-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwordclouds
54 lines (41 loc) · 1.46 KB
/
wordclouds
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# importing the necessery modules
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import csv
import codecs
def read_csv(line):
'''
Returns: a list of all the urls of articles from huntington news,
read from a preexisting csv file
'''
text = ''
with open('scraped_articles_DATE.csv', encoding="ISO-8859-1") as file:
csv_reader = csv.reader(file)
# skip the headers
next(csv_reader, None)
counter = 2
for row in csv_reader:
url = row[0]
headline = row[1]
byline = row[2]
date = row[3]
content = row[4]
if counter == line:
return content
counter += 1
return ''
lineYo = int(input('What line?'))
article = read_csv(lineYo)
wordcloud = WordCloud(width=480, height=480,
stopwords=["the", "and","but", "if", "to", "of", "at", "with",
"we", "it", "on", "in", "from", "for", "her", "him",
"up", "just", "had", "they", "that", "said", "got", "did",
"really", "their", "or", "who", "is", "its", "be", "am", "are"
"was", "about", "so", "also", "this", "you", "would", "she",
"could", "can"
]).generate(article)
plt.figure()
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.margins(x=0, y=0)
plt.show()