-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathfrequency.py
67 lines (46 loc) · 1.79 KB
/
frequency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import sys
import json
twitterData = sys.argv[1] #output.txt
def tweet_dict(twitterData):
''' (file) -> list of dictionaries
This method should take your output.txt
file and create a list of dictionaries.
'''
twitter_list_dict = []
twitterfile = open(twitterData)
for line in twitterfile:
twitter_list_dict.append(json.loads(line))
return twitter_list_dict
def main():
tweets = tweet_dict(twitterData)
term_list = []
total_list = {}#dict of term and corresponding # of occurance that term/ # of total terms
punc_str = '?:!.,;"!@\''
punc_chars_list = []
for char in punc_str:
punc_chars_list.append(char)
#print punc_chars_list
'''Create a method below that loops through each tweet in your
twees_list. For each individual tweet it should add up you sentiment
score, based on the sent_dict.
'''
for index in range(len(tweets)):
if tweets[index].has_key("text"):
tweet_word = tweets[index]["text"].split()
for word in tweet_word:
word = word.rstrip(punc_str)
#if not (word.encode("ASCII", "ignore")==''):
#get rid of all weird charcters and words with punctuation marks
#if not any(x in word.encode("ASCII", "ignore") for x in punc_chars_list):
term_list.append(word)# create list of total terms
for word in term_list:
if word in total_list:
total_list[word] = total_list[word]+1
else:
total_list[word] = 1
num_total = len(total_list)
for word in total_list:
total_list[word] = "%.3f" %(float(total_list[word])/num_total)
print word.encode("utf-8") + " " + total_list[word]
if __name__ == '__main__':
main()