-
Notifications
You must be signed in to change notification settings - Fork 0
/
emoji_analise.py
95 lines (85 loc) · 2.58 KB
/
emoji_analise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import csv
import pprint
import emoji
import advertools as adv
from emosent import get_emoji_sentiment_rank
import re
import pickle
import emoji
from emot.emo_unicode import UNICODE_EMOJI
from emot.emo_unicode import EMOTICONS_EMO
import pandas as pd
emojis=dict()
result=dict()
emoticons=dict()
taxonomia=dict()
rawEmojis = {
":)" : "😀",
"=)" : "😀",
":(" : "😞",
":D" : "😄",
":/" : "😕",
":'(": "😢",
":P" : "😛",
"XD" : "😆",
":3" : "🐱",
"DX" : "😫",
"d:" : "😦",
"XP" : "😝",
"D8" : "😱",
":o" : "😯"
}
with open('Emoticon_Dict.p', 'rb') as fp:
Emoticon_Dict = pickle.load(fp)
def find_emoticons(text):
emoticon_pattern = re.findall(u'(' + u'|'.join(k for k in Emoticon_Dict) + u')',text)
aux=[]
for elem in emoticon_pattern:
elem = elem.replace('-','')
if elem != ')':
aux.append(str(elem))
return aux
def extract_emojis(s):
return ''.join(c for c in s if c in emoji.UNICODE_EMOJI['pt'])
def convert_text_to_emojis(text):
return emoji.emojize(text,language='pt')
def convert_emojis_to_text(text):
return emoji.demojize(text, language='pt')
def convert_emoticons_to_emoji(text):
words = text.split(" ")
outcome = " "
for word in words:
outcome += rawEmojis.get(word, word) + " "
return(outcome)
def convert_emoticons_to_text(text):
return emoji.demojize(convert_emoticons_to_emoji(text))
def find_taxonomia(text):
result=dict()
aux=extract_emojis(text)
for emoj in aux:
taxonomia[emoj]= adv.emoji_search(emoj)['group'][0]
return result
with open('Tweets_pt_pt.csv', 'r',encoding="utf-8") as file:
tweets = csv.DictReader(file, skipinitialspace=True)
aux=0
for tweet in tweets:
aux+=1
mojis = extract_emojis(convert_emoticons_to_emoji(tweet["tweet_text"]))
for elem in mojis:
emojis[elem] = [{'taxonomia': adv.emoji_search(elem)['group'][0]},
{'emoji em texto': convert_emojis_to_text(elem)}]
try:
a=get_emoji_sentiment_rank(elem)
emojis[elem].append({'sentimento': a['sentiment_score'],'positividade': a['positive'], 'neutral': a['neutral'], 'negatividade': a['negative']})
except:
emojis[elem].append({'sentimento', 'NA'})
df = pd.DataFrame(data=emojis)
df = df.fillna(' ').T
df.columns = df.columns.astype(str).str.replace('0', 'Taxonomia')
df.columns = df.columns.astype(str).str.replace('1', 'Emoji em texto')
df.columns = df.columns.astype(str).str.replace('2', 'Sentimento')
with open('output.html', 'w') as f:
f.write(df.to_html())
if(aux==3000):
pprint.pprint(emojis)
break