-
Notifications
You must be signed in to change notification settings - Fork 1
/
sentimentData.py
49 lines (42 loc) · 1.81 KB
/
sentimentData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import snscrape.modules.twitter as sntwitter
import re
import flair
from datetime import datetime, timedelta
class sentimentData:
def nextDay(self, prevDate):
prev = prevDate[:4]+'-'+prevDate[4:6]+'-'+prevDate[6:]+' 00:00:00'
return datetime.fromisoformat(prev)+timedelta(1)
def cleanText(self, text):
whitespace = re.compile(r"\s+")
web_address = re.compile(r"(?i)http(s):\/\/[a-z0-9.~_\-\/]+")
user = re.compile(r"(?i)@[a-z0-9_]+")
text = whitespace.sub(' ', text)
text = web_address.sub('', text)
text = user.sub('', text)
text = re.sub(r"(?:@\S*|#\S*|http(?=.*://)\S*)", "", text)
return text
def getSentiment(self, currDay):
MAX_TWEETS = 25
PREV = currDay[:4]+'-'+currDay[4:6]+'-'+currDay[6:]
PRES = self.nextDay(currDay)
QUERY = f"natural gas (natural OR gas OR import OR export OR price) until:{PRES} since:{PREV} -filter:links -filter:replies"
sentiment_model = flair.models.TextClassifier.load('en-sentiment')
curr = 0.0
nums = 0
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(QUERY).get_items()):
nums += 1
if i>MAX_TWEETS:
break
sentence = flair.data.Sentence(self.cleanText(tweet.content))
sentiment_model.predict(sentence)
try:
if(sentence.labels[0].value == 'POSITIVE'):
curr += sentence.labels[0].score
elif(sentence.labels[0].value == 'NEGATIVE'):
curr -= sentence.labels[0].score
except:
pass
return curr/nums
if __name__ == '__main__':
sen = sentimentData()
sen.getSentiment('20220426')