-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwitternewsbot.py
88 lines (74 loc) · 2.89 KB
/
twitternewsbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
!pip install nltk
!pip install newspaper3k
#nltk
import sys
import re
import nltk
import random
from newspaper import Article
#twitter bot
import tweepy
import csv
import pandas as pd
l=input('Search Keyword ')
a='@'+l
b='#'+l
print(a,b)
print("Some news sources retrived based on the article you searched for are shown. \n")
#input your credentials here
consumer_key = "insert_your_key"
consumer_secret = "insert_your_secret"
access_token = "insert_your_token"
access_token_secret = "insert_your_token_secret"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)
# Open/Create a file to append data
csvFile = open('s.csv','a')
#Use csv Writer
csvWriter = csv.writer(csvFile)
def user(a):
for tweet in tweepy.Cursor(api.search,q=a+'-filter:retweets',tweet_mode="extended",
lang="en").items(1):
print (tweet.created_at, tweet.full_text)
try:
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', tweet.full_text)
if(tweet.entities['media'][0]["type"]=='photo'):
media_url=tweet.entities['media'][0]["media_url"]
print("Source:",media_url,"\n")
print("Various other news sources which can be shown are shown below. \n")
for i in range(len(urls)):
summarizer(urls[i])
except:
pass
csvWriter.writerow([tweet.created_at, tweet.full_text.replace('\n',' ').encode('utf-8'), tweet.user.screen_name.encode('utf-8'), [e['text'] for e in tweet._json['entities']['hashtags']], tweet.user.followers_count])
def hashtags(b):
for tweet in tweepy.Cursor(api.search,q=b+'-filter:retweets',tweet_mode="extended",
lang="en").items(1):
print (tweet.created_at, tweet.full_text)
try:
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', tweet.full_text)
if(tweet.entities['media'][0]["type"]=='photo'):
media_url=tweet.entities['media'][0]["media_url"]
print("Source:",media_url,"\n")
print("Various other news sources which can be shown are shown below. \n")
for i in range(len(urls)):
summarizer(urls[i])
except:
pass
csvWriter.writerow([tweet.created_at, tweet.full_text.replace('\n',' ').encode('utf-8'), tweet.user.screen_name.encode('utf-8'), [e['text'] for e in tweet._json['entities']['hashtags']], tweet.user.followers_count])
#article summarizer function
def summarizer(nurl):
#Get the article
article = Article(nurl)
# Do some NLP
article.download()
article.parse()
nltk.download('punkt')
article.nlp()
print(article.title)
print(article.top_image)
print(article.text)
print(article.summary)
user(a)
hashtags(b)