-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDownload_Data.py
61 lines (53 loc) · 2.27 KB
/
Download_Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import csv
import pandas as pd
import sys
import os
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
import argparse
import pickle
def save_object(obj, filename):
with open(filename, 'wb') as fp:
pickle.dump(obj, fp)
def Descargar_Datos(consumer_token,consumer_secret,access_token,access_token_secret):
auth = tweepy.OAuthHandler(consumer_token,consumer_secret)
auth.set_access_token(access_token,access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)
import json
#archivos con los ids de los doferentes datasets
names = ['Waseem_Dataset','Data_new']
archivos_ids = ['.\Data\Waseem_IDS.csv', '.\Data\Data_new_IDS.csv ']
total = 0
for current_file in range(len(archivos_ids)):
c = 0
print('Downloading ' + str(current_file + 1) + ' of 2 datasets ...')
data = pd.read_csv(archivos_ids[current_file],'r',delimiter = ',',encoding = 'utf-8')
str_json = []
for j in data.values:
total += 1
if total % 100 ==0:
print(total)
try:
tweet = api.get_status(j[0])._json
str_json.append({"id":tweet['id'],"name": tweet['user']['name'], "text":tweet['text'], "label": j[1]})
c += 1
if c % 100 ==0:
print('recuperados')
print(c)
except:
pass
print(str(c) + ' tweets downloaded')
name_file = names[current_file] + '.pkl'
save_object(str_json, name_file)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Descargar datos usando el api de twitter')
parser.add_argument('-ct', '--consumer_token', required=True)
parser.add_argument('-cs', '--consumer_secret', required=True)
parser.add_argument('-at', '--access_token', required=True)
parser.add_argument('-ats', '--access_token_secret', required=True)
consumer_token = parser.parse_args().consumer_token
consumer_secret = parser.parse_args().consumer_secret
access_token = parser.parse_args().access_token
access_token_secret = parser.parse_args().access_token_secret
Descargar_Datos(consumer_token,consumer_secret,access_token,access_token_secret)