-
Notifications
You must be signed in to change notification settings - Fork 9
/
replies.py
155 lines (119 loc) · 4.76 KB
/
replies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import sys
import argparse
import requests
import time
from collections import Counter
from dotenv import load_dotenv
load_dotenv(verbose=True) # Throws error if no .env file is found
consumer_key = os.getenv("TWITTER_CONSUMER_KEY")
consumer_secret = os.getenv("TWITTER_CONSUMER_SECRET")
access_token = os.getenv("TWITTER_ACCESS_TOKEN")
access_token_secret = os.getenv("TWITTER_ACCESS_TOKEN_SECRET")
bearer_token = os.getenv("TWITTER_BEARER_TOKEN")
# Argparse for CLI options. Run `python3 replies.py -h` to see the list of arguments.
parser = argparse.ArgumentParser()
parser.add_argument(
"-t",
"--tweet_id",
required=True,
help="ID of the Tweet for which you want to pull replies",
)
parser.add_argument(
"-s",
"--start_time",
help="The oldest UTC timestamp from which the replies will be provided. Format: YYYY-MM-DDTHH:mm:ssZ; for example: 2021-12-04T01:30:00Z. If unspecified, will default to returning replies from up to 30 days ago.",
)
parser.add_argument(
"-e",
"--end_time",
help="The newest, most recent UTC timestamp to which the replies will be provided. Format: YYYY-MM-DDTHH:mm:ssZ; for example: 2021-12-04T01:30:00Z. If unspecified, will default to [now - 30 seconds].",
)
args = parser.parse_args()
def bearer_oauth(r):
r.headers["Authorization"] = f"Bearer {bearer_token}"
r.headers["User-Agent"] = "v2FullArchiveSearchPython"
return r
def get_parameters():
params = {
"query": f"conversation_id:{args.tweet_id}",
"tweet.fields": "in_reply_to_user_id,author_id,conversation_id,entities",
"max_results": "500",
}
if args.start_time:
params.update(start_time=args.start_time)
if args.end_time:
params.update(end_time=args.end_time)
return (params, args.tweet_id)
def get_replies(parameters):
replies = []
search_url = "https://api.twitter.com/2/tweets/search/all"
request_count = 0
while True:
response = requests.request(
"GET", search_url, auth=bearer_oauth, params=parameters
)
if response.status_code != 200:
raise Exception(response.status_code, response.text)
response_payload = response.json()
meta = response_payload["meta"]
if meta["result_count"] == 0:
sys.exit("No replies to analyze")
for reply in response_payload["data"]:
replies.append(reply)
request_count += 1
if "next_token" not in meta:
break
next_token = meta["next_token"]
parameters.update(next_token=next_token)
time.sleep(1)
return replies, request_count
def get_author(tweet_id):
tweet_lookup_url = f"https://api.twitter.com/2/tweets/{tweet_id}"
parameters = {
"tweet.fields": "author_id",
"expansions": "author_id",
"user.fields": "username",
}
response = requests.request(
"GET", tweet_lookup_url, auth=bearer_oauth, params=parameters
)
if response.status_code != 200:
raise Exception(response.status_code, response.text)
response_payload = response.json()
author_id = response_payload["data"]["author_id"]
for user in response_payload["includes"]["users"]:
author_username = user["username"]
return (author_id, author_username)
def get_usernames(author_id, replies):
usernames = []
replies_ids = []
for reply in replies:
# Only include Tweets that are in direct reply to the original Tweet
if reply["in_reply_to_user_id"] == author_id:
for mention in reply["entities"]["mentions"]:
usernames.append(mention["username"])
replies_ids.append(reply["id"])
return usernames, replies_ids
def count_and_sort(usernames, author_username):
ordered_usernames = Counter(usernames)
# Remove mentions of original author from results
ordered_usernames.pop(f"{author_username}")
return ordered_usernames
def results(ordered_usernames, replies_ids, request_count):
with open("replies.txt", "w") as output:
output.write(str(replies_ids))
print("============================")
print(ordered_usernames)
print("============================")
print("* Number of direct replies to original Tweet:", len(replies_ids))
print("* Number of usernames mentioned:", len(ordered_usernames))
print("* Total request count:", request_count)
print("============================")
if __name__ == "__main__":
parameters, original_tweet_id = get_parameters()
replies, request_count = get_replies(parameters)
author_id, author_username = get_author(original_tweet_id)
usernames, replies_ids = get_usernames(author_id, replies)
ordered_usernames = count_and_sort(usernames, author_username)
results(ordered_usernames, replies_ids, request_count)