-
Notifications
You must be signed in to change notification settings - Fork 4
/
subreddit_dl.py
44 lines (29 loc) · 1.01 KB
/
subreddit_dl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
'''
Downloads the reddit TIL submissions, limited to top 1000.
If you want more you'll have to come across it some other way.
'''
import praw, os, json, codecs
from pprint import pprint
subreddit_name = "todayilearned"
# Login
user_agent = "Subdownloaded 0.1 by /u/hookedon"
agent = praw.Reddit(user_agent=user_agent)
sub = agent.get_subreddit(subreddit_name)
# Create save directories
os.system("mkdir -p data")
os.system("mkdir -p data/reddit")
submissions = sub.search("site:wikipedia",
limit=None,
sort="top",
period="all")
for k,result in enumerate(submissions):
js = vars(result)
js["author"] = str(js["author"])
js["subreddit"] = str(js["subreddit"])
js.pop("reddit_session")
name = js["name"]
jstr = json.dumps(js,indent=2)
f_out = os.path.join("data","reddit",name+'.json')
with codecs.open(f_out,"w","utf-8") as FOUT:
FOUT.write(jstr)
print k, js["score"], f_out, js["title"][:40]