-
Notifications
You must be signed in to change notification settings - Fork 67
/
update.py
64 lines (51 loc) · 1.35 KB
/
update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# I don't want to hose feedly's servers, so don't run this script too much.
try:
import urllib2 as request
except ImportError:
from urllib import request
import json
interests = [
'gaming',
'architecture',
'design',
'politics',
'art',
'painting',
'technology',
'writing',
'books',
'engineering',
'science',
'economics',
'history',
'diy',
'cooking',
'movies',
'television',
'philosophy',
'psychology',
'sports',
'programming'
]
all_interests = {}
for i, interest in enumerate(interests):
print('Retreiving {}, ({} / {})'.format(interest, i + 1, len(interests)))
url = 'http://feedly.com/v3/search/feeds?q='+interest+'&n=10000'
# python 2 return str, python 3 return bytes
response = request.urlopen(url).read()
if isinstance(response, bytes):
response = response.decode('utf-8')
results = json.loads(response)['results']
all_interests[interest] = []
for result in results:
if 'website' in result:
all_interests[interest].append(result['website'])
by_url = {}
for interest in all_interests:
urls = all_interests[interest]
for url in urls:
if url not in by_url:
by_url[url] = []
by_url[url].append(interest)
with open('sites.json', 'w') as output:
output.write(json.dumps(by_url))