-
Notifications
You must be signed in to change notification settings - Fork 21
/
dblp.py
74 lines (68 loc) · 2.06 KB
/
dblp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# How to use (from data folder):
# python ../dblp.py
# python ../dblp.py -test (only test the cached files)
import csv
import sys
import requests
import time
import xmltodict
import os
def get_dblp_file(pid,prof):
prof = prof.replace(" ", "-")
file = '../cache/dblp/' + prof + '.xml'
if os.path.exists(file):
with open(file) as f:
bibfile = f.read()
else:
try:
url = "http://dblp.org/pid/" + pid + ".xml"
bibfile = requests.get(url).text
with open(file, 'w') as f:
f.write(str(bibfile))
except requests.exceptions.RequestException as e:
print (e)
sys.exit(1)
return bibfile
def parse_dblp(_, dblp):
if ('journal' in dblp) or ('booktitle' in dblp):
return True
return True
download = True
if len(sys.argv) == 2:
if sys.argv[1] == "-test":
download = False
if download:
start_time = time.time()
reader = csv.reader(open("all-researchers.csv", 'r'))
count = 1;
for researcher in reader:
prof = researcher[0]
department = researcher[1]
pid = researcher[2]
print (str(count) + " >> " + prof + "," + department)
prof = prof.replace(" ", "-")
file = '../cache/dblp/' + prof + '.xml'
try:
url = "http://dblp.org/pid/" + pid + ".xml"
bibfile = requests.get(url).text
with open(file, 'w') as f:
f.write(bibfile)
except requests.exceptions.RequestException as e:
print (e)
sys.exit(1)
time.sleep(2)
count = count + 1
elapsed_time = (time.time() - start_time) / 60
elapsed_time = round(elapsed_time, 2)
print ("Elapsed time (min): " + str(elapsed_time))
print ("Testing files ....")
reader = csv.reader(open("all-researchers.csv", 'r'))
count = 1;
for researcher in reader:
prof = researcher[0]
department = researcher[1]
pid = researcher[2]
print (str(count) + " >> " + prof + "," + department)
bibfile = get_dblp_file(pid,prof)
xmltodict.parse(bibfile, item_depth=3, item_callback=parse_dblp)
count = count + 1