-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathconvert_paper.py
49 lines (38 loc) · 1.18 KB
/
convert_paper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import argparse
import re
import requests
def print_error ():
print ("""
FAILURE... PLEASE MAKE SURE THE FOLLOWING ARE TRUE
* paper is correctly formatted with version number (e.g. 2307.11045v1)
* version is most current version """
)
# arXiv ID format used from 2007-04 to present
RE_ARXIV_NEW_ID = re.compile(
r'^(?P<yymm>(?P<yy>\d\d)(?P<mm>\d\d))\.(?P<num>\d{4,5})'
r'(v(?P<version>[1-9]\d*))?([#\/].*)?$'
)
# arg parser
parser = argparse.ArgumentParser(
description='Convert arxiv paper to html',
)
parser.add_argument('paper')
args = parser.parse_args()
if args.paper:
if re.match(RE_ARXIV_NEW_ID, args.paper):
archive = 'arxiv'
id = args.paper
orig = 'ftp'
yymm = args.paper.split('.')[0]
blob = f'{orig}/{archive}/papers/{yymm}/{args.paper}.tar.gz'
response = requests.post('https://services.arxiv.org/latexml/single-convert', json={
'id': args.paper,
'blob': blob,
'bucket': 'arxiv-production-data'
})
if response.status_code != 200:
print_error()
else:
print_error()
else:
print_error()