forked from wagdav/atomic
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathadas.py
191 lines (156 loc) · 5.75 KB
/
adas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import os
import errno
import shutil
import urllib.request, urllib.parse, urllib.error
open_adas = 'http://open.adas.ac.uk/'
class OpenAdas(object):
def search_adf11(self, element, year='', ms='metastable_unresolved'):
p = [('element', element), ('year', year), (ms, 1),
('searching', 1)]
s = AdasSearch('adf11')
return s.search(p)
def search_adf15(self, element, charge=''):
p = [('element', element), ('charge', charge), ('resolveby', 'file'),
('searching', 1)]
s = AdasSearch('adf15')
return s.search(p)
def fetch(self, url_filename, dst_directory=None):
if dst_directory == None:
dst_directory = os.curdir
self.dst_directory = dst_directory
url = self._construct_url(url_filename)
nested = False # this switch makes files save flat
if nested:
path = self._construct_path(url_filename)
else:
__, path = url_filename
dst_filename = os.path.join(self.dst_directory, path)
if not os.path.exists(dst_filename):
tmpfile, __ = urllib.request.urlretrieve(url)
self._mkdir_p(os.path.dirname(dst_filename))
shutil.move(tmpfile, dst_filename)
def _construct_url(self, url_filename):
"""
>>> db = OpenAdas()
>>> db._construct_url(('detail/adf11/prb96/prb96_c.dat', 'foo.dat'))
'http://open.adas.ac.uk/download/adf11/prb96/prb96_c.dat'
"""
url, __ = url_filename
query = url.replace('detail','download')
return open_adas + query
def _construct_path(self, url_filename):
"""
This function constructs a path to store the file in.
>>> db = OpenAdas()
>>> db._construct_path(('detail/adf11/prb96/prb96_c.dat', 'foo.dat'))
'adf11/prb96/prb96_c.dat'
"""
url, filename = url_filename
path = url.replace('detail/','')
path = path.replace('][','#')
return path
def _mkdir_p(self,path):
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
class AdasSearch(object):
def __init__(self, class_):
if class_ not in ['adf11', 'adf15']:
raise NotImplementedError('ADAS class %s is not supported.' %s)
self.url = open_adas + '%s.php?' % class_
self.class_ = class_
self.data = 0
self.parameters = []
def search(self, parameters):
self.parameters = parameters
self._retrieve_search_page()
return self._parse_data()
def _retrieve_search_page(self):
search_url = self.url + urllib.parse.urlencode(self.parameters)
res, __ = urllib.request.urlretrieve(search_url)
self.data = open(res).read()
os.remove(res)
def _parse_data(self):
parser = SearchPageParser()
parser.feed(self.data)
lines = parser.lines
if lines == []: return {}
header = lines.pop(0)
db = []
for l in lines:
if self.class_ == 'adf11':
element, class_, comment, year, resolved, url, cl, typ, name = l
name = name.strip()
db.append((url, name))
elif self.class_ == 'adf15':
element, ion, w_lo, w_hi, url, cl, typ, name = l
name = name.strip()
db.append((url, name))
else:
raise NotImplementedError('this should never happen')
return db
def _strip_url(self, url):
__, id_ = url.split('=')
return int(id_)
from html.parser import HTMLParser
class SearchPageParser(HTMLParser):
"""
Filling in a search form on http://open.adas.ac.uk generates a HTML document
with a table that has the following structure:
>>> html = '''
... <table summary='Search Results'>
... <tr>
... <td>Ne</td> <td><a href='filedetail.php?id=32147'>rc89_ne.dat</a></td>
... <tr>
... </tr>
... <td>C</td> <td><a href='filedetail.php?id=32154'>rc89_c.dat</a></td>
... </tr>
... </table>'''
The SearchPageParser can parse this document looking for a table with a
class `searchresults`.
>>> parser = SearchPageParser()
>>> parser.feed(html)
>>> for l in parser.lines: print l
['Ne', 'filedetail.php?id=32147', 'rc89_ne.dat']
['C', 'filedetail.php?id=32154', 'rc89_c.dat']
"""
def reset(self):
self.search_results = False
self.line = []
self.lines = []
HTMLParser.reset(self)
#def handle_starttag(self, tag, attrs):
# attrs = dict(attrs)
# if tag == 'table' and attrs.get('class') == 'searchresults':
# self.search_results = True
# if not self.search_results: return
#
# if tag == 'a' and self.line != None:
# self.line.append(attrs['href'])
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if (tag == 'table'
and 'summary' in attrs
and 'Results' in attrs['summary']):
self.search_results = True
if not self.search_results: return
if tag == 'a' and self.line != None:
self.line.append(attrs['href'])
def handle_endtag(self, tag):
if tag == 'table':
self.search_results = False
if not self.search_results: return
if tag == 'tr':
self.lines.append(self.line)
self.line = []
def handle_data(self, data):
if not self.search_results: return
if data.strip() != '':
self.line.append(data)
if __name__ == '__main__':
import doctest
doctest.testmod()