-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchguadiana_scrap.py
executable file
·39 lines (34 loc) · 1.24 KB
/
chguadiana_scrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
import json
import requests
from lxml import etree
import datetime
from serialize import saveValues, loadStations
def getData(station):
url = station['url']
print(url)
r = requests.get(url)
t = etree.HTML(r.text.encode(r.encoding))
columns = []
for tr in t.xpath('//tr'):
tds = tr.xpath('td')
if len(tds)>0:
values = dict(zip(columns,map(lambda td:td.text,tds)))
d = datetime.datetime.strptime(values['Fecha'],'%Y-%m-%d %H:%M:%S')
if values.has_key(u'Caudal (m\xb3/s)'):
yield d,float(values[u'Caudal (m\xb3/s)'])
elif values.has_key(u'Nivel (m)'):
yield d,float(values[u'Nivel (m)'])
elif values.has_key(u'Volumen (hm\xb3)'):
yield d,float(values[u'Volumen (hm\xb3)'])
else:
ths = tr.xpath('th')
columns = map(lambda th:th.text,ths)
def main():
for station in loadStations('chguadiana'):
values = list(getData(station))
if len(values)>0:
values.sort(key=lambda x:x[0]) # values must be sorted for saveValues algorithm
saveValues('chguadiana','%(type)s_%(station_id)s'%station,values)
if __name__=='__main__':
main()