-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchcantabrico_get_stations.py
executable file
·92 lines (83 loc) · 3.15 KB
/
chcantabrico_get_stations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python
import requests
from lxml import etree
import json
import re
try:
from urllib.parse import urlparse
from urllib.parse import parse_qs
except ImportError:
from urlparse import urlparse
from urlparse import parse_qs
# in this C.H., we can get history of water levels in meters, and measure of flow in m3/s, but only a single point. So
# we save the correpondancy between level and flow in 'flow4level'
def getFlow4Level(url):
r = requests.get(url)
t = etree.HTML(r.text.encode(r.encoding))
tds = t.xpath("//td")
next_is_level = False
next_is_flow = False
flow = ''
level = ''
flow4level = {}
for td in tds:
if td.text==None:
continue
if td.text.strip() == 'Nivel del agua':
next_is_level = True
elif next_is_level:
spans = td.xpath('span')
if len(spans)>0:
level = spans[0].text.strip()
else:
level = td.text.strip()
next_is_level = False
elif td.text.strip() == 'Caudal circulante':
next_is_flow = True
elif next_is_flow:
spans = td.xpath('span')
if len(spans)>0:
flow = spans[0].text.strip()
else:
flow = td.text.strip()
next_is_flow = False
if flow!='' and level!='':
if level == 'm' or flow == 'm': # some are empty
continue
flow4level[float(level.strip(' m'))] = float(flow.strip(' m'))
return flow4level
findgeo = re.compile(r'var\s+myCenter\s*=\s*new\s+google.maps.LatLng\((-?\d+.\d+),\s*(-?\d+.\d+)\);')
def getGeo(station_id):
url = 'https://www.chcantabrico.es/sistema-automatico-de-informacion-detalle-estacion?cod_estacion=%s' % station_id
r = requests.get(url)
html = r.text.encode(r.encoding)
found = findgeo.findall(html)
return map(float,found[0])
def getStations():
r = requests.get("https://www.chcantabrico.es/web/guest/caudal-circulante")
t = etree.HTML(r.text.encode(r.encoding))
for tr in t.xpath("//table[@class='caudales tablefixedheader']")[0].xpath('tbody/tr'):
tds = tr.xpath("td")
codigo = tr.xpath("td[@class='codigo']")[0]
station_id1 = codigo.text
rio = tds[tds.index(codigo)+1]
links = rio.xpath("a")
if len(links)==1:
river = links[0].text.strip() #river is in a link
else:
river = rio.text.strip() #river is just test
a = tds[tds.index(codigo)+2].xpath("a")[0]
station_name = a.text.strip()
url = a.get('href')
station_id2 = parse_qs(urlparse(url).query)['cod_estacion'][0]
flow4level = getFlow4Level(url)
lat, lon = getGeo(station_id2)
station = {'id':station_id2,'river':river,'name':station_name,'url':url,'flow4level':flow4level,'lat':lat,\
'lon':lon,'url_scrap':'https://www.chcantabrico.es/evolucion-de-niveles/-/descarga/csv/nivel/%s'%station_id2,\
'unit': 'm'}
print(station)
yield station
def main():
json.dump(list(getStations()),open("stations_chcantabrico.json","w"))
if __name__=='__main__':
main()