-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvigicrues_get_stations.py
executable file
·43 lines (36 loc) · 1.44 KB
/
vigicrues_get_stations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env python
import requests #for http requests
import re
import json
BASSINPREFIX = 'href="./niv2-bassin.php?CdEntVigiCru='
STATIONPREFIX = 'niv3-station.php?'
EXTRACTSTATIONIDRE = re.compile('CdStationHydro=([A-Z0-9]+)')
def extract_stationid(url):
return EXTRACTSTATIONIDRE.findall(url)[0]
def parse_page(t,prefix):
i=0
while i!=-1:
i = t.find(prefix,i)
if i==-1:
break
j = t.find('"', i+len(prefix))
yield t[i:j]
i = j
def main():
all_stations=[]
r = requests.get('http://www.vigicrues.gouv.fr')
t = r.text.encode(r.encoding)
bassins = map(lambda hrefurl: int(hrefurl[len(BASSINPREFIX):]),list(set(parse_page(t,BASSINPREFIX))))
for bassin in bassins:
print('Bassin %s' % bassin)
r = requests.get('http://www.vigicrues.gouv.fr/niv2-bassin.php?CdEntVigiCru=%s'%bassin)
t = r.text.encode(r.encoding)
all_stations += list(map(extract_stationid,parse_page(t,STATIONPREFIX)))
all_stations = list(set(all_stations))
all_stations_obj = []
for station in all_stations:
r = requests.get('http://www.vigicrues.gouv.fr/services/station.json/index.php?CdStationHydro=%s'%station)
all_stations_obj.append({'id':station,'river':r.json()['LbCoursEau'].encode(r.encoding),'name':r.json()['LbStationHydro'].encode(r.encoding)})
json.dump(all_stations_obj,open('stations_vigicrues.json','w'))
if __name__=='__main__':
main()