-
Notifications
You must be signed in to change notification settings - Fork 0
/
evo_import.py
executable file
·71 lines (61 loc) · 2.42 KB
/
evo_import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
# -*- coding: utf8 -*-
#~ tomerge = {"Ardèche (haute)":"Ardèche",
#~ "Ardèche (basse)":"Ardèche",
#~ "Ariège (Haute)":"Ariège",
#~ "Garonne - Les Roches":"Garonne",
#~ 'Rhône (entier)':'Rhône',
#~ #"Ouvèze de l'ardèche":"Ouvèze",
#~ "Ouvèze des Baronnies":"Ouvèze", # chez Evo: "Ouvèze des Baronnies" et "Ouvèze" sont la meme riviere
#~ }
tomerge = {}
import json
import pymongo
from pprint import pprint
collection = pymongo.MongoClient().wwsupdb.evo
def insert(id,river):
collection.update({'_id':id},river,upsert=True)
def main():
with open('evo.json','r') as f:
input = json.load(f)
rivers_grouped={}
for river in input:
new_name = tomerge.get(river['name'].encode('utf8'))
if new_name!=None:
river['name'] = new_name
if river['name'] in rivers_grouped:
rivers_grouped[river['name']].append(river)
else:
rivers_grouped[river['name']] = [river]
collection.drop()
for name,rivers in rivers_grouped.iteritems():
if len(rivers)>1:
try:
name_unicode = name.decode('utf8',errors='ignore')
except:
name_unicode = name.replace(u'\xe8','e').decode('utf8',errors='ignore')
name_utf8 = name_unicode.encode('utf8')
print '%d duplicates found for %s' % (len(rivers),name_utf8)
b = False
j = 0
while not b:
for i in range(0,len(rivers)):
# if j is too big, numerate the rivers
if j>=len(rivers[i]['situation']):
for i in range(0,len(rivers)):
rivers[i]['name'] = '%s (%s)' % (name, i)
b = True
if b:
break
s = [river['situation'][j] for river in rivers]
if len(set(s))==len(s):
# we found a j where all situations[j] are different: add it in the name
for i in range(0,len(rivers)):
rivers[i]['name'] = '%s (%s)' % (name, rivers[i]['situation'][j])
break
j += 1
print ','.join(map(lambda river:river['name'],rivers))
for river in rivers:
insert(river["name"],river)
if __name__=='__main__':
main()