-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataedit.py
33 lines (24 loc) · 841 Bytes
/
dataedit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import sys
import json
from pprint import pprint
import unicodedata
def removeAccents(string):
nfkd_form = unicodedata.normalize("NFKD", str(string))
return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
data = json.loads(open(sys.argv[1], "rb").read()[5:])
f = open(sys.argv[2], "wb")
newdata = []
for key, value in data.items():
if key != '' and '' not in value:
newdata.append([removeAccents(key).lower().replace('\"', ''), key, value])
newdata.sort(key=lambda d: d[0])
index = ""
indexobj = {}
for i, item in enumerate(newdata):
if item[0][0] != index:
index = item[0][0]
indexobj[item[0][0]] = i
f.write("lang = ".encode("utf-8"))
f.write(json.dumps(newdata, ensure_ascii=False).encode("utf-8"))
f.write("\nindex = ".encode("utf-8"))
f.write(json.dumps(indexobj, ensure_ascii=False).encode("utf-8"))