-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprep.py
executable file
·76 lines (60 loc) · 2.07 KB
/
prep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python
from collections import defaultdict
import csv
import json
import os
from nltk.corpus import wordnet as wn
from textblob import TextBlob
# import epitran
# epi = epitran.Epitran('eng-Latn')
DATA_DIR = 'data'
exclusions = [wn.synset('axis.n.01'), wn.synset('point_source.n.01'), wn.synset('groundcover.n.01')]
def munge(s):
blob = TextBlob(s)
# TODO proper pluralisation using head nouns
idx = -1
for i, tag in enumerate(blob.tags):
if tag[1] == 'IN':
idx = i - 1
blob.words[idx] = blob.words[idx].singularize().pluralize()
return ' '.join(blob.words).title()
def prep(save=False):
# TODO better source of place nouns
location = [
wn.synset('location.n.01'),
wn.synset('building.n.01'),
wn.synset('geological_formation.n.01'),
wn.synset('land.n.02'),
wn.synset('vegetation.n.01')
]
hypo = lambda s: [x for x in s.hyponyms() if x not in exclusions]
locs = list({
z.replace('_', ' ')
for x in location
for y in x.closure(hypo)
for z in y.lemma_names()
})
# with open(os.path.join(DATA_DIR, 'monsters.json'), 'r') as f:
# data = json.load(f)
# names = [x['name'] for x in data]
with open(os.path.join(DATA_DIR, 'kfc-monsters.csv'), 'r') as f:
reader = csv.reader(f)
names = [row[2] for row in reader][1:]
locs = [munge(x) for x in locs]
names = [munge(x) for x in names]
# locs_ipa = [epi.transliterate(w) for w in locs]
# names_ipa = [epi.transliterate(w) for w in names]
dungeons = defaultdict(list)
for i in range(len(locs)):
dungeons[locs[i][0]].append(locs[i])
dragons = defaultdict(list)
for i in range(len(names)):
dragons[names[i][0]].append(names[i])
if save:
with open(os.path.join(DATA_DIR, 'dungeons.json'), 'w+') as f:
json.dump(dungeons, f)
with open(os.path.join(DATA_DIR, 'dragons.json'), 'w+') as f:
json.dump(dragons, f)
return dungeons, dragons
if __name__ == '__main__':
prep(save=True)