-
Notifications
You must be signed in to change notification settings - Fork 4
/
pan-cluster
executable file
·86 lines (65 loc) · 2.36 KB
/
pan-cluster
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/python
import argparse
import os
import subprocess
from meta import read_info_json, CLUSTERINESS
HERE = os.path.dirname(__file__)
def read_config_file(fn):
f = open(fn)
rows = []
for line in f:
line = line.strip()
if line and not line[0] == '#':
rows.append(line.split())
f.close()
return rows
def get_commands(lang, input_dir, output_dir):
starts = read_config_file('%s/config/%s-common' % (HERE, lang))
for cmd in starts:
cmd += ['-i', input_dir]
ends = read_config_file('%s/config/%s' % (HERE, lang))
commands = {}
i = 0
for s in starts:
for e in ends:
i += 1
pickle = '%s/%s-%s.pickle' % (output_dir, lang, i)
commands[pickle] = s + e + ['--save', pickle]
return commands
def write_answers_for_lang(lang, input_dir, output_dir, skip_training):
commands = get_commands(lang, input_dir, output_dir)
pickles = []
for pickle, cmd in commands.items():
print "training: %s" % ' '.join(cmd)
try:
if not skip_training:
subprocess.check_call(cmd)
if os.path.exists(pickle):
pickles.append(pickle)
except subprocess.CalledProcessError, e:
print "failed to create %s with %s" % (pickle, e)
cl_articles, cl_reviews = CLUSTERINESS[lang]
cmd = ['./emit-opinions', '-o', output_dir,
'--clusteriness-articles', str(cl_articles),
'--clusteriness-reviews', str(cl_reviews),
'--info-dir', input_dir,
'--strategy', 'exp',
'-n', 'scale_l2']
for p in pickles:
cmd.extend(['-i', p])
print "running %s" % ' '.join(cmd)
subprocess.check_call(cmd)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input-dir', help="find testing corpus here")
parser.add_argument('-o', '--output-dir', help="write results here")
parser.add_argument('--skip-training', action='store_true',
help="don't actually train; assume pickles exist")
args = parser.parse_args()
info = read_info_json(args.input_dir)
langs = set(x[1] for x in info)
for lang in langs:
print "looking at %s" % lang
write_answers_for_lang(lang, args.input_dir, args.output_dir,
args.skip_training)
main()