forked from uwdata/termite-data-server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo.py
executable file
·86 lines (71 loc) · 3.1 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import subprocess
DEFAULT_DATASET = 'infovis'
DATASETS = [ DEFAULT_DATASET, '20newsgroups', 'nsfgrants', 'nsf25k', 'nsf10k', 'nsf1k', 'poliblogs', 'gjp', 'fomc', 'CR_financial_collapse', 'CR_stock_market_plunge', 'FCIC_final_report', 'FCIC_first_hearing', 'FR_federal_open_market_committee', 'FR_monetary_policy_hearings' ]
DEFAULT_MODEL = 'mallet'
MODELS = [ DEFAULT_MODEL, 'treetm', 'stmt', 'stm', 'gensim' ]
def Shell(command):
p = subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
while p.poll() is None:
line = p.stdout.readline().rstrip('\n')
if len(line) > 0:
print line
def Demonstrate(dataset, model, is_quiet, force_overwrite):
database_folder = 'data/demo/{}/corpus'.format(dataset)
corpus_folder = 'data/demo/{}/corpus'.format(dataset)
model_folder = 'data/demo/{}/model-{}'.format(dataset, model)
app_name = '{}_{}'.format(dataset, model)
def PrepareDataset():
executable = 'bin/fetch_dataset.sh'
Shell([executable, dataset])
def PrepareModel():
executable = 'bin/setup_{}.sh'.format(model)
command = [executable]
Shell(command)
def PrepareOthers():
executable = 'bin/setup_mallet.sh'
command = [executable]
Shell(command)
executable = 'bin/setup_corenlp.sh'
command = [executable]
Shell(command)
def TrainModel():
executable = 'bin/train_{}.py'.format(model)
command = [executable, corpus_folder, model_folder]
if is_quiet:
command.append('--quiet')
if force_overwrite:
command.append('--overwrite')
Shell(command)
def ImportModel():
executable = 'bin/read_{}.py'.format(model)
command = [executable, app_name, model_folder, corpus_folder, database_folder]
if is_quiet:
command.append('--quiet')
if force_overwrite:
command.append('--overwrite')
Shell(command)
print '--------------------------------------------------------------------------------'
print 'Build a topic model ({}) using a demo dataset ({})'.format(model, dataset)
print ' database = {}'.format(database_folder)
print ' corpus = {}'.format(corpus_folder)
print ' model = {}'.format(model_folder)
print ' app = {}'.format(app_name)
print '--------------------------------------------------------------------------------'
PrepareDataset()
PrepareModel()
PrepareOthers()
TrainModel()
ImportModel()
def main():
parser = argparse.ArgumentParser( description = 'Import a MALLET topic model as a web2py application.' )
parser.add_argument( 'dataset' , nargs = '?', type = str, default = DEFAULT_DATASET, choices = DATASETS, help = 'Dataset identifier' )
parser.add_argument( 'model' , nargs = '?', type = str, default = DEFAULT_MODEL , choices = MODELS , help = 'Model type' )
parser.add_argument( '--quiet' , const = True, default = False, action = 'store_const', help = 'Show fewer debugging messages' )
parser.add_argument( '--overwrite' , const = True, default = False, action = 'store_const', help = 'Overwrite any existing model' )
args = parser.parse_args()
Demonstrate( args.dataset, args.model, args.quiet, args.overwrite )
if __name__ == '__main__':
main()