-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_models.py
115 lines (94 loc) · 3.52 KB
/
run_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from __future__ import division, print_function
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, BaggingRegressor, GradientBoostingRegressor
from util.util import generate_submission
from util.const import *
from preprocessing.feature_engineering import engineer_data
from util.timer import Timer
from model import IntegratedRegressor
import argparse
import json
import os
def _decode_list(data):
rv = []
for item in data:
if isinstance(item, unicode):
item = item.encode('utf-8')
elif isinstance(item, list):
item = _decode_list(item)
elif isinstance(item, dict):
item = _decode_dict(item)
rv.append(item)
return rv
def _decode_dict(data):
rv = {}
for key, value in data.iteritems():
if isinstance(key, unicode):
key = key.encode('utf-8')
if isinstance(value, unicode):
value = value.encode('utf-8')
elif isinstance(value, list):
value = _decode_list(value)
elif isinstance(value, dict):
value = _decode_dict(value)
rv[key] = value
return rv
def _process_dict(data):
rv = {}
for key, value in data.iteritems():
if key == 'base_estimator' and value == 'BayesianRidge':
value = globals()[value](fit_intercept=True)
elif key == 'base_estimator' and value == 'RandomForestRegressor':
value = globals()[value](n_estimators=500, min_samples_split=7, oob_score=True, n_jobs=-1)
rv[key] = value
return rv
if __name__ == '__main__':
#################################
### Parse arguments from JSON ###
#################################
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("filename")
config = parser.parse_args()
config = json.load(open(config.filename), object_hook=_decode_dict)
timer = Timer()
print('Loading data... ', end='')
#####################
### Load the data ###
#####################
train = pd.read_csv(TRAIN_DATA[3:])
test = pd.read_csv(TEST_DATA[3:])
features = config['features'].split(', ')
if 'normalize' in config and config['normalize'] == "True":
normalize = True
else:
normalize = False
X, y = engineer_data(train, features, TARGETS, normalize=normalize)
X_test = engineer_data(test, features, normalize=normalize)
print('Elapsed: {}'.format(timer.elapsed()))
#################################
### Create a classifier model ###
#################################
arguments = _process_dict(config['classifier_args'])
predictor = locals()[config['classifier']](**arguments)
if 'predict_log' in config and config['predict_log'] == "False":
predict_log = False
else:
predict_log = True
predictor = IntegratedRegressor(predictor, predict_log=predict_log)
#################################
### Train a classifier model ###
#################################
print('{} Training {}... '.format(timer.elapsed(), config['classifier']))
predictor.fit(X, y)
pred = predictor.predict(X_test)
# round and convert to int
pred = np.intp(pred.round())
#############################
### Write the predictions ###
#############################
os.chdir('./model')
generate_submission(test, pred, config)
print('\nTotal time: {}\n\n'.format(timer.elapsed()))