-
Notifications
You must be signed in to change notification settings - Fork 0
/
curve_baseline.py
115 lines (90 loc) · 4.01 KB
/
curve_baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from __future__ import print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# noinspection PyUnresolvedReferences
from sklearn.ensemble import GradientBoostingRegressor, ExtraTreesRegressor, AdaBoostRegressor, RandomForestRegressor, \
BaggingRegressor
# noinspection PyUnresolvedReferences
from sklearn.svm import SVR
# noinspection PyUnresolvedReferences
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from util.common import loss, print_pd_frame_from_multi_input_performances
from util.loader import load_data_as_numpy
from sklearn.preprocessing import StandardScaler
def predict_curve(model, config, start_points, prediction_length):
x = np.zeros((1, 9))
x[0, :5][:] = config
predicted_curve = np.zeros(prediction_length)
predicted_curve[:start_points.shape[0]][:] = start_points
curr_point = start_points.shape[0]
x[0, 5:9][:] = start_points[curr_point - 4:curr_point]
while curr_point < prediction_length:
point = model.predict(x).reshape(-1)
predicted_curve[curr_point] = point
curr_point += 1
x[0, 5:9][:] = np.roll(x[0, 5:9], -1)
x[0, 8] = point
return predicted_curve
normalize = True
configs, learning_curves = load_data_as_numpy()
estimators = [
'GradientBoostingRegressor(learning_rate=0.033, n_estimators=300)',
# 'linear_model.SGDRegressor(loss=\'squared_loss\')',
# 'linear_model.SGDRegressor(loss=\'epsilon_insensitive\', epsilon=0.005)',
'LinearRegression()',
# 'linear_model.Ridge(alpha=0.1)',
'RandomForestRegressor(n_estimators=30)',
# 'SVR(C=2.0, kernel=\'linear\', epsilon=0.005)',
'BaggingRegressor()'
]
k_fold = KFold(n_splits=3, shuffle=True, random_state=1)
performances = np.zeros((len(estimators), 3, 4))
for m_idx, model_desc in enumerate(estimators):
current_fold = 0
print(model_desc)
for train_indices, test_indices in k_fold.split(configs):
# split into training and test data
train_configs = configs[train_indices]
train_curves = learning_curves[train_indices]
test_configs = configs[test_indices]
test_curves = learning_curves[test_indices]
if normalize:
scaler = StandardScaler()
train_configs = scaler.fit_transform(train_configs)
test_configs = scaler.transform(test_configs)
train_x = np.zeros((train_configs.shape[0] * 36, 9))
train_y = np.zeros(train_configs.shape[0] * 36)
for i in range(train_configs.shape[0]):
for j in range(4, train_curves.shape[1]):
train_x[i * 36 + j - 4, :5] = train_configs[i]
train_x[i * 36 + j - 4, 5:9] = train_curves[i, j - 4:j]
train_y[i * 36 + j - 4] = train_curves[i, j]
clf = eval(model_desc)
clf.fit(train_x, train_y)
for k, input_points in enumerate([5, 10, 20, 30]):
pred_curves = np.array(
[predict_curve(clf, test_configs[t], test_curves[t, :input_points], test_curves.shape[1])
for t in range(test_configs.shape[0])]
)
fold_loss = loss(pred_curves[:, -1], test_curves[:, -1])
performances[m_idx, current_fold, k] = fold_loss
print('fold {0} loss: {1}'.format(current_fold, performances[m_idx, current_fold]))
current_fold += 1
print('mean CV performance: {0} \n'.format(performances[m_idx].mean(axis=0)))
frame = print_pd_frame_from_multi_input_performances(performances, [e.split('(')[0] for e in estimators])
frame = frame.sort_values('loss_mean')
frame.to_latex('out/task3_point4_baselines.tex')
# steps = test_curves.shape[1]
# fig = plt.figure(figsize=(10, 10))
# for i, idx in enumerate(np.random.choice(np.arange(0, test_configs.shape[0]), 20)):
# y_hat = predict_curve(clf, test_configs[idx], test_curves[idx, :4], steps)
#
# plt.subplot(4, 5, i + 1)
# plt.plot(range(40), test_curves[idx], "g")
# plt.plot(range(steps), y_hat, "r")
# plt.ylim(0, 1)
#
# plt.tight_layout()
# plt.show()