-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgridSearch_d0.py
98 lines (79 loc) · 3.95 KB
/
gridSearch_d0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from bib import *
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
import pickle
from sklearn.model_selection import PredefinedSplit
#Later I'll do a 10-fold cross validation so I don't need a explicit validation set
train_QP_22 = ['ChristmasTree_QP_22_depth0.csv','CrowdRun_QP_22_depth0.csv','DucksTakeOff_QP_22_depth0.csv',
'PedestrianArea_QP_22_depth0.csv','RushHour_QP_22_depth0.csv','Sunflower_QP_22_depth0.csv','Tractor_QP_22_depth0.csv','Wisley_QP_22_depth0.csv']
train_QP_27 = ['ChristmasTree_QP_27_depth0.csv','CrowdRun_QP_27_depth0.csv','DucksTakeOff_QP_27_depth0.csv',
'PedestrianArea_QP_27_depth0.csv','RushHour_QP_27_depth0.csv','Sunflower_QP_27_depth0.csv','Tractor_QP_27_depth0.csv','Wisley_QP_27_depth0.csv']
train_QP_32 = ['ChristmasTree_QP_32_depth0.csv','CrowdRun_QP_32_depth0.csv','DucksTakeOff_QP_32_depth0.csv',
'PedestrianArea_QP_32_depth0.csv','RushHour_QP_32_depth0.csv','Sunflower_QP_32_depth0.csv','Tractor_QP_32_depth0.csv','Wisley_QP_32_depth0.csv']
train_QP_37 = ['ChristmasTree_QP_37_depth0.csv','CrowdRun_QP_37_depth0.csv','DucksTakeOff_QP_37_depth0.csv',
'PedestrianArea_QP_37_depth0.csv','RushHour_QP_37_depth0.csv','Sunflower_QP_37_depth0.csv','Tractor_QP_37_depth0.csv','Wisley_QP_37_depth0.csv']
train_d0 = [train_QP_22,train_QP_27,train_QP_32,train_QP_37]
accs = []
clfs = []
for item in train_d0:
accs_mat = np.zeros((8,40))
for i in range(8):
valid = [item[i]]
train = [item[j] for j in range(8) if j!=i]
#import pdb; pdb.set_trace()
data = Data()
#I'm lazy...
data.load_data(train,valid)
#matrix with all the features in the training set
X_train = data.features_train[:,:10]
#matrix with all the features in the valid set
X_valid = data.features_valid[:,:10]
#vector with all the classes in the training set
y_train = data.classes_train
y_train.shape = (y_train.shape[0],1)
#vector with all the classes in the valid set
y_valid = data.classes_valid
y_valid.shape = (y_valid.shape[0],1)
test_fold = np.zeros(X_train.shape[0]+X_valid.shape[0])
test_fold[X_train.shape[0]:] = test_fold[X_train.shape[0]:] -1
ps = PredefinedSplit(test_fold=test_fold)
X = np.vstack((X_train,X_valid))
y = np.vstack((y_train,y_valid))
#matrix with all the costs
#C = data.costs_train
#import pdb; pdb.set_trace()
pipe = Pipeline([('classifier' , DecisionTreeClassifier())])
#here I choose what paramaters I want to to the grid search on
param_grid = [
{'classifier' : [DecisionTreeClassifier()],
'classifier__random_state' : [42],
'classifier__criterion' : ['gini'],
'classifier__splitter' : ['best'],
'classifier__min_samples_leaf' : np.linspace(2,10000,num=100,dtype=np.int64)
}]
# Create grid search object
grid = GridSearchCV(pipe, param_grid = param_grid, cv = ps, verbose=True, n_jobs=-1)
grid.fit(X,y)
df = pd.DataFrame(grid.cv_results_)
accs_mat[i,:] = df.split0_test_score.values
clfs_qp = df.param_classifier
#import pdb; pdb.set_trace()
#gets info about the best classifier
#output = df.iloc[np.argmax(np.array(df.mean_test_score.values)),:]
#import pdb; pdb.set_trace()
#accs_aux.append(output.mean_test_score)
#clfs_aux.append(output.param_classifier)
accs.append(accs_mat)
clfs.append(clfs_qp)
#saves the best parameters for each classifier
res = [accs,clfs]
with open("GridSearch_results_d0.pkl", "wb") as fp: pickle.dump(res, fp)
best_clfs = []
for acc_qp, clf_qp in zip(accs,clfs):
accs_mean = np.mean(acc_qp,axis=0)
accs_std = np.std(acc_qp,axis=0)
scores = accs_mean/accs_std
idx = np.argmax(scores)
best_clfs.append(clf_qp[idx])
with open("best_clfs_d0.pkl", "wb") as fp: pickle.dump(best_clfs,fp)