-
Notifications
You must be signed in to change notification settings - Fork 0
/
cv_toytrees_EMD.py
75 lines (56 loc) · 2.19 KB
/
cv_toytrees_EMD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
import os,sys
module_path = os.path.abspath(os.path.join('/Users/vayer/Documents/OT/Python/GW_tests/'))
if module_path not in sys.path:
sys.path.append(module_path)
from graph import *
import copy
import NN,time
from sklearn.model_selection import GridSearchCV
nTree=100
depth=2
c=20
d=30
dataset=build_binary_uniform_dataset(nTree1=nTree,nTree2=nTree,maxdepth=depth,c=c,d=d)
X,y=zip(*dataset)
rationtraintest=0.8
A,B=split_train_test(dataset,rationtraintest)
x_train,y_train=zip(*A)
x_test,y_test=zip(*B)
dir_path='./'
result_file='result_toytrees_EMD.csv'
text_file = open(os.path.join(dir_path, result_file), 'w')
n_splits=10
start_time = time.time()
print('CV Nb_splits : ', n_splits, file=text_file)
print('Data size : ',len(X),file=text_file)
print('Train/test : ',rationtraintest)
tuned_parameters = [{'features_metric':['sqeuclidean']}]
print('Tuned tuned_parameters : ',tuned_parameters,file=text_file)
emd_1NN=NN.Tree_EMD_1NN_Classifier()
clf = GridSearchCV(emd_1NN, tuned_parameters, cv=n_splits,verbose=1,scoring='accuracy')
clf.fit(np.array(x_train).reshape(-1,1),np.array(y_train))
print('--------------------------', file=text_file)
print('--------------------------', file=text_file)
print('', file=text_file)
print("Best parameters set found on development set:", file=text_file)
print('', file=text_file)
print(clf.best_params_, file=text_file)
print('', file=text_file)
print('--------------------------', file=text_file)
print('--------------------------', file=text_file)
print("Grid scores on development set:", file=text_file)
print('', file=text_file)
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r"
% (mean, std * 2, params), file=text_file)
print('', file=text_file)
end_time = time.time()
print('--------------------------', file=text_file)
print('--------------------------', file=text_file)
preds=clf.predict(np.array(x_test))
nested_scores=np.sum(preds==np.array(y_test))/len(y_test)
print('Score on test set with best_params_ : ',nested_scores,file=text_file)
print('All Time :', end_time-start_time, file=text_file)