-
Notifications
You must be signed in to change notification settings - Fork 12
/
train_second_layer.py
124 lines (97 loc) · 3.56 KB
/
train_second_layer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
'''
Author : Siddharth Nayak
email:[email protected]
'''
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import scipy as sp
import os, signals1
from sklearn.externals import joblib
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
'''
This module is for the second layer of SVM to classify between gestures
having similiarity in the dynamic parts.
'''
#Check if the module is executed as main, needed for parallel processing
if __name__ == '__main__':
#List of parameters
SHOW_CONFUSION_MATRIX = False
x_data = []
y_data = []
classes = {}
root="data1234" #Default directory containing the dataset
print( "Loading the dataset from '{directory}'...".format(directory=root),)
#Fetch all the data files from the root directory of the dataset
for path, subdirs, files in os.walk(root):
for name in files:
#Get the filename
filename = os.path.join(path, name)
if filename[-6:]!='_Store':
print(filename)
#Load the sample from file
sample = signals1.Sample.load_from_file(filename)
#Linearize the sample and then add it to the x_data list
x_data.append(sample.get_linearized())
#Extract the category from the file name
#For example, the file "a_sample_0.txt" will be considered as "a"
category = name.split("_")[0]
#Get a number for the category, as an offset from the category
#to the a char in Ascii
if category=='d':
number=0
elif category=='e':
number=1
elif category=='i':
number=2
elif category=='j':
number=3
elif category=='g':
number=4
elif category=='k':
number=5
#Add the category to the y_data list
y_data.append(number)
#Include the category and the corresponding number into a dictionary
#for easy access and referencing
classes[number] = category
print('done')
print ("DONE")
#Parameters used in the cross-validated training process
#The library automatically tries every possible combination to
#find the best scoring one.
params = {'C':[0.001,0.01,0.1,1], 'kernel':['rbf']}
#params= {'max_depth': [1, 2, 3, 4, 5],'max_features': [1, 2, 3, 4]}
#Inizialize the model
svc = svm.SVC(probability = True)
#clf = DecisionTreeClassifier(random_state=0)
#Inizialize the GridSearchCV with 8 processing cores and maximum verbosity
clf = GridSearchCV(svc, params,verbose =10, n_jobs=8)
#grid_search = GridSearchCV(clf, param_grid = params,cv = cross_validation)
#Split the dataset into two subset, one used for training and one for testing
X_train, X_test, Y_train, Y_test = train_test_split(x_data,
y_data, test_size=0.35, random_state=0)
print ("Starting the training process...")
#Start the training process
clf.fit(X_train, Y_train)
#If SHOW_CONFUSION_MATRIX is true, prints the confusion matrix
if SHOW_CONFUSION_MATRIX:
print ("Confusion Matrix:")
Y_predicted = clf.predict(X_test)
print (confusion_matrix(Y_test, Y_predicted))
print ("\nBest estimator parameters: ")
#print (clf.best_estimator_)
#print (clf.best_score_)
#Calculates the score of the best estimator found.
score = clf.score(X_test, Y_test)
print ("\nSCORE: {score}\n".format(score = score))
print ("Saving the model...",)
#Saves the model to the "model.pkl" file
joblib.dump(clf, 'model_flex1.pkl')
#Saves the classes to the "classes.pkl" file
joblib.dump(classes, 'classes_flex1.pkl')
print ("DONE")