forked from medhini/Malicious_Website_Detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
50 lines (45 loc) · 1.45 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import numpy as np
from sklearn import preprocessing
from sklearn import metrics
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.externals import joblib
# load the CSV file as a numpy matrix
dataset = np.genfromtxt("f_vectors.csv", delimiter=',', skip_header=True)
dataset2= np.genfromtxt("f_vectors2.csv", delimiter=',', skip_header=True)
dataset=np.concatenate((dataset,dataset2))
# separate the data from the target attributes
X = dataset[:,0:8]
y = dataset[:,8]
# normalize the data attributes
nX = preprocessing.normalize(X)
# standardize the data attributes
sX = preprocessing.scale(X)
'''
model = ExtraTreesClassifier()
model.fit(X, y)
# display the relative importance of each attribute
print model.feature_importances_
print "........................."
model = LogisticRegression()
# create the RFE model and select 3 attributes
rfe = RFE(model, 3)
rfe = rfe.fit(X, y)
# summarize the selection of the attributes
print(rfe.support_)
print(rfe.ranking_)
'''
model = SVC()
model.fit(X, y)
print(model)
# make predictions
expected = y
predicted = model.predict(X)
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
#print X[0]
joblib.dump(model, 'trainmodel.pkl')