-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathclassification.py
51 lines (43 loc) · 1.34 KB
/
classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""
Perform the classification based on features
"""
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from IPython import embed
class Classification(object):
def __init__(self, features, p):
self.p = p
self.features = features
# {className: [item1, item2,...]}
self.classes = {}
# [number: name]
self.labels = []
self.training = ([], [])
self.all = []
self.classifier = None
def learning(self, fp='training.txt'):
f = file(fp)
for l in f.readlines():
l = l.split('\t')
if l[1] not in self.labels:
self.labels.append(l[1][:-1])
label = self.labels.index(l[1][:-1])
fqdn_id = self.p.fqdn.index(l[0])
self.training[0].append(self.features.X_scaled[fqdn_id])
self.training[1].append(label)
f.close()
self.classifier = OneVsRestClassifier(LinearSVC(random_state=0)).fit(self.training[0], self.training[1])
def compute(self):
self.learning()
result = self.classifier.predict(self.features.X_scaled)
fqdn_id = 0
for i in result:
if self.classes.has_key(self.labels[i]):
self.classes[self.labels[i]].append(self.p.fqdn[fqdn_id])
else:
self.classes[self.labels[i]] = [self.p.fqdn[fqdn_id]]
self.all.append(self.p.fqdn[fqdn_id])
fqdn_id += 1
for classname in self.classes.keys():
self.classes[classname] = sorted(self.classes[classname])
# embed()