-
Notifications
You must be signed in to change notification settings - Fork 9
/
mean_classifier.py
67 lines (54 loc) · 1.72 KB
/
mean_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from distances import *
import random
def distance(p1, p2):
return (p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])
def vplus(p1, p2):
return [p1[0]+p2[0], p1[1]+p2[1]]
def ktimesv(k, p):
return [k*p[0], k*p[1]]
def random_labels(points, k):
return [random.randint(0, k-1) for point in points]
def points_with_label(label, points, labels):
result = []
for i in range(0, len(points)):
if labels[i]==label:
result.append(points[i])
return result
def mean(points):
return ktimesv(1.0/len(points), reduce(vplus, points, [0, 0]))
def train(points, labels):
k = 0
for i in range(0, len(points)):
if labels[i]>k:
k = labels[i]+1
return [mean(points_with_label(j, points, labels)) for j in range(0, k)]
infinity = float("inf")
def classify(point, means):
best_distance = infinity
best_label = -1
for j in range(0, len(means)):
d = distance(point, means[j])
if d<best_distance:
best_distance = d
best_label = j
return best_label
def reclassify_all(points, means):
return [classify(point, means) for point in points]
def cost(points, labels, means):
return reduce(plus,
[reduce(plus,
[distance(point, means[j])
for point in points_with_label(j, points, labels)],
0)
for j in range(0, len(means))],
0)
def all_labeled(labels):
for label in labels:
if label==-1:
return False
return True
def all_labels(labels, k):
for j in range(0, k):
if len(points_with_label(j, labels, labels))==0:
return False
return True