-
Notifications
You must be signed in to change notification settings - Fork 3
/
Cluster.py
137 lines (121 loc) · 3.98 KB
/
Cluster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 27 13:32:24 2016
@author: mattwallingford
"""
import preprocess
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import pandas as pd
from collections import Counter
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import SpectralClustering
from sklearn.cluster import AffinityPropagation
import random
from matplotlib import colors
import preprocess
MAX_CLUSTER = 4
def find_best_cluster(df, max_cluster):
K = range(1,max_cluster)
models = [KMeans(n_clusters=k, random_state = 0).fit(df) for k in K]
print('Completed: training...')
centroids = [m.cluster_centers_ for m in models]
print('Computed: centroids...')
all_distances = [cdist(df,C, 'euclidean') for C in centroids]
dist = [np.min(d,axis=1) for d in all_distances]
print('Computed: distances...')
avg_distances = [sum(distances)/len(df.iloc[:,1]) for distances in dist]
#model = cl.k_means(clusters = k)
#cluster_label = model.fit_predict(df)
return avg_distances
#Try more clusters,
def plot_elbow(df):
y = find_best_cluster(df)
x = range(1,MAX_CLUSTER)
plt.plot(x,y)
plt.axis([1,MAX_CLUSTER,2,8])
return x, y
def plot_clusters_k_means(df):
fig = plt.figure()
ax = fig.gca(projection='3d')
pca = PCA(n_components = 3)
trans_df = pca.fit_transform(df)
trans_df = pd.DataFrame(trans_df)
print('Transformed data...')
predictions = KMeans(n_clusters = 10, random_state = 3).fit_predict(df)
colors = convert_cluster_to_color(predictions)
print('Predicted Colors...')
x = np.array(trans_df.iloc[:,0])
y = np.array(trans_df.iloc[:,1])
z = np.array(trans_df.iloc[:,2])
ax.scatter(xs = x,ys = y,zs = z, c = colors)
#plt.axis([-1000,1000,-1000,1000])
plt.show()
def plot_clusters_spectral(df):
fig = plt.figure()
ax = fig.gca(projection='3d')
samples = np.random.choice([False, True],len(df.iloc[:,1]), p = [.99,.01])
df1 = df[samples]
print(len(df1))
pca = PCA(n_components = 3)
trans_df = pca.fit_transform(df)
trans_df = pd.DataFrame(df)
print('Transformed data...')
model = AffinityPropagation().fit(df1)
print('Constructed: Model')
predictions = model.predict(df)
print(len(model.cluster_centers_indices_))
colors = convert_cluster_to_color(predictions)
print('Predicted Colors...')
x = np.array(trans_df.iloc[:,0])
y = np.array(trans_df.iloc[:,1])
z = np.array(trans_df.iloc[:,2])
ax.scatter(xs = x,ys = y,zs = z, c = colors)
#plt.axis([-100,100,-100,100, -100,100])
ax = Axes3D(fig)
ax.set_xlim3d(-1000, 1000)
ax.set_ylim3d(-1000,1000)
ax.set_zlim3d(-1000,1000)
plt.show()
def convert_cluster_to_color(predictions):
color = []
for pred in predictions:
if pred < 150:
color.append(list(colors.cnames.keys())[pred*5])
else:
color.append('red')
return color
def convert_label_to_color(labels):
color = []
for label in labels:
if label == 1:
color.append('black')
if label == 0:
color.append('yellow')
if label == 2:
color.append('red')
if label == 3:
color.append('blue')
if label == 4:
color.append('orange')
if label == 5:
color.append('white')
if label == 6:
color.append('purple')
if label == 7:
color.append('green')
if label == 8:
color.append('brown')
if label == 9:
color.append('pink')
if label == 10:
color.append('lavender')
return color
if __name__ == "__main__":
df = preprocess.remove_labels(preprocess.process_container_folder('cmsdata-2', '5-param'))
#df = preprocess.process_container_folder('cmsdata-2', '5-param')
#cl.k_meanscluster(df)
pass