-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_analysis.py
109 lines (79 loc) · 3.22 KB
/
data_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
import json
#import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from collections import Counter
from calculations import calculate_avg_time
def main():
# analisys()
plt.show()
def edit_distance_features(features, predicted_distances):
for i, distance in enumerate(features[:, 0]):
d = min(predicted_distances, key=lambda x: abs(x - distance))
features[i, 0] = d
return features
def predict_distances(features):
true_distance = [50, 60, 80, 100, 150, 200, 250, 300, 350, 400, 500, 600, 800, 1000, 1200, 1400, 1600, 1609, 2000,
2500, 3000]
kmeans = KMeans(n_clusters=3).fit(features[:, 0].reshape(-1, 1))
num_distances = len(set(kmeans.labels_))
distance_sum = np.zeros(shape=num_distances)
predicted_distance = np.zeros(shape=num_distances)
number_of_laps = np.zeros(shape=num_distances)
counts = Counter(kmeans.labels_)
for i in counts:
number_of_laps[i] = counts[i]
for i, data in enumerate(features[:, 0]):
distance_sum[kmeans.labels_[i]] += data
for i, data in enumerate(distance_sum):
distance_sum[i] = distance_sum[i] / number_of_laps[i]
for i, distance in enumerate(distance_sum):
predicted_distance[i] = min(true_distance, key=lambda x: abs(x - distance))
return predicted_distance
def setup_data(data):
distance = []
cadence = []
speed = []
timestamp = []
#print("Data: ", data)
for lap in data:
# Removes the laps that dont have enough speed
if lap["average_speed"] < 3:
continue
distance.append(lap["distance"])
cadence.append(lap["average_cadence"])
speed.append(lap["elapsed_time"])
timestamp.append(lap["start_date"])
npdistance = np.asarray(distance)
npcadence = np.asarray(cadence)
npspeed = np.asarray(speed)
npdistance = npdistance.reshape(npdistance.shape[0], 1)
npcadence = npcadence.reshape(npcadence.shape[0], 1)
npspeed = npspeed.reshape(npspeed.shape[0], 1)
features = np.concatenate((npdistance, npcadence, npspeed), axis=1)
return features, timestamp, distance
def normalize_array(array):
norm = np.linalg.norm(array)
return array / norm
def analisys():
with open('strava_laps_2.json') as json_file:
r = json.load(json_file)
features = setup_data(r)
predicted_distances = predict_distances(features)
new_features = edit_distance_features(features, predicted_distances)
for i in range(5):
j = new_features.shape[0] - 1
new_features = np.delete(new_features, j, axis=0)
# ax = plt.axes(projection='3d')
# ax.scatter3D(new_features[:, 0], new_features[:, 1], new_features[:, 2], c=new_features[:, 2], cmap='cool')
plt.scatter(new_features[:, 1], new_features[:, 2])
print(new_features)
return calculate_avg_time(new_features)
# plt.scatter(features[:, 0], features[:, 1], c=kmeans.labels_, cmap='rainbow')
# npdistance = normalize_array(npdistance)
# npcadence = normalize_array(npcadence)
# npspeed = normalize_array(npspeed)
# features = np.concatenate((npdistance, npcadence, npspeed), axis=1)
# new_data = pca(features, 2)
if __name__ == "__main__":
main()