-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnbastats.py
109 lines (86 loc) · 2.41 KB
/
nbastats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
V = (v1 - v2) / (v1 + v2)
Transofrm the values as above
v1 being stats for home
v2 beign stats for away
Calucate rDiff for each game store in single column
rDiff = (home - away ) / (home + away)
linear regression on xV = rDiff
where x is matrix of coefficient
V is transformed values
rDiff is the actual diff
"""
import sys
import csv
from sklearn import linear_model
import matplotlib.pyplot as plt
def read(fileName):
fileHandle = open(fileName, 'rU')
reader = csv.DictReader(fileHandle)
values = []
for line in reader:
values.append(readLine(line))
stats = [row[:-1] for row in values]
rDiff = [row[-1] for row in values]
traingSetSize = 300
sizeShortHand = -1 * traingSetSize
trainStats = stats[:sizeShortHand]
trainrDiff = rDiff[:sizeShortHand]
testStats = stats[sizeShortHand:]
testrDiff = rDiff[sizeShortHand:]
coefs = []
for game in values:
clf.fit(trainStats, trainrDiff)
// clf = linear_model.LinearRegression()
clf.fit (trainStats, trainrDiff)
print "linearRegression"
print clf.coef_
print clf.score(testStats, testrDiff)
ridge = linear_model.Ridge (alpha = .5)
ridge.fit (trainStats, trainrDiff)
print "ridgeLinearRegression"
print ridge.coef_
print ridge.score(testStats, testrDiff)
def plot(x, y, model):
plt.scatter(x, y, color='black')
plt.plot(x, model.predict(x), color='blue',
linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()
def readLine(row):
data = []
headings = [
"Assists",
"Blocks",
"Steals",
"Turnovers",
"FreeThrowsAttempted",
"FreeThrowsMade",
"ThreePointersMade",
"OffensiveRebounds",
"DefensiveRebounds",
"Fouls",
"TwoPointersAttempted",
"TwoPointersMade",
"ThreePointersAttempted"
]
for name in headings:
data.append(getValue(row, name))
real = getValue(row, "Points")
data.append(real)
return data
def getValue(row, statName):
homeStat = int(row.get(statName))
# The opponent values all start with "O.""
awayStat = int(row.get("O." + statName))
return getNormalizedValue(homeStat, awayStat)
def getNormalizedValue(homeStat, awayStat):
numer = homeStat - awayStat
denom = homeStat + awayStat
if (denom == 0 or numer == 0):
return 0
else:
return numer / float(denom)
if __name__ == '__main__':
read(sys.argv[1])