-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathscipySVDcontrol.py
110 lines (90 loc) · 3.09 KB
/
scipySVDcontrol.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Daniel Alabi and Cody Wang
import math
from scipy.linalg import *
import numpy as np
import time
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)
class SvdMatrix:
def __init__(self, trainfile, nusers, nmovies):
self.M = [[None]*nmovies for i in range(nusers)]
self.Mtest = [[None]*nmovies for i in range(nusers)]
self.Ahat = None
self.U = None
self.S = None
self.Vt = None
self.nusers = nusers
self.nmovies = nmovies
# fill in utility matrix
# rating for present
# average for absent
f = open(trainfile)
for line in f:
newline = line.split("\t")
userid, movieid, rating = int(newline[0]), int(newline[1]), int(newline[2])
self.M[userid-1][movieid-1] = rating
for userid in range(nusers):
avg = 0.0
nm = 0
missmovies = []
for movieid in range(nmovies):
if self.M[userid][movieid] == None:
missmovies.append(movieid)
continue
avg += self.M[userid][movieid]
nm += 1
avg = float(avg)/nm
for movieid in missmovies:
self.M[userid][movieid] = avg
self.M = np.matrix(self.M)
def test(self, fname):
nusers = self.nusers
nmovies = self.nmovies
f = open(fname)
for line in f:
newline = [int(each) for each in line.split("\t")]
userid, movieid, rating = newline[0], newline[1], newline[2]
self.Mtest[userid-1][movieid-1] = rating
def train(self):
(U, Sd, Vt) = svd(self.M, False)
# remove 20% of energy of Sd
energy = np.sum(Sd)
e80 = energy * 0.8
e = 0
c = 0
while e < e80:
e += Sd[c]
c += 1
Uhat = np.matrix(np.copy(U)[:, 0:c])
Sdhat = np.copy(Sd)[0:c]
Vthat = np.matrix(np.copy(Vt)[0:c, :])
self.Ahat = Uhat * np.diag(Sdhat) * Vthat
def calcrmsetest(self, arr):
nusers = self.nusers
nmovies = self.nmovies
sse = 0.0
total = 0
for i in range(nusers):
for j in range(nmovies):
if arr[i][j] == None: continue
total += 1
sse += (arr[i][j] - self.Ahat[i, j])**2
return math.sqrt(sse/total)
def calcrmsetrain(self):
nusers = self.nusers
nmovies = self.nmovies
sse = 0.0
total = 0
for i in range(nusers):
for j in range(nmovies):
total += 1
sse += (self.M[i, j] - self.Ahat[i, j])**2
return math.sqrt(sse/total)
if __name__ == "__main__":
init = time.time()
svdM = SvdMatrix("ua.base", 943, 1682)
svdM.train()
print "rmse of trainset: ", svdM.calcrmsetrain()
svdM.test("ua.test")
print "rmse of testset: ", svdM.calcrmsetest(svdM.Mtest)
print "time used: ", time.time()-init