-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdataVis.py
65 lines (52 loc) · 2.12 KB
/
dataVis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import random
import numpy as np
import scipy.sparse as sp
import matplotlib.pyplot as plt
class DataVisualization:
""" PCA-based visualization on MovieLens data sets """
def __init__(self, M, N, txt_data = None, reg_data = None):
""" Build dense matrix out of data from text file """
if txt_data is not None:
raw_data = np.genfromtxt(txt_data, dtype=np.int32)
item_ids = raw_data[:, 0] - 1
comp_ids = raw_data[:, 1] - 1
values = raw_data[:, 2].astype(np.float64)
#convenient to initially build sparse matrix but will need dense later
R = sp.coo_matrix((values, (comp_ids, item_ids)), shape=(M, N))
self._data = np.zeros((M, N), order='F')
self._data[:, :] = R.toarray()
self._M = M
self._N = N
elif reg_data is not None:
self._data = np.zeros((M, N), order='F')
self._data[:, :] = reg_data
self._M = M
self._N = N
def gen_plot(self, figname, special=[]):
"""
Generate low-dimensional plot of input txt_data
Gives different marker to ids in special list
"""
mean = self._data.mean(axis=1)
self._data = self._data - mean.reshape((self._M, 1))
U, _, _ = np.linalg.svd(self._data, full_matrices=False)
princ_comps = U.T.dot(self._data)
# Generate Plot
plt.figure()
for i in xrange(self._N):
if i in special:
continue
plt.plot(princ_comps[0,i], princ_comps[1,i], 'sy')
# Give different marker to those in ignore list
for i in special:
plt.plot(princ_comps[0, i], princ_comps[1,i], '^g')
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')
plt.savefig(figname)
# use module to plot MovieLens 100k data set only if directly called
# otherwise just provide class interface
def main():
dv = DataVisualization(1682, 943, txt_data='data/ml-100k/u.data')
dv.gen_plot('plots/init100k.pdf', special=[5,9])
if __name__ == "__main__":
main()