-
Notifications
You must be signed in to change notification settings - Fork 2
/
test.py~
executable file
·159 lines (120 loc) · 3.86 KB
/
test.py~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#Carlyn Lee
#test.py
#wrapper to test Extr_PCA_Feature. Projects over randomly selected negative and positive samples from 'imputed-liver.txt' and generates two dimensional data using primary components
#thank you to Dr. Charles Lee for helping me write this script and for the POD
from numpy import *
import random
def Extr_PCA_Features(xData,indInt):
ns=len(indInt)
x=xData[:,indInt]
gridSize= x.shape[0]
theta=zeros((ns,ns))
theta=mat(theta)
for i in range(0,ns):
vi=x[:,i]
for j in range(i,ns):
vj=x[:,j]
theta[i,j]= float(vi.transpose() * vj /gridSize/ns)
theta[j,i]= theta[i,j]
lam,u=linalg.eig(theta)
I = argsort(-1*lam)
lam=lam[I,:]
u=u[I,:]
#normalize so that first eigenvector has unit length
for i in range(i,ns):
u[:,i]=u[:,i]/sum(abs(u[:,i]))
#normalized eigenvectors
normPhi=zeros(ns)
phi=zeros((gridSize,ns))
phi=mat(phi)
for i in range(0,ns):
for j in range(0,ns):
phi[:,i]=phi[:,i]+u[j,i]*x[:,j]
phi[:,i]=phi[:,i]/linalg.norm(phi[:,i])
normPhi[i]=phi[:,i].transpose() * phi[:,i]
#Determine sign for dominant eigenvector
sumP1=zeros(ns)
projMatA=zeros((xData.shape[1],ns))
for j in range(0,ns):
sumP1[j]=0
for i in range(0,ns):
sumP1[j]=sumP1[j]+sum( x[:,i].transpose() * phi[:,j]/normPhi[j])
if sumP1[j] < 0:
phi[:,j]=-phi[:,j]
for i in range(0, xData.shape[1]):
projMatA[i,j] = xData[:,i].transpose() * phi[:,j]/normPhi[j]
return projMatA
nIndex=range(115,191)
nni=len(nIndex)
primaryTIndexAll=[3]
primaryTIndexAll.extend(range(8,112))
npta=len(primaryTIndexAll)
infile = open('imputed-liver.txt', 'r')
xraw=[]
for line in infile:
line.strip()
xraw.append(line.split())
infile.close()
for i in xrange(0,len(xraw)):
for j in xrange(0,len(xraw[0])):
xraw[i][j]=float(xraw[i][j])
xraw=mat(xraw)
#extract positives and negatives from data set
pxraw=xraw[:,primaryTIndexAll]
nxraw=xraw[:,nIndex]
xraw=bmat('pxraw nxraw')
#subtract the means of each row to eliminate outliers
mean=xraw.sum(1)/xraw.shape[1]
xraw=xraw-mean
class_num=2
primaryTIndexAll=range(0,npta)
nIndex=range(0+npta,npta+nni)
Ngenes=xraw.shape[0]
Ns=xraw.shape[1]
N_class1=len(primaryTIndexAll)
divide=random.sample(primaryTIndexAll,npta)
#separate positve and negative sets into train, validation, test sets
N_class1Train=int(round(N_class1*0.08))
N_class1Val=int(round(N_class1*0.32))
N_class1Test=int(round(N_class1*0.60))
TrainIndclass1= divide[0:N_class1Train]
ValIndclass1= divide[N_class1Train:N_class1Train+N_class1Val]
TestIndclass1= divide[N_class1Train+N_class1Val:N_class1Train+N_class1Val+N_class1Test]
N_class2=len(nIndex)
divide=random.sample(nIndex,nni)
N_class2Train=int(round(N_class2*0.08))
N_class2Val=int(round(N_class2*0.32))
N_class2Test=int(round(N_class2*0.60))
TrainIndclass2= divide[0:N_class2Train]
ValIndclass2= divide[N_class2Train:N_class2Train+N_class2Val]
TestIndclass2= divide[N_class2Train+N_class2Val:N_class2Train+N_class2Val+N_class2Test]
#POD
Cls1projMatA=Extr_PCA_Features(xraw,TrainIndclass1)
Cls2projMatA=Extr_PCA_Features(xraw,TrainIndclass2)
#using only principal components (normalized between 0 and 1)
Nfea=2
a=Cls1projMatA[:,1]
a=mat(a)
a= ( a-float(a.min(1)) )/( float(a.max(1)) - float(a.min(1)) )
b=Cls1projMatA[:,1]
b=mat(b)
b= ( b-float(b.min(1)) )/( float(b.max(1)) - float(b.min(1)) )
LiverData=bmat('a; b')
targets=list( ones(N_class1) )
targets.extend( list(zeros(N_class2)) )
targets=array(targets)
#construct train set and targets
extr_TrainInd=TrainIndclass1
extr_TrainInd.extend(TrainIndclass2)
TrainSet=LiverData[:,extr_TrainInd]
TrainTargets=targets[extr_TrainInd]
#construct validation set and targets
extr_ValInd=ValIndclass1
extr_ValInd.extend(ValIndclass2)
ValSet=LiverData[:,extr_ValInd]
ValTargets=targets[extr_ValInd]
#construct test set and targets
extr_TestInd=TestIndclass1
extr_TestInd.extend(TestIndclass2)
TestSet=LiverData[:,extr_TestInd]
TeTargets=targets[extr_TestInd]