-
Notifications
You must be signed in to change notification settings - Fork 18
/
ChemUtils.py
186 lines (148 loc) · 6.46 KB
/
ChemUtils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#Spectral Utilities
from __future__ import print_function
import numpy as np
import scipy #TODO reimplement as Numpy only
from scipy import newaxis as nA
class GlobalStandardScaler(object):
"""Scales to unit standard deviation and mean centering using global mean and std of X, skleran like API"""
def __init__(self,with_mean=True, with_std=True, normfact=1.0):
self._with_mean = with_mean
self._with_std = with_std
self.std = None
self.normfact=normfact
self.mean = None
self._fitted = False
def fit(self,X, y = None):
X = np.array(X)
self.mean = X.mean()
self.std = X.std()
self._fitted = True
def transform(self,X, y=None):
if self._fitted:
X = np.array(X)
if self._with_mean:
X=X-self.mean
if self._with_std:
X=X/(self.std*self.normfact)
return X
else:
print("Scaler is not fitted")
return
def inverse_transform(self,X, y=None):
if self._fitted:
X = np.array(X)
if self._with_std:
X=X*self.std*self.normfact
if self._with_mean:
X=X+self.mean
return X
else:
print("Scaler is not fitted")
return
def fit_transform(self,X, y=None):
self.fit(X)
return self.transform(X)
class SavGolFilt(object):
"""Applies a Savitsky-Golay filter of order k and frame width F.
The order must be odd and the frame width (F) a positive integer of
a value greater than k
"""
#TODO use the scipy implementation
def __init__(self, order=1, width=11):
self.k = order
self.frame = width
def transform(self,myarray,y=None):
"""Applies a Savitsky-Golay filter of order k and frame width F.
The order must be odd and the frame width (F) a positive integer of
a value greater than k
"""
frange = scipy.arange(-(self.frame-1)/2,((self.frame-1)/2)+1)
f, vande = 0, scipy.zeros((self.frame,self.frame))
while f < self.frame: # compute Vandemonde matrix
vande[f,:] = frange**f
f = f+1
vande = scipy.transpose(vande,(1,0))
vande = vande[:,0:self.k+1]
Q,R = scipy.linalg.qr(vande,vande.shape[1]) # Do QR decomposition
# print vande.shape
# print Q.shape
# print R[0:vande.shape[1]]
G = scipy.dot(vande,scipy.dot(scipy.linalg.inv(R[0:vande.shape[1]]),
scipy.transpose(scipy.linalg.inv(R[0:vande.shape[1]])))) # Find the matrix of differentiators
B = scipy.dot(G,scipy.transpose(vande)) # Projection matrix
myarray = scipy.transpose(myarray)
extract_array, extract_B = myarray[0:self.frame,:], B[(((self.frame-1)/2)+1):self.frame,:]
start_array = scipy.dot(extract_B[::-1],extract_array[::-1]) # first bins
array_size = myarray.shape
last, mid_array = (self.frame-1)/2, scipy.zeros((array_size[0],array_size[1]),'d')
extract_B = scipy.reshape(B[((self.frame-1)/2),:],(self.frame,1))
while last < array_size[0]-((self.frame-1)/2):
mid_array[last,:] = sum((extract_B*myarray[last-((self.frame-1)/2):last+((self.frame-1)/2)+1,:]),0) #middle bit
last = last+1
extract_array, extract_B = myarray[array_size[0]-self.frame:array_size[0],:], B[0:(self.frame-1)/2,:]
end_array = scipy.dot(extract_B[::-1],extract_array[::-1]) # last bins
mid_array[0:(self.frame-1)/2,:], mid_array[array_size[0]-((self.frame-1)/2):array_size[0],:] = start_array, end_array
return scipy.transpose(mid_array)
def fit(self, X,y=None):
print("Fit not needed for filter")
pass
def fit_transform(self, X,y=None):
return self.transform(X)
class EmscScaler(object):
def __init__(self,order=1):
self.order = order
self._mx = None
def mlr(self,x,y):
"""Multiple linear regression fit of the columns of matrix x
(dependent variables) to constituent vector y (independent variables)
order - order of a smoothing polynomial, which can be included
in the set of independent variables. If order is
not specified, no background will be included.
b - fit coeffs
f - fit result (m x 1 column vector)
r - residual (m x 1 column vector)
"""
if self.order > 0:
s=scipy.ones((len(y),1))
for j in range(self.order):
s=scipy.concatenate((s,(scipy.arange(0,1+(1.0/(len(y)-1)),1.0/(len(y)-1))**j)[:,nA]),1)
X=scipy.concatenate((x, s),1)
else:
X = x
#calc fit b=fit coefficients
b = scipy.dot(scipy.dot(scipy.linalg.pinv(scipy.dot(scipy.transpose(X),X)),scipy.transpose(X)),y)
f = scipy.dot(X,b)
r = y - f
return b,f,r
def inverse_transform(self, X, y=None):
print("Warning: inverse transform not possible with Emsc")
return X
def fit(self, X, y=None):
"""fit to X (get average spectrum), y is a passthrough for pipeline compatibility"""
self._mx = scipy.mean(X,axis=0)[:,nA]
def transform(self, X, y=None, copy=None):
if type(self._mx) == type(None):
print("EMSC not fit yet. run .fit method on reference spectra")
else:
#do fitting
corr = scipy.zeros(X.shape)
for i in range(len(X)):
b,f,r = self.mlr(self._mx, X[i,:][:,nA])
corr[i,:] = scipy.reshape((r/b[0,0]) + self._mx, (corr.shape[1],))
return corr
def fit_transform(self, X, y=None):
self.fit(X)
return self.transform(X)
def dataaugment(x, betashift = 0.05, slopeshift = 0.05,multishift = 0.05):
#Shift of baseline
#calculate arrays
beta = np.random.random(size=(x.shape[0],1))*2*betashift-betashift
slope = np.random.random(size=(x.shape[0],1))*2*slopeshift-slopeshift + 1
#Calculate relative position
axis = np.array(range(x.shape[1]))/float(x.shape[1])
#Calculate offset to be added
offset = slope*(axis) + beta - axis - slope/2. + 0.5
#Multiplicative
multi = np.random.random(size=(x.shape[0],1))*2*multishift-multishift + 1
x = multi*x + offset
return x