-
Notifications
You must be signed in to change notification settings - Fork 1
/
ConvNet.py
350 lines (302 loc) · 14.8 KB
/
ConvNet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
import numpy as np
from scipy import signal
import random
import math
import graphics as g
class FCLayer():
"""Fully connected layer; neurons connect to every neuron in the
previous layer, with randomized weights and biases
"""
def __init__(self, prevNeurons, numNeurons, actFunc="relu"):
"""FCLayer constructor
Arguments:
prevNeurons {int} -- the number of neurons in the previous layer
numNeurons {int} -- the number of neurons in this layer
Keyword Arguments:
actFunc {str} -- the layer's activation function: "relu", "sigmoid" (default: {"relu"})
"""
self.actFunc = actFunc
self.prevNeurons = prevNeurons
self.numNeurons = numNeurons
self.weights = np.matrix([[random.random()*2-1 for j in range(prevNeurons)] for i in range(numNeurons)])
self.biases = np.matrix([random.random()*2-1 for i in range(numNeurons)]).transpose()
class ConvLayer():
"""Convolutional layer + activation function, with stride size 1
"""
def __init__(self, currentInputRows, currentInputCols, numFeatureMaps=5, windowRows=4, windowCols=4, actFunc="relu"):
"""ConvLayer constructor
Keyword Arguments:
numFeatureMaps {int} -- number of feature maps, which are each applied to the input image (default: {5})
windowRows {int} -- number of rows in the sliding feature map window (default: {4})
windowCols {int} -- number of cols in the sliding feature map window (default: {4})
actFunc {str} -- the layer's activation function: "relu", "sigmoid" (default: {"relu"})
"""
self.outputRows = currentInputRows - windowRows + 1
self.outputCols = currentInputCols - windowCols + 1
self.numFeatureMaps = numFeatureMaps
self.windowRows = windowRows
self.windowCols = windowCols
self.actFunc = actFunc # "sigmoid", "relu"
self.weights = []
self.biases = []
for _ in range(numFeatureMaps):
self.weights.append(np.matrix([[random.random()*2-1 for i in range(windowRows)] for j in range(windowCols)]))
self.biases.append(random.random()*2-1)
class PoolingLayer():
"""Pooling layer, shrinks input images based on its activation function
"""
def __init__(self, windowRows=2, windowCols=2, strideSize=2, actFunc="max"):
"""PoolingLayer constructor
Keyword Arguments:
windowRows {int} -- number of rows in the sliding pooling window (default: {2})
windowCols {int} -- number of cols in the sliding pooling window (default: {2})
strideSize {int} -- number of rows/cols to step the window (default: {2})
actFunc {str} -- the layer's pooling mode: "max" (default: {"max"})
"""
self.windowRows = windowRows
self.windowCols = windowCols
self.strideSize = strideSize
self.actFunc = actFunc
# Selected neurons for max pooling, represented as tuples
self.forwardSelections = []
class ConvNet():
"""Convolutional neural network. Expected architecture is mixed convolutional and pooling
layers, followed by some number of fully connected layers. Expected input is a 2D list
"""
def __init__(self, inRows, inCols, learningRate=.1, layerTypes=[], layerInputs=[]):
"""ConvNet constructor
Arguments:
inRows {int} -- number of rows in the input image
inCols {int} -- number of cols in the input image
Keyword Arguments:
learningRate {float} -- the degree to which backpropagation adjusts weights
during the gradient descent algorithm (default: {.1})
layerTypes {list} -- a list of the layer types: "fc" for FCLayer, "c" for ConvLayer,
"p" for PoolingLayer (default: {[]})
layerInputs {list} -- a list of parameters for the layers; for ConvLayers
and PoolingLayers, a tuple of parameters is expected; for FCLayer,
an int for the number of neurons in that layer (default: {[]})
"""
self.sigFunc = np.vectorize(ConvNet.sigmoid)
self.dsigFunc = np.vectorize(ConvNet.dsigmoid)
self.reluFunc = np.vectorize(ConvNet.relu)
self.dreluFunc = np.vectorize(ConvNet.drelu)
self.inRows = inRows
self.inCols = inCols
self.learningRate = learningRate
self.layers = []
inputIsPicture = True
currentInputRows = inRows
currentInputCols = inCols
currentFeatureMaps = 1
for i in range(len(layerTypes)):
if layerTypes[i] == "c":
if layerInputs[i] == None:
newLayer = ConvLayer(currentInputRows, currentInputCols)
else:
newLayer = ConvLayer(currentInputRows, currentInputCols*layerInputs[i])
self.layers.append(newLayer)
currentFeatureMaps *= newLayer.numFeatureMaps
currentInputRows = newLayer.outputRows
currentInputCols = newLayer.outputCols
elif layerTypes[i] == "p":
if layerInputs[i] == None:
newLayer = PoolingLayer()
else:
newLayer = PoolingLayer(*layerInputs[i])
self.layers.append(newLayer)
currentInputRows = math.ceil(currentInputRows/newLayer.strideSize)
currentInputCols = math.ceil(currentInputCols/newLayer.strideSize)
elif layerTypes[i] == "fc":
if inputIsPicture:
inputIsPicture = False
inputNeurons = currentFeatureMaps * currentInputRows * currentInputCols
else:
inputNeurons = self.layers[-1].numNeurons
self.layers.append(FCLayer(inputNeurons, layerInputs[i]))
@staticmethod
def sigmoid(x):
return 1 / (1 + math.exp(-x))
@staticmethod
def dsigmoid(x):
return x * (1-x)
@staticmethod
def relu(x):
return max(0, x)
@staticmethod
def drelu(x):
if x > 0: return 1
else: return 0
@staticmethod
def flatten(inputs, vertical=False):
"""Flattens the 2D numpy matrices taken in, and concatenates them"""
output = inputs[0].flatten()
for inputMat in inputs[1:]:
output = np.append(output, inputMat)
if vertical:
output.shape = (len(output), 1)
return output
@staticmethod
def unflatten(inputMat, rows, cols):
outputs = np.split(inputMat, len(inputMat)/(rows*cols))
for mat in outputs:
mat.shape = (rows, cols)
return outputs
def fullyConnectedOperation(self, layer, inputs):
"""Returns the output of the FCLayer when given the inputs"""
if layer.actFunc == "relu":
outputs = self.reluFunc(layer.weights * inputs + layer.biases)
elif layer.actFunc == "sigmoid":
outputs = self.sigFunc(layer.weights * inputs + layer.biases)
return outputs
def layerConvolute(self, layer, inputs):
"""Returns the output of the ConvLayer when given the inputs"""
outputs = []
for inputMat in inputs:
for i in range(layer.numFeatureMaps):
if layer.actFunc == "relu":
newMat = self.reluFunc(signal.convolve2d(layer.weights[i], inputMat, "valid") + layer.biases[i])
elif layer.actFunc == "sigmoid":
newMat = self.sigFunc(signal.convolve2d(layer.weights[i], inputMat, "valid") + layer.biases[i])
outputs.append(newMat)
return outputs
def pool(self, layer, inputs):
"""Returns the output of the PoolingLayer when given the inputs"""
if layer.actFunc == "max":
layer.forwardSelections = []
outputs = []
for inputMat in inputs:
inputRows = inputMat.shape[0]
inputCols = inputMat.shape[1]
newMat = np.empty((math.ceil(inputRows/layer.strideSize), math.ceil(inputCols/layer.strideSize)))
newMatRow = 0
for overallRow in range(0, inputRows, layer.strideSize):
newMatCol = 0
for overallCol in range(0, inputCols, layer.strideSize):
if layer.actFunc == "max":
maxVal = -1
chosenRow = -1
chosenCol = -1
for row in range(layer.windowRows):
for col in range(layer.windowCols):
newVal = inputMat[overallRow+row-1, overallCol+col-1]
if newVal > maxVal:
maxVal = newVal
chosenRow = row
chosenCol = col
newMat[newMatRow, newMatCol] = maxVal
layer.forwardSelections.append((chosenRow, chosenCol))
newMatCol += 1
newMatRow += 1
outputs.append(newMat)
return outputs
def feedForwardHelper(self, inputs):
"""Internal feedForward method, returns a list of the outputs at each layer"""
currentInputIsPicture = True
outputsL = []
for layer in self.layers:
if currentInputIsPicture and type(layer) is FCLayer:
inputs = ConvNet.flatten(inputs, True)
currentInputIsPicture = False
if type(layer) is FCLayer:
inputs = self.fullyConnectedOperation(layer, inputs)
elif type(layer) is ConvLayer:
inputs = self.layerConvolute(layer, inputs)
elif type(layer) is PoolingLayer:
inputs = self.pool(layer, inputs)
outputsL.append(inputs)
return outputsL
def feedForward(self, inputs):
"""Feeds the input image through the network and returns the output as a list
Arguments:
inputs {list} -- 2D list representing an image
Returns:
list -- the output of the output layer as a list
"""
output = self.feedForwardHelper([np.matrix(inputs)])[-1].tolist()
return [i[0] for i in output]
def maxPoolingError(self, dError, prevLayer, poolLayer):
"""Returns the new dError propagated back from a poolingLayer"""
newMat = np.zeros(prevLayer.shape)
loc = 0
for row in dError.shape[0]:
for col in dError.shape[1]:
newMat[poolLayer.forwardSelections[loc][0], poolLayer.forwardSelections[loc][1]] = dError[row, col]
loc += 1
return newMat
def convLayerError(self, layerIndex, dError, inputs, outputsL):
currentLayer = self.layers[layerIndex]
for i in range(currentLayer.numFeatureMaps):
if layerIndex > 0:
weightsChange = signal.convolve2d(dError[i], self.reluFunc(np.rot90(outputsL[layerIndex-1], 2)))
else:
weightsChange = signal.convolve2d(dError[i], self.reluFunc(np.rot90(inputs, 2)))
if layerIndex > 0:
if currentLayer.actFunc == "relu":
dError = signal.convolve2d(dError, np.rot90(currentLayer.weights, 2)*self.dreluFunc(outputsL[layerIndex-1]))
elif currentLayer.actFunc == "sigmoid":
dError = signal.convolve2d(dError, np.rot90(currentLayer.weights, 2)*self.dsigFunc(outputsL[layerIndex-1]))
return dError
def FCLayerError(self, layerIndex, dError, inputs, outputsL):
currentLayer = self.layers[layerIndex]
if layerIndex > 0:
weightsChange = dError * outputsL[layerIndex-1].transpose()
else:
weightsChange = dError * inputs.transpose()
currentLayer.weights += weightsChange
currentLayer.biases += dError
if layerIndex > 0:
# the new error, which is also simply the gradient of the biases (multiplied by learningRate)
dError = np.multiply(self.layers[layerIndex].weights.transpose()*dError,
self.dreluFunc(outputsL[layerIndex-1])) * self.learningRate
return dError
def backpropagate(self, inputs, expOutputs, outputsL):
"""Adjusts the weights and biases of the network with gradient descent"""
# Starting error
dError = np.multiply(expOutputs-outputsL[-1], self.dreluFunc(outputsL[-1]))
errorIsFlat = True
for layerIndex in range(len(self.layers)-1, -1, -1):
currentLayer = self.layers[layerIndex]
if type(currentLayer) is FCLayer:
dError = self.FCLayerError(layerIndex, dError, inputs, outputsL)
elif type(currentLayer) is ConvLayer:
if errorIsFlat:
errorIsFlat = False
dError = ConvNet.unflatten(dError, currentLayer.outputRows, currentLayer.outputCols)
dError = self.convLayerError(layerIndex, dError, inputs, outputsL)
elif type(currentLayer) is PoolingLayer:
if layerIndex > 0:
if currentLayer.actFunc == "max":
dError = self.maxPoolingError(dError, self.layers[layerIndex-1], currentLayer)
def train(self, inputsL, expOutputsL = []):
"""Train the network on a pair of inputs and expected outputs
Arguments:
inputsL {list} -- 2D list representing an image
Keyword Arguments:
expOutputsL {list} -- the expected output of the network.
If this is not provided, inputsL is assumed to contain
the inputs as its first element and the expected output
as its second (default: {[]})
"""
if expOutputsL != []:
inputs = np.matrix(inputsL).transpose()
expOutputs = np.matrix(expOutputsL).transpose()
else:
inputs = np.matrix(inputsL[0]).transpose()
expOutputs = np.matrix(inputsL[1]).transpose()
outputsL = self.feedForwardHelper(inputs)
self.backpropagate(inputs, expOutputs, outputsL)
def trainMultiple(self, examples, numTrain):
"""Train the network on random choices of input/output pairs
in examples, numTrain times
Arguments:
examples {list} -- a list of input/expected output pairs
numTrain {int} -- number of times to sample an example randomly
and train the network on it
"""
for _ in range(numTrain):
self.train(random.choice(examples))
if __name__ == "__main__":
n = ConvNet(30, 30, .1, ["c", "p", "c", "p", "fc", "fc"],
[None, None, None, None, 40, 5])
print(n.feedForward([[i for i in range(30)] for j in range(30)]))