forked from henryhao1991/PA2-Backprop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneuralnet_starter.py
214 lines (176 loc) · 6.93 KB
/
neuralnet_starter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import numpy as np
import pickle
config = {}
config['layer_specs'] = [784, 100, 100, 10] # The length of list denotes number of hidden layers; each element denotes number of neurons in that layer; first element is the size of input layer, last element is the size of output layer.
config['activation'] = 'sigmoid' # Takes values 'sigmoid', 'tanh' or 'ReLU'; denotes activation function for hidden layers
config['batch_size'] = 1000 # Number of training samples per batch to be passed to network
config['epochs'] = 50 # Number of epochs to train the model
config['early_stop'] = True # Implement early stopping or not
config['early_stop_epoch'] = 5 # Number of epochs for which validation loss increases to be counted as overfitting
config['L2_penalty'] = 0 # Regularization constant
config['momentum'] = False # Denotes if momentum is to be applied or not
config['momentum_gamma'] = 0.9 # Denotes the constant 'gamma' in momentum expression
config['learning_rate'] = 0.0001 # Learning rate of gradient descent algorithm
def softmax(x):
"""
Write the code for softmax activation function that takes in a numpy array and returns a numpy array.
"""
output = np.exp(x)/np.sum(np.exp(x))
return output
def load_data(fname):
"""
Write code to read the data and return it as 2 numpy arrays.
Make sure to convert labels to one hot encoded format.
"""
images = []
labels = []
while open(fname,'rb') as d:
label_onehot = np.zeros(10)
datum = pickle.load(d)
images.append([datum[:-1]])
label_onehot[datum[-1]] = 1
labels.append(label_onehot)
images = np.array(images)
return images, labels
class Activation:
def __init__(self, activation_type = "sigmoid"):
self.activation_type = activation_type
self.x = None # Save the input 'x' for sigmoid or tanh or ReLU to this variable since it will be used later for computing gradients.
def forward_pass(self, a):
if self.activation_type == "sigmoid":
return self.sigmoid(a)
elif self.activation_type == "tanh":
return self.tanh(a)
elif self.activation_type == "ReLU":
return self.relu(a)
def backward_pass(self, delta):
if self.activation_type == "sigmoid":
grad = self.grad_sigmoid()
elif self.activation_type == "tanh":
grad = self.grad_tanh()
elif self.activation_type == "ReLU":
grad = self.grad_ReLU()
return grad * delta
def sigmoid(self, x):
"""
Write the code for sigmoid activation function that takes in a numpy array and returns a numpy array.
"""
self.x = x
output = 1.0/(1.0+np.exp(-self.x))
return output
def tanh(self, x):
"""
Write the code for tanh activation function that takes in a numpy array and returns a numpy array.
"""
self.x = x
output = np.tanh(self.x)
return output
def ReLU(self, x):
"""
Write the code for ReLU activation function that takes in a numpy array and returns a numpy array.
"""
self.x = x
output = np.maximum(0,self.x)
return output
def grad_sigmoid(self):
"""
Write the code for gradient through sigmoid activation function that takes in a numpy array and returns a numpy array.
"""
grad = np.divide(np.exp(-self.x),np.square(1.0+np.exp(-self.x)))
return grad
def grad_tanh(self):
"""
Write the code for gradient through tanh activation function that takes in a numpy array and returns a numpy array.
"""
grad = 1.0-np.square(np.tanh(self.x))
return grad
def grad_ReLU(self):
"""
Write the code for gradient through ReLU activation function that takes in a numpy array and returns a numpy array.
"""
grad = 1.0*(x>0)
return grad
class Layer():
def __init__(self, in_units, out_units):
np.random.seed(42)
self.w = np.random.randn(in_units, out_units) # Weight matrix
self.b = np.zeros((1, out_units)).astype(np.float32) # Bias
self.x = None # Save the input to forward_pass in this
self.a = None # Save the output of forward pass in this (without activation)
self.d_x = None # Save the gradient w.r.t x in this
self.d_w = None # Save the gradient w.r.t w in this
self.d_b = None # Save the gradient w.r.t b in this
def forward_pass(self, x):
"""
Write the code for forward pass through a layer. Do not apply activation function here.
"""
self.x = x
self.a = np.dot(self.w,self.x)+self.b
return self.a
def backward_pass(self, delta):
"""
Write the code for backward pass. This takes in gradient from its next layer as input,
computes gradient for its weights and the delta to pass to its previous layers.
"""
self.d_w = -self.w*delta
self.d_b = -self.b,delta
self.d_x = np.sum(self.d_w,axis=1)+np.sum(self.db)
return self.d_x
class Neuralnetwork():
def __init__(self, config):
self.layers = []
self.x = None # Save the input to forward_pass in this
self.y = None # Save the output vector of model in this
self.targets = None # Save the targets in forward_pass in this variable
for i in range(len(config['layer_specs']) - 1):
self.layers.append( Layer(config['layer_specs'][i], config['layer_specs'][i+1]) )
if i < len(config['layer_specs']) - 2:
self.layers.append(Activation(config['activation']))
def forward_pass(self, x, targets=None):
"""
Write the code for forward pass through all layers of the model and return loss and predictions.
If targets == None, loss should be None. If not, then return the loss computed.
"""
self.x = x
self.y = np.copy(x)
for l in layers:
self.y = layers.forward_pass(self.y)
loss = self.loss_func(self.y,self.targets)
return loss, self.y
def loss_func(self, logits, targets):
'''
find cross entropy loss between logits and targets
'''
output = None
if target:
output = np.sum(-targets*np.log(logits))
return output
def backward_pass(self):
'''
implement the backward pass for the whole network.
hint - use previously built functions.
'''
delta = self.targets-self.y
for l in layers[::-1]:
delta = l.backward_pass(delta)
def trainer(model, X_train, y_train, X_valid, y_valid, config):
"""
Write the code to train the network. Use values from config to set parameters
such as L2 penalty, number of epochs, momentum, etc.
"""
def test(model, X_test, y_test, config):
"""
Write code to run the model on the data passed as input and return accuracy.
"""
return accuracy
if __name__ == "__main__":
train_data_fname = 'MNIST_train.pkl'
valid_data_fname = 'MNIST_valid.pkl'
test_data_fname = 'MNIST_test.pkl'
### Train the network ###
model = Neuralnetwork(config)
X_train, y_train = load_data(train_data_fname)
X_valid, y_valid = load_data(valid_data_fname)
X_test, y_test = load_data(test_data_fname)
trainer(model, X_train, y_train, X_valid, y_valid, config)
test_acc = test(model, X_test, y_test, config)