-
Notifications
You must be signed in to change notification settings - Fork 6
/
linear_svm.py
177 lines (140 loc) · 6.12 KB
/
linear_svm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import numpy as np
from random import shuffle
def svm_loss_naive(W, X, y, reg):
"""
Structured SVM loss function, naive implementation (with loops).
Inputs have dimension D, there are C classes, and we operate on minibatches
of N examples.
Inputs:
- W: A numpy array of shape (D, C) containing weights.
- X: A numpy array of shape (N, D) containing a minibatch of data.
- y: A numpy array of shape (N,) containing training labels; y[i] = c means
that X[i] has label c, where 0 <= c < C.
- reg: (float) regularization strength
Returns a tuple of:
- loss as single float
- gradient with respect to weights W; an array of same shape as W
"""
dW = np.zeros(W.shape) # initialize the gradient as zero
# compute the loss and the gradient
num_classes = W.shape[1]
num_train = X.shape[0]
loss = 0.0
for i in xrange(num_train):
scores = X[i].dot(W)
#print X.shape <- (500, 3073)
#print X[i].shape <- (3073,)
#print W.shape <- (3073,10)
#print scores.shape <- (10,)
correct_class_score = scores[y[i]]
#print 'Example {}, correct class {}'.format(i, y[i])
outOfMargin = 0
for j in xrange(num_classes):
margin = scores[j] - correct_class_score + 1 # note delta = 1
#print 'Example {}, class {}: margin = {}'.format(i, j, margin)
if j == y[i]:
continue
if margin > 0:
#print 'OutOfMargin !'
dW[:, j] += X[i].T
outOfMargin += 1
loss += margin
#else:
# #print 'InMargin !'
# dW[:, j] += np.zeros(X[i].T.shape)
# Now handle the correct class case
#print 'outOfMargin = {}'.format(outOfMargin)
dW[:, y[i]] += -1 * outOfMargin * X[i].T
#print dW[:, y[i]]
# Right now the loss is a sum over all training examples, but we want it
# to be an average instead so we divide by num_train.
loss /= num_train
dW /= num_train
# Add regularization to the loss.
loss += 0.5 * reg * np.sum(W * W)
dW += reg * W
#############################################################################
# TODO: #
# Compute the gradient of the loss function and store it dW. #
# Rather that first computing the loss and then computing the derivative, #
# it may be simpler to compute the derivative at the same time that the #
# loss is being computed. As a result you may need to modify some of the #
# code above to compute the gradient. #
#############################################################################
return loss, dW
def svm_loss_vectorized(W, X, y, reg):
"""
Structured SVM loss function, vectorized implementation.
Inputs and outputs are the same as svm_loss_naive.
"""
loss = 0.0
dW = np.zeros(W.shape) # initialize the gradient as zero
#############################################################################
# TODO: #
# Implement a vectorized version of the structured SVM loss, storing the #
# result in loss. #
#############################################################################
num_classes = W.shape[1]
num_train = X.shape[0]
#print X.shape <- (500, 3073)
#print X[i].shape <- (3073,)
#print W.shape <- (3073,10)
#print scores.shape <- (10,)
scores = X.dot(W) # (500, 10) matrix of scores
y = y[:,np.newaxis] # (500,1) treat y like a col vector
# Create a col vector of the correct score values
rows = np.arange(num_train)
rows = rows[:, np.newaxis]
# Index the scores to pull out the correct values
correctScores = scores[rows, y]
# print 'correctScores.shape {} '.format(correctScores.shape)
#
# print scores[:2,]
# print y[:2]
# print correctScores[:2,]
scoresDiff = scores - correctScores
#print scoresDiff[:2,]
margins = np.maximum(0, scoresDiff + 1)
margins[rows, y] = 0
#print margins[:2,]
#print y[:2,]
# Final loss calculation - add up all margins and normalise by # training
loss = np.sum(margins)
loss /= num_train
# Don't forget to regularize
loss += 0.5 * reg * np.sum(W * W)
#############################################################################
# END OF YOUR CODE #
#############################################################################
#############################################################################
# TODO: #
# Implement a vectorized version of the gradient for the structured SVM #
# loss, storing the result in dW. #
# #
# Hint: Instead of computing the gradient from scratch, it may be easier #
# to reuse some of the intermediate values that you used to compute the #
# loss. #
#############################################################################
#print X.shape <- (500, 3073)
#print X[i].shape <- (3073,)
#print W.shape <- (3073,10)
#print scores.shape <- (10,)
# margins for the correct class have already been set to 0
margins[margins > 0] = 1
marginsSum = np.sum(margins, axis=-1) # Correct class already set to 0
# Replace the correct class with the sum of incorrect margins
margins[rows, y] = -1 * marginsSum[:,np.newaxis]
#print '--- Vector section ---'
#print margins[:2,]
#print marginsSum[:2,]
#print scores[:2,]
#print y[:2,]
# Now the margins matrix is ready, take the matrix multiply and normalize
dW = X.T.dot(margins)
dW /= num_train
# Now add in regularization
dW += reg * W
#############################################################################
# END OF YOUR CODE #
#############################################################################
return loss, dW