-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAdalineGD.py
168 lines (140 loc) · 5.53 KB
/
AdalineGD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
class AdalineGD(object):
"""
Adaptive linear neuron classifier
"""
def __init__(self, eta=0.01, epochs=10):
"""
:param eta: float
Learning rate [0.0, 1.0]
:param epochs: int
Number of iterations to pass over trainingset
:param w_: numpy.array
shape = [n_features]
Weights after fitting
:param errors_: list
Number of miscalculations in every epoch
"""
self.eta = eta
self.epochs = epochs
def fit(self, X, y):
"""
Fit training data, training step
..math:
J(w) = \frac{1}{2}\Sigma_i(y_i - phi(z_i))^2
\phi(z_i) = w^Tx
w = w + \delta w
\dela w = \eta * -\nabla J(w)
\nabla J(w) = - \Sigma_i (y_i - \phi(z_i))x_i
:param X: numpy.array
shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features
:param Y: numpy.array
shape=[n_samples]
Target values
:return: self: object
"""
# Weight is a vector of size of the features + 1 which is the
# threshold theta
self.w_ = np.zeros(1 + X.shape[1])
self.cost_ = []
for i in range(self.epochs):
# Calculate w^Tx
output = self.net_input(X)
errors = (y - output)
# Gradient for w, \nabla J(w)
self.w_[0] += self.eta * errors.sum()
self.w_[1:] += self.eta * np.dot(X.T, errors)
# Sum squared error (SSE)
cost = np.square(errors).sum() / 2.0
self.cost_.append(cost)
return
def net_input(self, X):
"""
Calculate the net input
..math:
w^Tx
:param X: Feature matrix
:return: numpy.array
Dot product of `X` and weights `w_`
"""
# wTx + w0, w0 is theta and w[1:] are the weights without theta
return np.dot(X, self.w_[1:]) + self.w_[0]
def activation(self, X):
return self.net_input(X)
def predict(self, X,):
return np.where(self.activation(X) >= 0.0, 1, -1)
def plot_learning_rate_sse(X, y):
ada1 = AdalineGD(epochs=10, eta=0.01)
ada1.fit(X, y)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('log(Sum-squared-error)')
ax[0].set_title('Adaline - Learning rate 0.01')
ada2 = AdalineGD(eta=0.0001, epochs=10)
ada2.fit(X, y)
ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Sum-squared-error')
ax[1].set_title('Adaline - Learning rate 0.0001')
plt.show()
def plot_decision_regions(X, y, classifier, resolution=0.22):
from matplotlib.colors import ListedColormap
# setup marker generator and color map
markers = ('s', 'x', 'o', '^', 'v')
colors = ('red', 'blue', 'lightblue', 'gray', 'cyan')
cmap = ListedColormap(colors[:len(np.unique(y))])
# plot decision surface
x1_min, x1_max = np.min(X[:, 0]) - 1, np.max(X[:, 0]) + 1
x2_min, x2_max = np.min(X[:, 1]) - 1, np.max(X[:, 1]) + 1
# create a pair of grid arrays
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(
x2_min, x2_max, resolution))
# classifier model is trained on two feature dimensions, we need to
# flatten the grid arrays and create a matrix which has the same number
# of columns as the Iris data subset, then it is possible to use the
# predict method to predict the class labels
z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
# reshape predicted labels into grid with same dimension as xx1 and xx2
z = z.reshape(xx1.shape)
plt.contourf(xx1, xx2, z, alpha=0.4, cmap=cmap)
plt.xlim(xx1.min(), xx1.max())
plt.ylim(xx2.min(), xx2.max())
# plot class samples
for idx, cl in enumerate(np.unique(y)):
plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, cmap=cmap(
idx), marker=markers[idx], label=cl)
def plot_standardized_sse(X, y, eta, epochs):
X_std = np.copy(X)
X_std[:, 0] = (X_std[:, 0] - X_std[:, 0].mean()) / X_std[:, 0].std()
X_std[:, 1] = (X_std[:, 1] - X_std[:, 1].mean()) / X_std[:, 1].std()
ada = AdalineGD(eta=eta, epochs=epochs)
ada.fit(X_std, y)
plot_decision_regions(X_std, y, classifier=ada)
plt.xlabel('sepal length [standardized]')
plt.ylabel('petal length [standardized]')
plt.legend(loc='upper left')
plt.title('Adaline eta: {}, epochs: {}'.format(eta, epochs))
plt.show()
plt.title('Adaline eta: {}, epochs: {}'.format(eta, epochs))
plt.xlabel('Epochs')
plt.ylabel('log(Sum-squared-error)')
plt.plot(range(1, len(ada.cost_) + 1), np.log10(ada.cost_), marker='o')
plt.show()
# load iris data set
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases'
'/iris/iris.data', header=None)
# choose setose and versicolor, from 0 to 100
y = df.iloc[0:100, 4].values
# transform to labels +1 and -1, where setosa is -1 and versicolor is +1
y = np.where(y == 'Iris-setosa', -1, 1)
# sepal length and petal length, column 0 and 2
X = df.iloc[:100, [0, 2]].values
plot_learning_rate_sse(X, y)
plot_standardized_sse(X, y, eta=0.01, epochs=20)