-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathfully_connected_layer.go
208 lines (178 loc) · 6.78 KB
/
fully_connected_layer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
package cnns
import (
"fmt"
"math/rand"
"github.com/LdDl/cnns/tensor"
"github.com/pkg/errors"
"gonum.org/v1/gonum/mat"
)
// FullyConnectedLayer FC is simple layer structure (so this layer can be used for simple neural networks like XOR problem)
/*
Oj - O{j}, activated output from previous layer for j-th neuron (in other words: previous summation input)
Ok - O{k}, activated output from current layer for k-th node (in other words: activated summation input)
SumInput - non-activated output for current layer for k-th node (in other words: summation input)
LocalDelta - δ{k}, delta for current layer for k-th neuron
NextDeltaWeightSum - SUM(δ{k}*w{j,k}), summation component for evaluating δ{j} for previous layer for j-th neuron
Weights - w{j,k}, weight from j-th node of previous layer to k-th node of current layer
*/
type FullyConnectedLayer struct {
Oj *mat.Dense
Ok *mat.Dense
NextDeltaWeightSum *mat.Dense
Weights *mat.Dense
PreviousWeightsState *mat.Dense
LocalDelta *mat.Dense
SumInput *mat.Dense
ActivationFunc func(v float64) float64
ActivationDerivative func(v float64) float64
OutputSize *tensor.TDsize
inputSize *tensor.TDsize
trainMode bool
}
// NewFullyConnectedLayer Constructor for fully-connected layer. You need to specify input size and output size
func NewFullyConnectedLayer(inSize *tensor.TDsize, outSize int) Layer {
newLayer := &FullyConnectedLayer{
inputSize: inSize,
OutputSize: &tensor.TDsize{X: outSize, Y: 1, Z: 1},
Ok: &mat.Dense{},
Oj: mat.NewDense(outSize, 1, nil),
SumInput: mat.NewDense(outSize, 1, nil),
Weights: mat.NewDense(outSize, inSize.Total(), nil),
PreviousWeightsState: mat.NewDense(outSize, inSize.Total(), nil),
ActivationFunc: ActivationTanh, // Default Activation function is TanH
ActivationDerivative: ActivationTanhDerivative, // Default derivative of activation function is 1 - TanH(x)*TanH(x)
trainMode: false,
}
newLayer.PreviousWeightsState.Zero()
for i := 0; i < outSize; i++ {
for h := 0; h < inSize.Total(); h++ {
newLayer.Weights.Set(i, h, rand.Float64()-0.5)
}
}
return newLayer
}
// SetCustomWeights Set user's weights for fully-connected layer (make it carefully)
func (fc *FullyConnectedLayer) SetCustomWeights(weights []*mat.Dense) {
if len(weights) != 1 {
fmt.Println("You can provide array of length 1 only (for fully-connected layer)")
return
}
r, c := weights[0].Dims()
fc.Weights = mat.NewDense(r, c, nil)
fc.Weights.CloneFrom(weights[0])
}
// GetInputSize Returns dimensions of incoming data for fully-connected layer
func (fc *FullyConnectedLayer) GetInputSize() *tensor.TDsize {
return fc.inputSize
}
// GetOutputSize Returns output size (dimensions) of fully-connected layer
func (fc *FullyConnectedLayer) GetOutputSize() *tensor.TDsize {
return fc.OutputSize
}
// GetActivatedOutput Returns fully-connected layer's output
func (fc *FullyConnectedLayer) GetActivatedOutput() *mat.Dense {
return fc.Ok // ACTIVATED values
}
// GetWeights Returns fully-connected layer's weights.
func (fc *FullyConnectedLayer) GetWeights() []*mat.Dense {
return []*mat.Dense{fc.Weights}
}
// GetGradients Returns fully-connected layer's gradients dense
func (fc *FullyConnectedLayer) GetGradients() *mat.Dense {
return fc.NextDeltaWeightSum
}
// FeedForward Feed data to fully-connected layer
func (fc *FullyConnectedLayer) FeedForward(input *mat.Dense) error {
r, _ := input.Dims()
_, weightsCC := fc.Weights.Dims()
if r != weightsCC {
// Try to reshape input Dense to match matrix multiplication
temp, err := Reshape(input, weightsCC, 1)
if err != nil {
return errors.Wrap(err, "Can't call FeedForward() on fully-connected layer [1]")
}
fc.Oj = temp
} else {
fc.Oj.CloneFrom(input)
}
err := fc.doActivation()
if err != nil {
return errors.Wrap(err, "Can't call FeedForward() on fully-connected layer [2]")
}
return nil
}
// doActivation fully-connected layer's output activation
func (fc *FullyConnectedLayer) doActivation() error {
if fc.Oj == nil {
return fmt.Errorf("Can't call doActivation() on FC layer")
}
fc.Ok.Mul(fc.Weights, fc.Oj)
fc.SumInput.Copy(fc.Ok)
rawMatrix := fc.Ok.RawMatrix().Data
for i := range rawMatrix {
rawMatrix[i] = fc.ActivationFunc(rawMatrix[i])
}
return nil
}
// CalculateGradients Evaluate fully-connected layer's gradients
func (fc *FullyConnectedLayer) CalculateGradients(errorsDense *mat.Dense) error {
// Evaluate ΔO{k}/ΔΣ(k)
rawMatrix := fc.SumInput.RawMatrix().Data
for i := range rawMatrix {
rawMatrix[i] = fc.ActivationDerivative(rawMatrix[i])
}
// Evaluate ΔE{k}/ΔO{k} * ΔO{k}/ΔΣ(k)
fc.LocalDelta = &mat.Dense{}
fc.LocalDelta.MulElem(errorsDense, fc.SumInput)
// Evaluate ΔE{k}/ΔO{k} for next layers in backpropagation direction
fc.NextDeltaWeightSum = &mat.Dense{}
fc.NextDeltaWeightSum.Mul(fc.Weights.T(), fc.LocalDelta)
return nil
}
// UpdateWeights Update fully-connected layer's weights
func (fc *FullyConnectedLayer) UpdateWeights(lp *LearningParams) {
// Evaluate ΔΣ(k)/Δw{j}{k}
Δw := &mat.Dense{}
Δw.Mul(fc.LocalDelta, fc.Oj.T())
Δw.Scale(-1.0*lp.LearningRate, Δw)
// Inertia (as separated Scale() call)
// @todo - this should be optional.
Δw.Scale(1.0-lp.Momentum, Δw)
fc.PreviousWeightsState.Scale(lp.Momentum, fc.PreviousWeightsState)
Δw.Add(Δw, fc.PreviousWeightsState)
fc.PreviousWeightsState.CloneFrom(Δw)
// Update weights: w = w + Δw
fc.Weights.Add(fc.Weights, Δw)
}
// PrintOutput Pretty prrint fully-connected layer's output
func (fc *FullyConnectedLayer) PrintOutput() {
fmt.Println("Printing fully-connected Layer output...")
rows, _ := fc.Ok.Dims()
for r := 0; r < rows; r++ {
fmt.Printf("\t%v\n", fc.Ok.RawRowView(r))
}
}
// PrintWeights Pretty print fully-connected layer's weights
func (fc *FullyConnectedLayer) PrintWeights() {
fmt.Println("Printing fully-connected Layer weights...")
rows, _ := fc.Weights.Dims()
for r := 0; r < rows; r++ {
fmt.Printf("\t%v\n", fc.Weights.RawRowView(r))
}
}
// SetActivationFunc Set activation function for fully-connected layer. You need to specify function: func(v float64) float64
func (fc *FullyConnectedLayer) SetActivationFunc(f func(v float64) float64) {
fc.ActivationFunc = f
}
// SetActivationDerivativeFunc Set derivative of activation function for fully-connected layer. You need to specify function: func(v float64) float64
func (fc *FullyConnectedLayer) SetActivationDerivativeFunc(f func(v float64) float64) {
fc.ActivationDerivative = f
}
// GetStride Returns stride of fully-connected layer
func (fc *FullyConnectedLayer) GetStride() int {
return 0
}
// GetType Returns "fc" as layer's type
func (fc *FullyConnectedLayer) GetType() string {
return "fc"
}