-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloss.py
85 lines (67 loc) · 3.16 KB
/
loss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import numpy as np
class Loss:
def __init__(self, name):
self.name = name
self.epsilon = 1e-8
def loss_function(self, output, label):
"""
:return: The return value of the loss function will give the sum of elements in the batch
"""
raise NotImplementedError()
def error_function(self, output, label, activation_function):
"""
Returns the derivative of the loss_function
:return: Boolean if there is a "shortcut" combination with the activation function,
meaning that further transformations do not need to be applied.
numpy array containing the derivative of the loss function
"""
raise NotImplementedError()
class CrossEntropy(Loss):
def loss_function(self, output, label):
"""
the calculated loss is divided by the number of samples
small value added so value inside log never 0
"""
loss = -np.sum(label * np.log(output + self.epsilon))
return loss
def error_function(self, output, label, activation_function):
if activation_function.name == "softmax":
return True, self.error_function_with_softmax(output, label)
return False, self.regular_error_function(output, label)
def error_function_with_softmax(self, output, label):
return output - label
def regular_error_function(self, output, label):
# avoid division by 0, similar to loss function with log
return -label / (output + self.epsilon)
class BinaryCrossEntropy(Loss):
"""
Helpful stack overflow derivative:
https://math.stackexchange.com/questions/2503428/derivative-of-binary-cross-entropy-why-are-my-signs-not-right
"""
def loss_function(self, output, label):
inside_of_sum_term = label * np.log(output + self.epsilon) + (1 - label) * np.log(1 - output + self.epsilon)
# axis = -1 because axis 0 is batch dimension, and we need the mean for every sample
loss = -np.sum(np.mean(inside_of_sum_term, axis=-1))
return loss
def error_function(self, output, label, activation_function):
if activation_function.name == "sigmoid":
return True, self.error_function_with_sigmoid(output, label)
return False, self.regular_error_function(output, label)
def error_function_with_sigmoid(self, output, label):
return output - label
def regular_error_function(self, output, label):
# here you can clearly see how the sigmoid derivative cancels out :)
# epsilon is to prevent division by 0
return (output - label) / (output * (1 - output) + self.epsilon)
class MeanSquaredError(Loss):
def loss_function(self, output, label):
# already "divides" by the number of samples in a batch
return np.sum(np.mean(np.square(label - output), axis=-1))
def error_function(self, output, label, activation_function):
return False, self.regular_error_function(output, label)
def regular_error_function(self, output, label):
"""
derivative of mse:
-2 * (label - prediction) / output_nodes
"""
return -2 * (label - output) / output.shape[-1]