-
Notifications
You must be signed in to change notification settings - Fork 3
/
TL_model_embed.py
186 lines (137 loc) · 6.44 KB
/
TL_model_embed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# create your dataset
train_dataset = TensorDataset( train_embedding,
train_Ys)
batch_size = 64
## creating a dataloader
train_dataloader = DataLoader( train_dataset , shuffle = True , batch_size = batch_size )
# create your dataset
valid_dataset = TensorDataset( valid_embedding,
valid_Ys)
## creating a dataloader
valid_dataloader = DataLoader(valid_dataset , shuffle = True , batch_size = batch_size )
class Deep_LR(torch.nn.Module):
def __init__(self , input_dim = 2048 ,
hidden_dim=[] ,
dropout_fraction = 0.0):
super().__init__()
self.layers = nn.Sequential( )
# each layer is made of a linear layer with a ReLu activation and a DropOut Layer
for i in range(len(hidden_dim)):
self.layers.append( nn.Linear(input_dim, hidden_dim[i]) )
self.layers.append( nn.ReLU() )
self.layers.append( nn.Dropout(dropout_fraction) )
input_dim = hidden_dim[i] ## update the input dimension for the next layer
self.layers.append( nn.Linear(input_dim, 1) )
self.layers.append( nn.Sigmoid() )
def forward(self, x):
out = self.layers(x)
return out
model = Deep_LR().to(device)
print(model)
## we also define this to easily get accuracy
def get_model_accuracy(model, dataloader):
Ys = np.array([] , dtype = 'float32' )
Ps = np.array([], dtype = 'float32' )
with torch.no_grad():
for X,y in dataloader:
X = X.to(device)
y = y.to(device)
pred = model(X)
Ys = np.concatenate([Ys, y.squeeze().numpy()])
Ps = np.concatenate([Ps, (pred>0.5).squeeze().numpy()])
return np.mean( Ys == Ps )
def train(dataloader, model, loss_fn, optimizer , echo = True , echo_batch = False):
size = len(dataloader.dataset) # how many batches do we have
model.train() # Sets the module in training mode.
for batch, (X,y) in enumerate(dataloader): # for each batch
X = X.to(device) # send the data to the GPU or whatever device you use for training
y = y.to(device) # send the data to the GPU or whatever device you use for training
# Compute prediction error
pred = model(X) # prediction for the model -> forward pass
loss = loss_fn(pred, y) # loss function from these prediction
# Backpropagation
loss.backward() # backward propagation
# https://ml-cheatsheet.readthedocs.io/en/latest/backpropagation.html
# https://pytorch.org/tutorials/beginner/basics/autogradqs_tutorial.html
optimizer.step()
optimizer.zero_grad() # reset the gradients
# https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch
if echo_batch:
current = (batch) * dataloader.batch_size + len(X)
print(f"Train loss: {loss.item():>7f} [{current:>5d}/{size:>5d}]")
if echo:
print(f"Train loss: {loss.item():>7f}")
# return the last batch loss
return loss.item()
def valid(dataloader, model, loss_fn, echo = True):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval() # Sets the module in evaluation mode
valid_loss = 0
with torch.no_grad(): ## disables tracking of gradient: prevent accidental training + speeds up computation
for X,y in dataloader:
X = X.to(device)
y = y.to(device)
pred = model(X)
valid_loss += loss_fn(pred, y).item() ## accumulating the loss function over the batches
valid_loss /= num_batches
if echo:
print(f"Valid Error: {valid_loss:>8f}")
## return the average loss / batch
return valid_loss
model = Deep_LR(hidden_dim=[1028,512]).to(device)
loss = nn.BCELoss()
## following the usage of https://github.com/liyu95/Deep_learning_examples/blob/master/4.ResNet_X-ray_classification/Densenet_fine_tune.ipynb
optimizer = torch.optim.SGD(model.parameters(), lr = 10**-2 )
## container to keep the scores across all epochs
train_scores = []
valid_scores = []
early_stopping = EarlyStopping(patience=10, verbose=False)
## naive performance
print( "train accuracy:", get_model_accuracy(model, train_dataloader) )
print( "valid accuracy:", get_model_accuracy(model, valid_dataloader) )
## lets do a single round, to learn how long it takes
train_scores.append( train(train_dataloader,
model,
loss,
optimizer,
echo = True , echo_batch = True ) )
valid_scores.append( valid(valid_dataloader,
model,
loss ,
echo = True) )
epoch = 50
for t in range(epoch):
echo = t%10==0
if echo:
print('Epoch',len(train_scores)+1 )
train_scores.append( train(train_dataloader,
model,
loss,
optimizer,
echo = echo , echo_batch = False ) )
valid_scores.append( valid(valid_dataloader,
model,
loss ,
echo = echo) )
# early_stopping needs the validation loss to check if it has decresed,
# and if it has, it will make a checkpoint of the current model
early_stopping(valid_scores[-1], model)
if early_stopping.early_stop:
print("Early stopping")
break
# load the last checkpoint with the best model
model.load_state_dict(torch.load('checkpoint.pt'))
plt.plot(train_scores , label = 'train')
plt.plot(valid_scores, label = 'validation')
plt.axvline(np.argmin(valid_scores), linestyle='--', color='r',label='Early Stopping Checkpoint')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('BCE loss')
print( "train accuracy:", get_model_accuracy(model, train_dataloader) )
print( "valid accuracy:", get_model_accuracy(model, valid_dataloader) )
# For ref:
# * no hidden layer , SGD(10**-1) -> 0.970 , 0.878 in 1.5s
# * [1028,512] , SGD(10**-1) -> 0.976 , 0.885 in 30s
# * logistic regression : 0.82 on valid
# * with some l1 (C=774263682681127) -> 0.867