-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLSTM_train.py
147 lines (116 loc) · 5.21 KB
/
LSTM_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# The GPU id to use, usually either "0" or "1"
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
from keras import Input, Model, regularizers
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, TensorBoard, ModelCheckpoint
import numpy as np
from keras.layers import Bidirectional, LSTM, TimeDistributed, Dense, Permute, Lambda, K, RepeatVector, multiply, \
Flatten, CuDNNLSTM, Softmax, Multiply
from modules.utils import read_from_file, read_features, mask_data, phase_length, sample_weights, cal_avg_len, \
train_generator, vali_generator
from sklearn.utils import class_weight
# import tensorflow as tf
# from keras.backend.tensorflow_backend import set_session
# config = tf.ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction = 0.3
# set_session(tf.Session(config=config))
local_feats_path = '/Users/seanxiang/data/cholec80/feats/'
remote_feats_path = '/home/cxia8134/dev/baseline/feats/'
model_name = 'BiLSTM-500nodes-noMask-densenetFeats-1'
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=7, min_lr=0.5e-6, mode='auto')
early_stopper = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10)
tensor_board = TensorBoard('log/' + model_name)
# save model if validation loss decreased
checkpointer = ModelCheckpoint(filepath='/home/cxia8134/dev/baseline/temp/' + model_name
+ '-{epoch:02d}-{val_loss:.2f}.hdf5',
verbose=1,
save_best_only=True)
def attention_3d_block(inputs):
# inputs.shape = (batch_size, time_steps, input_dim)
input_dim = int(inputs.shape[2])
a = Permute((2, 1))(inputs)
# a = Reshape((input_dim, TIME_STEPS))(a) # this line is not useful. It's just to know which dimension is what.
a = Dense(n_nodes, activation='softmax', name='weighting')(a)
if SINGLE_ATTENTION_VECTOR:
a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a)
a = RepeatVector(input_dim)(a)
a_probs = Permute((2, 1), name='attention_vec')(a)
output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
return output_attention_mul
def attention_block(inputs, length):
lstm = Bidirectional(LSTM(length, return_sequences=True,
dropout=0.5,
recurrent_dropout=0.5,
kernel_regularizer=regularizers.l2(l2_norm)))(inputs)
attention = TimeDistributed(Dense(1))(lstm)
attention = Softmax(axis=1, name='attention_vec')(attention)
context = Multiply(name='attention_mul')([attention, lstm])
return context
n_nodes = 500
nb_epoch = 200
nb_classes = 7
batch_size = 1
n_feat = 2048
max_len = 6000
l2_norm = 0.01
attention_length = 100
SINGLE_ATTENTION_VECTOR = False
feats = ['resnet', 'densenet'][1]
if feats == 'resnet':
n_feat = 2048
elif feats == 'densenet':
n_feat = 1024
path = remote_feats_path
X_train, Y_train = read_features(path, feats, 'train')
X_vali, Y_vali = read_features(path, feats, 'vali')
# TODO: append frame id to feature
# X_train_m, Y_train_, M_train = mask_data(X_train, Y_train, max_len, mask_value=-1)
# X_vali_m, Y_vali_, M_vali = mask_data(X_vali, Y_vali, max_len, mask_value=-1)
# calculate sample weights based on ground-truth label distributions
# sample_weights = sample_weights(Y_train)
# find the average length of the training samples
# avg_len = cal_avg_len(X_train)
inputs = Input(shape=(None, n_feat))
# model = attention_block(inputs, 100)
model = Bidirectional(LSTM(n_nodes,
return_sequences=True,
input_shape=(batch_size, None, n_feat),
dropout=0.5,
name='bilstm',
recurrent_dropout=0.25))(inputs)
# model = LSTM(n_nodes,
# return_sequences=True,
# input_shape=(batch_size, None, n_feat),
# dropout=0.5,
# name='bilstm',
# recurrent_dropout=0.25)(inputs)
# attention layer
# model = attention_3d_block(model)
# Output FC layer
model = TimeDistributed(Dense(nb_classes, activation="softmax"))(model)
model = Model(inputs=inputs, outputs=model)
# model = multi_gpu_model(model, gpus=2)
model.compile(loss='categorical_crossentropy',
optimizer='adam',
sample_weight_mode="temporal",
metrics=['accuracy'])
model.summary()
# train on videos with sample weighting
# model.fit(x=X_train_m,
# y=Y_train_,
# validation_data=(X_vali_m, Y_vali_, M_vali[:, :, 0]),
# epochs=nb_epoch,
# batch_size=batch_size,
# verbose=1,
# # sample_weight=M_train[:, :, 0],
# sample_weight=sample_weights,
# callbacks=[lr_reducer, early_stopper, tensor_board, checkpointer])
model.fit_generator(train_generator(X_train, Y_train),
verbose=1,
epochs=nb_epoch,
steps_per_epoch=50,
validation_steps=10,
validation_data=vali_generator(X_vali, Y_vali),
callbacks=[lr_reducer, early_stopper, tensor_board, checkpointer])
model.save('trained/' + model_name + '.h5')