-
Notifications
You must be signed in to change notification settings - Fork 5
/
lstm_testing.py
executable file
·184 lines (168 loc) · 7.45 KB
/
lstm_testing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: winston lin
"""
import numpy as np
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers import LSTM, Input, Multiply
import random
import time
from utils import getPaths, DynamicChunkSplitTestingData, evaluation_metrics
from scipy.io import loadmat
from model_utils import mean, reshape
from model_utils import atten_gated, atten_rnn, atten_selfMH, output_net
import argparse
# Ignore warnings & Fix random seed
import warnings
warnings.filterwarnings("ignore")
random.seed(999)
random_seed=99
argparse = argparse.ArgumentParser()
argparse.add_argument("-ep", "--epoch", required=True)
argparse.add_argument("-batch", "--batch_size", required=True)
argparse.add_argument("-emo", "--emo_attr", required=True)
argparse.add_argument("-atten", "--atten_type", required=True)
args = vars(argparse.parse_args())
# Parameters
batch_size = int(args['batch_size'])
epochs = int(args['epoch'])
emo_attr = args['emo_attr']
atten_type = args['atten_type']
# Data/Label/Model Dir
label_dir = '/media/winston/UTD-MSP/Speech_Datasets/MSP-PODCAST-Publish-1.6/Labels/labels_concensus.csv'
root_dir = '/media/winston/UTD-MSP/Speech_Datasets/MSP-PODCAST-Publish-1.6/Features/OpenSmile_lld_IS13ComParE/feat_mat/'
#model_path = './Models/LSTM_model[epoch'+str(epochs)+'-batch'+str(batch_size)+']_'+atten_type+'_'+emo_attr+'_weights.h5'
model_path = './trained_model_v1.6/LSTM_model[epoch'+str(epochs)+'-batch'+str(batch_size)+']_'+atten_type+'_'+emo_attr+'_weights.h5'
# Loading Norm-Parameters
Feat_mean = loadmat('./NormTerm/feat_norm_means.mat')['normal_para']
Feat_std = loadmat('./NormTerm/feat_norm_stds.mat')['normal_para']
if emo_attr == 'Act':
Label_mean = loadmat('./NormTerm/act_norm_means.mat')['normal_para'][0][0]
Label_std = loadmat('./NormTerm/act_norm_stds.mat')['normal_para'][0][0]
elif emo_attr == 'Dom':
Label_mean = loadmat('./NormTerm/dom_norm_means.mat')['normal_para'][0][0]
Label_std = loadmat('./NormTerm/dom_norm_stds.mat')['normal_para'][0][0]
elif emo_attr == 'Val':
Label_mean = loadmat('./NormTerm/val_norm_means.mat')['normal_para'][0][0]
Label_std = loadmat('./NormTerm/val_norm_stds.mat')['normal_para'][0][0]
# Regression Task
test_file_path, test_file_tar = getPaths(label_dir, split_set='Test', emo_attr=emo_attr)
#test_file_path, test_file_tar = getPaths(label_dir, split_set='Validation', emo_attr=emo_attr)
# Setting Online Prediction Model Graph (predict sentence by sentence rather than a data batch)
time_step = 62 # same as the number of frames within a chunk (i.e., m)
feat_num = 130 # number of LLDs features
if atten_type == 'GatedVec':
# LSTM Layer
inputs = Input((time_step, feat_num))
encode = LSTM(units=feat_num, activation='tanh', dropout=0.5, return_sequences=True)(inputs)
encode = LSTM(units=feat_num, activation='tanh', dropout=0.5, return_sequences=False)(encode)
encode = BatchNormalization()(encode)
encode = reshape()(encode)
# Attention Layer
a_weighted = atten_gated(feat_num=130, C=11)(encode)
attention_vector = Multiply()([encode, a_weighted])
attention_vector = mean()(attention_vector)
# Output Layer
outputs = output_net(feat_num)(attention_vector)
model = Model(inputs=inputs, outputs=outputs)
elif atten_type == 'RnnAttenVec':
# LSTM Layer
inputs = Input((time_step, feat_num))
encode = LSTM(units=feat_num, activation='tanh', dropout=0.5, return_sequences=True)(inputs)
encode = LSTM(units=feat_num, activation='tanh', dropout=0.5, return_sequences=False)(encode)
encode = BatchNormalization()(encode)
encode = reshape()(encode)
# Attention Layer
attention_vector = atten_rnn(feat_num=130, C=11)(encode)
# Output Layer
outputs = output_net(feat_num)(attention_vector)
model = Model(inputs=inputs, outputs=outputs)
elif atten_type == 'SelfAttenVec':
# LSTM Layer
inputs = Input((time_step, feat_num))
encode = LSTM(units=feat_num, activation='tanh', dropout=0.5, return_sequences=True)(inputs)
encode = LSTM(units=feat_num, activation='tanh', dropout=0.5, return_sequences=False)(encode)
encode = BatchNormalization()(encode)
encode = reshape()(encode)
# Attention Layer
attention_vector = atten_selfMH(feat_num=130, C=11)(encode)
# Output Layer
outputs = output_net(feat_num)(attention_vector)
model = Model(inputs=inputs, outputs=outputs)
elif atten_type == 'NonAtten':
# LSTM Layer
inputs = Input((time_step, feat_num))
encode = LSTM(units=feat_num, activation='tanh', dropout=0.5, return_sequences=True)(inputs)
encode = LSTM(units=feat_num, activation='tanh', dropout=0.5, return_sequences=False)(encode)
encode = BatchNormalization()(encode)
encode = reshape()(encode)
# Output Layer
avg_vec = mean()(encode)
outputs = output_net(feat_num)(avg_vec)
model = Model(inputs=inputs, outputs=outputs)
# loading model trained weights
model.load_weights(model_path)
#print(model.summary())
# Split testing set into 15 subsets for statistical T-test
subset_num = 15
indexes = np.arange(len(test_file_path))
np.random.seed(random_seed)
np.random.shuffle(indexes)
test_file_path = test_file_path[indexes]
test_file_tar = test_file_tar[indexes]
subset_idx = np.arange(0, len(test_file_path),int(np.round(len(test_file_path)/subset_num)))
Subset_Idx = []
for i in range(len(subset_idx)):
try:
Subset_Idx.append(np.arange(subset_idx[i], subset_idx[i+1]))
except:
Subset_Idx.append(np.arange(subset_idx[i], len(test_file_path)))
# Online Testing Process for subsets of the test set
Pred_Rsl = []
Time_Cost = []
for ii in range(len(Subset_Idx)):
Test_pred = []
Test_label = []
Time_cost = []
for i in Subset_Idx[ii]:
data = loadmat(root_dir + test_file_path[i].replace('.wav','.mat'))['Audio_data']
data = data[:,1:] # remove time-info
data = (data-Feat_mean)/Feat_std # Feature Normalization
# Bounded NormFeat Ranging from -3~3 and assign NaN to 0
data[np.isnan(data)]=0
data[data>3]=3
data[data<-3]=-3
# split sentence into data chunks
chunk_data = DynamicChunkSplitTestingData([data], m=62, C=11, n=1)
# Recording prediction time cost
tic = time.time()
pred = model.predict(chunk_data)
toc = time.time()
Time_cost.append((toc-tic)*1000) # unit of time = 10^-3
# Output prediction results
pred = np.mean(pred)
Test_pred.append(pred)
Test_label.append(test_file_tar[i])
Test_pred = np.array(Test_pred)
Test_label = np.array(Test_label)
# Time Cost Result
Time_cost = np.array(Time_cost)
# Regression Task => Prediction & De-Normalize Target
Test_pred = (Label_std*Test_pred)+Label_mean
Pred_Rsl.append(evaluation_metrics(Test_label, Test_pred)[0])
Time_Cost.append(np.mean(Time_cost))
# Subset results for Statistic Test
Pred_Rsl = np.array(Pred_Rsl)
Time_Cost = np.array(Time_Cost)
print('Model_type: LSTM')
print('Epochs: '+str(epochs))
print('Batch_size: '+str(batch_size))
print('Emotion: '+emo_attr)
print('Chunk_type: dynamicOverlap')
print('Atten_type: '+atten_type)
print('Avg. CCC testing performance: '+str(np.mean(Pred_Rsl)))
print('Std. CCC testing performance: '+str(np.std(Pred_Rsl)))
print('Avg. Prediction Time(ms/uttr): '+str(np.mean(Time_Cost)))
print('Std. Prediction Time(ms/uttr): '+str(np.std(Time_Cost)))