-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_demo.py
113 lines (79 loc) · 4.18 KB
/
run_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Demo for Error Detection Model described in
Ribeiro, M.S., Cleland, J., Eshky, A., Richmond, K., and Renals. S. (Under Revision)
Automatic detection of speech articulation errors using ultrasound tongue imaging
Submitted to Speech Communication.
Steps applied in this demo are:
# step 1: load input data (MFCCs, ultrasound, normalisers)
# step 2: resize ultrasound and compute speaker mean
# step 3: build input sample
# step 4: ultrasound pre-processing (normalise + append normalised speaker mean)
# step 5: mfcc pre-processing (normalise)
# step 7: forward pass (load model from file and forward pass to get probability distribution)
# step 8: compute final score
This script shows the sequence of steps that are run.
For the details of each step, check the corresponding function in steps.py
Date: 2020
Author: M. Sam Ribeiro
"""
import os
import sys
import steps
import argparse
def main(config):
## step 1: load input data
# MFCCs data (normalised and with deltas)
mfccs = steps.read_text_array(config['data'], config['file_id']+'.mfccs')
# raw ultrasound data
ultrasound, params = steps.read_ultrasound_data(config['data'], config['file_id'])
# data normalisers
ultra_mvn = steps.read_text_array(config['norm'], 'ultrasound.mvn.txt')
audio_mvn = steps.read_text_array(config['norm'], 'audio.mvn.txt')
speaker_mvn = steps.read_text_array(config['norm'], 'speaker.mvn.txt')
## step2: resize ultrasound and compute speaker mean
ultrasound, speaker_mean = steps.preprocess_ultrasound(ultrasound)
## step 3: build input sample
mfcc_sample, ultra_sample = steps.build_sample(mfccs, ultrasound, params, config['timestamp'])
## step 4: ultrasound pre-processing
ultra_sample = steps.normalise_ultrasound(ultra_sample, speaker_mean, \
normaliser=ultra_mvn, speaker_normaliser=speaker_mvn)
## step 5: mfcc pre-processing
mfcc_sample = steps.normalise_mfcc(mfcc_sample, normaliser=audio_mvn)
## step 6: forward pass
model_scores = steps.forward_pass(config['model'], mfcc_sample, ultra_sample)
## step 7: score
steps.score_sample(model_scores, config['expected_class'], config['competing_class'], config['threshold'])
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileid', default='07F-028A', type=str, help='file identifier')
parser.add_argument('-t', '--timestamp', default=2.83, type=float, help='timestamp for anchor frame')
parser.add_argument('-k', '--threshold', default=0.0, type=float, help='rejection threshold')
parser.add_argument('-ec', '--expected_class', default='velar', type=str, help='expected class')
parser.add_argument('-cc', '--competing_class', default='alveolar', type=str, help='competing class')
parser.add_argument('-dir', '--base_dir', default='.', type=str, help='base directory')
args = parser.parse_args()
# use a single object with all required parameters
config = {
# target file identifier
# we'll look for it (*.ult, *.param, *.mfccs) in the data directory
'file_id' : args.fileid,
# mid-frame of target phone (given by user or discovered automatically)
# also referred to as 'anchor frame'
'timestamp' : args.timestamp,
# competing phone classes. supported clases are:
# alveolar, dental, labial, labiovelar, lateral, palatal, postalveolar, rhotic, velar
'expected_class' : args.expected_class,
'competing_class' : args.competing_class,
# threshold k in the paper (Equation 3, see also Figure 9)
# higher (positive) values tends to increases recall and decrease precision.
# lower (negative) values tends to decrease recall and increase precision.
'threshold' : args.threshold,
# input directories with relevant data
# this script expects to find sub-directories 'data', 'normalisers', and 'model'
'data' : os.path.join(args.base_dir, 'sample_data'),
'norm' : os.path.join(args.base_dir, 'normalisers'),
'model' : os.path.join(args.base_dir, 'model'),
}
main(config)