-
Notifications
You must be signed in to change notification settings - Fork 1
/
generateAllFeatures.py
75 lines (49 loc) · 2.46 KB
/
generateAllFeatures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'''
This performs feature extraction for specified languages, feature types, and
subsets of data. This should be run from the LDCData directory. It will put all new files
in a new directory called features_<featuretype>.
Usage:
1) edit the dictionary 'configFiles' below to include all desired feature config files
2) edit 'languages' array to include desired languages (lowercase, complete name)
3) verify subsets contains the appropriate suffixes to the subset filenames
4) delete existing stuff you want to replace
python generateAllFeatures.py
LDCData must contain the following files:
1) htk config files
2) .scp files containing all paths of wav files (one for each language and subset, eg. germanwavstrong2_0.scp)
3) wav files in directories for individual languages
Flags that would be convenient but aren't implemented:
- Option to just run HCOPY
- Specify the languages, featuretype, and/or data subsets
'''
import csv
import sys
import generateFilenames as gen
import os
if __name__ == '__main__':
languages = ['german', 'mandarin']
subsets = ['strength0.scp'] #'strong2_0.scp', 'stronger2_5.scp', 'strongest2_7.scp',
configFiles = {}
configFiles['mfcc'] = 'config_files/mfccconfig.txt'
configFiles['fbank'] = 'config_files/fbankconfig.txt'
configFiles['lpc'] = 'config_files/lpcconfig.txt'
configFiles['mfccplus'] = 'config_files/mfccplusconfig.txt'
#configFiles['mfcchamming'] = 'mfccconfighamming'
all_feature_only_files = []
for featuretype in configFiles.keys():
configfilename = configFiles[featuretype]
#makes a new directory for all feature files for a given featuretype
feature_dir = 'features_' + featuretype
os.system('mkdir ' + feature_dir)
for lang in languages:
for degreeSet in subsets:
#make a new directory for feature files fo a given a language and degree
feature_strength_dir = feature_dir + '/' + lang + 'wav' + degreeSet[0:-4]
os.system('mkdir ' + feature_strength_dir)
wavfiles = lang + 'wav' + degreeSet
#this generates a file with pairs of 2 files (.wav file .<featuretype> file) to be used with HCOPY
wav_feature_pair_file, feature_only_file = gen.genFilenames(wavfiles, featuretype, lang)
all_feature_only_files.append(feature_only_file)
hcopycommand = 'HCOPY -C ' + configfilename + ' -S ' + wav_feature_pair_file
print hcopycommand
os.system(hcopycommand)