-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathobjective_measures.py
44 lines (39 loc) · 1.49 KB
/
objective_measures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
'''
TODO: logSpecDbDist appropriate? (both mels & mags?)
TODO: compute output length error?
TODO: work out best way of handling the fact that predicted *coarse* features
can correspond to text but be arbitrarily 'out of phase' with reference.
Mutliple references? Or compare against full-time resolution reference?
'''
import logging
from mcd import dtw
import mcd.metrics_fast as mt
def compute_dtw_error(reference, predictions):
minCostTot = 0.0
framesTot = 0
for (nat, synth) in zip(reference, predictions):
nat, synth = nat.astype('float64'), synth.astype('float64')
minCost, path = dtw.dtw(nat, synth, mt.logSpecDbDist)
frames = len(nat)
minCostTot += minCost
framesTot += frames
mean_score = minCostTot / framesTot
print ('overall LSD = %f (%s frames nat/synth)' % (mean_score, framesTot))
return mean_score
def compute_simple_LSD(reference_list, prediction_list):
costTot = 0.0
framesTot = 0
for (synth, nat) in zip(prediction_list, reference_list):
#synth = prediction_tensor[i,:,:].astype('float64')
# len_nat = len(nat)
assert len(synth) == len(nat)
#synth = synth[:len_nat, :]
nat = nat.astype('float64')
synth = synth.astype('float64')
cost = sum([
mt.logSpecDbDist(natFrame, synthFrame)
for natFrame, synthFrame in zip(nat, synth)
])
framesTot += len(nat)
costTot += cost
return costTot / framesTot