-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathaudio_preprocessing_essentia.py
64 lines (53 loc) · 1.97 KB
/
audio_preprocessing_essentia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from general.parameters import *
from general.filePathHsmm import kerasScaler_path
from general.Fprev_sub import Fprev_sub
from audio_preprocessing import feature_reshape
import essentia.standard as ess
import pickle
import numpy as np
winAnalysis = 'hann'
N = 2 * framesize # padding 1 time framesize
SPECTRUM = ess.Spectrum(size=N)
MFCC = ess.MFCC(sampleRate=fs,
highFrequencyBound=highFrequencyBound,
inputSize=framesize + 1,
numberBands=80)
WINDOW = ess.Windowing(type=winAnalysis, zeroPadding=N - framesize)
def getMFCCBands2D(audio, framesize, hopsize, nbf=False, nlen=10):
"""
mel bands feature [p[0],p[1]]
output feature for each time stamp is a 2D matrix
it needs the array format float32
:param audio:
:param p:
:param nbf: bool, if we need to neighbor frames
:return:
"""
mfcc = []
# audio_p = audio[p[0]*fs:p[1]*fs]
for frame in ess.FrameGenerator(audio, frameSize=framesize, hopSize=hopsize):
frame = WINDOW(frame)
mXFrame = SPECTRUM(frame)
bands,mfccFrame = MFCC(mXFrame)
mfcc.append(bands)
if nbf:
mfcc = np.array(mfcc).transpose()
mfcc_out = np.array(mfcc, copy=True)
for ii in range(1,nlen+1):
mfcc_right_shift = Fprev_sub(mfcc, w=ii)
mfcc_left_shift = Fprev_sub(mfcc, w=-ii)
mfcc_out = np.vstack((mfcc_right_shift, mfcc_out, mfcc_left_shift))
feature = mfcc_out.transpose()
else:
feature = mfcc
# the mel bands features
feature = np.array(feature,dtype='float32')
return feature
def mfccFeature_pipeline(filename_wav):
audio = ess.MonoLoader(downmix = 'left', filename = filename_wav, sampleRate = fs)()
scaler = pickle.load(open(kerasScaler_path,'rb'))
feature = getMFCCBands2D(audio,framesize, hopsize, nbf=True)
mfcc = np.log(100000 * feature + 1)
feature = scaler.transform(mfcc)
feature = feature_reshape(feature)
return feature, mfcc