-
Notifications
You must be signed in to change notification settings - Fork 142
/
audio_processor.py
38 lines (31 loc) · 1.19 KB
/
audio_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import librosa
import numpy as np
def compute_melgram(audio_path):
''' Compute a mel-spectrogram and returns it in a shape of (1,1,96,1366), where
96 == #mel-bins and 1366 == #time frame
parameters
----------
audio_path: path for the audio file.
Any format supported by audioread will work.
More info: http://librosa.github.io/librosa/generated/librosa.core.load.html#librosa.core.load
'''
# mel-spectrogram parameters
SR = 12000
N_FFT = 512
N_MELS = 96
HOP_LEN = 256
DURA = 29.12 # to make it 1366 frame..
src, sr = librosa.load(audio_path, sr=SR) # whole signal
n_sample = src.shape[0]
n_sample_fit = int(DURA*SR)
if n_sample < n_sample_fit: # if too short
src = np.hstack((src, np.zeros((int(DURA*SR) - n_sample,))))
elif n_sample > n_sample_fit: # if too long
src = src[(n_sample-n_sample_fit)/2:(n_sample+n_sample_fit)/2]
logam = librosa.logamplitude
melgram = librosa.feature.melspectrogram
ret = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
n_fft=N_FFT, n_mels=N_MELS)**2,
ref_power=1.0)
ret = ret[np.newaxis, np.newaxis, :]
return ret