-
Notifications
You must be signed in to change notification settings - Fork 1
/
cepstrum.py
105 lines (90 loc) · 3.63 KB
/
cepstrum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 3 11:41:47 2017
@author: danny
"""
from preproc import four,pad,preemph, hamming
from filters import apply_filterbanks,filter_centers, create_filterbanks
from scipy.fftpack import dct
import numpy
import math
# functions to get the cepstrum and delta features of the cepstrum
def get_cepstrum(frames,fs, windowsize,filterbanks,filt):
# this function calls other preprocessing steps and returns the cepstrum
# get the frequency spectrum of the frames
freq_spectrum = four(frames,fs,windowsize)
#apply the filterbanks to the frequency spectrum
filtered_freq=apply_filterbanks(freq_spectrum,filterbanks)
# if the option f_banks is choosen, return the filterbanks
# else return the cepstrum
if filt:
cepstrum = filtered_freq
else:
cepstrum = dct(filtered_freq[:,1:])
# remove the first coefficient.
cepstrum = cepstrum[:,1:13]
return (cepstrum)
def delta (data,n):
dt=[]
for j in range (0,data.shape[0]):
temp=[]
for i in range (1,n+1):
if j-i >=0 and not j+i > (data.shape[0]-1):
temp.append(n*(data[j+i] - data[j-i]))
elif j-i <0:
temp.append(n*(data[j+i]))
else:
temp.append(n* (0 - data[j-i]))
temp2 = 2 * sum([x*x for x in range (1,n+1) ])
dt.append(sum(temp)/temp2)
return (numpy.array(dt))
def get_mfcc (input_data,alpha,nfilters,windowsize,frameshift,filt,use_deltas):
#sampling frequency
fs=input_data[0]
#determine the number of frames to be extracted
#subtract the surrounding window as the first frame starts from
# the contextsize not 0.
nframes=math.floor(input_data[1].size/frameshift)
# pad the data
data=pad(input_data[1],windowsize,frameshift)
# slice the frames from the wav file
# keep a list with the frames and all the values of the samples and
# list with the start and end sample# of each frame
frames=[]
frame_nrs=[]
energy =[]
for x in range (0,nframes):
frame=data[x*frameshift:x*frameshift+windowsize]
frame_nrs.append([x*frameshift,x*frameshift+frameshift])
# frame energy and frame
energy.append(numpy.log(numpy.sum(numpy.square(frame),0)))
frames.append(frame)
frames= numpy.array(frames)
energy= numpy.array(energy)
# if energy is 0 , the log can not be taken(results in -inf) so we set the
# log energy to -50 (log of 2e-22 or approx 0 )
energy[energy==numpy.log(0)]=-50
#apply preemphasis
frames=preemph(frames,alpha)
#apply hamming window
frames=hamming(frames)
# frequency range for filter centers (fft bins spaced equally over the freqs up to the nyquist rate)
# windowsize/2 is the # of fft bins, 1/(2/fs) is the nyquist frequency
#xf = numpy.linspace(64.0, 1.0/(2/fs), (windowsize/2)+56)
# frequency range for filter response
xf= numpy.linspace(0.0, 1.0/(2/fs), (windowsize/2)+56)
# get the filter centers
fc = filter_centers (nfilters,xf)
# create filterbanks
filterbanks = create_filterbanks(nfilters,xf,fc)
# get the cepstum and add it to the energy
mfcc= get_cepstrum(frames,fs, windowsize,filterbanks,filt)
mfcc = numpy.concatenate([energy[:,None],mfcc],1)
#plt.plot(xf,numpy.log10(x))
# add delta and double delta features if needed
if use_deltas:
single_delta= delta (mfcc,2)
double_delta= delta(single_delta,2)
mfcc= numpy.concatenate([mfcc,single_delta,double_delta],1)
return (mfcc,frame_nrs)