forked from Romaisaa/Voice-Passsword-Checker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Gmm.py
143 lines (117 loc) · 4.71 KB
/
Gmm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import pickle
import numpy as np
from sklearn import preprocessing
import python_speech_features as mfcc
import librosa
import numpy
from python_speech_features import sigproc
from scipy.fftpack import dct
def mel2hz(mel):
return 700*(10**(mel/2595.0)-1)
def hz2mel(hz):
return 2595 * numpy.log10(1+hz/700.)
def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
highfreq= highfreq or samplerate/2
assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2"
# compute points evenly spaced in mels
lowmel = hz2mel(lowfreq)
highmel = hz2mel(highfreq)
melpoints = numpy.linspace(lowmel,highmel,nfilt+2)
# our points are in Hz, but we use fft bins, so we have to convert
# from Hz to fft bin number
bin = numpy.floor((nfft+1)*mel2hz(melpoints)/samplerate)
fbank = numpy.zeros([nfilt,nfft//2+1])
for j in range(0,nfilt):
for i in range(int(bin[j]), int(bin[j+1])):
fbank[j,i] = (i - bin[j]) / (bin[j+1]-bin[j])
for i in range(int(bin[j+1]), int(bin[j+2])):
fbank[j,i] = (bin[j+2]-i) / (bin[j+2]-bin[j+1])
return fbank
def lifter(cepstra, L=22):
if L > 0:
nframes,ncoeff = numpy.shape(cepstra)
n = numpy.arange(ncoeff)
lift = 1 + (L/2.)*numpy.sin(numpy.pi*n/L)
return lift*cepstra
else:
# values of L <= 0, do nothing
return cepstra
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,
winfunc=lambda x:numpy.ones((x,))):
highfreq= highfreq or samplerate/2
signal = sigproc.preemphasis(signal,preemph)
frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)
pspec = sigproc.powspec(frames,nfft)
energy = numpy.sum(pspec,1) # this stores the total energy in each frame
energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log
return feat,energy
def mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=None, lowfreq=0, highfreq=None,
preemph=0.97, ceplifter=22 ,appendEnergy=True, winfunc=lambda x:numpy.ones((x,))):
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
feat = lifter(feat,ceplifter)
if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
return feat
def calculate_delta(array,n_mfcc):
rows,cols = array.shape
deltas = np.zeros((rows,n_mfcc))
N = 2
for i in range(rows):
index = []
j = 1
while j <= N:
if i-j < 0:
first =0
else:
first = i-j
if i+j > rows-1:
second = rows-1
else:
second = i+j
index.append((second,first))
j+=1
deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
return deltas
def extract_features(audio,rate,mode):
options={
"Voice":{
"wenlen":0.025,
"wenlap":0.01
},
"Voc":{
"wenlen":0.0125,
"wenlap":0.005
}
}
mfcc_feature = mfcc(audio,rate,options[mode]["wenlen"],options[mode]["wenlap"],20,nfft = 1200, appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
delta = calculate_delta(mfcc_feature,20)
combined = np.hstack((mfcc_feature,delta))
return combined
def predict(mode,username="Unknown"):
modelpath={
"Voice":"models",
"Voc":"models_voc"
}
path= modelpath[mode]
if mode=="Voc":
path=path+"\\"+username
gmm_files = [os.path.join(path,fname) for fname in
os.listdir(path) if fname.endswith('.gmm')]
#Load the Gaussian gender Models
models = [pickle.load(open(fname,'rb')) for fname in gmm_files]
# Read the test directory and get the list of test audio files
audio,sr = librosa.load("audio.wav")
vector = extract_features(audio,sr,mode)
score_list = np.zeros(len(models))
for i in range(len(models)):
gmm = models[i] #checking with each model one by one
scores = np.array(gmm.score(vector))
score_list[i] = scores.sum()
return score_list