-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathadaptecdemo.m
executable file
·285 lines (247 loc) · 10 KB
/
adaptecdemo.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
%% Acoustic Echo Cancellation (AEC)
% This example shows how to apply adaptive filters to acoustic echo
% cancellation (AEC).
%
% Author(s): Scott C. Douglas
% Copyright 1999-2014 The MathWorks, Inc.
%% Introduction
% Acoustic echo cancellation is important for audio teleconferencing when
% simultaneous communication (or full-duplex transmission) of speech is
% necessary. In acoustic echo cancellation, a measured microphone signal
% |d(n)| contains two signals:
%
% * the near-end speech signal |v(n)|
% * the far-end echoed speech signal |dhat(n)|
%
% The goal is to remove the far-end echoed speech signal from the
% microphone signal so that only the near-end speech signal is transmitted.
% This example has some sound clips, so you might want to adjust your
% computer's volume now.
%% The Room Impulse Response
%
% First, we describe the acoustics of the loudspeaker-to-microphone signal
% path where the speakerphone is located. We can use a long finite impulse
% response filter to describe these characteristics. The following sequence
% of commands generates a random impulse response that is not unlike what a
% conference room would exhibit assuming a system sampling rate of |fs =
% 16000 Hz|.
fs = 16000;
M = fs/2 + 1;
frameSize = 8192;
[B,A] = cheby2(4,20,[0.1 0.7]);
IIR = dsp.IIRFilter('Numerator', [zeros(1,6) B], 'Denominator', A);
FVT = fvtool(IIR); % Analyze the filter
FVT.Color = [1 1 1];
%%
H = step(IIR, ...
(log(0.99*rand(1,M)+0.01).*sign(randn(1,M)).*exp(-0.002*(1:M)))');
H = H/norm(H)*4; % Room Impulse Response
firRoom = dsp.FIRFilter('Numerator', H');
fig = figure;
plot(0:1/fs:0.5, H);
xlabel('Time [sec]');
ylabel('Amplitude');
title('Room Impulse Response');
fig.Color = [1 1 1];
%% The Near-End Speech Signal
%
% The teleconferencing system's user is typically located near the system's
% microphone. Here is what a male speech sounds like at the microphone.
load nearspeech
AP = dsp.AudioPlayer('SampleRate', fs);
nearSpeechSrc = dsp.SignalSource('Signal',v,'SamplesPerFrame',frameSize);
nearSpeechScope = dsp.TimeScope('SampleRate', fs, ...
'TimeSpan', 35, ...
'YLimits', [-1.5 1.5], ...
'BufferLength', length(v), ...
'Title', 'Near-End Speech Signal', ...
'ShowGrid', true);
% Stream processing loop
while(~isDone(nearSpeechSrc))
% Extract the speech samples from the input signal
nearSpeech = step(nearSpeechSrc);
% Send the speech samples to the output audio device
step(AP, nearSpeech);
% Plot the signal
step(nearSpeechScope, nearSpeech);
end
%% The Far-End Speech Signal
%
% Now we describe the path of the far-end speech signal. A male voice
% travels out the loudspeaker, bounces around in the room, and then is
% picked up by the system's microphone. Let's listen to what his speech
% sounds like if it is picked up at the microphone without the near-end
% speech present.
load farspeech
farSpeechSrc = dsp.SignalSource('Signal',x,'SamplesPerFrame',frameSize);
farSpeechSink = dsp.SignalSink;
farSpeechScope = dsp.TimeScope('SampleRate', fs, ...
'TimeSpan', 35, ...
'YLimits', [-0.5 0.5], ...
'BufferLength', length(x), ...
'Title', 'Far-End Speech Signal', ...
'ShowGrid', true);
% Stream processing loop
while(~isDone(farSpeechSrc))
% Extract the speech samples from the input signal
farSpeech = step(farSpeechSrc);
% Add the room effect to the far-end speech signal
farSpeechEcho = step(firRoom, farSpeech);
% Send the speech samples to the output audio device
step(AP, farSpeechEcho);
% Plot the signal
step(farSpeechScope, farSpeech);
% Log the signal for further processing
step(farSpeechSink, farSpeechEcho);
end
%% The Microphone Signal
%
% The signal at the microphone contains both the near-end speech and the
% far-end speech that has been echoed throughout the room. The goal of the
% acoustic echo canceler is to cancel out the far-end speech, such that
% only the near-end speech is transmitted back to the far-end listener.
reset(nearSpeechSrc);
farSpeechEchoSrc = dsp.SignalSource('Signal', farSpeechSink.Buffer, ...
'SamplesPerFrame', frameSize);
micSink = dsp.SignalSink;
micScope = dsp.TimeScope('SampleRate', fs,...
'TimeSpan', 35, ...
'YLimits', [-1 1], ...
'BufferLength', length(x), ...
'Title', 'Microphone Signal', ...
'ShowGrid', true);
% Stream processing loop
while(~isDone(farSpeechEchoSrc))
% Microphone signal = echoed far-end + near-end + noise
micSignal = step(farSpeechEchoSrc) + step(nearSpeechSrc) + ...
0.001*randn(frameSize,1);
% Send the speech samples to the output audio device
step(AP, micSignal);
% Plot the signal
step(micScope, micSignal);
% Log the signal
step(micSink, micSignal);
end
%% The Frequency-Domain Adaptive Filter (FDAF)
%
% The algorithm that we will use in this example is the *Frequency-Domain
% Adaptive Filter (FDAF)*. This algorithm is very useful when the impulse
% response of the system to be identified is long. The FDAF uses a fast
% convolution technique to compute the output signal and filter updates.
% This computation executes quickly in MATLAB(R). It also has improved
% convergence performance through frequency-bin step size normalization.
% We'll pick some initial parameters for the filter and see how well the
% far-end speech is cancelled in the error signal.
% Construct the Frequency-Domain Adaptive Filter
FDAF = dsp.FrequencyDomainAdaptiveFilter('Length', 2048, ...
'StepSize', 0.025, ...
'InitialPower', 0.01, ...
'AveragingFactor', 0.98, ...
'Method', 'Unconstrained FDAF');
AECScope1 = dsp.TimeScope(4, fs, ...
'LayoutDimensions', [4,1], ...
'TimeSpan', 35, ...
'BufferLength', length(x));
AECScope1.ActiveDisplay = 1;
AECScope1.ShowGrid = true;
AECScope1.YLimits = [-1.5 1.5];
AECScope1.Title = 'Near-End Speech Signal';
AECScope1.ActiveDisplay = 2;
AECScope1.ShowGrid = true;
AECScope1.YLimits = [-1.5 1.5];
AECScope1.Title = 'Microphone Signal';
AECScope1.ActiveDisplay = 3;
AECScope1.ShowGrid = true;
AECScope1.YLimits = [-1.5 1.5];
AECScope1.Title = 'Output of Acoustic Echo Canceller mu=0.025';
AECScope1.ActiveDisplay = 4;
AECScope1.ShowGrid = true;
AECScope1.YLimits = [0 50];
AECScope1.YLabel = 'ERLE [dB]';
AECScope1.Title = 'Echo Return Loss Enhancement mu=0.025';
% Near-end speech signal
release(nearSpeechSrc);
nearSpeechSrc.SamplesPerFrame = frameSize;
% Far-end speech signal
release(farSpeechSrc);
farSpeechSrc.SamplesPerFrame = frameSize;
% Far-end speech signal echoed by the room
release(farSpeechEchoSrc);
farSpeechEchoSrc.SamplesPerFrame = frameSize;
%% Echo Return Loss Enhancement (ERLE)
%
% Since we have access to both the near-end and far-end speech signals, we
% can compute the *echo return loss enhancement (ERLE)*, which is a
% smoothed measure of the amount (in dB) that the echo has been attenuated.
% From the plot, we see that we have achieved about a 35 dB ERLE at the end
% of the convergence period.
firERLE1 = dsp.FIRFilter('Numerator', ones(1,1024));
firERLE2 = clone(firERLE1);
setfilter(FVT,firERLE1);
micSrc = dsp.SignalSource('Signal', micSink.Buffer, ...
'SamplesPerFrame', frameSize);
% Stream processing loop - adaptive filter step size = 0.025
while(~isDone(nearSpeechSrc))
nearSpeech = step(nearSpeechSrc);
farSpeech = step(farSpeechSrc);
farSpeechEcho = step(farSpeechEchoSrc);
micSignal = step(micSrc);
% Apply FDAF
[y,e] = step(FDAF, farSpeech, micSignal);
% Send the speech samples to the output audio device
step(AP, e);
% Compute ERLE
erle = step(firERLE1,(e-nearSpeech).^2)./ ...
(step(firERLE2, farSpeechEcho.^2));
erledB = -10*log10(erle);
% Plot near-end, far-end, microphone, AEC output and ERLE
step(AECScope1, nearSpeech, micSignal, e, erledB);
end
%% Effects of Different Step Size Values
%
% To get faster convergence, we can try using a larger step size value.
% However, this increase causes another effect, that is, the adaptive
% filter is "mis-adjusted" while the near-end speaker is talking. Listen
% to what happens when we choose a step size that is 60% larger than
% before.
% Change the step size value in FDAF
reset(FDAF);
FDAF.StepSize = 0.04;
AECScope2 = clone(AECScope1);
AECScope2.ActiveDisplay = 3;
AECScope2.Title = 'Output of Acoustic Echo Canceller mu=0.04';
AECScope2.ActiveDisplay = 4;
AECScope2.Title = 'Echo Return Loss Enhancement mu=0.04';
reset(nearSpeechSrc);
reset(farSpeechSrc);
reset(farSpeechEchoSrc);
reset(micSrc);
reset(firERLE1);
reset(firERLE2);
% Stream processing loop - adaptive filter step size = 0.04
while(~isDone(nearSpeechSrc))
nearSpeech = step(nearSpeechSrc);
farSpeech = step(farSpeechSrc);
farSpeechEcho = step(farSpeechEchoSrc);
micSignal = step(micSrc);
% Apply FDAF
[y,e] = step(FDAF, farSpeech, micSignal);
% Send the speech samples to the output audio device
step(AP, e);
% Compute ERLE
erle = step(firERLE1,(e-nearSpeech).^2)./ ...
(step(firERLE2, farSpeechEcho.^2));
erledB = -10*log10(erle);
% Plot near-end, far-end, microphone, AEC output and ERLE
step(AECScope2, nearSpeech, micSignal, e, erledB);
end
%% Echo Return Loss Enhancement Comparison
%
% With a larger step size, the ERLE performance is not as good due to the
% misadjustment introduced by the near-end speech. To deal with this
% performance difficulty, acoustic echo cancellers include a detection
% scheme to tell when near-end speech is present and lower the step size
% value over these periods. Without such detection schemes, the performance
% of the system with the larger step size is not as good as the former, as
% can be seen from the ERLE plots.
displayEndOfDemoMessage(mfilename)