Skip to content

Commit

Permalink
feat: integrate rnnoise based service for voice activity (VAD) detection
Browse files Browse the repository at this point in the history
  • Loading branch information
andrei-gavrilescu authored and saghul committed Oct 4, 2019
1 parent 11d3a34 commit 761ac6a
Show file tree
Hide file tree
Showing 12 changed files with 888 additions and 5 deletions.
12 changes: 10 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ CLEANCSS = ./node_modules/.bin/cleancss
DEPLOY_DIR = libs
LIBJITSIMEET_DIR = node_modules/lib-jitsi-meet/
LIBFLAC_DIR = node_modules/libflacjs/dist/min/
RNNOISE_WASM_DIR = node_modules/rnnoise-wasm/dist/
NODE_SASS = ./node_modules/.bin/node-sass
NPM = npm
OUTPUT_DIR = .
Expand All @@ -20,7 +21,7 @@ compile:
clean:
rm -fr $(BUILD_DIR)

deploy: deploy-init deploy-appbundle deploy-lib-jitsi-meet deploy-libflac deploy-css deploy-local
deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-css deploy-local

deploy-init:
rm -fr $(DEPLOY_DIR)
Expand All @@ -47,6 +48,8 @@ deploy-appbundle:
$(BUILD_DIR)/analytics-ga.min.map \
$(BUILD_DIR)/video-blur-effect.min.js \
$(BUILD_DIR)/video-blur-effect.min.map \
$(BUILD_DIR)/rnnoise-processor.min.js \
$(BUILD_DIR)/rnnoise-processor.min.map \
$(DEPLOY_DIR)

deploy-lib-jitsi-meet:
Expand All @@ -63,6 +66,11 @@ deploy-libflac:
$(LIBFLAC_DIR)/libflac4-1.3.2.min.js.mem \
$(DEPLOY_DIR)

deploy-rnnoise-binary:
cp \
$(RNNOISE_WASM_DIR)/rnnoise.wasm \
$(DEPLOY_DIR)

deploy-css:
$(NODE_SASS) $(STYLES_MAIN) $(STYLES_BUNDLE) && \
$(CLEANCSS) $(STYLES_BUNDLE) > $(STYLES_DESTINATION) ; \
Expand All @@ -71,7 +79,7 @@ deploy-css:
deploy-local:
([ ! -x deploy-local.sh ] || ./deploy-local.sh)

dev: deploy-init deploy-css deploy-lib-jitsi-meet deploy-libflac
dev: deploy-init deploy-css deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac
$(WEBPACK_DEV_SERVER)

source-package:
Expand Down
33 changes: 33 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
"react-transition-group": "2.4.0",
"redux": "4.0.4",
"redux-thunk": "2.2.0",
"rnnoise-wasm": "github:jitsi/rnnoise-wasm.git#db96d11f175a22ef56c7db1ba9550835b716e615",
"styled-components": "3.4.9",
"util": "0.12.1",
"uuid": "3.1.0",
Expand Down
44 changes: 44 additions & 0 deletions react/features/rnnoise/functions.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// @flow

import { getJitsiMeetGlobalNS, loadScript } from '../base/util';

let loadRnnoisePromise;

/**
* Returns promise that resolves with a RnnoiseProcessor instance.
*
* @returns {Promise<RnnoiseProcessor>} - Resolves with the blur effect instance.
*/
export function createRnnoiseProcessorPromise() {
// Subsequent calls should not attempt to load the script multiple times.
if (!loadRnnoisePromise) {
loadRnnoisePromise = loadScript('libs/rnnoise-processor.min.js');
}

return loadRnnoisePromise.then(() => {
const ns = getJitsiMeetGlobalNS();

if (ns?.effects?.rnnoise?.createRnnoiseProcessor) {
return ns.effects.rnnoise.createRnnoiseProcessor();
}

throw new Error('Rnnoise module binding createRnnoiseProcessor not found!');
});
}

/**
* Get the accepted sample length for the rnnoise library. We might want to expose it with flow libdefs.
*
* @returns {number}
*/
export function getSampleLength() {
const ns = getJitsiMeetGlobalNS();

const rnnoiseSample = ns?.effects?.rnnoise?.RNNOISE_SAMPLE_LENGTH;

if (!rnnoiseSample) {
throw new Error('Please call createRnnoiseProcessorPromise first or wait for promise to resolve!');
}

return rnnoiseSample;
}
2 changes: 2 additions & 0 deletions react/features/rnnoise/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

export * from './functions';
174 changes: 174 additions & 0 deletions react/features/stream-effects/rnnoise/RnnoiseProcessor.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// @flow

/**
* Constant. Rnnoise default sample size, samples of different size won't work.
*/
export const RNNOISE_SAMPLE_LENGTH: number = 480;

/**
* Constant. Rnnoise only takes inputs of 480 PCM float32 samples thus 480*4.
*/
const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4;

/**
* Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly
* memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity
* detection) scores.
*/
export default class RnnoiseProcessor {
/**
* Rnnoise context object needed to perform the audio processing.
*/
_context: ?Object;

/**
* State flag, check if the instance was destroyed.
*/
_destroyed: boolean = false;

/**
* WASM interface through which calls to rnnoise are made.
*/
_wasmInterface: Object;

/**
* WASM dynamic memory buffer used as input for rnnoise processing method.
*/
_wasmPcmInput: Object;

/**
* The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer.
*/
_wasmPcmInputF32Index: number;

/**
* WASM dynamic memory buffer used as output for rnnoise processing method.
*/
_wasmPcmOutput: Object;

/**
* Constructor.
*
* @class
* @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality.
*/
constructor(wasmInterface: Object) {
// Considering that we deal with dynamic allocated memory employ exception safety strong guarantee
// i.e. in case of exception there are no side effects.
try {
this._wasmInterface = wasmInterface;

// For VAD score purposes only allocate the buffers once and reuse them
this._wasmPcmInput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);

if (!this._wasmPcmInput) {
throw Error('Failed to create wasm input memory buffer!');
}

this._wasmPcmOutput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);

if (!this._wasmPcmOutput) {
wasmInterface._free(this._wasmPcmInput);
throw Error('Failed to create wasm output memory buffer!');
}

// The HEAPF32.set function requires an index relative to a Float32 array view of the wasm memory model
// which is an array of bytes. This means we have to divide it by the size of a float to get the index
// relative to a Float32 Array.
this._wasmPcmInputF32Index = this._wasmPcmInput / 4;

this._context = this._wasmInterface._rnnoise_create();
} catch (error) {
// release can be called even if not all the components were initialized.
this._releaseWasmResources();
throw error;
}
}

/**
* Copy the input PCM Audio Sample to the wasm input buffer.
*
* @param {Float32Array} pcmSample - Array containing 16 bit format PCM sample stored in 32 Floats .
* @returns {void}
*/
_copyPCMSampleToWasmBuffer(pcmSample: Float32Array) {
this._wasmInterface.HEAPF32.set(pcmSample, this._wasmPcmInputF32Index);
}

/**
* Convert 32 bit Float PCM samples to 16 bit Float PCM samples and store them in 32 bit Floats.
*
* @param {Float32Array} f32Array - Array containing 32 bit PCM samples.
* @returns {void}
*/
_convertTo16BitPCM(f32Array: Float32Array) {
for (const [ index, value ] of f32Array.entries()) {
f32Array[index] = value * 0x7fff;
}
}

/**
* Release resources associated with the wasm context. If something goes downhill here
* i.e. Exception is thrown, there is nothing much we can do.
*
* @returns {void}
*/
_releaseWasmResources() {
// For VAD score purposes only allocate the buffers once and reuse them
if (this._wasmPcmInput) {
this._wasmInterface._free(this._wasmPcmInput);
this._wasmPcmInput = null;
}

if (this._wasmPcmOutput) {
this._wasmInterface._free(this._wasmPcmOutput);
this._wasmPcmOutput = null;
}

if (this._context) {
this._wasmInterface._rnnoise_destroy(this._context);
this._context = null;
}
}

/**
* Release any resources required by the rnnoise context this needs to be called
* before destroying any context that uses the processor.
*
* @returns {void}
*/
destroy() {
// Attempting to release a non initialized processor, do nothing.
if (this._destroyed) {
return;
}

this._releaseWasmResources();

this._destroyed = true;
}

/**
* Calculate the Voice Activity Detection for a raw Float32 PCM sample Array.
* The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
*
* @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples.
* @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 .
*/
calculateAudioFrameVAD(pcmFrame: Float32Array) {
if (this._destroyed) {
throw new Error('RnnoiseProcessor instance is destroyed, please create another one!');
}

const pcmFrameLength = pcmFrame.length;

if (pcmFrameLength !== RNNOISE_SAMPLE_LENGTH) {
throw new Error(`Rnnoise can only process PCM frames of 480 samples! Input sample was:${pcmFrameLength}`);
}

this._convertTo16BitPCM(pcmFrame);
this._copyPCMSampleToWasmBuffer(pcmFrame);

return this._wasmInterface._rnnoise_process_frame(this._context, this._wasmPcmOutput, this._wasmPcmInput);
}
}
36 changes: 36 additions & 0 deletions react/features/stream-effects/rnnoise/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// @flow

// Script expects to find rnnoise webassembly binary in the same public path root, otherwise it won't load
// During the build phase this needs to be taken care of manually
import rnnoiseWasmInit from 'rnnoise-wasm';
import RnnoiseProcessor from './RnnoiseProcessor';

export { RNNOISE_SAMPLE_LENGTH } from './RnnoiseProcessor';
export type { RnnoiseProcessor };

let rnnoiseWasmInterface;
let initializePromise;

/**
* Creates a new instance of RnnoiseProcessor.
*
* @returns {Promise<RnnoiseProcessor>}
*/
export function createRnnoiseProcessor() {
if (!initializePromise) {
initializePromise = new Promise((resolve, reject) => {
rnnoiseWasmInterface = rnnoiseWasmInit({
onRuntimeInitialized() {
resolve();
},
onAbort(reason) {
reject(reason);
}
});
});
}

return initializePromise.then(
() => new RnnoiseProcessor(rnnoiseWasmInterface)
);
}
Loading

0 comments on commit 761ac6a

Please sign in to comment.