From 761ac6a730068bc3372065edfbb3a93987894207 Mon Sep 17 00:00:00 2001 From: Andrei Gavrilescu <51706180+andrei-gavrilescu@users.noreply.github.com> Date: Fri, 4 Oct 2019 13:55:18 +0300 Subject: [PATCH] feat: integrate rnnoise based service for voice activity (VAD) detection --- Makefile | 12 +- package-lock.json | 33 ++ package.json | 1 + react/features/rnnoise/functions.js | 44 +++ react/features/rnnoise/index.js | 2 + .../rnnoise/RnnoiseProcessor.js | 174 +++++++++++ .../features/stream-effects/rnnoise/index.js | 36 +++ .../features/vad-reporter/TrackVADEmitter.js | 258 ++++++++++++++++ react/features/vad-reporter/VADEvents.js | 7 + .../vad-reporter/VADReportingService.js | 284 ++++++++++++++++++ react/features/vad-reporter/logger.js | 5 + webpack.config.js | 37 ++- 12 files changed, 888 insertions(+), 5 deletions(-) create mode 100644 react/features/rnnoise/functions.js create mode 100644 react/features/rnnoise/index.js create mode 100644 react/features/stream-effects/rnnoise/RnnoiseProcessor.js create mode 100644 react/features/stream-effects/rnnoise/index.js create mode 100644 react/features/vad-reporter/TrackVADEmitter.js create mode 100644 react/features/vad-reporter/VADEvents.js create mode 100644 react/features/vad-reporter/VADReportingService.js create mode 100644 react/features/vad-reporter/logger.js diff --git a/Makefile b/Makefile index 969687fef8e0..bcbe3c69afd1 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ CLEANCSS = ./node_modules/.bin/cleancss DEPLOY_DIR = libs LIBJITSIMEET_DIR = node_modules/lib-jitsi-meet/ LIBFLAC_DIR = node_modules/libflacjs/dist/min/ +RNNOISE_WASM_DIR = node_modules/rnnoise-wasm/dist/ NODE_SASS = ./node_modules/.bin/node-sass NPM = npm OUTPUT_DIR = . @@ -20,7 +21,7 @@ compile: clean: rm -fr $(BUILD_DIR) -deploy: deploy-init deploy-appbundle deploy-lib-jitsi-meet deploy-libflac deploy-css deploy-local +deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-css deploy-local deploy-init: rm -fr $(DEPLOY_DIR) @@ -47,6 +48,8 @@ deploy-appbundle: $(BUILD_DIR)/analytics-ga.min.map \ $(BUILD_DIR)/video-blur-effect.min.js \ $(BUILD_DIR)/video-blur-effect.min.map \ + $(BUILD_DIR)/rnnoise-processor.min.js \ + $(BUILD_DIR)/rnnoise-processor.min.map \ $(DEPLOY_DIR) deploy-lib-jitsi-meet: @@ -63,6 +66,11 @@ deploy-libflac: $(LIBFLAC_DIR)/libflac4-1.3.2.min.js.mem \ $(DEPLOY_DIR) +deploy-rnnoise-binary: + cp \ + $(RNNOISE_WASM_DIR)/rnnoise.wasm \ + $(DEPLOY_DIR) + deploy-css: $(NODE_SASS) $(STYLES_MAIN) $(STYLES_BUNDLE) && \ $(CLEANCSS) $(STYLES_BUNDLE) > $(STYLES_DESTINATION) ; \ @@ -71,7 +79,7 @@ deploy-css: deploy-local: ([ ! -x deploy-local.sh ] || ./deploy-local.sh) -dev: deploy-init deploy-css deploy-lib-jitsi-meet deploy-libflac +dev: deploy-init deploy-css deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac $(WEBPACK_DEV_SERVER) source-package: diff --git a/package-lock.json b/package-lock.json index 535c72337c6d..b0201b8e8e2e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16413,6 +16413,39 @@ "inherits": "^2.0.1" } }, + "rnnoise-wasm": { + "version": "github:jitsi/rnnoise-wasm#db96d11f175a22ef56c7db1ba9550835b716e615", + "from": "github:jitsi/rnnoise-wasm#db96d11f175a22ef56c7db1ba9550835b716e615" + }, + "rollup-plugin-visualizer": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/rollup-plugin-visualizer/-/rollup-plugin-visualizer-1.1.1.tgz", + "integrity": "sha512-7xkSKp+dyJmSC7jg2LXqViaHuOnF1VvIFCnsZEKjrgT5ZVyiLLSbeszxFcQSfNJILphqgAEmWAUz0Z4xYScrRw==", + "optional": true, + "requires": { + "mkdirp": "^0.5.1", + "opn": "^5.4.0", + "source-map": "^0.7.3", + "typeface-oswald": "0.0.54" + }, + "dependencies": { + "opn": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/opn/-/opn-5.5.0.tgz", + "integrity": "sha512-PqHpggC9bLV0VeWcdKhkpxY+3JTzetLSqTCWL/z/tFIbI6G8JCjondXklT1JinczLz2Xib62sSp0T/gKT4KksA==", + "optional": true, + "requires": { + "is-wsl": "^1.1.0" + } + }, + "source-map": { + "version": "0.7.3", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz", + "integrity": "sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==", + "optional": true + } + } + }, "rsvp": { "version": "4.8.5", "resolved": "https://registry.npmjs.org/rsvp/-/rsvp-4.8.5.tgz", diff --git a/package.json b/package.json index d5d556d74c02..58cb65fb4275 100644 --- a/package.json +++ b/package.json @@ -87,6 +87,7 @@ "react-transition-group": "2.4.0", "redux": "4.0.4", "redux-thunk": "2.2.0", + "rnnoise-wasm": "github:jitsi/rnnoise-wasm.git#db96d11f175a22ef56c7db1ba9550835b716e615", "styled-components": "3.4.9", "util": "0.12.1", "uuid": "3.1.0", diff --git a/react/features/rnnoise/functions.js b/react/features/rnnoise/functions.js new file mode 100644 index 000000000000..3dad56cd64f5 --- /dev/null +++ b/react/features/rnnoise/functions.js @@ -0,0 +1,44 @@ +// @flow + +import { getJitsiMeetGlobalNS, loadScript } from '../base/util'; + +let loadRnnoisePromise; + +/** + * Returns promise that resolves with a RnnoiseProcessor instance. + * + * @returns {Promise} - Resolves with the blur effect instance. + */ +export function createRnnoiseProcessorPromise() { + // Subsequent calls should not attempt to load the script multiple times. + if (!loadRnnoisePromise) { + loadRnnoisePromise = loadScript('libs/rnnoise-processor.min.js'); + } + + return loadRnnoisePromise.then(() => { + const ns = getJitsiMeetGlobalNS(); + + if (ns?.effects?.rnnoise?.createRnnoiseProcessor) { + return ns.effects.rnnoise.createRnnoiseProcessor(); + } + + throw new Error('Rnnoise module binding createRnnoiseProcessor not found!'); + }); +} + +/** + * Get the accepted sample length for the rnnoise library. We might want to expose it with flow libdefs. + * + * @returns {number} + */ +export function getSampleLength() { + const ns = getJitsiMeetGlobalNS(); + + const rnnoiseSample = ns?.effects?.rnnoise?.RNNOISE_SAMPLE_LENGTH; + + if (!rnnoiseSample) { + throw new Error('Please call createRnnoiseProcessorPromise first or wait for promise to resolve!'); + } + + return rnnoiseSample; +} diff --git a/react/features/rnnoise/index.js b/react/features/rnnoise/index.js new file mode 100644 index 000000000000..9142d34e1873 --- /dev/null +++ b/react/features/rnnoise/index.js @@ -0,0 +1,2 @@ + +export * from './functions'; diff --git a/react/features/stream-effects/rnnoise/RnnoiseProcessor.js b/react/features/stream-effects/rnnoise/RnnoiseProcessor.js new file mode 100644 index 000000000000..fe565ae85aae --- /dev/null +++ b/react/features/stream-effects/rnnoise/RnnoiseProcessor.js @@ -0,0 +1,174 @@ +// @flow + +/** + * Constant. Rnnoise default sample size, samples of different size won't work. + */ +export const RNNOISE_SAMPLE_LENGTH: number = 480; + +/** + * Constant. Rnnoise only takes inputs of 480 PCM float32 samples thus 480*4. + */ +const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4; + +/** + * Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly + * memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity + * detection) scores. + */ +export default class RnnoiseProcessor { + /** + * Rnnoise context object needed to perform the audio processing. + */ + _context: ?Object; + + /** + * State flag, check if the instance was destroyed. + */ + _destroyed: boolean = false; + + /** + * WASM interface through which calls to rnnoise are made. + */ + _wasmInterface: Object; + + /** + * WASM dynamic memory buffer used as input for rnnoise processing method. + */ + _wasmPcmInput: Object; + + /** + * The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer. + */ + _wasmPcmInputF32Index: number; + + /** + * WASM dynamic memory buffer used as output for rnnoise processing method. + */ + _wasmPcmOutput: Object; + + /** + * Constructor. + * + * @class + * @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality. + */ + constructor(wasmInterface: Object) { + // Considering that we deal with dynamic allocated memory employ exception safety strong guarantee + // i.e. in case of exception there are no side effects. + try { + this._wasmInterface = wasmInterface; + + // For VAD score purposes only allocate the buffers once and reuse them + this._wasmPcmInput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE); + + if (!this._wasmPcmInput) { + throw Error('Failed to create wasm input memory buffer!'); + } + + this._wasmPcmOutput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE); + + if (!this._wasmPcmOutput) { + wasmInterface._free(this._wasmPcmInput); + throw Error('Failed to create wasm output memory buffer!'); + } + + // The HEAPF32.set function requires an index relative to a Float32 array view of the wasm memory model + // which is an array of bytes. This means we have to divide it by the size of a float to get the index + // relative to a Float32 Array. + this._wasmPcmInputF32Index = this._wasmPcmInput / 4; + + this._context = this._wasmInterface._rnnoise_create(); + } catch (error) { + // release can be called even if not all the components were initialized. + this._releaseWasmResources(); + throw error; + } + } + + /** + * Copy the input PCM Audio Sample to the wasm input buffer. + * + * @param {Float32Array} pcmSample - Array containing 16 bit format PCM sample stored in 32 Floats . + * @returns {void} + */ + _copyPCMSampleToWasmBuffer(pcmSample: Float32Array) { + this._wasmInterface.HEAPF32.set(pcmSample, this._wasmPcmInputF32Index); + } + + /** + * Convert 32 bit Float PCM samples to 16 bit Float PCM samples and store them in 32 bit Floats. + * + * @param {Float32Array} f32Array - Array containing 32 bit PCM samples. + * @returns {void} + */ + _convertTo16BitPCM(f32Array: Float32Array) { + for (const [ index, value ] of f32Array.entries()) { + f32Array[index] = value * 0x7fff; + } + } + + /** + * Release resources associated with the wasm context. If something goes downhill here + * i.e. Exception is thrown, there is nothing much we can do. + * + * @returns {void} + */ + _releaseWasmResources() { + // For VAD score purposes only allocate the buffers once and reuse them + if (this._wasmPcmInput) { + this._wasmInterface._free(this._wasmPcmInput); + this._wasmPcmInput = null; + } + + if (this._wasmPcmOutput) { + this._wasmInterface._free(this._wasmPcmOutput); + this._wasmPcmOutput = null; + } + + if (this._context) { + this._wasmInterface._rnnoise_destroy(this._context); + this._context = null; + } + } + + /** + * Release any resources required by the rnnoise context this needs to be called + * before destroying any context that uses the processor. + * + * @returns {void} + */ + destroy() { + // Attempting to release a non initialized processor, do nothing. + if (this._destroyed) { + return; + } + + this._releaseWasmResources(); + + this._destroyed = true; + } + + /** + * Calculate the Voice Activity Detection for a raw Float32 PCM sample Array. + * The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library. + * + * @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples. + * @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 . + */ + calculateAudioFrameVAD(pcmFrame: Float32Array) { + if (this._destroyed) { + throw new Error('RnnoiseProcessor instance is destroyed, please create another one!'); + } + + const pcmFrameLength = pcmFrame.length; + + if (pcmFrameLength !== RNNOISE_SAMPLE_LENGTH) { + throw new Error(`Rnnoise can only process PCM frames of 480 samples! Input sample was:${pcmFrameLength}`); + } + + this._convertTo16BitPCM(pcmFrame); + this._copyPCMSampleToWasmBuffer(pcmFrame); + + return this._wasmInterface._rnnoise_process_frame(this._context, this._wasmPcmOutput, this._wasmPcmInput); + } +} diff --git a/react/features/stream-effects/rnnoise/index.js b/react/features/stream-effects/rnnoise/index.js new file mode 100644 index 000000000000..23fadefa4baf --- /dev/null +++ b/react/features/stream-effects/rnnoise/index.js @@ -0,0 +1,36 @@ +// @flow + +// Script expects to find rnnoise webassembly binary in the same public path root, otherwise it won't load +// During the build phase this needs to be taken care of manually +import rnnoiseWasmInit from 'rnnoise-wasm'; +import RnnoiseProcessor from './RnnoiseProcessor'; + +export { RNNOISE_SAMPLE_LENGTH } from './RnnoiseProcessor'; +export type { RnnoiseProcessor }; + +let rnnoiseWasmInterface; +let initializePromise; + +/** + * Creates a new instance of RnnoiseProcessor. + * + * @returns {Promise} + */ +export function createRnnoiseProcessor() { + if (!initializePromise) { + initializePromise = new Promise((resolve, reject) => { + rnnoiseWasmInterface = rnnoiseWasmInit({ + onRuntimeInitialized() { + resolve(); + }, + onAbort(reason) { + reject(reason); + } + }); + }); + } + + return initializePromise.then( + () => new RnnoiseProcessor(rnnoiseWasmInterface) + ); +} diff --git a/react/features/vad-reporter/TrackVADEmitter.js b/react/features/vad-reporter/TrackVADEmitter.js new file mode 100644 index 000000000000..0022d8fcba6c --- /dev/null +++ b/react/features/vad-reporter/TrackVADEmitter.js @@ -0,0 +1,258 @@ +// @flow + +import { createRnnoiseProcessorPromise, getSampleLength } from '../rnnoise/'; +import EventEmitter from 'events'; +import JitsiMeetJS from '../base/lib-jitsi-meet'; +import logger from './logger'; +import { VAD_SCORE_PUBLISHED } from './VADEvents'; + +/** + * The structure used by TrackVADEmitter to relay a score + */ +export type VADScore = { + + /** + * Device ID associated with the VAD score + */ + deviceId: string, + + /** + * The PCM score from 0 - 1 i.e. 0.60 + */ + score: number, + + /** + * Epoch time at which PCM was recorded + */ + timestamp: number + +}; + +/** + * Connects an audio JitsiLocalTrack to a RnnoiseProcessor using WebAudio ScriptProcessorNode. + * Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM. + * The PCM is processed by the rnnoise module and a VAD (voice activity detection) score is obtained, the + * score is published to consumers via an EventEmitter. + * After work is done with this service the destroy method needs to be called for a proper cleanup. + */ +export default class TrackVADEmitter extends EventEmitter { + /** + * The AudioContext instance. + */ + _audioContext: AudioContext; + + /** + * The MediaStreamAudioSourceNode instance. + */ + _audioSource: MediaStreamAudioSourceNode; + + /** + * The ScriptProcessorNode instance. + */ + _audioProcessingNode: ScriptProcessorNode; + + /** + * Buffer to hold residue PCM resulting after a ScriptProcessorNode callback + */ + _bufferResidue: Float32Array; + + /** + * State flag, check if the instance was destroyed + */ + _destroyed: boolean = false; + + /** + * The JitsiLocalTrack instance. + */ + _localTrack: Object; + + /** + * Device ID of the target microphone. + */ + _micDeviceId: string; + + /** + * Callback function that will be called by the ScriptProcessNode with raw PCM data, depending on the set sample + * rate. + */ + _onAudioProcess: (audioEvent: Object) => void; + + /** + * Sample rate of the ScriptProcessorNode. + */ + _procNodeSampleRate: number; + + /** + * Rnnoise adapter that allows us to calculate VAD score for PCM samples + */ + _rnnoiseProcessor: Object; + + /** + * PCM Sample size expected by the RnnoiseProcessor instance. + */ + _rnnoiseSampleSize: number; + + /** + * Constructor. + * + * @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024, + * 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor. + * @param {Object} rnnoiseProcessor - Rnnoise adapter that allows us to calculate VAD score + * for PCM samples. + * @param {Object} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId. + */ + constructor(procNodeSampleRate: number, rnnoiseProcessor: Object, jitsiLocalTrack: Object) { + super(); + this._procNodeSampleRate = procNodeSampleRate; + this._rnnoiseProcessor = rnnoiseProcessor; + this._localTrack = jitsiLocalTrack; + this._micDeviceId = jitsiLocalTrack.getDeviceId(); + this._bufferResidue = new Float32Array([]); + this._audioContext = new AudioContext(); + this._rnnoiseSampleSize = getSampleLength(); + this._onAudioProcess = this._onAudioProcess.bind(this); + + this._initializeAudioContext(); + this._connectAudioGraph(); + + logger.log(`Constructed VAD emitter for device: ${this._micDeviceId}`); + } + + /** + * Factory method that sets up all the necessary components for the creation of the TrackVADEmitter. + * + * @param {string} micDeviceId - Target microphone device id. + * @param {number} procNodeSampleRate - Sample rate of the proc node. + * @returns {Promise} - Promise resolving in a new instance of TrackVADEmitter. + */ + static async create(micDeviceId: string, procNodeSampleRate: number) { + let rnnoiseProcessor = null; + let localTrack = null; + + try { + logger.log(`Initializing TrackVADEmitter for device: ${micDeviceId}`); + + rnnoiseProcessor = await createRnnoiseProcessorPromise(); + localTrack = await JitsiMeetJS.createLocalTracks({ + devices: [ 'audio' ], + micDeviceId + }); + + // We only expect one audio track when specifying a device id. + if (!localTrack[0]) { + throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`); + } + + return new TrackVADEmitter(procNodeSampleRate, rnnoiseProcessor, localTrack[0]); + } catch (error) { + logger.error(`Failed to create TrackVADEmitter for ${micDeviceId} with error: ${error}`); + + if (rnnoiseProcessor) { + rnnoiseProcessor.destroy(); + } + + if (localTrack) { + localTrack.stopStream(); + } + + throw error; + } + } + + /** + * Sets up the audio graph in the AudioContext. + * + * @returns {Promise} + */ + _initializeAudioContext() { + this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream); + + // TODO AudioProcessingNode is deprecated check and replace with alternative. + // We don't need stereo for determining the VAD score so we create a single chanel processing node. + this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1); + this._audioProcessingNode.onaudioprocess = this._onAudioProcess; + } + + /** + * ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise. + * Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple + * of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal + * to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on. + * + * @param {AudioProcessingEvent} audioEvent - Audio event. + * @returns {void} + */ + _onAudioProcess(audioEvent: Object) { + // Prepend the residue PCM buffer from the previous process callback. + const inData = audioEvent.inputBuffer.getChannelData(0); + const completeInData = [ ...this._bufferResidue, ...inData ]; + const sampleTimestamp = Date.now(); + + let i = 0; + + for (; i + this._rnnoiseSampleSize < completeInData.length; i += this._rnnoiseSampleSize) { + const pcmSample = completeInData.slice(i, i + this._rnnoiseSampleSize); + const vadScore = this._rnnoiseProcessor.calculateAudioFrameVAD(pcmSample); + + this.emit(VAD_SCORE_PUBLISHED, { + timestamp: sampleTimestamp, + score: vadScore, + deviceId: this._micDeviceId + }); + } + + this._bufferResidue = completeInData.slice(i, completeInData.length); + } + + /** + * Connects the nodes in the AudioContext to start the flow of audio data. + * + * @returns {void} + */ + _connectAudioGraph() { + this._audioSource.connect(this._audioProcessingNode); + this._audioProcessingNode.connect(this._audioContext.destination); + } + + /** + * Disconnects the nodes in the AudioContext. + * + * @returns {void} + */ + _disconnectAudioGraph() { + // Even thought we disconnect the processing node it seems that some callbacks remain queued, + // resulting in calls with and uninitialized context. + // eslint-disable-next-line no-empty-function + this._audioProcessingNode.onaudioprocess = () => {}; + this._audioProcessingNode.disconnect(); + this._audioSource.disconnect(); + } + + /** + * Cleanup potentially acquired resources. + * + * @returns {void} + */ + _cleanupResources() { + logger.debug(`Cleaning up resources for device ${this._micDeviceId}!`); + + this._disconnectAudioGraph(); + this._localTrack.stopStream(); + this._rnnoiseProcessor.destroy(); + } + + /** + * Destroy TrackVADEmitter instance (release resources and stop callbacks). + * + * @returns {void} + */ + destroy() { + if (this._destroyed) { + return; + } + + logger.log(`Destroying TrackVADEmitter for mic: ${this._micDeviceId}`); + this._cleanupResources(); + this._destroyed = true; + } +} diff --git a/react/features/vad-reporter/VADEvents.js b/react/features/vad-reporter/VADEvents.js new file mode 100644 index 000000000000..ff16eeeb947b --- /dev/null +++ b/react/features/vad-reporter/VADEvents.js @@ -0,0 +1,7 @@ +// Event generated by a TrackVADEmitter when it emits a VAD score from rnnoise. +// The generated objects are of type VADScore +export const VAD_SCORE_PUBLISHED = 'vad-score-published'; + +// Event generated by VADReportingService when if finishes creating a VAD report for the monitored devices. +// The generated objects are of type Array, one score for each monitored device. +export const VAD_REPORT_PUBLISHED = 'vad-report-published'; diff --git a/react/features/vad-reporter/VADReportingService.js b/react/features/vad-reporter/VADReportingService.js new file mode 100644 index 000000000000..84c1e86428cb --- /dev/null +++ b/react/features/vad-reporter/VADReportingService.js @@ -0,0 +1,284 @@ +// @flow + +import EventEmitter from 'events'; +import logger from './logger'; +import TrackVADEmitter from './TrackVADEmitter'; +import { VAD_SCORE_PUBLISHED, VAD_REPORT_PUBLISHED } from './VADEvents'; +import type { VADScore } from './TrackVADEmitter'; +export type { VADScore }; + +/** + * Sample rate used by TrackVADEmitter, this value determines how often the ScriptProcessorNode is going to call the + * process audio function and with what sample size. + * Basically lower values mean more callbacks with lower processing times bigger values less callbacks with longer + * processing times. This value is somewhere in the middle, so we strike a balance between flooding with callbacks + * and processing time. Possible values 256, 512, 1024, 2048, 4096, 8192, 16384. Passing other values will default + * to closes neighbor. + */ +const SCRIPT_NODE_SAMPLE_RATE = 4096; + +/** + * Context that contains the emitter and additional information about the device. + */ +type VADDeviceContext = { + + /** + * MediaDeviceInfo for associated context + */ + deviceInfo: MediaDeviceInfo, + + /** + * Array with VAD scores publish from the emitter. + */ + scoreArray: Array, + + /** + * TrackVADEmitter associated with media device + */ + vadEmitter: TrackVADEmitter +}; + +/** + * The structure used by VADReportingService to relay a score report + */ +export type VADReportScore = { + + /** + * Device ID associated with the VAD score + */ + deviceId: string, + + /** + * The PCM score from 0 - 1 i.e. 0.60 + */ + score: number, + + /** + * Epoch time at which PCM was recorded + */ + timestamp: number +}; + + +/** + * Voice activity detection reporting service. The service create TrackVADEmitters for the provided devices and + * publishes an average of their VAD score over the specified interval via EventEmitter. + * The service is not reusable if destroyed a new one needs to be created, i.e. when a new device is added to the system + * a new service needs to be created and the old discarded. + */ +export default class VADReportingService extends EventEmitter { + /** + * Map containing context for devices currently being monitored by the reporting service. + */ + _contextMap: Map; + + /** + * State flag, check if the instance was destroyed. + */ + _destroyed: boolean = false; + + /** + * Delay at which to publish VAD score for monitored devices. + */ + _intervalDelay: number; + + /** + * Identifier for the interval publishing stats on the set interval. + */ + _intervalId: ?IntervalID; + + /** + * Constructor. + * + * @param {number} intervalDelay - Delay at which to publish VAD score for monitored devices. + * @param {Function} publishScoreCallBack - Function called on the specific interval with the calculated VAD score. + */ + constructor(intervalDelay: number) { + super(); + this._contextMap = new Map(); + this._intervalDelay = intervalDelay; + + logger.log(`Constructed VADReportingService with publish interval of: ${intervalDelay}`); + } + + /** + * Factory methods that creates the TrackVADEmitters for the associated array of devices and instantiates + * a VADReportingService. + * + * @param {Array} micDeviceList - Device list that is monitored inside the service. + * @param {number} intervalDelay - Delay at which to publish VAD score for monitored devices. + * @param {Function} publishScoreCallBack - Function called on the specific interval with the calculated VAD score. + * + * @returns {Promise} + */ + static create(micDeviceList: Array, intervalDelay: number) { + const vadReportingService = new VADReportingService(intervalDelay); + const emitterPromiseArray = []; + + // Create a TrackVADEmitter for each provided audioinput device. + for (const micDevice of micDeviceList) { + if (micDevice.kind !== 'audioinput') { + logger.warn(`Provided device ${micDevice.label} -> ${micDevice.deviceId}, is not audioinput ignoring!`); + + return; + } + + logger.log(`Initializing VAD context for mic: ${micDevice.label} -> ${micDevice.deviceId}`); + + const emitterPromise = TrackVADEmitter.create(micDevice.deviceId, SCRIPT_NODE_SAMPLE_RATE).then(emitter => { + emitter.on(VAD_SCORE_PUBLISHED, vadReportingService._devicePublishVADScore.bind(vadReportingService)); + + return { + vadEmitter: emitter, + deviceInfo: micDevice, + scoreArray: [] + }; + }); + + emitterPromiseArray.push(emitterPromise); + } + + // Once all the TrackVADEmitter promises are resolved check if all of them resolved properly if not reject + // the promise and clear the already created emitters. + // $FlowFixMe - allSettled is not part of flow prototype even though it's a valid Promise function + return Promise.allSettled(emitterPromiseArray).then(outcomeArray => { + const vadContextArray = []; + const rejectedEmitterPromiseArray = []; + + for (const outcome of outcomeArray) { + if (outcome.status === 'fulfilled') { + vadContextArray.push(outcome.value); + } else { + // Promise was rejected. + logger.error(`Create TrackVADEmitter promise failed with ${outcome.reason}`); + + rejectedEmitterPromiseArray.push(outcome); + } + } + + // Check if there were any rejected promises and clear the already created ones list. + if (rejectedEmitterPromiseArray.length > 0) { + logger.error('Cleaning up remaining VADDeviceContext, due to create fail!'); + + for (const context of vadContextArray) { + context.vadEmitter.destroy(); + } + + // Reject create promise if one emitter failed to instantiate, we might one just ignore it, + // leaving it like this for now + throw new Error('Create VADReportingService failed due to TrackVADEmitter creation issues!'); + } + + vadReportingService._setVADContextArray(vadContextArray); + vadReportingService._startPublish(); + + return vadReportingService; + }); + } + + /** + * Destroy TrackVADEmitters and clear the context map. + * + * @returns {void} + */ + _clearContextMap() { + for (const vadContext of this._contextMap.values()) { + vadContext.vadEmitter.destroy(); + } + this._contextMap.clear(); + } + + /** + * Set the watched device contexts. + * + * @param {Array} vadContextArray - List of mics. + * @returns {void} + */ + _setVADContextArray(vadContextArray: Array): void { + for (const vadContext of vadContextArray) { + this._contextMap.set(vadContext.deviceInfo.deviceId, vadContext); + } + } + + /** + * Start the setInterval reporting process. + * + * @returns {void}. + */ + _startPublish() { + logger.log('VADReportingService started publishing.'); + this._intervalId = setInterval(() => { + this._reportVadScore(); + }, this._intervalDelay); + } + + /** + * Function called at set interval with selected compute. The result will be published on the set callback. + * + * @returns {void} + */ + _reportVadScore() { + const vadComputeScoreArray = []; + const computeTimestamp = Date.now(); + + // Go through each device and compute cumulated VAD score. + + for (const [ deviceId, vadContext ] of this._contextMap) { + const nrOfVADScores = vadContext.scoreArray.length; + let vadSum = 0; + + vadContext.scoreArray.forEach(vadScore => { + vadSum += vadScore.score; + }); + + // TODO For now we just calculate the average score for each device, more compute algorithms will be added. + const avgVAD = vadSum / nrOfVADScores; + + vadContext.scoreArray = []; + + vadComputeScoreArray.push({ + timestamp: computeTimestamp, + score: avgVAD, + deviceId + }); + } + + this.emit(VAD_REPORT_PUBLISHED, vadComputeScoreArray); + } + + /** + * Callback method passed to vad emitters in order to publish their score. + * + * @param {VADScore} vadScore - Mic publishing the score. + * @returns {void} + */ + _devicePublishVADScore(vadScore: VADScore) { + const context = this._contextMap.get(vadScore.deviceId); + + if (context) { + context.scoreArray.push(vadScore); + } + } + + /** + * Destroy the VADReportingService, stops the setInterval reporting, destroys the emitters and clears the map. + * After this call the instance is no longer usable. + * + * @returns {void}. + */ + destroy() { + if (this._destroyed) { + return; + } + + logger.log('Destroying VADReportingService.'); + + if (this._intervalId) { + clearInterval(this._intervalId); + this._intervalId = null; + } + this._clearContextMap(); + this._destroyed = true; + } + +} diff --git a/react/features/vad-reporter/logger.js b/react/features/vad-reporter/logger.js new file mode 100644 index 000000000000..859276ef7cd1 --- /dev/null +++ b/react/features/vad-reporter/logger.js @@ -0,0 +1,5 @@ +// @flow + +import { getLogger } from '../base/logging/functions'; + +export default getLogger('features/vad-reporter'); diff --git a/webpack.config.js b/webpack.config.js index 5edd336afbc5..d8a42f50e7f2 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -214,16 +214,46 @@ module.exports = [ }, performance: getPerformanceHints(5 * 1024) }), + + // Because both video-blur-effect and rnnoise-processor modules are loaded + // in a lazy manner using the loadScript function with a hard coded name, + // i.e.loadScript('libs/rnnoise-processor.min.js'), webpack dev server + // won't know how to properly load them using the default config filename + // and sourceMapFilename parameters which target libs without .min in dev + // mode. Thus we change these modules to have the same filename in both + // prod and dev mode. Object.assign({}, config, { entry: { 'video-blur-effect': './react/features/stream-effects/blur/index.js' }, output: Object.assign({}, config.output, { library: [ 'JitsiMeetJS', 'app', 'effects' ], - libraryTarget: 'window' + libraryTarget: 'window', + filename: '[name].min.js', + sourceMapFilename: '[name].min.map' }), performance: getPerformanceHints(1 * 1024 * 1024) }), + + Object.assign({}, config, { + entry: { + 'rnnoise-processor': './react/features/stream-effects/rnnoise/index.js' + }, + node: { + // Emscripten generated glue code "rnnoise.js" expects node fs module, + // we need to specify this parameter so webpack knows how to properly + // interpret it when encountered. + fs: 'empty' + }, + output: Object.assign({}, config.output, { + library: [ 'JitsiMeetJS', 'app', 'effects', 'rnnoise' ], + libraryTarget: 'window', + filename: '[name].min.js', + sourceMapFilename: '[name].min.map' + }), + performance: getPerformanceHints(30 * 1024) + }), + Object.assign({}, config, { entry: { 'external_api': './modules/API/external/index.js' @@ -249,7 +279,8 @@ function devServerProxyBypass({ path }) { if (path.startsWith('/css/') || path.startsWith('/doc/') || path.startsWith('/fonts/') || path.startsWith('/images/') || path.startsWith('/sounds/') - || path.startsWith('/static/')) { + || path.startsWith('/static/') + || path.endsWith('.wasm')) { return path; } @@ -258,7 +289,7 @@ function devServerProxyBypass({ path }) { /* eslint-disable array-callback-return, indent */ if ((Array.isArray(configs) ? configs : Array(configs)).some(c => { - if (path.startsWith(c.output.publicPath)) { + if (path.startsWith(c.output.publicPath)) { if (!minimize) { // Since webpack-dev-server is serving non-minimized // artifacts, serve them even if the minimized ones are