This commit is contained in:
186
react/features/stream-effects/rnnoise/RnnoiseProcessor.ts
Normal file
186
react/features/stream-effects/rnnoise/RnnoiseProcessor.ts
Normal file
@@ -0,0 +1,186 @@
|
||||
/* eslint-disable no-bitwise */
|
||||
|
||||
interface IRnnoiseModule extends EmscriptenModule {
|
||||
_rnnoise_create: () => number;
|
||||
_rnnoise_destroy: (context: number) => void;
|
||||
_rnnoise_process_frame: (context: number, input: number, output: number) => number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constant. Rnnoise default sample size, samples of different size won't work.
|
||||
*/
|
||||
export const RNNOISE_SAMPLE_LENGTH = 480;
|
||||
|
||||
/**
|
||||
* Constant. Rnnoise only takes inputs of 480 PCM float32 samples thus 480*4.
|
||||
*/
|
||||
const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4;
|
||||
|
||||
/**
|
||||
* Constant. Rnnoise only takes operates on 44.1Khz float 32 little endian PCM.
|
||||
*/
|
||||
const PCM_FREQUENCY = 44100;
|
||||
|
||||
/**
|
||||
* Used to shift a 32 bit number by 16 bits.
|
||||
*/
|
||||
const SHIFT_16_BIT_NR = 32768;
|
||||
|
||||
/**
|
||||
* Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly
|
||||
* memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity
|
||||
* detection) scores.
|
||||
*/
|
||||
export default class RnnoiseProcessor {
|
||||
/**
|
||||
* Rnnoise context object needed to perform the audio processing.
|
||||
*/
|
||||
private _context: number;
|
||||
|
||||
/**
|
||||
* State flag, check if the instance was destroyed.
|
||||
*/
|
||||
private _destroyed = false;
|
||||
|
||||
/**
|
||||
* WASM interface through which calls to rnnoise are made.
|
||||
*/
|
||||
private _wasmInterface: IRnnoiseModule;
|
||||
|
||||
/**
|
||||
* WASM dynamic memory buffer used as input for rnnoise processing method.
|
||||
*/
|
||||
private _wasmPcmInput: number;
|
||||
|
||||
/**
|
||||
* The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer.
|
||||
*/
|
||||
private _wasmPcmInputF32Index: number;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @class
|
||||
* @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality.
|
||||
*/
|
||||
constructor(wasmInterface: IRnnoiseModule) {
|
||||
// Considering that we deal with dynamic allocated memory employ exception safety strong guarantee
|
||||
// i.e. in case of exception there are no side effects.
|
||||
try {
|
||||
this._wasmInterface = wasmInterface;
|
||||
|
||||
// For VAD score purposes only allocate the buffers once and reuse them
|
||||
this._wasmPcmInput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);
|
||||
|
||||
this._wasmPcmInputF32Index = this._wasmPcmInput >> 2;
|
||||
|
||||
if (!this._wasmPcmInput) {
|
||||
throw Error('Failed to create wasm input memory buffer!');
|
||||
}
|
||||
|
||||
this._context = this._wasmInterface._rnnoise_create();
|
||||
} catch (error) {
|
||||
// release can be called even if not all the components were initialized.
|
||||
this.destroy();
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Release resources associated with the wasm context. If something goes downhill here
|
||||
* i.e. Exception is thrown, there is nothing much we can do.
|
||||
*
|
||||
* @returns {void}
|
||||
*/
|
||||
_releaseWasmResources(): void {
|
||||
// For VAD score purposes only allocate the buffers once and reuse them
|
||||
if (this._wasmPcmInput) {
|
||||
this._wasmInterface._free(this._wasmPcmInput);
|
||||
}
|
||||
|
||||
if (this._context) {
|
||||
this._wasmInterface._rnnoise_destroy(this._context);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rnnoise can only operate on a certain PCM array size.
|
||||
*
|
||||
* @returns {number} - The PCM sample array size as required by rnnoise.
|
||||
*/
|
||||
getSampleLength(): number {
|
||||
return RNNOISE_SAMPLE_LENGTH;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rnnoise can only operate on a certain format of PCM sample namely float 32 44.1Kz.
|
||||
*
|
||||
* @returns {number} - PCM sample frequency as required by rnnoise.
|
||||
*/
|
||||
getRequiredPCMFrequency(): number {
|
||||
return PCM_FREQUENCY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release any resources required by the rnnoise context this needs to be called
|
||||
* before destroying any context that uses the processor.
|
||||
*
|
||||
* @returns {void}
|
||||
*/
|
||||
destroy(): void {
|
||||
// Attempting to release a non initialized processor, do nothing.
|
||||
if (this._destroyed) {
|
||||
return;
|
||||
}
|
||||
|
||||
this._releaseWasmResources();
|
||||
|
||||
this._destroyed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the Voice Activity Detection for a raw Float32 PCM sample Array.
|
||||
* The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
|
||||
*
|
||||
* @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples.
|
||||
* @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90.
|
||||
*/
|
||||
calculateAudioFrameVAD(pcmFrame: Float32Array): number {
|
||||
return this.processAudioFrame(pcmFrame);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process an audio frame, optionally denoising the input pcmFrame and returning the Voice Activity Detection score
|
||||
* for a raw Float32 PCM sample Array.
|
||||
* The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
|
||||
*
|
||||
* @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples. Parameter is also used as output
|
||||
* when {@code shouldDenoise} is true.
|
||||
* @param {boolean} shouldDenoise - Should the denoised frame be returned in pcmFrame.
|
||||
* @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 .
|
||||
*/
|
||||
processAudioFrame(pcmFrame: Float32Array, shouldDenoise: Boolean = false): number {
|
||||
// Convert 32 bit Float PCM samples to 16 bit Float PCM samples as that's what rnnoise accepts as input
|
||||
for (let i = 0; i < RNNOISE_SAMPLE_LENGTH; i++) {
|
||||
this._wasmInterface.HEAPF32[this._wasmPcmInputF32Index + i] = pcmFrame[i] * SHIFT_16_BIT_NR;
|
||||
}
|
||||
|
||||
// Use the same buffer for input/output, rnnoise supports this behavior
|
||||
const vadScore = this._wasmInterface._rnnoise_process_frame(
|
||||
this._context,
|
||||
this._wasmPcmInput,
|
||||
this._wasmPcmInput
|
||||
);
|
||||
|
||||
// Rnnoise denoises the frame by default but we can avoid unnecessary operations if the calling
|
||||
// client doesn't use the denoised frame.
|
||||
if (shouldDenoise) {
|
||||
// Convert back to 32 bit PCM
|
||||
for (let i = 0; i < RNNOISE_SAMPLE_LENGTH; i++) {
|
||||
pcmFrame[i] = this._wasmInterface.HEAPF32[this._wasmPcmInputF32Index + i] / SHIFT_16_BIT_NR;
|
||||
}
|
||||
}
|
||||
|
||||
return vadScore;
|
||||
}
|
||||
}
|
||||
24
react/features/stream-effects/rnnoise/index.ts
Normal file
24
react/features/stream-effects/rnnoise/index.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
// Script expects to find rnnoise webassembly binary in the same public path root, otherwise it won't load
|
||||
// During the build phase this needs to be taken care of manually
|
||||
// @ts-expect-error
|
||||
import { createRNNWasmModule } from '@jitsi/rnnoise-wasm';
|
||||
|
||||
import RnnoiseProcessor from './RnnoiseProcessor';
|
||||
|
||||
export { RNNOISE_SAMPLE_LENGTH } from './RnnoiseProcessor';
|
||||
export type { RnnoiseProcessor };
|
||||
|
||||
let rnnoiseModule: Promise<any> | undefined;
|
||||
|
||||
/**
|
||||
* Creates a new instance of RnnoiseProcessor.
|
||||
*
|
||||
* @returns {Promise<RnnoiseProcessor>}
|
||||
*/
|
||||
export function createRnnoiseProcessor() {
|
||||
if (!rnnoiseModule) {
|
||||
rnnoiseModule = createRNNWasmModule();
|
||||
}
|
||||
|
||||
return rnnoiseModule?.then(mod => new RnnoiseProcessor(mod));
|
||||
}
|
||||
Reference in New Issue
Block a user