init

2025-09-02 14:49:16 +08:00
commit 38ba663466
2885 changed files with 391107 additions and 0 deletions
--- a/react/features/stream-effects/noise-suppression/NoiseSuppressionEffect.ts
+++ b/react/features/stream-effects/noise-suppression/NoiseSuppressionEffect.ts
@@ -0,0 +1,277 @@
+import { INoiseSuppressionConfig } from '../../base/config/configType';
+import { getBaseUrl } from '../../base/util/helpers';
+
+import logger from './logger';
+
+interface IKrispState {
+    filterNode?: AudioWorkletNode;
+    filterNodeReady: boolean;
+    sdk: any;
+    sdkInitialized: boolean;
+}
+
+const krispState: IKrispState = {
+    filterNode: undefined,
+    filterNodeReady: false,
+    sdk: undefined,
+    sdkInitialized: false
+};
+
+let audioContext: AudioContext;
+
+/**
+ * Class Implementing the effect interface expected by a JitsiLocalTrack.
+ * Effect applies rnnoise denoising on a audio JitsiLocalTrack.
+ */
+export class NoiseSuppressionEffect {
+
+    /**
+     * Source that will be attached to the track affected by the effect.
+     */
+    private _audioSource: MediaStreamAudioSourceNode;
+
+    /**
+     * Destination that will contain denoised audio from the audio worklet.
+     */
+    private _audioDestination: MediaStreamAudioDestinationNode;
+
+    /**
+     * `AudioWorkletProcessor` associated node.
+     */
+    private _noiseSuppressorNode?: AudioWorkletNode;
+
+    /**
+     * Audio track extracted from the original MediaStream to which the effect is applied.
+     */
+    private _originalMediaTrack: MediaStreamTrack;
+
+    /**
+     * Noise suppressed audio track extracted from the media destination node.
+     */
+    private _outputMediaTrack: MediaStreamTrack;
+
+    /**
+     * Configured options for noise suppression.
+     */
+    private _options?: INoiseSuppressionConfig;
+
+    /**
+     * Instantiates a noise suppressor audio effect which will use either rnnoise or krisp.
+     *
+     * @param {INoiseSuppressionConfig} options - Configured options.
+     */
+    constructor(options?: INoiseSuppressionConfig) {
+        this._options = options;
+
+        const useKrisp = options?.krisp?.enabled;
+
+        logger.info(`NoiseSuppressionEffect created with ${useKrisp ? 'Krisp' : 'RNNoise'}`);
+    }
+
+    /**
+     * Effect interface called by source JitsiLocalTrack.
+     * Applies effect that uses a {@code NoiseSuppressor} service initialized with {@code RnnoiseProcessor}
+     * for denoising.
+     *
+     * @param {MediaStream} audioStream - Audio stream which will be mixed with _mixAudio.
+     * @returns {MediaStream} - MediaStream containing both audio tracks mixed together.
+     */
+    startEffect(audioStream: MediaStream): MediaStream {
+        this._originalMediaTrack = audioStream.getAudioTracks()[0];
+
+        if (!audioContext) {
+            audioContext = new AudioContext();
+        }
+
+        this._audioSource = audioContext.createMediaStreamSource(audioStream);
+        this._audioDestination = audioContext.createMediaStreamDestination();
+        this._outputMediaTrack = this._audioDestination.stream.getAudioTracks()[0];
+
+        let init;
+
+        if (this._options?.krisp?.enabled) {
+            init = _initializeKrisp(this._options, audioStream).then(filterNode => {
+                this._noiseSuppressorNode = filterNode;
+
+                if (krispState.filterNodeReady) {
+                    // @ts-ignore
+                    krispState.filterNode?.enable();
+                }
+            });
+        } else {
+            init = _initializeKRnnoise().then(filterNode => {
+                this._noiseSuppressorNode = filterNode;
+            });
+        }
+
+        // Connect the audio processing graph MediaStream -> AudioWorkletNode -> MediaStreamAudioDestinationNode
+
+        init.then(() => {
+            if (this._noiseSuppressorNode) {
+                this._audioSource.connect(this._noiseSuppressorNode);
+                this._noiseSuppressorNode.connect(this._audioDestination);
+            }
+        });
+
+        // Sync the effect track muted state with the original track state.
+        this._outputMediaTrack.enabled = this._originalMediaTrack.enabled;
+
+        // We enable the audio on the original track because mute/unmute action will only affect the audio destination
+        // output track from this point on.
+        this._originalMediaTrack.enabled = true;
+
+        return this._audioDestination.stream;
+    }
+
+    /**
+     * Checks if the JitsiLocalTrack supports this effect.
+     *
+     * @param {JitsiLocalTrack} sourceLocalTrack - Track to which the effect will be applied.
+     * @returns {boolean} - Returns true if this effect can run on the specified track, false otherwise.
+     */
+    isEnabled(sourceLocalTrack: any): boolean {
+        // JitsiLocalTracks needs to be an audio track.
+        return sourceLocalTrack.isAudioTrack();
+    }
+
+    /**
+     * Clean up resources acquired by noise suppressor and rnnoise processor.
+     *
+     * @returns {void}
+     */
+    stopEffect(): void {
+        // Sync original track muted state with effect state before removing the effect.
+        this._originalMediaTrack.enabled = this._outputMediaTrack.enabled;
+
+        if (this._options?.krisp?.enabled) {
+            // When using Krisp we'll just disable the filter which we'll keep reusing.
+
+            // @ts-ignore
+            this._noiseSuppressorNode?.disable();
+        } else {
+            // Technically after this process the Audio Worklet along with it's resources should be garbage collected,
+            // however on chrome there seems to be a problem as described here:
+            // https://bugs.chromium.org/p/chromium/issues/detail?id=1298955
+            this._noiseSuppressorNode?.port?.close();
+        }
+
+        this._audioDestination?.disconnect();
+        this._noiseSuppressorNode?.disconnect();
+        this._audioSource?.disconnect();
+
+        audioContext.suspend();
+    }
+}
+
+/**
+ * Initializes the Krisp SDK and creates the filter node.
+ *
+ * @param {INoiseSuppressionConfig} options - Krisp options.
+ * @param {MediaStream} stream - Audio stream which will be mixed with _mixAudio.
+ *
+ * @returns {Promise<AudioWorkletNode | undefined>}
+ */
+async function _initializeKrisp(
+        options: INoiseSuppressionConfig,
+        stream: MediaStream
+): Promise<AudioWorkletNode | undefined> {
+    await audioContext.resume();
+
+    if (!krispState.sdk) {
+        const baseUrl = `${getBaseUrl()}libs/krisp`;
+        const { default: KrispSDK } = await import(/* webpackIgnore: true */ `${baseUrl}/krispsdk.mjs`);
+
+        const ncParams = {
+            krisp: {
+                models: {
+                    modelBVC: `${baseUrl}/models/${options?.krisp?.models?.modelBVC}`,
+                    model8: `${baseUrl}/models/${options?.krisp?.models?.model8}`,
+                    modelNC: `${baseUrl}/models/${options?.krisp?.models?.modelNC}`
+                },
+                logProcessStats: !options?.krisp?.logProcessStats,
+                debugLogs: !options?.krisp?.debugLogs,
+                useBVC: !options?.krisp?.useBVC,
+                bvc: {
+                    allowedDevices: `${baseUrl}/assets/${options?.krisp?.bvc?.allowedDevices}`,
+                    allowedDevicesExt: `${baseUrl}/assets/${options?.krisp?.bvc?.allowedDevicesExt}`
+                },
+                inboundModels: {
+                    modelInbound8: `${baseUrl}/models/${options?.krisp?.inboundModels?.modelInbound8}`,
+                    modelInbound16: `${baseUrl}/models/${options?.krisp?.inboundModels?.modelInbound16}`
+                },
+                preloadModels: {
+                    modelBVC: `${baseUrl}/models/${options?.krisp?.preloadModels?.modelBVC}`,
+                    model8: `${baseUrl}/models/${options?.krisp?.preloadModels?.model8}`,
+                    modelNC: `${baseUrl}/models/${options?.krisp?.preloadModels?.modelNC}`
+                },
+                preloadInboundModels: {
+                    modelInbound8: `${baseUrl}/models/${options?.krisp?.preloadInboundModels?.modelInbound8}`,
+                    modelInbound16: `${baseUrl}/models/${options?.krisp?.preloadInboundModels?.modelInbound16}`
+                }
+            }
+        };
+
+        krispState.sdk = new KrispSDK({
+            params: ncParams.krisp,
+            callbacks: {}
+        });
+    }
+
+    if (!krispState.sdkInitialized) {
+        // @ts-ignore
+        await krispState.sdk?.init();
+
+        krispState.sdkInitialized = true;
+    }
+
+    if (!krispState.filterNode) {
+        try {
+            // @ts-ignore
+            krispState.filterNode = await krispState.sdk?.createNoiseFilter(
+                {
+                    audioContext,
+                    stream
+                },
+                () => {
+                    logger.info('Krisp audio filter ready');
+
+                    // Enable audio filtering.
+                    // @ts-ignore
+                    krispState.filterNode?.enable();
+                    krispState.filterNodeReady = true;
+                }
+            );
+        } catch (e) {
+            logger.error('Failed to create Krisp noise filter', e);
+
+            krispState.filterNode = undefined;
+            krispState.filterNodeReady = false;
+        }
+    }
+
+    return krispState.filterNode;
+}
+
+/**
+ * Initializes the RNNoise audio worklet and creates the filter node.
+ *
+ * @returns {Promise<AudioWorkletNode | undefined>}
+ */
+async function _initializeKRnnoise(): Promise<AudioWorkletNode | undefined> {
+    await audioContext.resume();
+
+    const baseUrl = `${getBaseUrl()}libs/`;
+    const workletUrl = `${baseUrl}noise-suppressor-worklet.min.js`;
+
+    try {
+        await audioContext.audioWorklet.addModule(workletUrl);
+    } catch (e) {
+        logger.error('Error while adding audio worklet module: ', e);
+
+        return;
+    }
+
+    // After the resolution of module loading, an AudioWorkletNode can be constructed.
+
+    return new AudioWorkletNode(audioContext, 'NoiseSuppressorWorklet');
+}
--- a/react/features/stream-effects/noise-suppression/NoiseSuppressorWorklet.ts
+++ b/react/features/stream-effects/noise-suppression/NoiseSuppressorWorklet.ts
@@ -0,0 +1,177 @@
+// @ts-expect-error
+import { createRNNWasmModuleSync } from '@jitsi/rnnoise-wasm';
+
+import { leastCommonMultiple } from '../../base/util/math';
+import RnnoiseProcessor from '../rnnoise/RnnoiseProcessor';
+
+
+/**
+ * Audio worklet which will denoise targeted audio stream using rnnoise.
+ */
+class NoiseSuppressorWorklet extends AudioWorkletProcessor {
+    /**
+     * RnnoiseProcessor instance.
+     */
+    private _denoiseProcessor: RnnoiseProcessor;
+
+    /**
+     * Audio worklets work with a predefined sample rate of 128.
+     */
+    private _procNodeSampleRate = 128;
+
+    /**
+     * PCM Sample size expected by the denoise processor.
+     */
+    private _denoiseSampleSize: number;
+
+    /**
+     * Circular buffer data used for efficient memory operations.
+     */
+    private _circularBufferLength: number;
+
+    private _circularBuffer: Float32Array;
+
+    /**
+     * The circular buffer uses a couple of indexes to track data segments. Input data from the stream is
+     * copied to the circular buffer as it comes in, one `procNodeSampleRate` sized sample at a time.
+     * _inputBufferLength denotes the current length of all gathered raw audio segments.
+     */
+    private _inputBufferLength = 0;
+
+    /**
+     * Denoising is done directly on the circular buffer using subArray views, but because
+     * `procNodeSampleRate` and `_denoiseSampleSize` have different sizes, denoised samples lag behind
+     * the current gathered raw audio samples so we need a different index, `_denoisedBufferLength`.
+     */
+    private _denoisedBufferLength = 0;
+
+    /**
+     * Once enough data has been denoised (size of procNodeSampleRate) it's sent to the
+     * output buffer, `_denoisedBufferIndx` indicates the start index on the circular buffer
+     * of denoised data not yet sent.
+     */
+    private _denoisedBufferIndx = 0;
+
+    /**
+     * C'tor.
+     */
+    constructor() {
+        super();
+
+        /**
+         * The wasm module needs to be compiled to load synchronously as the audio worklet `addModule()`
+         * initialization process does not wait for the resolution of promises in the AudioWorkletGlobalScope.
+         */
+        this._denoiseProcessor = new RnnoiseProcessor(createRNNWasmModuleSync());
+
+        /**
+         * PCM Sample size expected by the denoise processor.
+         */
+        this._denoiseSampleSize = this._denoiseProcessor.getSampleLength();
+
+        /**
+         * In order to avoid unnecessary memory related operations a circular buffer was used.
+         * Because the audio worklet input array does not match the sample size required by rnnoise two cases can occur
+         * 1. There is not enough data in which case we buffer it.
+         * 2. There is enough data but some residue remains after the call to `processAudioFrame`, so its buffered
+         * for the next call.
+         * A problem arises when the circular buffer reaches the end and a rollover is required, namely
+         * the residue could potentially be split between the end of buffer and the beginning and would
+         * require some complicated logic to handle. Using the lcm as the size of the buffer will
+         * guarantee that by the time the buffer reaches the end the residue will be a multiple of the
+         * `procNodeSampleRate` and the residue won't be split.
+         */
+        this._circularBufferLength = leastCommonMultiple(this._procNodeSampleRate, this._denoiseSampleSize);
+        this._circularBuffer = new Float32Array(this._circularBufferLength);
+    }
+
+    /**
+     * Worklet interface process method. The inputs parameter contains PCM audio that is then sent to rnnoise.
+     * Rnnoise only accepts PCM samples of 480 bytes whereas `process` handles 128 sized samples, we take this into
+     * account using a circular buffer.
+     *
+     * @param {Float32Array[]} inputs - Array of inputs connected to the node, each of them with their associated
+     * array of channels. Each channel is an array of 128 pcm samples.
+     * @param {Float32Array[]} outputs - Array of outputs similar to the inputs parameter structure, expected to be
+     * filled during the execution of `process`. By default each channel is zero filled.
+     * @returns {boolean} - Boolean value that returns whether or not the processor should remain active. Returning
+     * false will terminate it.
+     */
+    process(inputs: Float32Array[][], outputs: Float32Array[][]) {
+
+        // We expect the incoming track to be mono, if a stereo track is passed only on of its channels will get
+        // denoised and sent pack.
+        // TODO Technically we can denoise both channel however this might require a new rnnoise context, some more
+        // investigation is required.
+        const inData = inputs[0][0];
+        const outData = outputs[0][0];
+
+        // Exit out early if there is no input data (input node not connected/disconnected)
+        // as rest of worklet will crash otherwise
+        if (!inData) {
+            return true;
+        }
+
+        // Append new raw PCM sample.
+        this._circularBuffer.set(inData, this._inputBufferLength);
+        this._inputBufferLength += inData.length;
+
+        // New raw samples were just added, start denoising frames, _denoisedBufferLength gives us
+        // the position at which the previous denoise iteration ended, basically it takes into account
+        // residue data.
+        for (; this._denoisedBufferLength + this._denoiseSampleSize <= this._inputBufferLength;
+            this._denoisedBufferLength += this._denoiseSampleSize) {
+            // Create view of circular buffer so it can be modified in place, removing the need for
+            // extra copies.
+
+            const denoiseFrame = this._circularBuffer.subarray(
+                this._denoisedBufferLength,
+                this._denoisedBufferLength + this._denoiseSampleSize
+            );
+
+            this._denoiseProcessor.processAudioFrame(denoiseFrame, true);
+        }
+
+        // Determine how much denoised audio is available, if the start index of denoised samples is smaller
+        // then _denoisedBufferLength that means a rollover occurred.
+        let unsentDenoisedDataLength;
+
+        if (this._denoisedBufferIndx > this._denoisedBufferLength) {
+            unsentDenoisedDataLength = this._circularBufferLength - this._denoisedBufferIndx;
+        } else {
+            unsentDenoisedDataLength = this._denoisedBufferLength - this._denoisedBufferIndx;
+        }
+
+        // Only copy denoised data to output when there's enough of it to fit the exact buffer length.
+        // e.g. if the buffer size is 1024 samples but we only denoised 960 (this happens on the first iteration)
+        // nothing happens, then on the next iteration 1920 samples will be denoised so we send 1024 which leaves
+        // 896 for the next iteration and so on.
+        if (unsentDenoisedDataLength >= outData.length) {
+            const denoisedFrame = this._circularBuffer.subarray(
+                this._denoisedBufferIndx,
+                this._denoisedBufferIndx + outData.length
+            );
+
+            outData.set(denoisedFrame, 0);
+            this._denoisedBufferIndx += outData.length;
+        }
+
+        // When the end of the circular buffer has been reached, start from the beginning. By the time the index
+        // starts over, the data from the begging is stale (has already been processed) and can be safely
+        // overwritten.
+        if (this._denoisedBufferIndx === this._circularBufferLength) {
+            this._denoisedBufferIndx = 0;
+        }
+
+        // Because the circular buffer's length is the lcm of both input size and the processor's sample size,
+        // by the time we reach the end with the input index the denoise length index will be there as well.
+        if (this._inputBufferLength === this._circularBufferLength) {
+            this._inputBufferLength = 0;
+            this._denoisedBufferLength = 0;
+        }
+
+        return true;
+    }
+}
+
+registerProcessor('NoiseSuppressorWorklet', NoiseSuppressorWorklet);
--- a/react/features/stream-effects/noise-suppression/logger.ts
+++ b/react/features/stream-effects/noise-suppression/logger.ts
@@ -0,0 +1,3 @@
+import { getLogger } from '../../base/logging/functions';
+
+export default getLogger('features/stream-effects/noise-suppression');