This commit is contained in:
@@ -0,0 +1,277 @@
|
||||
import { INoiseSuppressionConfig } from '../../base/config/configType';
|
||||
import { getBaseUrl } from '../../base/util/helpers';
|
||||
|
||||
import logger from './logger';
|
||||
|
||||
interface IKrispState {
|
||||
filterNode?: AudioWorkletNode;
|
||||
filterNodeReady: boolean;
|
||||
sdk: any;
|
||||
sdkInitialized: boolean;
|
||||
}
|
||||
|
||||
const krispState: IKrispState = {
|
||||
filterNode: undefined,
|
||||
filterNodeReady: false,
|
||||
sdk: undefined,
|
||||
sdkInitialized: false
|
||||
};
|
||||
|
||||
let audioContext: AudioContext;
|
||||
|
||||
/**
|
||||
* Class Implementing the effect interface expected by a JitsiLocalTrack.
|
||||
* Effect applies rnnoise denoising on a audio JitsiLocalTrack.
|
||||
*/
|
||||
export class NoiseSuppressionEffect {
|
||||
|
||||
/**
|
||||
* Source that will be attached to the track affected by the effect.
|
||||
*/
|
||||
private _audioSource: MediaStreamAudioSourceNode;
|
||||
|
||||
/**
|
||||
* Destination that will contain denoised audio from the audio worklet.
|
||||
*/
|
||||
private _audioDestination: MediaStreamAudioDestinationNode;
|
||||
|
||||
/**
|
||||
* `AudioWorkletProcessor` associated node.
|
||||
*/
|
||||
private _noiseSuppressorNode?: AudioWorkletNode;
|
||||
|
||||
/**
|
||||
* Audio track extracted from the original MediaStream to which the effect is applied.
|
||||
*/
|
||||
private _originalMediaTrack: MediaStreamTrack;
|
||||
|
||||
/**
|
||||
* Noise suppressed audio track extracted from the media destination node.
|
||||
*/
|
||||
private _outputMediaTrack: MediaStreamTrack;
|
||||
|
||||
/**
|
||||
* Configured options for noise suppression.
|
||||
*/
|
||||
private _options?: INoiseSuppressionConfig;
|
||||
|
||||
/**
|
||||
* Instantiates a noise suppressor audio effect which will use either rnnoise or krisp.
|
||||
*
|
||||
* @param {INoiseSuppressionConfig} options - Configured options.
|
||||
*/
|
||||
constructor(options?: INoiseSuppressionConfig) {
|
||||
this._options = options;
|
||||
|
||||
const useKrisp = options?.krisp?.enabled;
|
||||
|
||||
logger.info(`NoiseSuppressionEffect created with ${useKrisp ? 'Krisp' : 'RNNoise'}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Effect interface called by source JitsiLocalTrack.
|
||||
* Applies effect that uses a {@code NoiseSuppressor} service initialized with {@code RnnoiseProcessor}
|
||||
* for denoising.
|
||||
*
|
||||
* @param {MediaStream} audioStream - Audio stream which will be mixed with _mixAudio.
|
||||
* @returns {MediaStream} - MediaStream containing both audio tracks mixed together.
|
||||
*/
|
||||
startEffect(audioStream: MediaStream): MediaStream {
|
||||
this._originalMediaTrack = audioStream.getAudioTracks()[0];
|
||||
|
||||
if (!audioContext) {
|
||||
audioContext = new AudioContext();
|
||||
}
|
||||
|
||||
this._audioSource = audioContext.createMediaStreamSource(audioStream);
|
||||
this._audioDestination = audioContext.createMediaStreamDestination();
|
||||
this._outputMediaTrack = this._audioDestination.stream.getAudioTracks()[0];
|
||||
|
||||
let init;
|
||||
|
||||
if (this._options?.krisp?.enabled) {
|
||||
init = _initializeKrisp(this._options, audioStream).then(filterNode => {
|
||||
this._noiseSuppressorNode = filterNode;
|
||||
|
||||
if (krispState.filterNodeReady) {
|
||||
// @ts-ignore
|
||||
krispState.filterNode?.enable();
|
||||
}
|
||||
});
|
||||
} else {
|
||||
init = _initializeKRnnoise().then(filterNode => {
|
||||
this._noiseSuppressorNode = filterNode;
|
||||
});
|
||||
}
|
||||
|
||||
// Connect the audio processing graph MediaStream -> AudioWorkletNode -> MediaStreamAudioDestinationNode
|
||||
|
||||
init.then(() => {
|
||||
if (this._noiseSuppressorNode) {
|
||||
this._audioSource.connect(this._noiseSuppressorNode);
|
||||
this._noiseSuppressorNode.connect(this._audioDestination);
|
||||
}
|
||||
});
|
||||
|
||||
// Sync the effect track muted state with the original track state.
|
||||
this._outputMediaTrack.enabled = this._originalMediaTrack.enabled;
|
||||
|
||||
// We enable the audio on the original track because mute/unmute action will only affect the audio destination
|
||||
// output track from this point on.
|
||||
this._originalMediaTrack.enabled = true;
|
||||
|
||||
return this._audioDestination.stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the JitsiLocalTrack supports this effect.
|
||||
*
|
||||
* @param {JitsiLocalTrack} sourceLocalTrack - Track to which the effect will be applied.
|
||||
* @returns {boolean} - Returns true if this effect can run on the specified track, false otherwise.
|
||||
*/
|
||||
isEnabled(sourceLocalTrack: any): boolean {
|
||||
// JitsiLocalTracks needs to be an audio track.
|
||||
return sourceLocalTrack.isAudioTrack();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up resources acquired by noise suppressor and rnnoise processor.
|
||||
*
|
||||
* @returns {void}
|
||||
*/
|
||||
stopEffect(): void {
|
||||
// Sync original track muted state with effect state before removing the effect.
|
||||
this._originalMediaTrack.enabled = this._outputMediaTrack.enabled;
|
||||
|
||||
if (this._options?.krisp?.enabled) {
|
||||
// When using Krisp we'll just disable the filter which we'll keep reusing.
|
||||
|
||||
// @ts-ignore
|
||||
this._noiseSuppressorNode?.disable();
|
||||
} else {
|
||||
// Technically after this process the Audio Worklet along with it's resources should be garbage collected,
|
||||
// however on chrome there seems to be a problem as described here:
|
||||
// https://bugs.chromium.org/p/chromium/issues/detail?id=1298955
|
||||
this._noiseSuppressorNode?.port?.close();
|
||||
}
|
||||
|
||||
this._audioDestination?.disconnect();
|
||||
this._noiseSuppressorNode?.disconnect();
|
||||
this._audioSource?.disconnect();
|
||||
|
||||
audioContext.suspend();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the Krisp SDK and creates the filter node.
|
||||
*
|
||||
* @param {INoiseSuppressionConfig} options - Krisp options.
|
||||
* @param {MediaStream} stream - Audio stream which will be mixed with _mixAudio.
|
||||
*
|
||||
* @returns {Promise<AudioWorkletNode | undefined>}
|
||||
*/
|
||||
async function _initializeKrisp(
|
||||
options: INoiseSuppressionConfig,
|
||||
stream: MediaStream
|
||||
): Promise<AudioWorkletNode | undefined> {
|
||||
await audioContext.resume();
|
||||
|
||||
if (!krispState.sdk) {
|
||||
const baseUrl = `${getBaseUrl()}libs/krisp`;
|
||||
const { default: KrispSDK } = await import(/* webpackIgnore: true */ `${baseUrl}/krispsdk.mjs`);
|
||||
|
||||
const ncParams = {
|
||||
krisp: {
|
||||
models: {
|
||||
modelBVC: `${baseUrl}/models/${options?.krisp?.models?.modelBVC}`,
|
||||
model8: `${baseUrl}/models/${options?.krisp?.models?.model8}`,
|
||||
modelNC: `${baseUrl}/models/${options?.krisp?.models?.modelNC}`
|
||||
},
|
||||
logProcessStats: !options?.krisp?.logProcessStats,
|
||||
debugLogs: !options?.krisp?.debugLogs,
|
||||
useBVC: !options?.krisp?.useBVC,
|
||||
bvc: {
|
||||
allowedDevices: `${baseUrl}/assets/${options?.krisp?.bvc?.allowedDevices}`,
|
||||
allowedDevicesExt: `${baseUrl}/assets/${options?.krisp?.bvc?.allowedDevicesExt}`
|
||||
},
|
||||
inboundModels: {
|
||||
modelInbound8: `${baseUrl}/models/${options?.krisp?.inboundModels?.modelInbound8}`,
|
||||
modelInbound16: `${baseUrl}/models/${options?.krisp?.inboundModels?.modelInbound16}`
|
||||
},
|
||||
preloadModels: {
|
||||
modelBVC: `${baseUrl}/models/${options?.krisp?.preloadModels?.modelBVC}`,
|
||||
model8: `${baseUrl}/models/${options?.krisp?.preloadModels?.model8}`,
|
||||
modelNC: `${baseUrl}/models/${options?.krisp?.preloadModels?.modelNC}`
|
||||
},
|
||||
preloadInboundModels: {
|
||||
modelInbound8: `${baseUrl}/models/${options?.krisp?.preloadInboundModels?.modelInbound8}`,
|
||||
modelInbound16: `${baseUrl}/models/${options?.krisp?.preloadInboundModels?.modelInbound16}`
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
krispState.sdk = new KrispSDK({
|
||||
params: ncParams.krisp,
|
||||
callbacks: {}
|
||||
});
|
||||
}
|
||||
|
||||
if (!krispState.sdkInitialized) {
|
||||
// @ts-ignore
|
||||
await krispState.sdk?.init();
|
||||
|
||||
krispState.sdkInitialized = true;
|
||||
}
|
||||
|
||||
if (!krispState.filterNode) {
|
||||
try {
|
||||
// @ts-ignore
|
||||
krispState.filterNode = await krispState.sdk?.createNoiseFilter(
|
||||
{
|
||||
audioContext,
|
||||
stream
|
||||
},
|
||||
() => {
|
||||
logger.info('Krisp audio filter ready');
|
||||
|
||||
// Enable audio filtering.
|
||||
// @ts-ignore
|
||||
krispState.filterNode?.enable();
|
||||
krispState.filterNodeReady = true;
|
||||
}
|
||||
);
|
||||
} catch (e) {
|
||||
logger.error('Failed to create Krisp noise filter', e);
|
||||
|
||||
krispState.filterNode = undefined;
|
||||
krispState.filterNodeReady = false;
|
||||
}
|
||||
}
|
||||
|
||||
return krispState.filterNode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the RNNoise audio worklet and creates the filter node.
|
||||
*
|
||||
* @returns {Promise<AudioWorkletNode | undefined>}
|
||||
*/
|
||||
async function _initializeKRnnoise(): Promise<AudioWorkletNode | undefined> {
|
||||
await audioContext.resume();
|
||||
|
||||
const baseUrl = `${getBaseUrl()}libs/`;
|
||||
const workletUrl = `${baseUrl}noise-suppressor-worklet.min.js`;
|
||||
|
||||
try {
|
||||
await audioContext.audioWorklet.addModule(workletUrl);
|
||||
} catch (e) {
|
||||
logger.error('Error while adding audio worklet module: ', e);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// After the resolution of module loading, an AudioWorkletNode can be constructed.
|
||||
|
||||
return new AudioWorkletNode(audioContext, 'NoiseSuppressorWorklet');
|
||||
}
|
||||
@@ -0,0 +1,177 @@
|
||||
// @ts-expect-error
|
||||
import { createRNNWasmModuleSync } from '@jitsi/rnnoise-wasm';
|
||||
|
||||
import { leastCommonMultiple } from '../../base/util/math';
|
||||
import RnnoiseProcessor from '../rnnoise/RnnoiseProcessor';
|
||||
|
||||
|
||||
/**
|
||||
* Audio worklet which will denoise targeted audio stream using rnnoise.
|
||||
*/
|
||||
class NoiseSuppressorWorklet extends AudioWorkletProcessor {
|
||||
/**
|
||||
* RnnoiseProcessor instance.
|
||||
*/
|
||||
private _denoiseProcessor: RnnoiseProcessor;
|
||||
|
||||
/**
|
||||
* Audio worklets work with a predefined sample rate of 128.
|
||||
*/
|
||||
private _procNodeSampleRate = 128;
|
||||
|
||||
/**
|
||||
* PCM Sample size expected by the denoise processor.
|
||||
*/
|
||||
private _denoiseSampleSize: number;
|
||||
|
||||
/**
|
||||
* Circular buffer data used for efficient memory operations.
|
||||
*/
|
||||
private _circularBufferLength: number;
|
||||
|
||||
private _circularBuffer: Float32Array;
|
||||
|
||||
/**
|
||||
* The circular buffer uses a couple of indexes to track data segments. Input data from the stream is
|
||||
* copied to the circular buffer as it comes in, one `procNodeSampleRate` sized sample at a time.
|
||||
* _inputBufferLength denotes the current length of all gathered raw audio segments.
|
||||
*/
|
||||
private _inputBufferLength = 0;
|
||||
|
||||
/**
|
||||
* Denoising is done directly on the circular buffer using subArray views, but because
|
||||
* `procNodeSampleRate` and `_denoiseSampleSize` have different sizes, denoised samples lag behind
|
||||
* the current gathered raw audio samples so we need a different index, `_denoisedBufferLength`.
|
||||
*/
|
||||
private _denoisedBufferLength = 0;
|
||||
|
||||
/**
|
||||
* Once enough data has been denoised (size of procNodeSampleRate) it's sent to the
|
||||
* output buffer, `_denoisedBufferIndx` indicates the start index on the circular buffer
|
||||
* of denoised data not yet sent.
|
||||
*/
|
||||
private _denoisedBufferIndx = 0;
|
||||
|
||||
/**
|
||||
* C'tor.
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
/**
|
||||
* The wasm module needs to be compiled to load synchronously as the audio worklet `addModule()`
|
||||
* initialization process does not wait for the resolution of promises in the AudioWorkletGlobalScope.
|
||||
*/
|
||||
this._denoiseProcessor = new RnnoiseProcessor(createRNNWasmModuleSync());
|
||||
|
||||
/**
|
||||
* PCM Sample size expected by the denoise processor.
|
||||
*/
|
||||
this._denoiseSampleSize = this._denoiseProcessor.getSampleLength();
|
||||
|
||||
/**
|
||||
* In order to avoid unnecessary memory related operations a circular buffer was used.
|
||||
* Because the audio worklet input array does not match the sample size required by rnnoise two cases can occur
|
||||
* 1. There is not enough data in which case we buffer it.
|
||||
* 2. There is enough data but some residue remains after the call to `processAudioFrame`, so its buffered
|
||||
* for the next call.
|
||||
* A problem arises when the circular buffer reaches the end and a rollover is required, namely
|
||||
* the residue could potentially be split between the end of buffer and the beginning and would
|
||||
* require some complicated logic to handle. Using the lcm as the size of the buffer will
|
||||
* guarantee that by the time the buffer reaches the end the residue will be a multiple of the
|
||||
* `procNodeSampleRate` and the residue won't be split.
|
||||
*/
|
||||
this._circularBufferLength = leastCommonMultiple(this._procNodeSampleRate, this._denoiseSampleSize);
|
||||
this._circularBuffer = new Float32Array(this._circularBufferLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Worklet interface process method. The inputs parameter contains PCM audio that is then sent to rnnoise.
|
||||
* Rnnoise only accepts PCM samples of 480 bytes whereas `process` handles 128 sized samples, we take this into
|
||||
* account using a circular buffer.
|
||||
*
|
||||
* @param {Float32Array[]} inputs - Array of inputs connected to the node, each of them with their associated
|
||||
* array of channels. Each channel is an array of 128 pcm samples.
|
||||
* @param {Float32Array[]} outputs - Array of outputs similar to the inputs parameter structure, expected to be
|
||||
* filled during the execution of `process`. By default each channel is zero filled.
|
||||
* @returns {boolean} - Boolean value that returns whether or not the processor should remain active. Returning
|
||||
* false will terminate it.
|
||||
*/
|
||||
process(inputs: Float32Array[][], outputs: Float32Array[][]) {
|
||||
|
||||
// We expect the incoming track to be mono, if a stereo track is passed only on of its channels will get
|
||||
// denoised and sent pack.
|
||||
// TODO Technically we can denoise both channel however this might require a new rnnoise context, some more
|
||||
// investigation is required.
|
||||
const inData = inputs[0][0];
|
||||
const outData = outputs[0][0];
|
||||
|
||||
// Exit out early if there is no input data (input node not connected/disconnected)
|
||||
// as rest of worklet will crash otherwise
|
||||
if (!inData) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Append new raw PCM sample.
|
||||
this._circularBuffer.set(inData, this._inputBufferLength);
|
||||
this._inputBufferLength += inData.length;
|
||||
|
||||
// New raw samples were just added, start denoising frames, _denoisedBufferLength gives us
|
||||
// the position at which the previous denoise iteration ended, basically it takes into account
|
||||
// residue data.
|
||||
for (; this._denoisedBufferLength + this._denoiseSampleSize <= this._inputBufferLength;
|
||||
this._denoisedBufferLength += this._denoiseSampleSize) {
|
||||
// Create view of circular buffer so it can be modified in place, removing the need for
|
||||
// extra copies.
|
||||
|
||||
const denoiseFrame = this._circularBuffer.subarray(
|
||||
this._denoisedBufferLength,
|
||||
this._denoisedBufferLength + this._denoiseSampleSize
|
||||
);
|
||||
|
||||
this._denoiseProcessor.processAudioFrame(denoiseFrame, true);
|
||||
}
|
||||
|
||||
// Determine how much denoised audio is available, if the start index of denoised samples is smaller
|
||||
// then _denoisedBufferLength that means a rollover occurred.
|
||||
let unsentDenoisedDataLength;
|
||||
|
||||
if (this._denoisedBufferIndx > this._denoisedBufferLength) {
|
||||
unsentDenoisedDataLength = this._circularBufferLength - this._denoisedBufferIndx;
|
||||
} else {
|
||||
unsentDenoisedDataLength = this._denoisedBufferLength - this._denoisedBufferIndx;
|
||||
}
|
||||
|
||||
// Only copy denoised data to output when there's enough of it to fit the exact buffer length.
|
||||
// e.g. if the buffer size is 1024 samples but we only denoised 960 (this happens on the first iteration)
|
||||
// nothing happens, then on the next iteration 1920 samples will be denoised so we send 1024 which leaves
|
||||
// 896 for the next iteration and so on.
|
||||
if (unsentDenoisedDataLength >= outData.length) {
|
||||
const denoisedFrame = this._circularBuffer.subarray(
|
||||
this._denoisedBufferIndx,
|
||||
this._denoisedBufferIndx + outData.length
|
||||
);
|
||||
|
||||
outData.set(denoisedFrame, 0);
|
||||
this._denoisedBufferIndx += outData.length;
|
||||
}
|
||||
|
||||
// When the end of the circular buffer has been reached, start from the beginning. By the time the index
|
||||
// starts over, the data from the begging is stale (has already been processed) and can be safely
|
||||
// overwritten.
|
||||
if (this._denoisedBufferIndx === this._circularBufferLength) {
|
||||
this._denoisedBufferIndx = 0;
|
||||
}
|
||||
|
||||
// Because the circular buffer's length is the lcm of both input size and the processor's sample size,
|
||||
// by the time we reach the end with the input index the denoise length index will be there as well.
|
||||
if (this._inputBufferLength === this._circularBufferLength) {
|
||||
this._inputBufferLength = 0;
|
||||
this._denoisedBufferLength = 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
registerProcessor('NoiseSuppressorWorklet', NoiseSuppressorWorklet);
|
||||
@@ -0,0 +1,3 @@
|
||||
import { getLogger } from '../../base/logging/functions';
|
||||
|
||||
export default getLogger('features/stream-effects/noise-suppression');
|
||||
Reference in New Issue
Block a user