init
Some checks failed
Close stale issues and PRs / stale (push) Has been cancelled

This commit is contained in:
2025-09-02 14:49:16 +08:00
commit 38ba663466
2885 changed files with 391107 additions and 0 deletions

View File

@@ -0,0 +1,113 @@
import JitsiMeetJS from '../../base/lib-jitsi-meet';
import { MEDIA_TYPE } from '../../base/media/constants';
/**
* Class Implementing the effect interface expected by a JitsiLocalTrack.
* The AudioMixerEffect, as the name implies, mixes two JitsiLocalTracks containing a audio track. First track is
* provided at the moment of creation, second is provided through the effect interface.
*/
export class AudioMixerEffect {
/**
* JitsiLocalTrack that is going to be mixed into the track that uses this effect.
*/
_mixAudio: any;
/**
* MediaStream resulted from mixing.
*/
_mixedMediaStream: any;
/**
* MediaStreamTrack obtained from mixed stream.
*/
_mixedMediaTrack: Object;
/**
* Original MediaStream from the JitsiLocalTrack that uses this effect.
*/
_originalStream: Object;
/**
* MediaStreamTrack obtained from the original MediaStream.
*/
_originalTrack: any;
/**
* Lib-jitsi-meet AudioMixer.
*/
_audioMixer: any;
/**
* Creates AudioMixerEffect.
*
* @param {JitsiLocalTrack} mixAudio - JitsiLocalTrack which will be mixed with the original track.
*/
constructor(mixAudio: any) {
if (mixAudio.getType() !== MEDIA_TYPE.AUDIO) {
throw new Error('AudioMixerEffect only supports audio JitsiLocalTracks; effect will not work!');
}
this._mixAudio = mixAudio;
}
/**
* Checks if the JitsiLocalTrack supports this effect.
*
* @param {JitsiLocalTrack} sourceLocalTrack - Track to which the effect will be applied.
* @returns {boolean} - Returns true if this effect can run on the specified track, false otherwise.
*/
isEnabled(sourceLocalTrack: any) {
// Both JitsiLocalTracks need to be audio i.e. contain an audio MediaStreamTrack
return sourceLocalTrack.isAudioTrack() && this._mixAudio.isAudioTrack();
}
/**
* Effect interface called by source JitsiLocalTrack, At this point a WebAudio ChannelMergerNode is created
* and and the two associated MediaStreams are connected to it; the resulting mixed MediaStream is returned.
*
* @param {MediaStream} audioStream - Audio stream which will be mixed with _mixAudio.
* @returns {MediaStream} - MediaStream containing both audio tracks mixed together.
*/
// @ts-ignore
startEffect(audioStream: MediaStream) {
this._originalStream = audioStream;
this._originalTrack = audioStream.getTracks()[0];
this._audioMixer = JitsiMeetJS.createAudioMixer();
this._audioMixer.addMediaStream(this._mixAudio.getOriginalStream());
this._audioMixer.addMediaStream(this._originalStream);
this._mixedMediaStream = this._audioMixer.start();
this._mixedMediaTrack = this._mixedMediaStream.getTracks()[0];
return this._mixedMediaStream;
}
/**
* Reset the AudioMixer stopping it in the process.
*
* @returns {void}
*/
stopEffect() {
this._audioMixer.reset();
}
/**
* Change the muted state of the effect.
*
* @param {boolean} muted - Should effect be muted or not.
* @returns {void}
*/
setMuted(muted: boolean) {
this._originalTrack.enabled = !muted;
}
/**
* Check whether or not this effect is muted.
*
* @returns {boolean}
*/
isMuted() {
return !this._originalTrack.enabled;
}
}

View File

@@ -0,0 +1,277 @@
import { INoiseSuppressionConfig } from '../../base/config/configType';
import { getBaseUrl } from '../../base/util/helpers';
import logger from './logger';
interface IKrispState {
filterNode?: AudioWorkletNode;
filterNodeReady: boolean;
sdk: any;
sdkInitialized: boolean;
}
const krispState: IKrispState = {
filterNode: undefined,
filterNodeReady: false,
sdk: undefined,
sdkInitialized: false
};
let audioContext: AudioContext;
/**
* Class Implementing the effect interface expected by a JitsiLocalTrack.
* Effect applies rnnoise denoising on a audio JitsiLocalTrack.
*/
export class NoiseSuppressionEffect {
/**
* Source that will be attached to the track affected by the effect.
*/
private _audioSource: MediaStreamAudioSourceNode;
/**
* Destination that will contain denoised audio from the audio worklet.
*/
private _audioDestination: MediaStreamAudioDestinationNode;
/**
* `AudioWorkletProcessor` associated node.
*/
private _noiseSuppressorNode?: AudioWorkletNode;
/**
* Audio track extracted from the original MediaStream to which the effect is applied.
*/
private _originalMediaTrack: MediaStreamTrack;
/**
* Noise suppressed audio track extracted from the media destination node.
*/
private _outputMediaTrack: MediaStreamTrack;
/**
* Configured options for noise suppression.
*/
private _options?: INoiseSuppressionConfig;
/**
* Instantiates a noise suppressor audio effect which will use either rnnoise or krisp.
*
* @param {INoiseSuppressionConfig} options - Configured options.
*/
constructor(options?: INoiseSuppressionConfig) {
this._options = options;
const useKrisp = options?.krisp?.enabled;
logger.info(`NoiseSuppressionEffect created with ${useKrisp ? 'Krisp' : 'RNNoise'}`);
}
/**
* Effect interface called by source JitsiLocalTrack.
* Applies effect that uses a {@code NoiseSuppressor} service initialized with {@code RnnoiseProcessor}
* for denoising.
*
* @param {MediaStream} audioStream - Audio stream which will be mixed with _mixAudio.
* @returns {MediaStream} - MediaStream containing both audio tracks mixed together.
*/
startEffect(audioStream: MediaStream): MediaStream {
this._originalMediaTrack = audioStream.getAudioTracks()[0];
if (!audioContext) {
audioContext = new AudioContext();
}
this._audioSource = audioContext.createMediaStreamSource(audioStream);
this._audioDestination = audioContext.createMediaStreamDestination();
this._outputMediaTrack = this._audioDestination.stream.getAudioTracks()[0];
let init;
if (this._options?.krisp?.enabled) {
init = _initializeKrisp(this._options, audioStream).then(filterNode => {
this._noiseSuppressorNode = filterNode;
if (krispState.filterNodeReady) {
// @ts-ignore
krispState.filterNode?.enable();
}
});
} else {
init = _initializeKRnnoise().then(filterNode => {
this._noiseSuppressorNode = filterNode;
});
}
// Connect the audio processing graph MediaStream -> AudioWorkletNode -> MediaStreamAudioDestinationNode
init.then(() => {
if (this._noiseSuppressorNode) {
this._audioSource.connect(this._noiseSuppressorNode);
this._noiseSuppressorNode.connect(this._audioDestination);
}
});
// Sync the effect track muted state with the original track state.
this._outputMediaTrack.enabled = this._originalMediaTrack.enabled;
// We enable the audio on the original track because mute/unmute action will only affect the audio destination
// output track from this point on.
this._originalMediaTrack.enabled = true;
return this._audioDestination.stream;
}
/**
* Checks if the JitsiLocalTrack supports this effect.
*
* @param {JitsiLocalTrack} sourceLocalTrack - Track to which the effect will be applied.
* @returns {boolean} - Returns true if this effect can run on the specified track, false otherwise.
*/
isEnabled(sourceLocalTrack: any): boolean {
// JitsiLocalTracks needs to be an audio track.
return sourceLocalTrack.isAudioTrack();
}
/**
* Clean up resources acquired by noise suppressor and rnnoise processor.
*
* @returns {void}
*/
stopEffect(): void {
// Sync original track muted state with effect state before removing the effect.
this._originalMediaTrack.enabled = this._outputMediaTrack.enabled;
if (this._options?.krisp?.enabled) {
// When using Krisp we'll just disable the filter which we'll keep reusing.
// @ts-ignore
this._noiseSuppressorNode?.disable();
} else {
// Technically after this process the Audio Worklet along with it's resources should be garbage collected,
// however on chrome there seems to be a problem as described here:
// https://bugs.chromium.org/p/chromium/issues/detail?id=1298955
this._noiseSuppressorNode?.port?.close();
}
this._audioDestination?.disconnect();
this._noiseSuppressorNode?.disconnect();
this._audioSource?.disconnect();
audioContext.suspend();
}
}
/**
* Initializes the Krisp SDK and creates the filter node.
*
* @param {INoiseSuppressionConfig} options - Krisp options.
* @param {MediaStream} stream - Audio stream which will be mixed with _mixAudio.
*
* @returns {Promise<AudioWorkletNode | undefined>}
*/
async function _initializeKrisp(
options: INoiseSuppressionConfig,
stream: MediaStream
): Promise<AudioWorkletNode | undefined> {
await audioContext.resume();
if (!krispState.sdk) {
const baseUrl = `${getBaseUrl()}libs/krisp`;
const { default: KrispSDK } = await import(/* webpackIgnore: true */ `${baseUrl}/krispsdk.mjs`);
const ncParams = {
krisp: {
models: {
modelBVC: `${baseUrl}/models/${options?.krisp?.models?.modelBVC}`,
model8: `${baseUrl}/models/${options?.krisp?.models?.model8}`,
modelNC: `${baseUrl}/models/${options?.krisp?.models?.modelNC}`
},
logProcessStats: !options?.krisp?.logProcessStats,
debugLogs: !options?.krisp?.debugLogs,
useBVC: !options?.krisp?.useBVC,
bvc: {
allowedDevices: `${baseUrl}/assets/${options?.krisp?.bvc?.allowedDevices}`,
allowedDevicesExt: `${baseUrl}/assets/${options?.krisp?.bvc?.allowedDevicesExt}`
},
inboundModels: {
modelInbound8: `${baseUrl}/models/${options?.krisp?.inboundModels?.modelInbound8}`,
modelInbound16: `${baseUrl}/models/${options?.krisp?.inboundModels?.modelInbound16}`
},
preloadModels: {
modelBVC: `${baseUrl}/models/${options?.krisp?.preloadModels?.modelBVC}`,
model8: `${baseUrl}/models/${options?.krisp?.preloadModels?.model8}`,
modelNC: `${baseUrl}/models/${options?.krisp?.preloadModels?.modelNC}`
},
preloadInboundModels: {
modelInbound8: `${baseUrl}/models/${options?.krisp?.preloadInboundModels?.modelInbound8}`,
modelInbound16: `${baseUrl}/models/${options?.krisp?.preloadInboundModels?.modelInbound16}`
}
}
};
krispState.sdk = new KrispSDK({
params: ncParams.krisp,
callbacks: {}
});
}
if (!krispState.sdkInitialized) {
// @ts-ignore
await krispState.sdk?.init();
krispState.sdkInitialized = true;
}
if (!krispState.filterNode) {
try {
// @ts-ignore
krispState.filterNode = await krispState.sdk?.createNoiseFilter(
{
audioContext,
stream
},
() => {
logger.info('Krisp audio filter ready');
// Enable audio filtering.
// @ts-ignore
krispState.filterNode?.enable();
krispState.filterNodeReady = true;
}
);
} catch (e) {
logger.error('Failed to create Krisp noise filter', e);
krispState.filterNode = undefined;
krispState.filterNodeReady = false;
}
}
return krispState.filterNode;
}
/**
* Initializes the RNNoise audio worklet and creates the filter node.
*
* @returns {Promise<AudioWorkletNode | undefined>}
*/
async function _initializeKRnnoise(): Promise<AudioWorkletNode | undefined> {
await audioContext.resume();
const baseUrl = `${getBaseUrl()}libs/`;
const workletUrl = `${baseUrl}noise-suppressor-worklet.min.js`;
try {
await audioContext.audioWorklet.addModule(workletUrl);
} catch (e) {
logger.error('Error while adding audio worklet module: ', e);
return;
}
// After the resolution of module loading, an AudioWorkletNode can be constructed.
return new AudioWorkletNode(audioContext, 'NoiseSuppressorWorklet');
}

View File

@@ -0,0 +1,177 @@
// @ts-expect-error
import { createRNNWasmModuleSync } from '@jitsi/rnnoise-wasm';
import { leastCommonMultiple } from '../../base/util/math';
import RnnoiseProcessor from '../rnnoise/RnnoiseProcessor';
/**
* Audio worklet which will denoise targeted audio stream using rnnoise.
*/
class NoiseSuppressorWorklet extends AudioWorkletProcessor {
/**
* RnnoiseProcessor instance.
*/
private _denoiseProcessor: RnnoiseProcessor;
/**
* Audio worklets work with a predefined sample rate of 128.
*/
private _procNodeSampleRate = 128;
/**
* PCM Sample size expected by the denoise processor.
*/
private _denoiseSampleSize: number;
/**
* Circular buffer data used for efficient memory operations.
*/
private _circularBufferLength: number;
private _circularBuffer: Float32Array;
/**
* The circular buffer uses a couple of indexes to track data segments. Input data from the stream is
* copied to the circular buffer as it comes in, one `procNodeSampleRate` sized sample at a time.
* _inputBufferLength denotes the current length of all gathered raw audio segments.
*/
private _inputBufferLength = 0;
/**
* Denoising is done directly on the circular buffer using subArray views, but because
* `procNodeSampleRate` and `_denoiseSampleSize` have different sizes, denoised samples lag behind
* the current gathered raw audio samples so we need a different index, `_denoisedBufferLength`.
*/
private _denoisedBufferLength = 0;
/**
* Once enough data has been denoised (size of procNodeSampleRate) it's sent to the
* output buffer, `_denoisedBufferIndx` indicates the start index on the circular buffer
* of denoised data not yet sent.
*/
private _denoisedBufferIndx = 0;
/**
* C'tor.
*/
constructor() {
super();
/**
* The wasm module needs to be compiled to load synchronously as the audio worklet `addModule()`
* initialization process does not wait for the resolution of promises in the AudioWorkletGlobalScope.
*/
this._denoiseProcessor = new RnnoiseProcessor(createRNNWasmModuleSync());
/**
* PCM Sample size expected by the denoise processor.
*/
this._denoiseSampleSize = this._denoiseProcessor.getSampleLength();
/**
* In order to avoid unnecessary memory related operations a circular buffer was used.
* Because the audio worklet input array does not match the sample size required by rnnoise two cases can occur
* 1. There is not enough data in which case we buffer it.
* 2. There is enough data but some residue remains after the call to `processAudioFrame`, so its buffered
* for the next call.
* A problem arises when the circular buffer reaches the end and a rollover is required, namely
* the residue could potentially be split between the end of buffer and the beginning and would
* require some complicated logic to handle. Using the lcm as the size of the buffer will
* guarantee that by the time the buffer reaches the end the residue will be a multiple of the
* `procNodeSampleRate` and the residue won't be split.
*/
this._circularBufferLength = leastCommonMultiple(this._procNodeSampleRate, this._denoiseSampleSize);
this._circularBuffer = new Float32Array(this._circularBufferLength);
}
/**
* Worklet interface process method. The inputs parameter contains PCM audio that is then sent to rnnoise.
* Rnnoise only accepts PCM samples of 480 bytes whereas `process` handles 128 sized samples, we take this into
* account using a circular buffer.
*
* @param {Float32Array[]} inputs - Array of inputs connected to the node, each of them with their associated
* array of channels. Each channel is an array of 128 pcm samples.
* @param {Float32Array[]} outputs - Array of outputs similar to the inputs parameter structure, expected to be
* filled during the execution of `process`. By default each channel is zero filled.
* @returns {boolean} - Boolean value that returns whether or not the processor should remain active. Returning
* false will terminate it.
*/
process(inputs: Float32Array[][], outputs: Float32Array[][]) {
// We expect the incoming track to be mono, if a stereo track is passed only on of its channels will get
// denoised and sent pack.
// TODO Technically we can denoise both channel however this might require a new rnnoise context, some more
// investigation is required.
const inData = inputs[0][0];
const outData = outputs[0][0];
// Exit out early if there is no input data (input node not connected/disconnected)
// as rest of worklet will crash otherwise
if (!inData) {
return true;
}
// Append new raw PCM sample.
this._circularBuffer.set(inData, this._inputBufferLength);
this._inputBufferLength += inData.length;
// New raw samples were just added, start denoising frames, _denoisedBufferLength gives us
// the position at which the previous denoise iteration ended, basically it takes into account
// residue data.
for (; this._denoisedBufferLength + this._denoiseSampleSize <= this._inputBufferLength;
this._denoisedBufferLength += this._denoiseSampleSize) {
// Create view of circular buffer so it can be modified in place, removing the need for
// extra copies.
const denoiseFrame = this._circularBuffer.subarray(
this._denoisedBufferLength,
this._denoisedBufferLength + this._denoiseSampleSize
);
this._denoiseProcessor.processAudioFrame(denoiseFrame, true);
}
// Determine how much denoised audio is available, if the start index of denoised samples is smaller
// then _denoisedBufferLength that means a rollover occurred.
let unsentDenoisedDataLength;
if (this._denoisedBufferIndx > this._denoisedBufferLength) {
unsentDenoisedDataLength = this._circularBufferLength - this._denoisedBufferIndx;
} else {
unsentDenoisedDataLength = this._denoisedBufferLength - this._denoisedBufferIndx;
}
// Only copy denoised data to output when there's enough of it to fit the exact buffer length.
// e.g. if the buffer size is 1024 samples but we only denoised 960 (this happens on the first iteration)
// nothing happens, then on the next iteration 1920 samples will be denoised so we send 1024 which leaves
// 896 for the next iteration and so on.
if (unsentDenoisedDataLength >= outData.length) {
const denoisedFrame = this._circularBuffer.subarray(
this._denoisedBufferIndx,
this._denoisedBufferIndx + outData.length
);
outData.set(denoisedFrame, 0);
this._denoisedBufferIndx += outData.length;
}
// When the end of the circular buffer has been reached, start from the beginning. By the time the index
// starts over, the data from the begging is stale (has already been processed) and can be safely
// overwritten.
if (this._denoisedBufferIndx === this._circularBufferLength) {
this._denoisedBufferIndx = 0;
}
// Because the circular buffer's length is the lcm of both input size and the processor's sample size,
// by the time we reach the end with the input index the denoise length index will be there as well.
if (this._inputBufferLength === this._circularBufferLength) {
this._inputBufferLength = 0;
this._denoisedBufferLength = 0;
}
return true;
}
}
registerProcessor('NoiseSuppressorWorklet', NoiseSuppressorWorklet);

View File

@@ -0,0 +1,3 @@
import { getLogger } from '../../base/logging/functions';
export default getLogger('features/stream-effects/noise-suppression');

View File

@@ -0,0 +1,186 @@
/* eslint-disable no-bitwise */
interface IRnnoiseModule extends EmscriptenModule {
_rnnoise_create: () => number;
_rnnoise_destroy: (context: number) => void;
_rnnoise_process_frame: (context: number, input: number, output: number) => number;
}
/**
* Constant. Rnnoise default sample size, samples of different size won't work.
*/
export const RNNOISE_SAMPLE_LENGTH = 480;
/**
* Constant. Rnnoise only takes inputs of 480 PCM float32 samples thus 480*4.
*/
const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4;
/**
* Constant. Rnnoise only takes operates on 44.1Khz float 32 little endian PCM.
*/
const PCM_FREQUENCY = 44100;
/**
* Used to shift a 32 bit number by 16 bits.
*/
const SHIFT_16_BIT_NR = 32768;
/**
* Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly
* memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity
* detection) scores.
*/
export default class RnnoiseProcessor {
/**
* Rnnoise context object needed to perform the audio processing.
*/
private _context: number;
/**
* State flag, check if the instance was destroyed.
*/
private _destroyed = false;
/**
* WASM interface through which calls to rnnoise are made.
*/
private _wasmInterface: IRnnoiseModule;
/**
* WASM dynamic memory buffer used as input for rnnoise processing method.
*/
private _wasmPcmInput: number;
/**
* The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer.
*/
private _wasmPcmInputF32Index: number;
/**
* Constructor.
*
* @class
* @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality.
*/
constructor(wasmInterface: IRnnoiseModule) {
// Considering that we deal with dynamic allocated memory employ exception safety strong guarantee
// i.e. in case of exception there are no side effects.
try {
this._wasmInterface = wasmInterface;
// For VAD score purposes only allocate the buffers once and reuse them
this._wasmPcmInput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);
this._wasmPcmInputF32Index = this._wasmPcmInput >> 2;
if (!this._wasmPcmInput) {
throw Error('Failed to create wasm input memory buffer!');
}
this._context = this._wasmInterface._rnnoise_create();
} catch (error) {
// release can be called even if not all the components were initialized.
this.destroy();
throw error;
}
}
/**
* Release resources associated with the wasm context. If something goes downhill here
* i.e. Exception is thrown, there is nothing much we can do.
*
* @returns {void}
*/
_releaseWasmResources(): void {
// For VAD score purposes only allocate the buffers once and reuse them
if (this._wasmPcmInput) {
this._wasmInterface._free(this._wasmPcmInput);
}
if (this._context) {
this._wasmInterface._rnnoise_destroy(this._context);
}
}
/**
* Rnnoise can only operate on a certain PCM array size.
*
* @returns {number} - The PCM sample array size as required by rnnoise.
*/
getSampleLength(): number {
return RNNOISE_SAMPLE_LENGTH;
}
/**
* Rnnoise can only operate on a certain format of PCM sample namely float 32 44.1Kz.
*
* @returns {number} - PCM sample frequency as required by rnnoise.
*/
getRequiredPCMFrequency(): number {
return PCM_FREQUENCY;
}
/**
* Release any resources required by the rnnoise context this needs to be called
* before destroying any context that uses the processor.
*
* @returns {void}
*/
destroy(): void {
// Attempting to release a non initialized processor, do nothing.
if (this._destroyed) {
return;
}
this._releaseWasmResources();
this._destroyed = true;
}
/**
* Calculate the Voice Activity Detection for a raw Float32 PCM sample Array.
* The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
*
* @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples.
* @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90.
*/
calculateAudioFrameVAD(pcmFrame: Float32Array): number {
return this.processAudioFrame(pcmFrame);
}
/**
* Process an audio frame, optionally denoising the input pcmFrame and returning the Voice Activity Detection score
* for a raw Float32 PCM sample Array.
* The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
*
* @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples. Parameter is also used as output
* when {@code shouldDenoise} is true.
* @param {boolean} shouldDenoise - Should the denoised frame be returned in pcmFrame.
* @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 .
*/
processAudioFrame(pcmFrame: Float32Array, shouldDenoise: Boolean = false): number {
// Convert 32 bit Float PCM samples to 16 bit Float PCM samples as that's what rnnoise accepts as input
for (let i = 0; i < RNNOISE_SAMPLE_LENGTH; i++) {
this._wasmInterface.HEAPF32[this._wasmPcmInputF32Index + i] = pcmFrame[i] * SHIFT_16_BIT_NR;
}
// Use the same buffer for input/output, rnnoise supports this behavior
const vadScore = this._wasmInterface._rnnoise_process_frame(
this._context,
this._wasmPcmInput,
this._wasmPcmInput
);
// Rnnoise denoises the frame by default but we can avoid unnecessary operations if the calling
// client doesn't use the denoised frame.
if (shouldDenoise) {
// Convert back to 32 bit PCM
for (let i = 0; i < RNNOISE_SAMPLE_LENGTH; i++) {
pcmFrame[i] = this._wasmInterface.HEAPF32[this._wasmPcmInputF32Index + i] / SHIFT_16_BIT_NR;
}
}
return vadScore;
}
}

View File

@@ -0,0 +1,24 @@
// Script expects to find rnnoise webassembly binary in the same public path root, otherwise it won't load
// During the build phase this needs to be taken care of manually
// @ts-expect-error
import { createRNNWasmModule } from '@jitsi/rnnoise-wasm';
import RnnoiseProcessor from './RnnoiseProcessor';
export { RNNOISE_SAMPLE_LENGTH } from './RnnoiseProcessor';
export type { RnnoiseProcessor };
let rnnoiseModule: Promise<any> | undefined;
/**
* Creates a new instance of RnnoiseProcessor.
*
* @returns {Promise<RnnoiseProcessor>}
*/
export function createRnnoiseProcessor() {
if (!rnnoiseModule) {
rnnoiseModule = createRNNWasmModule();
}
return rnnoiseModule?.then(mod => new RnnoiseProcessor(mod));
}

View File

@@ -0,0 +1,269 @@
import { VIRTUAL_BACKGROUND_TYPE } from '../../virtual-background/constants';
import {
CLEAR_TIMEOUT,
SET_TIMEOUT,
TIMEOUT_TICK,
timerWorkerScript
} from './TimerWorker';
export interface IBackgroundEffectOptions {
height: number;
virtualBackground: {
backgroundType?: string;
blurValue?: number;
virtualSource?: string;
};
width: number;
}
/**
* Represents a modified MediaStream that adds effects to video background.
* <tt>JitsiStreamBackgroundEffect</tt> does the processing of the original
* video stream.
*/
export default class JitsiStreamBackgroundEffect {
_model: any;
_options: IBackgroundEffectOptions;
_stream: any;
_segmentationPixelCount: number;
_inputVideoElement: HTMLVideoElement;
_maskFrameTimerWorker: Worker;
_outputCanvasElement: HTMLCanvasElement;
_outputCanvasCtx: CanvasRenderingContext2D | null;
_segmentationMaskCtx: CanvasRenderingContext2D | null;
_segmentationMask: ImageData;
_segmentationMaskCanvas: HTMLCanvasElement;
_virtualImage: HTMLImageElement;
_virtualVideo: HTMLVideoElement;
/**
* Represents a modified video MediaStream track.
*
* @class
* @param {Object} model - Meet model.
* @param {Object} options - Segmentation dimensions.
*/
constructor(model: Object, options: IBackgroundEffectOptions) {
this._options = options;
if (this._options.virtualBackground.backgroundType === VIRTUAL_BACKGROUND_TYPE.IMAGE) {
this._virtualImage = document.createElement('img');
this._virtualImage.crossOrigin = 'anonymous';
this._virtualImage.src = this._options.virtualBackground.virtualSource ?? '';
}
this._model = model;
this._segmentationPixelCount = this._options.width * this._options.height;
// Bind event handler so it is only bound once for every instance.
this._onMaskFrameTimer = this._onMaskFrameTimer.bind(this);
// Workaround for FF issue https://bugzilla.mozilla.org/show_bug.cgi?id=1388974
this._outputCanvasElement = document.createElement('canvas');
this._outputCanvasElement.getContext('2d');
this._inputVideoElement = document.createElement('video');
}
/**
* EventHandler onmessage for the maskFrameTimerWorker WebWorker.
*
* @private
* @param {EventHandler} response - The onmessage EventHandler parameter.
* @returns {void}
*/
_onMaskFrameTimer(response: { data: { id: number; }; }) {
if (response.data.id === TIMEOUT_TICK) {
this._renderMask();
}
}
/**
* Represents the run post processing.
*
* @returns {void}
*/
runPostProcessing() {
const track = this._stream.getVideoTracks()[0];
const { height, width } = track.getSettings() ?? track.getConstraints();
const { backgroundType } = this._options.virtualBackground;
if (!this._outputCanvasCtx) {
return;
}
this._outputCanvasElement.height = height;
this._outputCanvasElement.width = width;
this._outputCanvasCtx.globalCompositeOperation = 'copy';
// Draw segmentation mask.
// Smooth out the edges.
this._outputCanvasCtx.filter = backgroundType === VIRTUAL_BACKGROUND_TYPE.IMAGE ? 'blur(4px)' : 'blur(8px)';
this._outputCanvasCtx?.drawImage( // @ts-ignore
this._segmentationMaskCanvas,
0,
0,
this._options.width,
this._options.height,
0,
0,
this._inputVideoElement.width,
this._inputVideoElement.height
);
this._outputCanvasCtx.globalCompositeOperation = 'source-in';
this._outputCanvasCtx.filter = 'none';
// Draw the foreground video.
// @ts-ignore
this._outputCanvasCtx?.drawImage(this._inputVideoElement, 0, 0);
// Draw the background.
this._outputCanvasCtx.globalCompositeOperation = 'destination-over';
if (backgroundType === VIRTUAL_BACKGROUND_TYPE.IMAGE) {
this._outputCanvasCtx?.drawImage( // @ts-ignore
backgroundType === VIRTUAL_BACKGROUND_TYPE.IMAGE
? this._virtualImage : this._virtualVideo,
0,
0,
this._outputCanvasElement.width,
this._outputCanvasElement.height
);
} else {
this._outputCanvasCtx.filter = `blur(${this._options.virtualBackground.blurValue}px)`;
// @ts-ignore
this._outputCanvasCtx?.drawImage(this._inputVideoElement, 0, 0);
}
}
/**
* Represents the run Tensorflow Interference.
*
* @returns {void}
*/
runInference() {
this._model._runInference();
const outputMemoryOffset = this._model._getOutputMemoryOffset() / 4;
for (let i = 0; i < this._segmentationPixelCount; i++) {
const person = this._model.HEAPF32[outputMemoryOffset + i];
// Sets only the alpha component of each pixel.
this._segmentationMask.data[(i * 4) + 3] = 255 * person;
}
this._segmentationMaskCtx?.putImageData(this._segmentationMask, 0, 0);
}
/**
* Loop function to render the background mask.
*
* @private
* @returns {void}
*/
_renderMask() {
this.resizeSource();
this.runInference();
this.runPostProcessing();
this._maskFrameTimerWorker.postMessage({
id: SET_TIMEOUT,
timeMs: 1000 / 30
});
}
/**
* Represents the resize source process.
*
* @returns {void}
*/
resizeSource() {
this._segmentationMaskCtx?.drawImage( // @ts-ignore
this._inputVideoElement,
0,
0,
this._inputVideoElement.width,
this._inputVideoElement.height,
0,
0,
this._options.width,
this._options.height
);
const imageData = this._segmentationMaskCtx?.getImageData(
0,
0,
this._options.width,
this._options.height
);
const inputMemoryOffset = this._model._getInputMemoryOffset() / 4;
for (let i = 0; i < this._segmentationPixelCount; i++) {
this._model.HEAPF32[inputMemoryOffset + (i * 3)] = Number(imageData?.data[i * 4]) / 255;
this._model.HEAPF32[inputMemoryOffset + (i * 3) + 1] = Number(imageData?.data[(i * 4) + 1]) / 255;
this._model.HEAPF32[inputMemoryOffset + (i * 3) + 2] = Number(imageData?.data[(i * 4) + 2]) / 255;
}
}
/**
* Checks if the local track supports this effect.
*
* @param {JitsiLocalTrack} jitsiLocalTrack - Track to apply effect.
* @returns {boolean} - Returns true if this effect can run on the specified track
* false otherwise.
*/
isEnabled(jitsiLocalTrack: any) {
return jitsiLocalTrack.isVideoTrack() && jitsiLocalTrack.videoType === 'camera';
}
/**
* Starts loop to capture video frame and render the segmentation mask.
*
* @param {MediaStream} stream - Stream to be used for processing.
* @returns {MediaStream} - The stream with the applied effect.
*/
startEffect(stream: MediaStream) {
this._stream = stream;
this._maskFrameTimerWorker = new Worker(timerWorkerScript, { name: 'Blur effect worker' });
this._maskFrameTimerWorker.onmessage = this._onMaskFrameTimer;
const firstVideoTrack = this._stream.getVideoTracks()[0];
const { height, frameRate, width }
= firstVideoTrack.getSettings ? firstVideoTrack.getSettings() : firstVideoTrack.getConstraints();
this._segmentationMask = new ImageData(this._options.width, this._options.height);
this._segmentationMaskCanvas = document.createElement('canvas');
this._segmentationMaskCanvas.width = this._options.width;
this._segmentationMaskCanvas.height = this._options.height;
this._segmentationMaskCtx = this._segmentationMaskCanvas.getContext('2d');
this._outputCanvasElement.width = parseInt(width, 10);
this._outputCanvasElement.height = parseInt(height, 10);
this._outputCanvasCtx = this._outputCanvasElement.getContext('2d');
this._inputVideoElement.width = parseInt(width, 10);
this._inputVideoElement.height = parseInt(height, 10);
this._inputVideoElement.autoplay = true;
this._inputVideoElement.srcObject = this._stream;
this._inputVideoElement.onloadeddata = () => {
this._maskFrameTimerWorker.postMessage({
id: SET_TIMEOUT,
timeMs: 1000 / 30
});
};
return this._outputCanvasElement.captureStream(parseInt(frameRate, 10));
}
/**
* Stops the capture and render loop.
*
* @returns {void}
*/
stopEffect() {
this._maskFrameTimerWorker.postMessage({
id: CLEAR_TIMEOUT
});
this._maskFrameTimerWorker.terminate();
}
}

View File

@@ -0,0 +1,67 @@
/**
* SET_TIMEOUT constant is used to set interval and it is set in
* the id property of the request.data property. TimeMs property must
* also be set.
*
* ```
* //Request.data example:
* {
* id: SET_TIMEOUT,
* timeMs: 33
* }
* ```
*/
export const SET_TIMEOUT = 1;
/**
* CLEAR_TIMEOUT constant is used to clear the interval and it is set in
* the id property of the request.data property.
*
* ```
* {
* id: CLEAR_TIMEOUT
* }
* ```
*/
export const CLEAR_TIMEOUT = 2;
/**
* TIMEOUT_TICK constant is used as response and it is set in the id property.
*
* ```
* {
* id: TIMEOUT_TICK
* }
* ```
*/
export const TIMEOUT_TICK = 3;
/**
* The following code is needed as string to create a URL from a Blob.
* The URL is then passed to a WebWorker. Reason for this is to enable
* use of setInterval that is not throttled when tab is inactive.
*/
const code = `
var timer;
onmessage = function(request) {
switch (request.data.id) {
case ${SET_TIMEOUT}: {
timer = setTimeout(() => {
postMessage({ id: ${TIMEOUT_TICK} });
}, request.data.timeMs);
break;
}
case ${CLEAR_TIMEOUT}: {
if (timer) {
clearTimeout(timer);
}
break;
}
}
};
`;
// @ts-ignore
export const timerWorkerScript = URL.createObjectURL(new Blob([ code ], { type: 'application/javascript' }));

View File

@@ -0,0 +1,106 @@
/* eslint-disable lines-around-comment */
import { IStore } from '../../app/types';
import { showWarningNotification } from '../../notifications/actions';
import { NOTIFICATION_TIMEOUT_TYPE } from '../../notifications/constants';
import { timeout } from '../../virtual-background/functions';
import logger from '../../virtual-background/logger';
import JitsiStreamBackgroundEffect, { IBackgroundEffectOptions } from './JitsiStreamBackgroundEffect';
// @ts-ignore
import createTFLiteModule from './vendor/tflite/tflite';
// @ts-ignore
import createTFLiteSIMDModule from './vendor/tflite/tflite-simd';
const models = {
modelLandscape: 'libs/selfie_segmentation_landscape.tflite'
};
/* eslint-enable lines-around-comment */
let modelBuffer: ArrayBuffer;
let tflite: any;
let wasmCheck;
let isWasmDisabled = false;
const segmentationDimensions = {
modelLandscape: {
height: 144,
width: 256
}
};
/**
* Creates a new instance of JitsiStreamBackgroundEffect. This loads the Meet background model that is used to
* extract person segmentation.
*
* @param {Object} virtualBackground - The virtual object that contains the background image source and
* the isVirtualBackground flag that indicates if virtual image is activated.
* @param {Function} dispatch - The Redux dispatch function.
* @returns {Promise<JitsiStreamBackgroundEffect>}
*/
export async function createVirtualBackgroundEffect(virtualBackground: IBackgroundEffectOptions['virtualBackground'],
dispatch?: IStore['dispatch']) {
if (!MediaStreamTrack.prototype.getSettings && !MediaStreamTrack.prototype.getConstraints) {
throw new Error('JitsiStreamBackgroundEffect not supported!');
}
if (isWasmDisabled) {
dispatch?.(showWarningNotification({
titleKey: 'virtualBackground.backgroundEffectError'
}, NOTIFICATION_TIMEOUT_TYPE.LONG));
return;
}
// Checks if WebAssembly feature is supported or enabled by/in the browser.
// Conditional import of wasm-check package is done to prevent
// the browser from crashing when the user opens the app.
if (!tflite) {
try {
wasmCheck = require('wasm-check');
const tfliteTimeout = 10000;
if (wasmCheck?.feature?.simd) {
tflite = await timeout(tfliteTimeout, createTFLiteSIMDModule());
} else {
tflite = await timeout(tfliteTimeout, createTFLiteModule());
}
} catch (err: any) {
if (err?.message === '408') {
logger.error('Failed to download tflite model!');
dispatch?.(showWarningNotification({
titleKey: 'virtualBackground.backgroundEffectError'
}, NOTIFICATION_TIMEOUT_TYPE.LONG));
} else {
isWasmDisabled = true;
logger.error('Looks like WebAssembly is disabled or not supported on this browser', err);
dispatch?.(showWarningNotification({
titleKey: 'virtualBackground.webAssemblyWarning',
descriptionKey: 'virtualBackground.webAssemblyWarningDescription'
}, NOTIFICATION_TIMEOUT_TYPE.LONG));
}
return;
}
}
if (!modelBuffer) {
const modelResponse = await fetch(models.modelLandscape);
if (!modelResponse.ok) {
throw new Error('Failed to download tflite model!');
}
modelBuffer = await modelResponse.arrayBuffer();
tflite.HEAPU8.set(new Uint8Array(modelBuffer), tflite._getModelBufferMemoryOffset());
tflite._loadModel(modelBuffer.byteLength);
}
const options = {
...segmentationDimensions.modelLandscape,
virtualBackground
};
return new JitsiStreamBackgroundEffect(tflite, options);
}

View File

@@ -0,0 +1,24 @@
# Virtual Background on stream effects
> From https://google.github.io/mediapipe/solutions/models.html#selfie-segmentation
#### Canvas 2D + CPU
This rendering pipeline is pretty much the same as for BodyPix. It relies on Canvas compositing properties to blend rendering layers according to the segmentation mask.
Interactions with TFLite inference tool are executed on CPU to convert from UInt8 to Float32 for the model input and to apply softmax on the model output.
The framerate is higher and the quality looks better than BodyPix
#### SIMD and non-SIMD
How to test on SIMD:
1. Go to chrome://flags/
2. Search for SIMD flag
3. Enable WebAssembly SIMD support(Enables support for the WebAssembly SIMD proposal).
4. Reopen Google Chrome
More details:
- [WebAssembly](https://webassembly.org/)
- [WebAssembly SIMD](https://github.com/WebAssembly/simd)
- [TFLite](https://blog.tensorflow.org/2020/07/accelerating-tensorflow-lite-xnnpack-integration.html)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long