allow setting tokenChunkSize of WebGPU mode

This commit is contained in:
josc146
2024-03-02 16:41:29 +08:00
parent c90cefc453
commit d91c3c004d
6 changed files with 32 additions and 4 deletions

View File

@@ -331,7 +331,21 @@ const Configs: FC = observer(() => {
}} />
} />
}
{selectedConfig.modelParameters.device.startsWith('WebGPU') && <div />}
{
selectedConfig.modelParameters.device.startsWith('WebGPU') &&
<Labeled label={t('Parallel Token Chunk Size')}
desc={t('Maximum tokens to be processed in parallel at once. For high end GPUs, this could be 64 or 128 (faster).')}
content={
<ValuedSlider
value={selectedConfig.modelParameters.tokenChunkSize || 32}
min={16} max={256} step={16} input
onChange={(e, data) => {
setSelectedConfigModelParams({
tokenChunkSize: data.value
});
}} />
} />
}
{
selectedConfig.modelParameters.device.startsWith('WebGPU') &&
<Labeled label={t('Quantized Layers')}