allow setting quantizedLayers of WebGPU mode; chore

This commit is contained in:
josc146
2024-03-01 14:23:05 +08:00
parent c9513822c9
commit 887ba06bd6
8 changed files with 46 additions and 10 deletions

View File

@@ -194,6 +194,8 @@ export const getStrategy = (modelConfig: ModelConfig | undefined = undefined) =>
case 'WebGPU':
case 'WebGPU (Python)':
strategy += params.precision === 'nf4' ? 'fp16i4' : params.precision === 'int8' ? 'fp16i8' : 'fp16';
if (params.quantizedLayers)
strategy += ` layer${params.quantizedLayers}`;
break;
case 'CUDA':
case 'CUDA-Beta':