allow setting tokenChunkSize of WebGPU mode

2024-03-02 16:41:29 +08:00
parent c90cefc453
commit d91c3c004d
6 changed files with 32 additions and 4 deletions
--- a/frontend/src/utils/index.tsx
+++ b/frontend/src/utils/index.tsx
@@ -196,6 +196,8 @@ export const getStrategy = (modelConfig: ModelConfig | undefined = undefined) =>
      strategy += params.precision === 'nf4' ? 'fp16i4' : params.precision === 'int8' ? 'fp16i8' : 'fp16';
      if (params.quantizedLayers)
        strategy += ` layer${params.quantizedLayers}`;
+      if (params.tokenChunkSize)
+        strategy += ` chunk${params.tokenChunkSize}`;
      break;
    case 'CUDA':
    case 'CUDA-Beta':