From 48fef0235b72e9a5365f176c36a987294f94064d Mon Sep 17 00:00:00 2001 From: josc146 Date: Mon, 20 Nov 2023 21:10:10 +0800 Subject: [PATCH] add webgpu nf4 --- frontend/src/pages/Configs.tsx | 4 +++- frontend/src/types/configs.ts | 2 +- frontend/src/utils/index.tsx | 6 +++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/frontend/src/pages/Configs.tsx b/frontend/src/pages/Configs.tsx index b1aa9b2..b02e189 100644 --- a/frontend/src/pages/Configs.tsx +++ b/frontend/src/pages/Configs.tsx @@ -340,8 +340,10 @@ const Configs: FC = observer(() => { }); } }}> - + {selectedConfig.modelParameters.device !== 'CPU' && selectedConfig.modelParameters.device !== 'MPS' && + } + {selectedConfig.modelParameters.device === 'WebGPU' && } {selectedConfig.modelParameters.device !== 'WebGPU' && } } /> diff --git a/frontend/src/types/configs.ts b/frontend/src/types/configs.ts index a3222cf..0814f66 100644 --- a/frontend/src/types/configs.ts +++ b/frontend/src/types/configs.ts @@ -7,7 +7,7 @@ export type ApiParameters = { frequencyPenalty: number; } export type Device = 'CPU' | 'CUDA' | 'CUDA-Beta' | 'WebGPU' | 'MPS' | 'Custom'; -export type Precision = 'fp16' | 'int8' | 'fp32'; +export type Precision = 'fp16' | 'int8' | 'fp32' | 'nf4'; export type ModelParameters = { // different models can not have the same name modelName: string; diff --git a/frontend/src/utils/index.tsx b/frontend/src/utils/index.tsx index 41cd95e..f6d1a1e 100644 --- a/frontend/src/utils/index.tsx +++ b/frontend/src/utils/index.tsx @@ -178,14 +178,14 @@ export const getStrategy = (modelConfig: ModelConfig | undefined = undefined) => strategy += params.precision === 'int8' ? 'fp32i8' : 'fp32'; break; case 'WebGPU': - strategy += params.precision === 'int8' ? 'fp16i8' : 'fp16'; + strategy += params.precision === 'nf4' ? 'fp16i4' : params.precision === 'int8' ? 'fp16i8' : 'fp16'; break; case 'CUDA': case 'CUDA-Beta': if (avoidOverflow) strategy = params.useCustomCuda ? 'cuda fp16 *1 -> ' : 'cuda fp32 *1 -> '; strategy += 'cuda '; - strategy += params.precision === 'fp16' ? 'fp16' : params.precision === 'int8' ? 'fp16i8' : 'fp32'; + strategy += params.precision === 'int8' ? 'fp16i8' : params.precision === 'fp32' ? 'fp32' : 'fp16'; if (params.storedLayers < params.maxStoredLayers) strategy += ` *${params.storedLayers}+`; break; @@ -193,7 +193,7 @@ export const getStrategy = (modelConfig: ModelConfig | undefined = undefined) => if (avoidOverflow) strategy = 'mps fp32 *1 -> '; strategy += 'mps '; - strategy += params.precision === 'fp16' ? 'fp16' : params.precision === 'int8' ? 'fp16i8' : 'fp32'; + strategy += params.precision === 'int8' ? 'fp32i8' : 'fp32'; break; case 'Custom': strategy = params.customStrategy || '';