add webgpu nf4

2023-11-20 21:10:10 +08:00
parent d435436525
commit 48fef0235b
3 changed files with 7 additions and 5 deletions
--- a/frontend/src/pages/Configs.tsx
+++ b/frontend/src/pages/Configs.tsx
@@ -340,8 +340,10 @@ const Configs: FC = observer(() => {
                            });
                          }
                        }}>
-                        <Option>fp16</Option>
+                        {selectedConfig.modelParameters.device !== 'CPU' && selectedConfig.modelParameters.device !== 'MPS' &&
+                          <Option>fp16</Option>}
                        <Option>int8</Option>
+                        {selectedConfig.modelParameters.device === 'WebGPU' && <Option>nf4</Option>}
                        {selectedConfig.modelParameters.device !== 'WebGPU' && <Option>fp32</Option>}
                      </Dropdown>
                    } />
--- a/frontend/src/types/configs.ts
+++ b/frontend/src/types/configs.ts
@@ -7,7 +7,7 @@ export type ApiParameters = {
  frequencyPenalty: number;
 }
 export type Device = 'CPU' | 'CUDA' | 'CUDA-Beta' | 'WebGPU' | 'MPS' | 'Custom';
-export type Precision = 'fp16' | 'int8' | 'fp32';
+export type Precision = 'fp16' | 'int8' | 'fp32' | 'nf4';
 export type ModelParameters = {
  // different models can not have the same name
  modelName: string;
--- a/frontend/src/utils/index.tsx
+++ b/frontend/src/utils/index.tsx
@@ -178,14 +178,14 @@ export const getStrategy = (modelConfig: ModelConfig | undefined = undefined) =>
      strategy += params.precision === 'int8' ? 'fp32i8' : 'fp32';
      break;
    case 'WebGPU':
-      strategy += params.precision === 'int8' ? 'fp16i8' : 'fp16';
+      strategy += params.precision === 'nf4' ? 'fp16i4' : params.precision === 'int8' ? 'fp16i8' : 'fp16';
      break;
    case 'CUDA':
    case 'CUDA-Beta':
      if (avoidOverflow)
        strategy = params.useCustomCuda ? 'cuda fp16 *1 -> ' : 'cuda fp32 *1 -> ';
      strategy += 'cuda ';
-      strategy += params.precision === 'fp16' ? 'fp16' : params.precision === 'int8' ? 'fp16i8' : 'fp32';
+      strategy += params.precision === 'int8' ? 'fp16i8' : params.precision === 'fp32' ? 'fp32' : 'fp16';
      if (params.storedLayers < params.maxStoredLayers)
        strategy += ` *${params.storedLayers}+`;
      break;
@@ -193,7 +193,7 @@ export const getStrategy = (modelConfig: ModelConfig | undefined = undefined) =>
      if (avoidOverflow)
        strategy = 'mps fp32 *1 -> ';
      strategy += 'mps ';
-      strategy += params.precision === 'fp16' ? 'fp16' : params.precision === 'int8' ? 'fp16i8' : 'fp32';
+      strategy += params.precision === 'int8' ? 'fp32i8' : 'fp32';
      break;
    case 'Custom':
      strategy = params.customStrategy || '';