useCustomCuda

2023-05-23 13:33:27 +08:00 · 2023-05-23 13:33:27 +08:00 · 65d92d5da1
commit 65d92d5da1
parent 3aaf16b38b
3 changed files with 25 additions and 6 deletions
--- a/frontend/src/_locales/zh-hans/main.json
+++ b/frontend/src/_locales/zh-hans/main.json
@ -82,7 +82,7 @@
  "Consider the results of the top n% probability mass, 0.1 considers the top 10%, with higher quality but more conservative, 1 considers all results, with lower quality but more diverse.": "考虑前 n% 概率质量的结果, 0.1 考虑前 10%, 质量更高, 但更保守, 1 考虑所有质量结果, 质量降低, 但更多样",
  "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.": "存在惩罚. 正值根据新token在至今的文本中是否出现过, 来对其进行惩罚, 从而增加了模型涉及新话题的可能性",
  "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.": "频率惩罚. 正值根据新token在至今的文本中出现的频率/次数, 来对其进行惩罚, 从而减少模型原封不动地重复相同句子的可能性",
-  "int8 uses less VRAM, and is faster, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.": "int8占用显存更低, 速度更快, 但质量略微下降. fp16质量更好, fp32质量最好",
+  "int8 uses less VRAM, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.": "int8占用显存更低, 但质量略微下降. fp16质量更好, fp32质量最好",
  "Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM.": "载入显存的神经网络层数, 载入越多, 速度越快, 但显存消耗越大",
  "Whether to use CPU to calculate the last output layer of the neural network with FP32 precision to obtain better quality.": "是否使用cpu以fp32精度计算神经网络的最后一层输出层, 以获得更好的质量",
  "Downloads": "下载",
@ -97,5 +97,7 @@
  "This is the latest version": "已是最新版",
  "Use Tsinghua Pip Mirrors": "使用清华大学Pip镜像源",
  "Model Config Exception": "模型配置异常",
-  "Use Gitee Updates Source": "使用Gitee更新源"
+  "Use Gitee Updates Source": "使用Gitee更新源",
+  "Use Custom CUDA kernel to Accelerate": "使用自定义CUDA算子加速",
+  "Enabling this option can greatly improve inference speed, but there may be compatibility issues. If it fails to start, please turn off this option.": "开启这个选项能大大提升推理速度，但可能存在兼容性问题，如果启动失败，请关闭此选项"
 }
--- a/frontend/src/components/RunButton.tsx
+++ b/frontend/src/components/RunButton.tsx
@ -9,7 +9,7 @@ import {
 } from '../../wailsjs/go/backend_golang/App';
 import { Button } from '@fluentui/react-components';
 import { observer } from 'mobx-react-lite';
-import { exit, readRoot, switchModel, updateConfig } from '../apis';
+import { exit, getStatus, readRoot, switchModel, updateConfig } from '../apis';
 import { toast } from 'react-toastify';
 import manifest from '../../../manifest.json';
 import { getStrategy, saveCache, toastWithButton } from '../utils';
@ -115,9 +115,13 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
      let loading = false;
      const intervalId = setInterval(() => {
        readRoot()
-        .then(r => {
+        .then(async r => {
          if (r.ok && !loading) {
            clearInterval(intervalId);
+            await getStatus().then(status => {
+              if (status)
+                commonStore.setStatus(status);
+            });
            commonStore.setStatus({ modelStatus: ModelStatus.Loading });
            loading = true;
            toast(t('Loading Model'), { type: 'info' });
@ -130,7 +134,8 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
            });
            switchModel({
              model: `${manifest.localModelDir}/${modelConfig.modelParameters.modelName}`,
-              strategy: getStrategy(modelConfig)
+              strategy: getStrategy(modelConfig),
+              customCuda: !!modelConfig.modelParameters.useCustomCuda
            }).then((r) => {
              if (r.ok) {
                commonStore.setStatus({ modelStatus: ModelStatus.Working });
--- a/frontend/src/pages/Configs.tsx
+++ b/frontend/src/pages/Configs.tsx
@ -39,6 +39,7 @@ export type ModelParameters = {
  storedLayers: number;
  maxStoredLayers: number;
  enableHighPrecisionForLastLayer: boolean;
+  useCustomCuda?: boolean;
 }

 export type ModelConfig = {
@ -754,7 +755,7 @@ export const Configs: FC = observer(() => {
                  </Dropdown>
                } />
                <Labeled label={t('Precision')}
-                  desc={t('int8 uses less VRAM, and is faster, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.')}
+                  desc={t('int8 uses less VRAM, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.')}
                  content={
                    <Dropdown style={{ minWidth: 0 }} className="grow"
                      value={selectedConfig.modelParameters.precision}
@ -771,6 +772,7 @@ export const Configs: FC = observer(() => {
                      <Option>fp32</Option>
                    </Dropdown>
                  } />
+                <div />
                <Labeled label={t('Stored Layers')}
                  desc={t('Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM.')}
                  content={
@ -792,6 +794,16 @@ export const Configs: FC = observer(() => {
                        });
                      }} />
                  } />
+                <Labeled label={t('Use Custom CUDA kernel to Accelerate')}
+                  desc={t('Enabling this option can greatly improve inference speed, but there may be compatibility issues. If it fails to start, please turn off this option.')}
+                  content={
+                    <Switch checked={selectedConfig.modelParameters.useCustomCuda}
+                      onChange={(e, data) => {
+                        setSelectedConfigModelParams({
+                          useCustomCuda: data.checked
+                        });
+                      }} />
+                  } />
              </div>
            }
          />