useCustomCuda

This commit is contained in:
josc146 2023-05-23 13:33:27 +08:00
parent 3aaf16b38b
commit 65d92d5da1
3 changed files with 25 additions and 6 deletions

View File

@ -82,7 +82,7 @@
"Consider the results of the top n% probability mass, 0.1 considers the top 10%, with higher quality but more conservative, 1 considers all results, with lower quality but more diverse.": "考虑前 n% 概率质量的结果, 0.1 考虑前 10%, 质量更高, 但更保守, 1 考虑所有质量结果, 质量降低, 但更多样",
"Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.": "存在惩罚. 正值根据新token在至今的文本中是否出现过, 来对其进行惩罚, 从而增加了模型涉及新话题的可能性",
"Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.": "频率惩罚. 正值根据新token在至今的文本中出现的频率/次数, 来对其进行惩罚, 从而减少模型原封不动地重复相同句子的可能性",
"int8 uses less VRAM, and is faster, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.": "int8占用显存更低, 速度更快, 但质量略微下降. fp16质量更好, fp32质量最好",
"int8 uses less VRAM, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.": "int8占用显存更低, 但质量略微下降. fp16质量更好, fp32质量最好",
"Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM.": "载入显存的神经网络层数, 载入越多, 速度越快, 但显存消耗越大",
"Whether to use CPU to calculate the last output layer of the neural network with FP32 precision to obtain better quality.": "是否使用cpu以fp32精度计算神经网络的最后一层输出层, 以获得更好的质量",
"Downloads": "下载",
@ -97,5 +97,7 @@
"This is the latest version": "已是最新版",
"Use Tsinghua Pip Mirrors": "使用清华大学Pip镜像源",
"Model Config Exception": "模型配置异常",
"Use Gitee Updates Source": "使用Gitee更新源"
"Use Gitee Updates Source": "使用Gitee更新源",
"Use Custom CUDA kernel to Accelerate": "使用自定义CUDA算子加速",
"Enabling this option can greatly improve inference speed, but there may be compatibility issues. If it fails to start, please turn off this option.": "开启这个选项能大大提升推理速度,但可能存在兼容性问题,如果启动失败,请关闭此选项"
}

View File

@ -9,7 +9,7 @@ import {
} from '../../wailsjs/go/backend_golang/App';
import { Button } from '@fluentui/react-components';
import { observer } from 'mobx-react-lite';
import { exit, readRoot, switchModel, updateConfig } from '../apis';
import { exit, getStatus, readRoot, switchModel, updateConfig } from '../apis';
import { toast } from 'react-toastify';
import manifest from '../../../manifest.json';
import { getStrategy, saveCache, toastWithButton } from '../utils';
@ -115,9 +115,13 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
let loading = false;
const intervalId = setInterval(() => {
readRoot()
.then(r => {
.then(async r => {
if (r.ok && !loading) {
clearInterval(intervalId);
await getStatus().then(status => {
if (status)
commonStore.setStatus(status);
});
commonStore.setStatus({ modelStatus: ModelStatus.Loading });
loading = true;
toast(t('Loading Model'), { type: 'info' });
@ -130,7 +134,8 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
});
switchModel({
model: `${manifest.localModelDir}/${modelConfig.modelParameters.modelName}`,
strategy: getStrategy(modelConfig)
strategy: getStrategy(modelConfig),
customCuda: !!modelConfig.modelParameters.useCustomCuda
}).then((r) => {
if (r.ok) {
commonStore.setStatus({ modelStatus: ModelStatus.Working });

View File

@ -39,6 +39,7 @@ export type ModelParameters = {
storedLayers: number;
maxStoredLayers: number;
enableHighPrecisionForLastLayer: boolean;
useCustomCuda?: boolean;
}
export type ModelConfig = {
@ -754,7 +755,7 @@ export const Configs: FC = observer(() => {
</Dropdown>
} />
<Labeled label={t('Precision')}
desc={t('int8 uses less VRAM, and is faster, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.')}
desc={t('int8 uses less VRAM, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.')}
content={
<Dropdown style={{ minWidth: 0 }} className="grow"
value={selectedConfig.modelParameters.precision}
@ -771,6 +772,7 @@ export const Configs: FC = observer(() => {
<Option>fp32</Option>
</Dropdown>
} />
<div />
<Labeled label={t('Stored Layers')}
desc={t('Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM.')}
content={
@ -792,6 +794,16 @@ export const Configs: FC = observer(() => {
});
}} />
} />
<Labeled label={t('Use Custom CUDA kernel to Accelerate')}
desc={t('Enabling this option can greatly improve inference speed, but there may be compatibility issues. If it fails to start, please turn off this option.')}
content={
<Switch checked={selectedConfig.modelParameters.useCustomCuda}
onChange={(e, data) => {
setSelectedConfigModelParams({
useCustomCuda: data.checked
});
}} />
} />
</div>
}
/>