useCustomCuda
This commit is contained in:
parent
3aaf16b38b
commit
65d92d5da1
@ -82,7 +82,7 @@
|
||||
"Consider the results of the top n% probability mass, 0.1 considers the top 10%, with higher quality but more conservative, 1 considers all results, with lower quality but more diverse.": "考虑前 n% 概率质量的结果, 0.1 考虑前 10%, 质量更高, 但更保守, 1 考虑所有质量结果, 质量降低, 但更多样",
|
||||
"Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.": "存在惩罚. 正值根据新token在至今的文本中是否出现过, 来对其进行惩罚, 从而增加了模型涉及新话题的可能性",
|
||||
"Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.": "频率惩罚. 正值根据新token在至今的文本中出现的频率/次数, 来对其进行惩罚, 从而减少模型原封不动地重复相同句子的可能性",
|
||||
"int8 uses less VRAM, and is faster, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.": "int8占用显存更低, 速度更快, 但质量略微下降. fp16质量更好, fp32质量最好",
|
||||
"int8 uses less VRAM, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.": "int8占用显存更低, 但质量略微下降. fp16质量更好, fp32质量最好",
|
||||
"Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM.": "载入显存的神经网络层数, 载入越多, 速度越快, 但显存消耗越大",
|
||||
"Whether to use CPU to calculate the last output layer of the neural network with FP32 precision to obtain better quality.": "是否使用cpu以fp32精度计算神经网络的最后一层输出层, 以获得更好的质量",
|
||||
"Downloads": "下载",
|
||||
@ -97,5 +97,7 @@
|
||||
"This is the latest version": "已是最新版",
|
||||
"Use Tsinghua Pip Mirrors": "使用清华大学Pip镜像源",
|
||||
"Model Config Exception": "模型配置异常",
|
||||
"Use Gitee Updates Source": "使用Gitee更新源"
|
||||
"Use Gitee Updates Source": "使用Gitee更新源",
|
||||
"Use Custom CUDA kernel to Accelerate": "使用自定义CUDA算子加速",
|
||||
"Enabling this option can greatly improve inference speed, but there may be compatibility issues. If it fails to start, please turn off this option.": "开启这个选项能大大提升推理速度,但可能存在兼容性问题,如果启动失败,请关闭此选项"
|
||||
}
|
@ -9,7 +9,7 @@ import {
|
||||
} from '../../wailsjs/go/backend_golang/App';
|
||||
import { Button } from '@fluentui/react-components';
|
||||
import { observer } from 'mobx-react-lite';
|
||||
import { exit, readRoot, switchModel, updateConfig } from '../apis';
|
||||
import { exit, getStatus, readRoot, switchModel, updateConfig } from '../apis';
|
||||
import { toast } from 'react-toastify';
|
||||
import manifest from '../../../manifest.json';
|
||||
import { getStrategy, saveCache, toastWithButton } from '../utils';
|
||||
@ -115,9 +115,13 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
|
||||
let loading = false;
|
||||
const intervalId = setInterval(() => {
|
||||
readRoot()
|
||||
.then(r => {
|
||||
.then(async r => {
|
||||
if (r.ok && !loading) {
|
||||
clearInterval(intervalId);
|
||||
await getStatus().then(status => {
|
||||
if (status)
|
||||
commonStore.setStatus(status);
|
||||
});
|
||||
commonStore.setStatus({ modelStatus: ModelStatus.Loading });
|
||||
loading = true;
|
||||
toast(t('Loading Model'), { type: 'info' });
|
||||
@ -130,7 +134,8 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
|
||||
});
|
||||
switchModel({
|
||||
model: `${manifest.localModelDir}/${modelConfig.modelParameters.modelName}`,
|
||||
strategy: getStrategy(modelConfig)
|
||||
strategy: getStrategy(modelConfig),
|
||||
customCuda: !!modelConfig.modelParameters.useCustomCuda
|
||||
}).then((r) => {
|
||||
if (r.ok) {
|
||||
commonStore.setStatus({ modelStatus: ModelStatus.Working });
|
||||
|
@ -39,6 +39,7 @@ export type ModelParameters = {
|
||||
storedLayers: number;
|
||||
maxStoredLayers: number;
|
||||
enableHighPrecisionForLastLayer: boolean;
|
||||
useCustomCuda?: boolean;
|
||||
}
|
||||
|
||||
export type ModelConfig = {
|
||||
@ -754,7 +755,7 @@ export const Configs: FC = observer(() => {
|
||||
</Dropdown>
|
||||
} />
|
||||
<Labeled label={t('Precision')}
|
||||
desc={t('int8 uses less VRAM, and is faster, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.')}
|
||||
desc={t('int8 uses less VRAM, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.')}
|
||||
content={
|
||||
<Dropdown style={{ minWidth: 0 }} className="grow"
|
||||
value={selectedConfig.modelParameters.precision}
|
||||
@ -771,6 +772,7 @@ export const Configs: FC = observer(() => {
|
||||
<Option>fp32</Option>
|
||||
</Dropdown>
|
||||
} />
|
||||
<div />
|
||||
<Labeled label={t('Stored Layers')}
|
||||
desc={t('Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM.')}
|
||||
content={
|
||||
@ -792,6 +794,16 @@ export const Configs: FC = observer(() => {
|
||||
});
|
||||
}} />
|
||||
} />
|
||||
<Labeled label={t('Use Custom CUDA kernel to Accelerate')}
|
||||
desc={t('Enabling this option can greatly improve inference speed, but there may be compatibility issues. If it fails to start, please turn off this option.')}
|
||||
content={
|
||||
<Switch checked={selectedConfig.modelParameters.useCustomCuda}
|
||||
onChange={(e, data) => {
|
||||
setSelectedConfigModelParams({
|
||||
useCustomCuda: data.checked
|
||||
});
|
||||
}} />
|
||||
} />
|
||||
</div>
|
||||
}
|
||||
/>
|
||||
|
Loading…
Reference in New Issue
Block a user