useCustomCuda
This commit is contained in:
parent
3aaf16b38b
commit
65d92d5da1
@ -82,7 +82,7 @@
|
|||||||
"Consider the results of the top n% probability mass, 0.1 considers the top 10%, with higher quality but more conservative, 1 considers all results, with lower quality but more diverse.": "考虑前 n% 概率质量的结果, 0.1 考虑前 10%, 质量更高, 但更保守, 1 考虑所有质量结果, 质量降低, 但更多样",
|
"Consider the results of the top n% probability mass, 0.1 considers the top 10%, with higher quality but more conservative, 1 considers all results, with lower quality but more diverse.": "考虑前 n% 概率质量的结果, 0.1 考虑前 10%, 质量更高, 但更保守, 1 考虑所有质量结果, 质量降低, 但更多样",
|
||||||
"Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.": "存在惩罚. 正值根据新token在至今的文本中是否出现过, 来对其进行惩罚, 从而增加了模型涉及新话题的可能性",
|
"Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.": "存在惩罚. 正值根据新token在至今的文本中是否出现过, 来对其进行惩罚, 从而增加了模型涉及新话题的可能性",
|
||||||
"Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.": "频率惩罚. 正值根据新token在至今的文本中出现的频率/次数, 来对其进行惩罚, 从而减少模型原封不动地重复相同句子的可能性",
|
"Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.": "频率惩罚. 正值根据新token在至今的文本中出现的频率/次数, 来对其进行惩罚, 从而减少模型原封不动地重复相同句子的可能性",
|
||||||
"int8 uses less VRAM, and is faster, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.": "int8占用显存更低, 速度更快, 但质量略微下降. fp16质量更好, fp32质量最好",
|
"int8 uses less VRAM, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.": "int8占用显存更低, 但质量略微下降. fp16质量更好, fp32质量最好",
|
||||||
"Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM.": "载入显存的神经网络层数, 载入越多, 速度越快, 但显存消耗越大",
|
"Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM.": "载入显存的神经网络层数, 载入越多, 速度越快, 但显存消耗越大",
|
||||||
"Whether to use CPU to calculate the last output layer of the neural network with FP32 precision to obtain better quality.": "是否使用cpu以fp32精度计算神经网络的最后一层输出层, 以获得更好的质量",
|
"Whether to use CPU to calculate the last output layer of the neural network with FP32 precision to obtain better quality.": "是否使用cpu以fp32精度计算神经网络的最后一层输出层, 以获得更好的质量",
|
||||||
"Downloads": "下载",
|
"Downloads": "下载",
|
||||||
@ -97,5 +97,7 @@
|
|||||||
"This is the latest version": "已是最新版",
|
"This is the latest version": "已是最新版",
|
||||||
"Use Tsinghua Pip Mirrors": "使用清华大学Pip镜像源",
|
"Use Tsinghua Pip Mirrors": "使用清华大学Pip镜像源",
|
||||||
"Model Config Exception": "模型配置异常",
|
"Model Config Exception": "模型配置异常",
|
||||||
"Use Gitee Updates Source": "使用Gitee更新源"
|
"Use Gitee Updates Source": "使用Gitee更新源",
|
||||||
|
"Use Custom CUDA kernel to Accelerate": "使用自定义CUDA算子加速",
|
||||||
|
"Enabling this option can greatly improve inference speed, but there may be compatibility issues. If it fails to start, please turn off this option.": "开启这个选项能大大提升推理速度,但可能存在兼容性问题,如果启动失败,请关闭此选项"
|
||||||
}
|
}
|
@ -9,7 +9,7 @@ import {
|
|||||||
} from '../../wailsjs/go/backend_golang/App';
|
} from '../../wailsjs/go/backend_golang/App';
|
||||||
import { Button } from '@fluentui/react-components';
|
import { Button } from '@fluentui/react-components';
|
||||||
import { observer } from 'mobx-react-lite';
|
import { observer } from 'mobx-react-lite';
|
||||||
import { exit, readRoot, switchModel, updateConfig } from '../apis';
|
import { exit, getStatus, readRoot, switchModel, updateConfig } from '../apis';
|
||||||
import { toast } from 'react-toastify';
|
import { toast } from 'react-toastify';
|
||||||
import manifest from '../../../manifest.json';
|
import manifest from '../../../manifest.json';
|
||||||
import { getStrategy, saveCache, toastWithButton } from '../utils';
|
import { getStrategy, saveCache, toastWithButton } from '../utils';
|
||||||
@ -115,9 +115,13 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
|
|||||||
let loading = false;
|
let loading = false;
|
||||||
const intervalId = setInterval(() => {
|
const intervalId = setInterval(() => {
|
||||||
readRoot()
|
readRoot()
|
||||||
.then(r => {
|
.then(async r => {
|
||||||
if (r.ok && !loading) {
|
if (r.ok && !loading) {
|
||||||
clearInterval(intervalId);
|
clearInterval(intervalId);
|
||||||
|
await getStatus().then(status => {
|
||||||
|
if (status)
|
||||||
|
commonStore.setStatus(status);
|
||||||
|
});
|
||||||
commonStore.setStatus({ modelStatus: ModelStatus.Loading });
|
commonStore.setStatus({ modelStatus: ModelStatus.Loading });
|
||||||
loading = true;
|
loading = true;
|
||||||
toast(t('Loading Model'), { type: 'info' });
|
toast(t('Loading Model'), { type: 'info' });
|
||||||
@ -130,7 +134,8 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
|
|||||||
});
|
});
|
||||||
switchModel({
|
switchModel({
|
||||||
model: `${manifest.localModelDir}/${modelConfig.modelParameters.modelName}`,
|
model: `${manifest.localModelDir}/${modelConfig.modelParameters.modelName}`,
|
||||||
strategy: getStrategy(modelConfig)
|
strategy: getStrategy(modelConfig),
|
||||||
|
customCuda: !!modelConfig.modelParameters.useCustomCuda
|
||||||
}).then((r) => {
|
}).then((r) => {
|
||||||
if (r.ok) {
|
if (r.ok) {
|
||||||
commonStore.setStatus({ modelStatus: ModelStatus.Working });
|
commonStore.setStatus({ modelStatus: ModelStatus.Working });
|
||||||
|
@ -39,6 +39,7 @@ export type ModelParameters = {
|
|||||||
storedLayers: number;
|
storedLayers: number;
|
||||||
maxStoredLayers: number;
|
maxStoredLayers: number;
|
||||||
enableHighPrecisionForLastLayer: boolean;
|
enableHighPrecisionForLastLayer: boolean;
|
||||||
|
useCustomCuda?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type ModelConfig = {
|
export type ModelConfig = {
|
||||||
@ -754,7 +755,7 @@ export const Configs: FC = observer(() => {
|
|||||||
</Dropdown>
|
</Dropdown>
|
||||||
} />
|
} />
|
||||||
<Labeled label={t('Precision')}
|
<Labeled label={t('Precision')}
|
||||||
desc={t('int8 uses less VRAM, and is faster, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.')}
|
desc={t('int8 uses less VRAM, but has slightly lower quality. fp16 has higher quality, and fp32 has the best quality.')}
|
||||||
content={
|
content={
|
||||||
<Dropdown style={{ minWidth: 0 }} className="grow"
|
<Dropdown style={{ minWidth: 0 }} className="grow"
|
||||||
value={selectedConfig.modelParameters.precision}
|
value={selectedConfig.modelParameters.precision}
|
||||||
@ -771,6 +772,7 @@ export const Configs: FC = observer(() => {
|
|||||||
<Option>fp32</Option>
|
<Option>fp32</Option>
|
||||||
</Dropdown>
|
</Dropdown>
|
||||||
} />
|
} />
|
||||||
|
<div />
|
||||||
<Labeled label={t('Stored Layers')}
|
<Labeled label={t('Stored Layers')}
|
||||||
desc={t('Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM.')}
|
desc={t('Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM.')}
|
||||||
content={
|
content={
|
||||||
@ -792,6 +794,16 @@ export const Configs: FC = observer(() => {
|
|||||||
});
|
});
|
||||||
}} />
|
}} />
|
||||||
} />
|
} />
|
||||||
|
<Labeled label={t('Use Custom CUDA kernel to Accelerate')}
|
||||||
|
desc={t('Enabling this option can greatly improve inference speed, but there may be compatibility issues. If it fails to start, please turn off this option.')}
|
||||||
|
content={
|
||||||
|
<Switch checked={selectedConfig.modelParameters.useCustomCuda}
|
||||||
|
onChange={(e, data) => {
|
||||||
|
setSelectedConfigModelParams({
|
||||||
|
useCustomCuda: data.checked
|
||||||
|
});
|
||||||
|
}} />
|
||||||
|
} />
|
||||||
</div>
|
</div>
|
||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
|
Loading…
Reference in New Issue
Block a user