add macOS MPS configs
This commit is contained in:
@@ -30,7 +30,7 @@ export type ApiParameters = {
|
||||
frequencyPenalty: number;
|
||||
}
|
||||
|
||||
export type Device = 'CPU' | 'CUDA' | 'Custom';
|
||||
export type Device = 'CPU' | 'CUDA' | 'MPS' | 'Custom';
|
||||
export type Precision = 'fp16' | 'int8' | 'fp32';
|
||||
|
||||
export type ModelParameters = {
|
||||
@@ -52,564 +52,6 @@ export type ModelConfig = {
|
||||
modelParameters: ModelParameters
|
||||
}
|
||||
|
||||
export const defaultModelConfigs: ModelConfig[] = [
|
||||
{
|
||||
name: 'GPU-2G-1B5-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 4,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-4G-1B5-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-4G-3B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 24,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-4G-3B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230527-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 24,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-4G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 8,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-4G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 8,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-6G-1B5-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-6G-3B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-6G-3B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230527-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-6G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 18,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-6G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 18,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-8G-3B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-8G-3B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230527-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-8G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 27,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-8G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 27,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-10G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-10G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-12G-14B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-14B-v12-Eng98%-Other2%-20230523-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 24,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-16G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-16G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-16G-14B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-14B-v12-Eng98%-Other2%-20230523-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 37,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-18G-14B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-14B-v12-Eng98%-Other2%-20230523-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-32G-14B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-14B-v12-Eng98%-Other2%-20230523-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-6G-1B5-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-12G-3B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-12G-3B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230527-ctx4096.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-28G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-28G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
export const Configs: FC = observer(() => {
|
||||
const { t } = useTranslation();
|
||||
const [selectedIndex, setSelectedIndex] = React.useState(commonStore.currentModelConfigIndex);
|
||||
@@ -836,6 +278,7 @@ export const Configs: FC = observer(() => {
|
||||
}
|
||||
}}>
|
||||
<Option value="CPU">CPU</Option>
|
||||
{commonStore.platform === 'darwin' && <Option value="MPS">MPS</Option>}
|
||||
<Option value="CUDA">CUDA</Option>
|
||||
<Option value="Custom">{t('Custom')!}</Option>
|
||||
</Dropdown>
|
||||
@@ -903,7 +346,8 @@ export const Configs: FC = observer(() => {
|
||||
onMouseEnter={() => setDisplayStrategyImg(true)}
|
||||
onMouseLeave={() => setDisplayStrategyImg(false)}
|
||||
content={
|
||||
<Input className="grow" placeholder="cuda:0 fp16 *20 -> cuda:1 fp16"
|
||||
<Input className="grow"
|
||||
placeholder={commonStore.platform != 'darwin' ? 'cuda:0 fp16 *20 -> cuda:1 fp16' : 'mps fp32'}
|
||||
value={selectedConfig.modelParameters.customStrategy}
|
||||
onChange={(e, data) => {
|
||||
setSelectedConfigModelParams({
|
||||
@@ -914,7 +358,7 @@ export const Configs: FC = observer(() => {
|
||||
}
|
||||
{selectedConfig.modelParameters.device == 'Custom' && <div />}
|
||||
{
|
||||
selectedConfig.modelParameters.device != 'CPU' &&
|
||||
selectedConfig.modelParameters.device != 'CPU' && selectedConfig.modelParameters.device != 'MPS' &&
|
||||
<Labeled label={t('Use Custom CUDA kernel to Accelerate')}
|
||||
desc={t('Enabling this option can greatly improve inference speed, but there may be compatibility issues. If it fails to start, please turn off this option.')}
|
||||
content={
|
||||
|
||||
720
frontend/src/pages/defaultModelConfigs.ts
Normal file
720
frontend/src/pages/defaultModelConfigs.ts
Normal file
@@ -0,0 +1,720 @@
|
||||
import { ModelConfig } from './Configs';
|
||||
|
||||
export const defaultModelConfigsMac: ModelConfig[] = [
|
||||
{
|
||||
name: 'MAC-1B5-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'MPS',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: false,
|
||||
customStrategy: 'mps fp32'
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'MAC-3B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'MPS',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: false,
|
||||
customStrategy: 'mps fp32'
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'MAC-3B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230527-ctx4096.pth',
|
||||
device: 'MPS',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: false,
|
||||
customStrategy: 'mps fp32'
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-6G-1B5-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-12G-3B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-12G-3B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230527-ctx4096.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-28G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-28G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
export const defaultModelConfigs: ModelConfig[] = [
|
||||
{
|
||||
name: 'GPU-2G-1B5-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 4,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-4G-1B5-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-4G-3B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 24,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-4G-3B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230527-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 24,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-4G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 8,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-4G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 8,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-6G-1B5-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-6G-3B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-6G-3B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230527-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-6G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 18,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-6G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 18,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: true,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-8G-3B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-8G-3B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230527-ctx4096.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-8G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 27,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-8G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 27,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-10G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-10G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-12G-14B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-14B-v12-Eng98%-Other2%-20230523-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 24,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-16G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-16G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-16G-14B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-14B-v12-Eng98%-Other2%-20230523-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 37,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-18G-14B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-14B-v12-Eng98%-Other2%-20230523-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'int8',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'GPU-32G-14B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-14B-v12-Eng98%-Other2%-20230523-ctx8192.pth',
|
||||
device: 'CUDA',
|
||||
precision: 'fp16',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false,
|
||||
useCustomCuda: true
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-6G-1B5-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-12G-3B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng98%-Other2%-20230520-ctx4096.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-12G-3B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-3B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230527-ctx4096.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-28G-7B-EN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'CPU-28G-7B-CN',
|
||||
apiParameters: {
|
||||
apiPort: 8000,
|
||||
maxResponseToken: 4100,
|
||||
temperature: 1.2,
|
||||
topP: 0.5,
|
||||
presencePenalty: 0.4,
|
||||
frequencyPenalty: 0.4
|
||||
},
|
||||
modelParameters: {
|
||||
modelName: 'RWKV-4-Raven-7B-v12-Eng49%-Chn49%-Jpn1%-Other1%-20230530-ctx8192.pth',
|
||||
device: 'CPU',
|
||||
precision: 'fp32',
|
||||
storedLayers: 41,
|
||||
maxStoredLayers: 41,
|
||||
enableHighPrecisionForLastLayer: false
|
||||
}
|
||||
}
|
||||
];
|
||||
Reference in New Issue
Block a user