add Penalty Decay slider to Chat page

This commit is contained in:
josc146 2024-02-03 22:40:30 +08:00
parent 843840baa0
commit 932281db0a
8 changed files with 42 additions and 9 deletions

File diff suppressed because one or more lines are too long

View File

@ -171,6 +171,10 @@
"chinese": "中文", "chinese": "中文",
"default": "默认", "default": "默认",
"japanese": "日文", "japanese": "日文",
"English": "英文",
"Chinese": "中文",
"Default": "默认",
"Japanese": "日文",
"New Preset": "新建预设", "New Preset": "新建预设",
"Import": "导入", "Import": "导入",
"Name": "名称", "Name": "名称",
@ -329,5 +333,7 @@
"Instruction 1": "指令1", "Instruction 1": "指令1",
"Instruction 2": "指令2", "Instruction 2": "指令2",
"Instruction 3": "指令3", "Instruction 3": "指令3",
"Instruction: You are an expert assistant for summarizing and extracting information from given content\nGenerate a valid JSON in the following format:\n{\n \"summary\": \"Summary of content\",\n \"keywords\": [\"content keyword 1\", \"content keyword 2\"]\n}\n\nInput: The open-source community has introduced Eagle 7B, a new RNN model, built on the RWKV-v5 architecture. This new model has been trained on 1.1 trillion tokens and supports over 100 languages. The RWKV architecture, short for Rotary Weighted Key-Value, is a type of architecture used in the field of artificial intelligence, particularly in natural language processing (NLP) and is a variation of the Recurrent Neural Network (RNN) architecture.\nEagle 7B promises lower inference cost and stands out as a leading 7B model in terms of environmental efficiency and language versatility.\nThe model, with its 7.52 billion parameters, shows excellent performance in multi-lingual benchmarks, setting a new standard in its category. It competes closely with larger models in English language evaluations and is distinctive as an “Attention-Free Transformer,” though it requires additional tuning for specific uses. This model is accessible under the Apache 2.0 license and can be downloaded from HuggingFace for both personal and commercial purposes.\nIn terms of multilingual performance, Eagle 7B has claimed to have achieved notable results in benchmarks covering 23 languages. Its English performance has also seen significant advancements, outperforming its predecessor, RWKV v4, and competing with top-tier models.\nWorking towards a more scalable architecture and use of data efficiently, Eagle 7B is a more inclusive AI technology, supporting a broader range of languages. This model challenges the prevailing dominance of transformer models by demonstrating the capabilities of RNNs like RWKV in achieving superior performance when trained on comparable data volumes.\nIn the RWKV model, the rotary mechanism transforms the input data in a way that helps the model better understand the position or or order of elements in a sequence. The weighted key value also makes the model efficient by retrieving the stored information from previous elements in a sequence. \nHowever, questions remain about the scalability of RWKV compared to transformers, although there is optimism regarding its potential. The team plans to include additional training, an in-depth paper on Eagle 7B, and the development of a 2T model.\n\nResponse:": "Instruction: 你是一个专业的内容分析总结助手\n根据提供的内容生成以下格式的有效JSON信息:\n{\n \"summary\": \"内容的简短摘要\",\n \"keywords\": [\"内容关键词 1\", \"内容关键词 2\"]\n}\n\nInput: 开源社区推出了基于RWKV-v5架构的Eagle 7B新的RNN模型。这个新模型以1.1万亿个token进行了训练并支持100多种语言。RWKV架构是人工智能领域中特别是自然语言处理NLP中使用的一种架构它是循环神经网络RNN架构的一种变种。\nEagle 7B承诺低推理成本并以其环境效益和语言灵活性在领先的7B模型中脱颖而出。\n该模型拥有75.2亿个参数在多语言基准测试中表现出色树立了新的行业标准。它在英语语言评估中与更大的模型竞争激烈并作为“无注意力Transformer”独具特色尽管它需要针对特定用途进行额外调整。该模型可在Apache 2.0许可下访问并可从HuggingFace下载用于个人和商业目的。\n关于多语言性能Eagle 7B声称在涵盖23种语言的基准测试中取得了显著成绩。它的英语性能也取得了重大进步超越了它的前身RWKV v4并与顶级模型竞争。\n为了实现更可扩展的架构和有效利用数据Eagle 7B是一种更包容的人工智能技术支持更广泛的语言范围。通过展示RWKV等RNNs在训练相当数据量时实现卓越性能的能力该模型挑战了Transformer模型的主导地位。\n在RWKV模型中旋转机制以一种有助于模型更好地理解序列中元素的位置或顺序的方式转换输入数据。加权关键值还通过从序列中先前元素中检索存储的信息使模型更高效。\n然而与Transformer相比人们对RWKV的可扩展性仍然存在疑问尽管对其潜力持乐观态度。团队计划包括额外的训练、对Eagle 7B进行深入论文研究以及开发一个2T模型。\n\nResponse:" "Instruction: You are an expert assistant for summarizing and extracting information from given content\nGenerate a valid JSON in the following format:\n{\n \"summary\": \"Summary of content\",\n \"keywords\": [\"content keyword 1\", \"content keyword 2\"]\n}\n\nInput: The open-source community has introduced Eagle 7B, a new RNN model, built on the RWKV-v5 architecture. This new model has been trained on 1.1 trillion tokens and supports over 100 languages. The RWKV architecture, short for Rotary Weighted Key-Value, is a type of architecture used in the field of artificial intelligence, particularly in natural language processing (NLP) and is a variation of the Recurrent Neural Network (RNN) architecture.\nEagle 7B promises lower inference cost and stands out as a leading 7B model in terms of environmental efficiency and language versatility.\nThe model, with its 7.52 billion parameters, shows excellent performance in multi-lingual benchmarks, setting a new standard in its category. It competes closely with larger models in English language evaluations and is distinctive as an “Attention-Free Transformer,” though it requires additional tuning for specific uses. This model is accessible under the Apache 2.0 license and can be downloaded from HuggingFace for both personal and commercial purposes.\nIn terms of multilingual performance, Eagle 7B has claimed to have achieved notable results in benchmarks covering 23 languages. Its English performance has also seen significant advancements, outperforming its predecessor, RWKV v4, and competing with top-tier models.\nWorking towards a more scalable architecture and use of data efficiently, Eagle 7B is a more inclusive AI technology, supporting a broader range of languages. This model challenges the prevailing dominance of transformer models by demonstrating the capabilities of RNNs like RWKV in achieving superior performance when trained on comparable data volumes.\nIn the RWKV model, the rotary mechanism transforms the input data in a way that helps the model better understand the position or or order of elements in a sequence. The weighted key value also makes the model efficient by retrieving the stored information from previous elements in a sequence. \nHowever, questions remain about the scalability of RWKV compared to transformers, although there is optimism regarding its potential. The team plans to include additional training, an in-depth paper on Eagle 7B, and the development of a 2T model.\n\nResponse:": "Instruction: 你是一个专业的内容分析总结助手\n根据提供的内容生成以下格式的有效JSON信息:\n{\n \"summary\": \"内容的简短摘要\",\n \"keywords\": [\"内容关键词 1\", \"内容关键词 2\"]\n}\n\nInput: 开源社区推出了基于RWKV-v5架构的Eagle 7B新的RNN模型。这个新模型以1.1万亿个token进行了训练并支持100多种语言。RWKV架构是人工智能领域中特别是自然语言处理NLP中使用的一种架构它是循环神经网络RNN架构的一种变种。\nEagle 7B承诺低推理成本并以其环境效益和语言灵活性在领先的7B模型中脱颖而出。\n该模型拥有75.2亿个参数在多语言基准测试中表现出色树立了新的行业标准。它在英语语言评估中与更大的模型竞争激烈并作为“无注意力Transformer”独具特色尽管它需要针对特定用途进行额外调整。该模型可在Apache 2.0许可下访问并可从HuggingFace下载用于个人和商业目的。\n关于多语言性能Eagle 7B声称在涵盖23种语言的基准测试中取得了显著成绩。它的英语性能也取得了重大进步超越了它的前身RWKV v4并与顶级模型竞争。\n为了实现更可扩展的架构和有效利用数据Eagle 7B是一种更包容的人工智能技术支持更广泛的语言范围。通过展示RWKV等RNNs在训练相当数据量时实现卓越性能的能力该模型挑战了Transformer模型的主导地位。\n在RWKV模型中旋转机制以一种有助于模型更好地理解序列中元素的位置或顺序的方式转换输入数据。加权关键值还通过从序列中先前元素中检索存储的信息使模型更高效。\n然而与Transformer相比人们对RWKV的可扩展性仍然存在疑问尽管对其潜力持乐观态度。团队计划包括额外的训练、对Eagle 7B进行深入论文研究以及开发一个2T模型。\n\nResponse:",
"Penalty Decay": "惩罚衰减",
"If you don't know what it is, keep it default.": "如果你不知道这是什么,保持默认"
} }

View File

@ -8,8 +8,9 @@ export const NumberInput: FC<{
max: number, max: number,
step?: number, step?: number,
onChange?: (ev: React.ChangeEvent<HTMLInputElement>, data: SliderOnChangeData) => void onChange?: (ev: React.ChangeEvent<HTMLInputElement>, data: SliderOnChangeData) => void
style?: CSSProperties style?: CSSProperties,
}> = ({ value, min, max, step, onChange, style }) => { toFixed?: number
}> = ({ value, min, max, step, onChange, style, toFixed = 2 }) => {
return ( return (
<Input type="number" style={style} value={value.toString()} min={min} max={max} step={step} <Input type="number" style={style} value={value.toString()} min={min} max={max} step={step}
onChange={(e, data) => { onChange={(e, data) => {
@ -22,7 +23,7 @@ export const NumberInput: FC<{
value = Number((( value = Number(((
Math.round((value - offset) / step) * step) Math.round((value - offset) / step) * step)
+ offset) + offset)
.toFixed(2)); // avoid precision issues .toFixed(toFixed)); // avoid precision issues
} }
onChange(e, { value: Math.max(Math.min(value, max), min) }); onChange(e, { value: Math.max(Math.min(value, max), min) });
} }

View File

@ -9,8 +9,9 @@ export const ValuedSlider: FC<{
max: number, max: number,
step?: number, step?: number,
input?: boolean input?: boolean
onChange?: (ev: React.ChangeEvent<HTMLInputElement>, data: SliderOnChangeData) => void onChange?: (ev: React.ChangeEvent<HTMLInputElement>, data: SliderOnChangeData) => void,
}> = ({ value, min, max, step, input, onChange }) => { toFixed?: number
}> = ({ value, min, max, step, input, onChange, toFixed }) => {
const sliderRef = useRef<HTMLInputElement>(null); const sliderRef = useRef<HTMLInputElement>(null);
useEffect(() => { useEffect(() => {
if (step && sliderRef.current && sliderRef.current.parentElement) { if (step && sliderRef.current && sliderRef.current.parentElement) {
@ -25,7 +26,8 @@ export const ValuedSlider: FC<{
max={max} step={step} max={max} step={step}
onChange={onChange} /> onChange={onChange} />
{input {input
? <NumberInput style={{ minWidth: 0 }} value={value} min={min} max={max} step={step} onChange={onChange} /> ? <NumberInput style={{ minWidth: 0 }} value={value} min={min} max={max} step={step} onChange={onChange}
toFixed={toFixed} />
: <Text>{value}</Text>} : <Text>{value}</Text>}
</div> </div>
); );

View File

@ -35,6 +35,7 @@ import { Labeled } from '../components/Labeled';
import { ValuedSlider } from '../components/ValuedSlider'; import { ValuedSlider } from '../components/ValuedSlider';
import { PresetsButton } from './PresetsManager/PresetsButton'; import { PresetsButton } from './PresetsManager/PresetsButton';
import { webOpenOpenFileDialog } from '../utils/web-file-operations'; import { webOpenOpenFileDialog } from '../utils/web-file-operations';
import { defaultPenaltyDecay } from './defaultConfigs';
let chatSseControllers: { let chatSseControllers: {
[id: string]: AbortController [id: string]: AbortController
@ -268,6 +269,18 @@ const SidePanel: FC = observer(() => {
}); });
}} /> }} />
} /> } />
<Labeled flex breakline
label={t('Penalty Decay') + (params.penaltyDecay === defaultPenaltyDecay ? ` (${t('Default')})` : '')}
desc={t('If you don\'t know what it is, keep it default.')}
content={
<ValuedSlider value={params.penaltyDecay!} min={0.99} max={0.999}
step={0.001} toFixed={3} input
onChange={(e, data) => {
commonStore.setChatParams({
penaltyDecay: data.value
});
}} />
} />
</div> </div>
<div className="grow" /> <div className="grow" />
{/*<Button*/} {/*<Button*/}
@ -451,6 +464,7 @@ const ChatPanel: FC = observer(() => {
top_p: commonStore.chatParams.topP, top_p: commonStore.chatParams.topP,
presence_penalty: commonStore.chatParams.presencePenalty, presence_penalty: commonStore.chatParams.presencePenalty,
frequency_penalty: commonStore.chatParams.frequencyPenalty, frequency_penalty: commonStore.chatParams.frequencyPenalty,
penalty_decay: commonStore.chatParams.penaltyDecay === defaultPenaltyDecay ? undefined : commonStore.chatParams.penaltyDecay,
user_name: commonStore.activePreset?.userName || undefined, user_name: commonStore.activePreset?.userName || undefined,
assistant_name: commonStore.activePreset?.assistantName || undefined, assistant_name: commonStore.activePreset?.assistantName || undefined,
presystem: commonStore.activePreset?.presystem && undefined presystem: commonStore.activePreset?.presystem && undefined

View File

@ -1,6 +1,8 @@
import { CompletionPreset } from '../types/completion'; import { CompletionPreset } from '../types/completion';
import { ModelConfig } from '../types/configs'; import { ModelConfig } from '../types/configs';
export const defaultPenaltyDecay = 0.996;
export const defaultCompositionPrompt = '<pad>'; export const defaultCompositionPrompt = '<pad>';
export const defaultCompositionABCPrompt = 'S:3\n' + export const defaultCompositionABCPrompt = 'S:3\n' +
'B:9\n' + 'B:9\n' +

View File

@ -79,7 +79,8 @@ class CommonStore {
temperature: 1, temperature: 1,
topP: 0.3, topP: 0.3,
presencePenalty: 0, presencePenalty: 0,
frequencyPenalty: 1 frequencyPenalty: 1,
penaltyDecay: 0.996
}; };
sidePanelCollapsed: boolean | 'auto' = 'auto'; sidePanelCollapsed: boolean | 'auto' = 'auto';
// completion // completion

View File

@ -5,6 +5,7 @@ export type ApiParameters = {
topP: number; topP: number;
presencePenalty: number; presencePenalty: number;
frequencyPenalty: number; frequencyPenalty: number;
penaltyDecay?: number;
} }
export type Device = 'CPU' | 'CPU (rwkv.cpp)' | 'CUDA' | 'CUDA-Beta' | 'WebGPU' | 'WebGPU (Python)' | 'MPS' | 'Custom'; export type Device = 'CPU' | 'CPU (rwkv.cpp)' | 'CUDA' | 'CUDA-Beta' | 'WebGPU' | 'WebGPU (Python)' | 'MPS' | 'Custom';
export type Precision = 'fp16' | 'int8' | 'fp32' | 'nf4' | 'Q5_1'; export type Precision = 'fp16' | 'int8' | 'fp32' | 'nf4' | 'Q5_1';