add support for MIDI RWKV

2023-07-25 16:09:31 +08:00
parent 211ae342af
commit 05b9b42b56
9 changed files with 20373 additions and 100 deletions
--- a/backend-python/dep_check.py
+++ b/backend-python/dep_check.py
@@ -1,3 +1,5 @@
 import midi2audio
 import mido
 import lm_dataformat
 import ftfy
 import tqdm
--- a/backend-python/requirements.txt
+++ b/backend-python/requirements.txt
--- a/backend-python/requirements_versions.txt
+++ b/backend-python/requirements_versions.txt
--- a/backend-python/requirements_without_cyac.txt
+++ b/backend-python/requirements_without_cyac.txt
--- a/backend-python/routes/completion.py
+++ b/backend-python/routes/completion.py
@@ -72,7 +72,7 @@ requests_num = 0
 async def eval_rwkv(
-    model: RWKV,
+    model: AbstractRWKV,
    request: Request,
    body: ModelConfigBody,
    prompt: str,
@@ -209,7 +209,7 @@ async def eval_rwkv(
@router.post("/v1/chat/completions")
@router.post("/chat/completions")
 async def chat_completions(body: ChatCompletionBody, request: Request):
-    model: RWKV = global_var.get(global_var.Model)
+    model: TextRWKV = global_var.get(global_var.Model)
    if model is None:
        raise HTTPException(status.HTTP_400_BAD_REQUEST, "model not loaded")
@@ -302,7 +302,7 @@ The following is a coherent verbose detailed conversation between a girl named {
@router.post("/v1/completions")
@router.post("/completions")
 async def completions(body: CompletionBody, request: Request):
-    model: RWKV = global_var.get(global_var.Model)
+    model: AbstractRWKV = global_var.get(global_var.Model)
    if model is None:
        raise HTTPException(status.HTTP_400_BAD_REQUEST, "model not loaded")
@@ -351,7 +351,7 @@ def embedding_base64(embedding: List[float]) -> str:
@router.post("/v1/engines/text-embedding-ada-002/embeddings")
@router.post("/engines/text-embedding-ada-002/embeddings")
 async def embeddings(body: EmbeddingsBody, request: Request):
-    model: RWKV = global_var.get(global_var.Model)
+    model: AbstractRWKV = global_var.get(global_var.Model)
    if model is None:
        raise HTTPException(status.HTTP_400_BAD_REQUEST, "model not loaded")
--- a/backend-python/routes/config.py
+++ b/backend-python/routes/config.py
@@ -13,13 +13,16 @@ router = APIRouter()
 def get_tokens_path(model_path: str):
    model_path = model_path.lower()
-    default_tokens_path = (
+    tokenizer_dir = f"{pathlib.Path(__file__).parent.parent.resolve()}/rwkv_pip/"
-        f"{pathlib.Path(__file__).parent.parent.resolve()}/rwkv_pip/20B_tokenizer.json"
+
-    )
+    default_tokens_path = tokenizer_dir + "20B_tokenizer.json"
    if "raven" in model_path:
        return default_tokens_path
    elif "world" in model_path:
        return "rwkv_vocab_v20230424"
    elif "midi" in model_path:
        return tokenizer_dir + "tokenizer-midi.json"
    else:
        return default_tokens_path
@@ -66,7 +69,13 @@ def switch_model(body: SwitchModelBody, response: Response, request: Request):
    try:
        global_var.set(
            global_var.Model,
-            RWKV(
+            TextRWKV(
                model=body.model,
                strategy=body.strategy,
                tokens_path=get_tokens_path(body.model),
            )
            if "midi" not in body.model.lower()
            else MusicRWKV(
                model=body.model,
                strategy=body.strategy,
                tokens_path=get_tokens_path(body.model),
--- a/backend-python/rwkv_pip/tokenizer-midi.json
+++ b/backend-python/rwkv_pip/tokenizer-midi.json
--- a/backend-python/utils/rwkv.py
+++ b/backend-python/utils/rwkv.py
@@ -1,3 +1,4 @@
 from abc import ABC, abstractmethod
 import os
 import pathlib
 import copy
@@ -18,8 +19,8 @@ END_OF_LINE_DOUBLE = 535
 os.environ["TORCH_EXTENSIONS_DIR"] = f"{pathlib.Path(__file__).parent.parent.resolve()}"
-class RWKV:
+class AbstractRWKV(ABC):
-    def __init__(self, model: str, strategy: str, tokens_path: str) -> None:
+    def __init__(self, model: str, strategy: str, tokens_path: str):
        from rwkv.model import RWKV as Model  # dynamic import to make RWKV_CUDA_ON work
        filename, _ = os.path.splitext(os.path.basename(model))
@@ -29,90 +30,39 @@ class RWKV:
        self.model_state = None
        self.model_tokens = []
        self.CHUNK_LEN = 256
        self.max_tokens_per_generation = 500
        self.temperature = 1
-        self.top_p = 0.5
+        self.top_p = 0.3
-        self.penalty_alpha_presence = 0.4
+        self.top_k = 0
-        self.penalty_alpha_frequency = 0.4
+        self.penalty_alpha_presence = 0
        self.penalty_alpha_frequency = 1
-        self.interface = ":"
+    @abstractmethod
-        if "world" in self.name.lower():
+    def adjust_occurrence(self, occurrence: Dict, token: int):
-            self.user = "Question"
+        pass
            self.bot = "Answer"
            self.END_OF_LINE = 11
        else:
            self.user = "Bob"
            self.bot = "Alice"
            self.END_OF_LINE = 187
-        self.AVOID_REPEAT_TOKENS = []
+    @abstractmethod
-        AVOID_REPEAT = "，：？！"
+    def adjust_forward_logits(self, logits: List[float], occurrence: Dict, i: int):
        for i in AVOID_REPEAT:
            dd = self.pipeline.encode(i)
            assert len(dd) == 1
            self.AVOID_REPEAT_TOKENS += dd
        self.preload()
    def preload(self):
        interface = self.interface
        user = self.user
        bot = self.bot
        preset_system = (
            f"""
 The following is a coherent verbose detailed conversation between a girl named {bot} and her friend {user}. \
 {bot} is very intelligent, creative and friendly. \
 {bot} is unlikely to disagree with {user}, and {bot} doesn't like to ask {user} questions. \
 {bot} likes to tell {user} a lot about herself and her opinions. \
 {bot} usually gives {user} kind, helpful and informative advices.\n
 """
            if self.user == "Bob"
            else f"{user}{interface} hi\n\n{bot}{interface} Hi. "
            + "I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.\n\n"
        )
        logits, _ = self.run_rnn(self.fix_tokens(self.pipeline.encode(preset_system)))
        try:
            state_cache.add_state(
                state_cache.AddStateBody(
                    prompt=preset_system,
                    tokens=self.model_tokens,
                    state=self.model_state,
                    logits=logits,
                )
            )
        except HTTPException:
        pass
    # Model only saw '\n\n' as [187, 187] before, but the tokenizer outputs [535] for it at the end
-    def fix_tokens(self, tokens):
+    @abstractmethod
-        if "world" in self.name.lower():
+    def fix_tokens(self, tokens) -> List[int]:
-            return tokens
+        pass
        if len(tokens) > 0 and tokens[-1] == END_OF_LINE_DOUBLE:
            tokens = tokens[:-1] + [self.END_OF_LINE, self.END_OF_LINE]
        return tokens
-    def run_rnn(self, _tokens: List[str], newline_adj: int = 0):
+    @abstractmethod
-        tokens = [int(x) for x in _tokens]
+    def run_rnn(
-        token_len = len(tokens)
+        self, _tokens: List[str], newline_adj: int = 0
-        self.model_tokens += tokens
+    ) -> Tuple[List[float], int]:
        pass
-        while len(tokens) > 0:
+    @abstractmethod
-            out, self.model_state = self.model.forward(
+    def delta_postprocess(self, delta: str) -> str:
-                tokens[: self.CHUNK_LEN], self.model_state
+        pass
            )
            tokens = tokens[self.CHUNK_LEN :]
        out[self.END_OF_LINE] += newline_adj  # adjust \n probability
        if self.model_tokens[-1] in self.AVOID_REPEAT_TOKENS:
            out[self.model_tokens[-1]] = -999999999
        return out, token_len
    def get_embedding(self, input: str, fast_mode: bool) -> Tuple[List[float], int]:
        if fast_mode:
-            embedding, token_len = self.fast_embedding(
+            embedding, token_len = self.__fast_embedding(
                self.fix_tokens(self.pipeline.encode(input)), None
            )
        else:
@@ -123,7 +73,7 @@ The following is a coherent verbose detailed conversation between a girl named {
        embedding = (embedding / np.linalg.norm(embedding)).tolist()
        return embedding, token_len
-    def fast_embedding(self, tokens: List[str], state):
+    def __fast_embedding(self, tokens: List[str], state):
        tokens = [int(x) for x in tokens]
        token_len = len(tokens)
        self = self.model
@@ -260,7 +210,9 @@ The following is a coherent verbose detailed conversation between a girl named {
                return state[0].tolist(), token_len
-    def generate(self, prompt: str, stop: str = None):
+    def generate(
        self, prompt: str, stop: str | List[str] = None
    ) -> Iterable[Tuple[str, str, int, int]]:
        quick_log(None, None, "Generation Prompt:\n" + prompt)
        cache = None
        delta_prompt = prompt
@@ -304,28 +256,23 @@ The following is a coherent verbose detailed conversation between a girl named {
        completion_token_len = 0
        response = ""
        for i in range(self.max_tokens_per_generation):
-            for n in occurrence:
+            self.adjust_forward_logits(logits, occurrence, i)
-                logits[n] -= (
+
                    self.penalty_alpha_presence
                    + occurrence[n] * self.penalty_alpha_frequency
                )
            token = self.pipeline.sample_logits(
-                logits, temperature=self.temperature, top_p=self.top_p
+                logits, temperature=self.temperature, top_p=self.top_p, top_k=self.top_k
            )
            if token == END_OF_TEXT:
                yield response, "", prompt_token_len, completion_token_len
                break
-            for xxx in occurrence:
+
-                occurrence[xxx] *= 0.996
+            self.adjust_occurrence(occurrence, token)
            if token not in occurrence:
                occurrence[token] = 1
            else:
                occurrence[token] += 1
            logits, _ = self.run_rnn([token])
            completion_token_len = completion_token_len + 1
-            delta: str = self.pipeline.decode(self.model_tokens[out_last:])
+            delta: str = self.delta_postprocess(
                self.pipeline.decode(self.model_tokens[out_last:])
            )
            if "\ufffd" not in delta:  # avoid utf-8 display issues
                response += delta
                if stop is not None:
@@ -360,6 +307,153 @@ The following is a coherent verbose detailed conversation between a girl named {
                yield response, delta, prompt_token_len, completion_token_len
 class TextRWKV(AbstractRWKV):
    def __init__(self, model: str, strategy: str, tokens_path: str) -> None:
        super().__init__(model, strategy, tokens_path)
        self.CHUNK_LEN = 256
        self.max_tokens_per_generation = 500
        self.temperature = 1
        self.top_p = 0.3
        self.top_k = 0
        self.penalty_alpha_presence = 0
        self.penalty_alpha_frequency = 1
        self.interface = ":"
        if "world" in self.name.lower():
            self.user = "Question"
            self.bot = "Answer"
            self.END_OF_LINE = 11
        else:
            self.user = "Bob"
            self.bot = "Alice"
            self.END_OF_LINE = 187
        self.AVOID_REPEAT_TOKENS = []
        AVOID_REPEAT = "，：？！"
        for i in AVOID_REPEAT:
            dd = self.pipeline.encode(i)
            assert len(dd) == 1
            self.AVOID_REPEAT_TOKENS += dd
        self.__preload()
    def adjust_occurrence(self, occurrence: Dict, token: int):
        for xxx in occurrence:
            occurrence[xxx] *= 0.996
        if token not in occurrence:
            occurrence[token] = 1
        else:
            occurrence[token] += 1
    def adjust_forward_logits(self, logits: List[float], occurrence: Dict, i: int):
        for n in occurrence:
            logits[n] -= (
                self.penalty_alpha_presence
                + occurrence[n] * self.penalty_alpha_frequency
            )
    # Model only saw '\n\n' as [187, 187] before, but the tokenizer outputs [535] for it at the end
    def fix_tokens(self, tokens) -> List[int]:
        if "world" in self.name.lower():
            return tokens
        if len(tokens) > 0 and tokens[-1] == END_OF_LINE_DOUBLE:
            tokens = tokens[:-1] + [self.END_OF_LINE, self.END_OF_LINE]
        return tokens
    def run_rnn(
        self, _tokens: List[str], newline_adj: int = 0
    ) -> Tuple[List[float], int]:
        tokens = [int(x) for x in _tokens]
        token_len = len(tokens)
        self.model_tokens += tokens
        while len(tokens) > 0:
            out, self.model_state = self.model.forward(
                tokens[: self.CHUNK_LEN], self.model_state
            )
            tokens = tokens[self.CHUNK_LEN :]
        out[self.END_OF_LINE] += newline_adj  # adjust \n probability
        if self.model_tokens[-1] in self.AVOID_REPEAT_TOKENS:
            out[self.model_tokens[-1]] = -999999999
        return out, token_len
    def delta_postprocess(self, delta: str) -> str:
        return delta
    def __preload(self):
        interface = self.interface
        user = self.user
        bot = self.bot
        preset_system = (
            f"""
 The following is a coherent verbose detailed conversation between a girl named {bot} and her friend {user}. \
 {bot} is very intelligent, creative and friendly. \
 {bot} is unlikely to disagree with {user}, and {bot} doesn't like to ask {user} questions. \
 {bot} likes to tell {user} a lot about herself and her opinions. \
 {bot} usually gives {user} kind, helpful and informative advices.\n
 """
            if self.user == "Bob"
            else f"{user}{interface} hi\n\n{bot}{interface} Hi. "
            + "I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.\n\n"
        )
        logits, _ = self.run_rnn(self.fix_tokens(self.pipeline.encode(preset_system)))
        try:
            state_cache.add_state(
                state_cache.AddStateBody(
                    prompt=preset_system,
                    tokens=self.model_tokens,
                    state=self.model_state,
                    logits=logits,
                )
            )
        except HTTPException:
            pass
 class MusicRWKV(AbstractRWKV):
    def __init__(self, model: str, strategy: str, tokens_path: str):
        super().__init__(model, strategy, tokens_path)
        self.max_tokens_per_generation = 500
        self.temperature = 1
        self.top_p = 0.8
        self.top_k = 8
    def adjust_occurrence(self, occurrence: Dict, token: int):
        for n in occurrence:
            occurrence[n] *= 0.997  #### decay repetition penalty
        if token >= 128 or token == 127:
            occurrence[token] = 1 + (occurrence[token] if token in occurrence else 0)
        else:
            occurrence[token] = 0.3 + (occurrence[token] if token in occurrence else 0)
    def adjust_forward_logits(self, logits: List[float], occurrence: Dict, i: int):
        for n in occurrence:
            logits[n] -= 0 + occurrence[n] * 0.5
        logits[0] += (i - 2000) / 500  # try not to be too short or too long
        logits[127] -= 1  # avoid "t125"
    def fix_tokens(self, tokens) -> List[int]:
        return tokens
    def run_rnn(
        self, _tokens: List[str], newline_adj: int = 0
    ) -> Tuple[List[float], int]:
        tokens = [int(x) for x in _tokens]
        token_len = len(tokens)
        self.model_tokens += tokens
        out, self.model_state = self.model.forward(tokens, self.model_state)
        return out, token_len
    def delta_postprocess(self, delta: str) -> str:
        return " " + delta
 class ModelConfigBody(BaseModel):
    max_tokens: int = Field(default=None, gt=0, le=102400)
    temperature: float = Field(default=None, ge=0, le=2)
@@ -379,7 +473,7 @@ class ModelConfigBody(BaseModel):
        }
-def set_rwkv_config(model: RWKV, body: ModelConfigBody):
+def set_rwkv_config(model: AbstractRWKV, body: ModelConfigBody):
    if body.max_tokens is not None:
        model.max_tokens_per_generation = body.max_tokens
    if body.temperature is not None:
@@ -395,7 +489,7 @@ def set_rwkv_config(model: RWKV, body: ModelConfigBody):
        model.penalty_alpha_frequency = body.frequency_penalty
-def get_rwkv_config(model: RWKV) -> ModelConfigBody:
+def get_rwkv_config(model: AbstractRWKV) -> ModelConfigBody:
    return ModelConfigBody(
        max_tokens=model.max_tokens_per_generation,
        temperature=model.temperature,
--- a/manifest.json
+++ b/manifest.json
@@ -526,6 +526,30 @@
      "lastUpdated": "2023-05-23T11:22:41",
      "url": "https://huggingface.co/BlinkDL/rwkv-4-raven/blob/main/RWKV-4-Raven-14B-v12-Eng98%25-Other2%25-20230523-ctx8192.pth",
      "downloadUrl": "https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-14B-v12-Eng98%25-Other2%25-20230523-ctx8192.pth"
    },
    {
      "name": "RWKV-4-MIDI-120M-v1-20230714-ctx4096.pth",
      "desc": {
        "en": "Music 120M v1",
        "zh": "作曲 120M v1"
      },
      "size": 239224753,
      "SHA256": "161d27dcf50d0958d230601ba1e0f8e7dd9c236105e92d2b833496412ace430c",
      "lastUpdated": "2023-07-15T08:03:36",
      "url": "https://huggingface.co/BlinkDL/rwkv-4-music/blob/main/RWKV-4-MIDI-120M-v1-20230714-ctx4096.pth",
      "downloadUrl": "https://huggingface.co/BlinkDL/rwkv-4-music/resolve/main/RWKV-4-MIDI-120M-v1-20230714-ctx4096.pth"
    },
    {
      "name": "RWKV-4-MIDI-560M-v1-20230717-ctx4096.pth",
      "desc": {
        "en": "Music 560M v1",
        "zh": "作曲 560M v1"
      },
      "size": 1130577457,
      "SHA256": "62b21841b24af38ef176e9e9d895d9fff730cea8aa0623f53a1784d74ce828d6",
      "lastUpdated": "2023-07-17T15:02:08",
      "url": "https://huggingface.co/BlinkDL/rwkv-4-music/blob/main/RWKV-4-MIDI-560M-v1-20230717-ctx4096.pth",
      "downloadUrl": "https://huggingface.co/BlinkDL/rwkv-4-music/resolve/main/RWKV-4-MIDI-560M-v1-20230717-ctx4096.pth"
    }
  ]
 }