feat: use model state cache to achieve 5x - 50x faster preparation time for generation

2023-05-28 23:52:38 +08:00
parent 822f2d729c
commit 3e11128c9d
7 changed files with 160 additions and 5 deletions
--- a/backend-python/routes/config.py
+++ b/backend-python/routes/config.py
@@ -14,7 +14,7 @@ router = APIRouter()
 def get_tokens_path(model_path: str):
    model_path = model_path.lower()
    default_tokens_path = (
-        f"{pathlib.Path(__file__).parent.parent.resolve()}/20B_tokenizer.json"
+        f"{pathlib.Path(__file__).parent.parent.resolve()}/rwkv_pip/20B_tokenizer.json"
    )
    if "raven" in model_path:
        return default_tokens_path