feat: use model state cache to achieve 5x - 50x faster preparation time for generation

This commit is contained in:
josc146
2023-05-28 23:52:38 +08:00
parent 822f2d729c
commit 3e11128c9d
7 changed files with 160 additions and 5 deletions

View File

@@ -14,7 +14,7 @@ router = APIRouter()
def get_tokens_path(model_path: str):
model_path = model_path.lower()
default_tokens_path = (
f"{pathlib.Path(__file__).parent.parent.resolve()}/20B_tokenizer.json"
f"{pathlib.Path(__file__).parent.parent.resolve()}/rwkv_pip/20B_tokenizer.json"
)
if "raven" in model_path:
return default_tokens_path