diff --git a/backend-python/routes/config.py b/backend-python/routes/config.py index a28c4a9..e478aa2 100644 --- a/backend-python/routes/config.py +++ b/backend-python/routes/config.py @@ -1,6 +1,7 @@ import pathlib +from utils.log import quick_log -from fastapi import APIRouter, HTTPException, Response, status as Status +from fastapi import APIRouter, HTTPException, Request, Response, status as Status from pydantic import BaseModel from utils.rwkv import * from utils.torch import * @@ -30,7 +31,7 @@ class SwitchModelBody(BaseModel): @router.post("/switch-model") -def switch_model(body: SwitchModelBody, response: Response): +def switch_model(body: SwitchModelBody, response: Response, request: Request): if global_var.get(global_var.Model_Status) is global_var.ModelStatus.Loading: response.status_code = Status.HTTP_304_NOT_MODIFIED return @@ -53,6 +54,7 @@ def switch_model(body: SwitchModelBody, response: Response): ) except Exception as e: print(e) + quick_log(request, body, f"Exception: {e}") global_var.set(global_var.Model_Status, global_var.ModelStatus.Offline) raise HTTPException(Status.HTTP_500_INTERNAL_SERVER_ERROR, "failed to load") diff --git a/backend-python/routes/state_cache.py b/backend-python/routes/state_cache.py index deb1047..b8a836f 100644 --- a/backend-python/routes/state_cache.py +++ b/backend-python/routes/state_cache.py @@ -1,5 +1,6 @@ from typing import Any, Dict -from fastapi import APIRouter, HTTPException, Response, status +from utils.log import quick_log +from fastapi import APIRouter, HTTPException, Request, Response, status from pydantic import BaseModel import gc import copy @@ -72,7 +73,7 @@ class LongestPrefixStateBody(BaseModel): @router.post("/longest-prefix-state") -def longest_prefix_state(body: LongestPrefixStateBody): +def longest_prefix_state(body: LongestPrefixStateBody, request: Request): global trie if trie is None: raise HTTPException(status.HTTP_400_BAD_REQUEST, "trie not loaded") @@ -83,8 +84,10 @@ def longest_prefix_state(body: LongestPrefixStateBody): if id != -1: v = dtrie[id] device = v["device"] + prompt = trie[id] + quick_log(request, body, "Hit: " + prompt) return { - "prompt": trie[id], + "prompt": prompt, "tokens": v["tokens"], "state": [tensor.to(device) for tensor in v["state"]] if device != torch.device("cpu") diff --git a/backend-python/utils/log.py b/backend-python/utils/log.py index cacdda2..498fbd8 100644 --- a/backend-python/utils/log.py +++ b/backend-python/utils/log.py @@ -16,7 +16,7 @@ logger.addHandler(fh) def quick_log(request: Request, body: Any, response: str): logger.info( - f"Client: {request.client}\nUrl: {request.url}\n" + f"Client: {request.client if request else ''}\nUrl: {request.url if request else ''}\n" + ( f"Body: {json.dumps(body.__dict__, default=vars, ensure_ascii=False)}\n" if body diff --git a/backend-python/utils/rwkv.py b/backend-python/utils/rwkv.py index 6c94369..340608b 100644 --- a/backend-python/utils/rwkv.py +++ b/backend-python/utils/rwkv.py @@ -105,7 +105,7 @@ The following is a coherent verbose detailed conversation between a girl named { delta_prompt = prompt try: cache = state_cache.longest_prefix_state( - state_cache.LongestPrefixStateBody(prompt=prompt) + state_cache.LongestPrefixStateBody(prompt=prompt), None ) except HTTPException: pass