add logs for state cache and switch-model
This commit is contained in:
parent
b7c34b0d42
commit
cea1d8b4d1
@ -1,6 +1,7 @@
|
|||||||
import pathlib
|
import pathlib
|
||||||
|
from utils.log import quick_log
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Response, status as Status
|
from fastapi import APIRouter, HTTPException, Request, Response, status as Status
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from utils.rwkv import *
|
from utils.rwkv import *
|
||||||
from utils.torch import *
|
from utils.torch import *
|
||||||
@ -30,7 +31,7 @@ class SwitchModelBody(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
@router.post("/switch-model")
|
@router.post("/switch-model")
|
||||||
def switch_model(body: SwitchModelBody, response: Response):
|
def switch_model(body: SwitchModelBody, response: Response, request: Request):
|
||||||
if global_var.get(global_var.Model_Status) is global_var.ModelStatus.Loading:
|
if global_var.get(global_var.Model_Status) is global_var.ModelStatus.Loading:
|
||||||
response.status_code = Status.HTTP_304_NOT_MODIFIED
|
response.status_code = Status.HTTP_304_NOT_MODIFIED
|
||||||
return
|
return
|
||||||
@ -53,6 +54,7 @@ def switch_model(body: SwitchModelBody, response: Response):
|
|||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
quick_log(request, body, f"Exception: {e}")
|
||||||
global_var.set(global_var.Model_Status, global_var.ModelStatus.Offline)
|
global_var.set(global_var.Model_Status, global_var.ModelStatus.Offline)
|
||||||
raise HTTPException(Status.HTTP_500_INTERNAL_SERVER_ERROR, "failed to load")
|
raise HTTPException(Status.HTTP_500_INTERNAL_SERVER_ERROR, "failed to load")
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
from fastapi import APIRouter, HTTPException, Response, status
|
from utils.log import quick_log
|
||||||
|
from fastapi import APIRouter, HTTPException, Request, Response, status
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
import gc
|
import gc
|
||||||
import copy
|
import copy
|
||||||
@ -72,7 +73,7 @@ class LongestPrefixStateBody(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
@router.post("/longest-prefix-state")
|
@router.post("/longest-prefix-state")
|
||||||
def longest_prefix_state(body: LongestPrefixStateBody):
|
def longest_prefix_state(body: LongestPrefixStateBody, request: Request):
|
||||||
global trie
|
global trie
|
||||||
if trie is None:
|
if trie is None:
|
||||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, "trie not loaded")
|
raise HTTPException(status.HTTP_400_BAD_REQUEST, "trie not loaded")
|
||||||
@ -83,8 +84,10 @@ def longest_prefix_state(body: LongestPrefixStateBody):
|
|||||||
if id != -1:
|
if id != -1:
|
||||||
v = dtrie[id]
|
v = dtrie[id]
|
||||||
device = v["device"]
|
device = v["device"]
|
||||||
|
prompt = trie[id]
|
||||||
|
quick_log(request, body, "Hit: " + prompt)
|
||||||
return {
|
return {
|
||||||
"prompt": trie[id],
|
"prompt": prompt,
|
||||||
"tokens": v["tokens"],
|
"tokens": v["tokens"],
|
||||||
"state": [tensor.to(device) for tensor in v["state"]]
|
"state": [tensor.to(device) for tensor in v["state"]]
|
||||||
if device != torch.device("cpu")
|
if device != torch.device("cpu")
|
||||||
|
@ -16,7 +16,7 @@ logger.addHandler(fh)
|
|||||||
|
|
||||||
def quick_log(request: Request, body: Any, response: str):
|
def quick_log(request: Request, body: Any, response: str):
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Client: {request.client}\nUrl: {request.url}\n"
|
f"Client: {request.client if request else ''}\nUrl: {request.url if request else ''}\n"
|
||||||
+ (
|
+ (
|
||||||
f"Body: {json.dumps(body.__dict__, default=vars, ensure_ascii=False)}\n"
|
f"Body: {json.dumps(body.__dict__, default=vars, ensure_ascii=False)}\n"
|
||||||
if body
|
if body
|
||||||
|
@ -105,7 +105,7 @@ The following is a coherent verbose detailed conversation between a girl named {
|
|||||||
delta_prompt = prompt
|
delta_prompt = prompt
|
||||||
try:
|
try:
|
||||||
cache = state_cache.longest_prefix_state(
|
cache = state_cache.longest_prefix_state(
|
||||||
state_cache.LongestPrefixStateBody(prompt=prompt)
|
state_cache.LongestPrefixStateBody(prompt=prompt), None
|
||||||
)
|
)
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
pass
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user