diff --git a/.gitignore b/.gitignore index 1515b8e..44368b0 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,5 @@ __pycache__ *.exe *.old .DS_Store +*.log.* +*.log \ No newline at end of file diff --git a/backend-python/main.py b/backend-python/main.py index 311c042..44ad7e7 100644 --- a/backend-python/main.py +++ b/backend-python/main.py @@ -4,17 +4,18 @@ import sys sys.path.append(os.path.dirname(os.path.realpath(__file__))) import psutil -from fastapi import FastAPI +from fastapi import Depends, FastAPI from fastapi.middleware.cors import CORSMiddleware import uvicorn from utils.rwkv import * from utils.torch import * from utils.ngrok import * +from utils.log import log_middleware from routes import completion, config, state_cache import global_var -app = FastAPI() +app = FastAPI(dependencies=[Depends(log_middleware)]) app.add_middleware( CORSMiddleware, @@ -42,7 +43,7 @@ def init(): @app.get("/") def read_root(): - return {"Hello": "World!", "pid": os.getpid()} + return {"Hello": "World!"} @app.post("/exit") @@ -60,7 +61,7 @@ def debug(): strategy="cuda fp16", tokens_path="20B_tokenizer.json", ) - d = model.tokenizer.decode([]) + d = model.pipeline.decode([]) print(d) diff --git a/backend-python/routes/completion.py b/backend-python/routes/completion.py index 7f6a1ac..f130440 100644 --- a/backend-python/routes/completion.py +++ b/backend-python/routes/completion.py @@ -7,6 +7,7 @@ from fastapi import APIRouter, Request, status, HTTPException from sse_starlette.sse import EventSourceResponse from pydantic import BaseModel from utils.rwkv import * +from utils.log import quick_log import global_var router = APIRouter() @@ -26,6 +27,8 @@ class ChatCompletionBody(ModelConfigBody): completion_lock = Lock() +requests_num = 0 + @router.post("/v1/chat/completions") @router.post("/chat/completions") @@ -106,8 +109,15 @@ The following is a coherent verbose detailed conversation between a girl named { completion_text += f"{bot}{interface}" async def eval_rwkv(): + global requests_num + requests_num = requests_num + 1 + quick_log(request, None, "Start Waiting. RequestsNum: " + str(requests_num)) while completion_lock.locked(): if await request.is_disconnected(): + requests_num = requests_num - 1 + quick_log( + request, None, "Stop Waiting. RequestsNum: " + str(requests_num) + ) return await asyncio.sleep(0.1) else: @@ -135,9 +145,21 @@ The following is a coherent verbose detailed conversation between a girl named { } ) # torch_gc() + requests_num = requests_num - 1 completion_lock.release() if await request.is_disconnected(): + print(f"{request.client} Stop Waiting") + quick_log( + request, + body, + response + "\nStop Waiting. RequestsNum: " + str(requests_num), + ) return + quick_log( + request, + body, + response + "\nFinished. RequestsNum: " + str(requests_num), + ) yield json.dumps( { "response": response, @@ -161,6 +183,12 @@ The following is a coherent verbose detailed conversation between a girl named { if await request.is_disconnected(): break # torch_gc() + requests_num = requests_num - 1 + quick_log( + request, + body, + response + "\nFinished. RequestsNum: " + str(requests_num), + ) completion_lock.release() if await request.is_disconnected(): return @@ -182,7 +210,11 @@ The following is a coherent verbose detailed conversation between a girl named { if body.stream: return EventSourceResponse(eval_rwkv()) else: - return await eval_rwkv().__anext__() + try: + return await eval_rwkv().__anext__() + except StopAsyncIteration: + print(f"{request.client} Stop Waiting") + return None class CompletionBody(ModelConfigBody): @@ -203,8 +235,15 @@ async def completions(body: CompletionBody, request: Request): raise HTTPException(status.HTTP_400_BAD_REQUEST, "prompt not found") async def eval_rwkv(): + global requests_num + requests_num = requests_num + 1 + quick_log(request, None, "Start Waiting. RequestsNum: " + str(requests_num)) while completion_lock.locked(): if await request.is_disconnected(): + requests_num = requests_num - 1 + quick_log( + request, None, "Stop Waiting. RequestsNum: " + str(requests_num) + ) return await asyncio.sleep(0.1) else: @@ -229,9 +268,21 @@ async def completions(body: CompletionBody, request: Request): } ) # torch_gc() + requests_num = requests_num - 1 completion_lock.release() if await request.is_disconnected(): + print(f"{request.client} Stop Waiting") + quick_log( + request, + body, + response + "\nStop Waiting. RequestsNum: " + str(requests_num), + ) return + quick_log( + request, + body, + response + "\nFinished. RequestsNum: " + str(requests_num), + ) yield json.dumps( { "response": response, @@ -252,6 +303,12 @@ async def completions(body: CompletionBody, request: Request): if await request.is_disconnected(): break # torch_gc() + requests_num = requests_num - 1 + quick_log( + request, + body, + response + "\nFinished. RequestsNum: " + str(requests_num), + ) completion_lock.release() if await request.is_disconnected(): return @@ -270,4 +327,8 @@ async def completions(body: CompletionBody, request: Request): if body.stream: return EventSourceResponse(eval_rwkv()) else: - return await eval_rwkv().__anext__() + try: + return await eval_rwkv().__anext__() + except StopAsyncIteration: + print(f"{request.client} Stop Waiting") + return None diff --git a/backend-python/utils/log.py b/backend-python/utils/log.py new file mode 100644 index 0000000..cacdda2 --- /dev/null +++ b/backend-python/utils/log.py @@ -0,0 +1,32 @@ +import json +import logging +from typing import Any +from fastapi import Request + + +logger = logging.getLogger() +logger.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s - %(levelname)s\n%(message)s") +fh = logging.handlers.RotatingFileHandler( + "api.log", mode="a", maxBytes=3 * 1024 * 1024, backupCount=3 +) +fh.setFormatter(formatter) +logger.addHandler(fh) + + +def quick_log(request: Request, body: Any, response: str): + logger.info( + f"Client: {request.client}\nUrl: {request.url}\n" + + ( + f"Body: {json.dumps(body.__dict__, default=vars, ensure_ascii=False)}\n" + if body + else "" + ) + + (f"Response:\n{response}\n" if response else "") + ) + + +async def log_middleware(request: Request): + logger.info( + f"Client: {request.client}\nUrl: {request.url}\nBody: {await request.body()}\n" + )