RWKV-Runner/backend-python/main.py

75 lines
1.6 KiB
Python
Raw Permalink Normal View History

2023-05-06 20:17:39 +08:00
import os
2023-05-17 11:39:00 +08:00
import sys
2023-05-06 20:17:39 +08:00
2023-05-20 23:34:33 +08:00
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
import psutil
2023-06-03 17:12:59 +08:00
from fastapi import Depends, FastAPI
2023-05-06 20:17:39 +08:00
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
2023-05-07 17:27:54 +08:00
from utils.rwkv import *
from utils.torch import *
from utils.ngrok import *
2023-06-03 17:12:59 +08:00
from utils.log import log_middleware
from routes import completion, config, state_cache
2023-05-07 17:27:54 +08:00
import global_var
2023-05-06 20:17:39 +08:00
2023-06-03 17:12:59 +08:00
app = FastAPI(dependencies=[Depends(log_middleware)])
2023-05-06 20:17:39 +08:00
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
2023-05-07 17:27:54 +08:00
app.include_router(completion.router)
app.include_router(config.router)
app.include_router(state_cache.router)
2023-05-07 17:27:54 +08:00
2023-05-06 20:17:39 +08:00
2023-05-17 11:39:00 +08:00
@app.on_event("startup")
2023-05-06 20:17:39 +08:00
def init():
2023-05-07 17:27:54 +08:00
global_var.init()
state_cache.init()
2023-05-06 20:17:39 +08:00
set_torch()
if os.environ.get("ngrok_token") is not None:
ngrok_connect()
@app.get("/")
def read_root():
2023-06-03 17:12:59 +08:00
return {"Hello": "World!"}
2023-05-06 20:17:39 +08:00
2023-05-07 17:27:54 +08:00
@app.post("/exit")
2023-05-07 22:48:52 +08:00
def exit():
2023-05-07 17:27:54 +08:00
parent_pid = os.getpid()
parent = psutil.Process(parent_pid)
for child in parent.children(recursive=True):
child.kill()
parent.kill()
2023-05-06 20:17:39 +08:00
2023-05-21 23:25:58 +08:00
def debug():
model = RWKV(
model="../models/RWKV-4-Raven-7B-v11-Eng49%-Chn49%-Jpn1%-Other1%-20230430-ctx8192.pth",
strategy="cuda fp16",
tokens_path="20B_tokenizer.json",
)
2023-06-03 17:12:59 +08:00
d = model.pipeline.decode([])
2023-05-21 23:25:58 +08:00
print(d)
2023-05-06 20:17:39 +08:00
if __name__ == "__main__":
2023-05-24 22:03:30 +08:00
uvicorn.run(
"main:app",
port=8000 if len(sys.argv) < 2 else int(sys.argv[1]),
host="127.0.0.1" if len(sys.argv) < 3 else sys.argv[2],
)
2023-05-21 23:25:58 +08:00
# debug()