RWKV-Runner/backend-python/routes/completion.py

266 lines
8.9 KiB
Python
Raw Normal View History

2023-05-17 11:39:00 +08:00
import asyncio
2023-05-07 17:27:54 +08:00
import json
2023-05-17 11:39:00 +08:00
from threading import Lock
2023-05-07 17:27:54 +08:00
from typing import List
from fastapi import APIRouter, Request, status, HTTPException
from sse_starlette.sse import EventSourceResponse
from pydantic import BaseModel
from utils.rwkv import *
import global_var
router = APIRouter()
class Message(BaseModel):
role: str
content: str
2023-05-22 11:18:37 +08:00
class ChatCompletionBody(ModelConfigBody):
2023-05-07 17:27:54 +08:00
messages: List[Message]
2023-05-17 11:47:45 +08:00
model: str = "rwkv"
stream: bool = False
2023-05-22 11:24:57 +08:00
stop: str = None
2023-05-17 11:39:00 +08:00
completion_lock = Lock()
2023-05-07 17:27:54 +08:00
@router.post("/v1/chat/completions")
@router.post("/chat/completions")
2023-05-22 11:18:37 +08:00
async def chat_completions(body: ChatCompletionBody, request: Request):
2023-05-17 11:39:00 +08:00
model: RWKV = global_var.get(global_var.Model)
if model is None:
2023-05-07 22:48:52 +08:00
raise HTTPException(status.HTTP_400_BAD_REQUEST, "model not loaded")
2023-05-07 17:27:54 +08:00
question = body.messages[-1]
2023-05-17 11:39:00 +08:00
if question.role == "user":
2023-05-07 17:27:54 +08:00
question = question.content
else:
raise HTTPException(status.HTTP_400_BAD_REQUEST, "no question found")
2023-05-28 12:53:14 +08:00
interface = model.interface
user = model.user
bot = model.bot
completion_text = (
f"""
2023-05-24 14:01:22 +08:00
The following is a coherent verbose detailed conversation between a girl named {bot} and her friend {user}. \
{bot} is very intelligent, creative and friendly. \
{bot} is unlikely to disagree with {user}, and {bot} doesn't like to ask {user} questions. \
{bot} likes to tell {user} a lot about herself and her opinions. \
{bot} usually gives {user} kind, helpful and informative advices.\n
"""
2023-05-28 12:53:14 +08:00
if user == "Bob"
else ""
)
2023-05-07 17:27:54 +08:00
for message in body.messages:
2023-05-24 14:01:22 +08:00
if message.role == "system":
completion_text = (
f"The following is a coherent verbose detailed conversation between a girl named {bot} and her friend {user}. "
2023-05-28 12:53:14 +08:00
if user == "Bob"
else ""
2023-05-24 14:01:22 +08:00
+ message.content.replace("\\n", "\n")
.replace("\r\n", "\n")
.replace("\n\n", "\n")
.replace("\n", " ")
.strip()
.replace("You are", f"{bot} is")
.replace("you are", f"{bot} is")
.replace("You're", f"{bot} is")
.replace("you're", f"{bot} is")
.replace("You", f"{bot}")
.replace("you", f"{bot}")
.replace("Your", f"{bot}'s")
.replace("your", f"{bot}'s")
.replace("", f"{bot}")
+ "\n\n"
)
elif message.role == "user":
2023-05-21 23:25:58 +08:00
completion_text += (
2023-05-24 14:01:22 +08:00
f"{user}{interface} "
2023-05-21 23:25:58 +08:00
+ message.content.replace("\\n", "\n")
.replace("\r\n", "\n")
.replace("\n\n", "\n")
.strip()
+ "\n\n"
)
2023-05-17 11:39:00 +08:00
elif message.role == "assistant":
2023-05-21 23:25:58 +08:00
completion_text += (
2023-05-24 14:01:22 +08:00
f"{bot}{interface} "
2023-05-21 23:25:58 +08:00
+ message.content.replace("\\n", "\n")
.replace("\r\n", "\n")
.replace("\n\n", "\n")
.strip()
+ "\n\n"
)
2023-05-24 14:01:22 +08:00
completion_text += f"{bot}{interface}"
2023-05-07 17:27:54 +08:00
async def eval_rwkv():
2023-05-17 11:39:00 +08:00
while completion_lock.locked():
2023-05-27 15:18:12 +08:00
if await request.is_disconnected():
return
2023-05-17 11:39:00 +08:00
await asyncio.sleep(0.1)
2023-05-07 17:27:54 +08:00
else:
2023-05-21 13:46:54 +08:00
completion_lock.acquire()
set_rwkv_config(model, global_var.get(global_var.Model_Config))
set_rwkv_config(model, body)
if body.stream:
2023-05-28 12:53:14 +08:00
for response, delta in model.generate(
2023-05-22 11:24:57 +08:00
completion_text,
2023-05-24 14:01:22 +08:00
stop=f"\n\n{user}" if body.stop is None else body.stop,
2023-05-21 13:46:54 +08:00
):
if await request.is_disconnected():
break
2023-05-17 11:39:00 +08:00
yield json.dumps(
{
"response": response,
"model": "rwkv",
"choices": [
{
2023-05-21 13:46:54 +08:00
"delta": {"content": delta},
2023-05-17 11:39:00 +08:00
"index": 0,
2023-05-21 13:46:54 +08:00
"finish_reason": None,
2023-05-17 11:39:00 +08:00
}
],
}
)
2023-05-24 11:45:55 +08:00
# torch_gc()
completion_lock.release()
2023-05-21 13:46:54 +08:00
if await request.is_disconnected():
return
yield json.dumps(
{
2023-05-17 11:39:00 +08:00
"response": response,
"model": "rwkv",
"choices": [
{
2023-05-21 13:46:54 +08:00
"delta": {},
2023-05-17 11:39:00 +08:00
"index": 0,
"finish_reason": "stop",
}
],
}
2023-05-21 13:46:54 +08:00
)
yield "[DONE]"
else:
response = None
2023-05-28 12:53:14 +08:00
for response, delta in model.generate(
2023-05-22 11:24:57 +08:00
completion_text,
2023-05-24 14:01:22 +08:00
stop=f"\n\n{user}" if body.stop is None else body.stop,
2023-05-21 13:46:54 +08:00
):
if await request.is_disconnected():
break
2023-05-24 11:45:55 +08:00
# torch_gc()
completion_lock.release()
2023-05-21 13:46:54 +08:00
if await request.is_disconnected():
return
yield {
"response": response,
"model": "rwkv",
"choices": [
{
"message": {
"role": "assistant",
"content": response,
},
"index": 0,
"finish_reason": "stop",
}
],
}
2023-05-07 17:27:54 +08:00
2023-05-17 11:39:00 +08:00
if body.stream:
return EventSourceResponse(eval_rwkv())
else:
return await eval_rwkv().__anext__()
2023-05-22 11:18:37 +08:00
class CompletionBody(ModelConfigBody):
prompt: str
model: str = "rwkv"
stream: bool = False
stop: str = None
@router.post("/v1/completions")
@router.post("/completions")
async def completions(body: CompletionBody, request: Request):
model: RWKV = global_var.get(global_var.Model)
if model is None:
raise HTTPException(status.HTTP_400_BAD_REQUEST, "model not loaded")
2023-05-28 12:53:14 +08:00
2023-05-27 15:18:12 +08:00
if body.prompt is None or body.prompt == "":
raise HTTPException(status.HTTP_400_BAD_REQUEST, "prompt not found")
2023-05-22 11:18:37 +08:00
async def eval_rwkv():
while completion_lock.locked():
2023-05-27 15:18:12 +08:00
if await request.is_disconnected():
return
2023-05-22 11:18:37 +08:00
await asyncio.sleep(0.1)
else:
completion_lock.acquire()
set_rwkv_config(model, global_var.get(global_var.Model_Config))
set_rwkv_config(model, body)
if body.stream:
2023-05-28 12:53:14 +08:00
for response, delta in model.generate(body.prompt, stop=body.stop):
2023-05-22 11:18:37 +08:00
if await request.is_disconnected():
break
yield json.dumps(
{
"response": response,
"model": "rwkv",
"choices": [
{
"text": delta,
"index": 0,
"finish_reason": None,
}
],
}
)
2023-05-24 11:45:55 +08:00
# torch_gc()
completion_lock.release()
2023-05-22 11:18:37 +08:00
if await request.is_disconnected():
return
yield json.dumps(
{
"response": response,
"model": "rwkv",
"choices": [
{
"text": "",
"index": 0,
"finish_reason": "stop",
}
],
}
)
yield "[DONE]"
else:
response = None
2023-05-28 12:53:14 +08:00
for response, delta in model.generate(body.prompt, stop=body.stop):
2023-05-22 11:18:37 +08:00
if await request.is_disconnected():
break
2023-05-24 11:45:55 +08:00
# torch_gc()
completion_lock.release()
2023-05-22 11:18:37 +08:00
if await request.is_disconnected():
return
yield {
"response": response,
"model": "rwkv",
"choices": [
{
"text": response,
"index": 0,
"finish_reason": "stop",
}
],
}
if body.stream:
return EventSourceResponse(eval_rwkv())
else:
return await eval_rwkv().__anext__()