diff --git a/backend-python/routes/state_cache.py b/backend-python/routes/state_cache.py
index ec6d81b..8c646b1 100644
--- a/backend-python/routes/state_cache.py
+++ b/backend-python/routes/state_cache.py
@@ -4,8 +4,6 @@ from fastapi import APIRouter, HTTPException, Request, Response, status
 from pydantic import BaseModel
 import gc
 import copy
-import sys
-import torch
 
 router = APIRouter()
 
@@ -73,6 +71,8 @@ def add_state(body: AddStateBody):
     if trie is None:
         raise HTTPException(status.HTTP_400_BAD_REQUEST, "trie not loaded")
 
+    import torch
+
     try:
         id: int = trie.insert(body.prompt)
         device: torch.device = body.state[0].device
@@ -147,6 +147,8 @@ def longest_prefix_state(body: LongestPrefixStateBody, request: Request):
     if trie is None:
         raise HTTPException(status.HTTP_400_BAD_REQUEST, "trie not loaded")
 
+    import torch
+
     id = -1
     try:
         for id, len in trie.prefix(body.prompt):
diff --git a/backend-python/utils/rwkv.py b/backend-python/utils/rwkv.py
index ca54e1c..83a4c0b 100644
--- a/backend-python/utils/rwkv.py
+++ b/backend-python/utils/rwkv.py
@@ -7,9 +7,7 @@ from typing import Dict, Iterable, List, Tuple
 from utils.log import quick_log
 from fastapi import HTTPException
 from pydantic import BaseModel, Field
-import torch
 import numpy as np
-from rwkv_pip.utils import PIPELINE
 from routes import state_cache
 
 
@@ -23,6 +21,7 @@ os.environ["TORCH_EXTENSIONS_DIR"] = f"{pathlib.Path(__file__).parent.parent.res
 class AbstractRWKV(ABC):
     def __init__(self, model: str, strategy: str, tokens_path: str):
         from rwkv.model import RWKV as Model  # dynamic import to make RWKV_CUDA_ON work
+        from rwkv_pip.utils import PIPELINE
 
         filename, _ = os.path.splitext(os.path.basename(model))
         self.name = filename
@@ -75,6 +74,8 @@ class AbstractRWKV(ABC):
         return embedding, token_len
 
     def __fast_embedding(self, tokens: List[str], state):
+        import torch
+
         tokens = [int(x) for x in tokens]
         token_len = len(tokens)
         self = self.model