diff --git a/backend-python/routes/state_cache.py b/backend-python/routes/state_cache.py index 33bf74f..647ffac 100644 --- a/backend-python/routes/state_cache.py +++ b/backend-python/routes/state_cache.py @@ -109,7 +109,7 @@ def add_state(body: AddStateBody): else copy.deepcopy(body.state) ) else: - pass # WebGPU + state = body.state.back() # WebGPU id: int = trie.insert(body.prompt) dtrie[id] = { diff --git a/backend-python/rwkv_pip/webgpu/model.py b/backend-python/rwkv_pip/webgpu/model.py index 5d65344..07df831 100644 --- a/backend-python/rwkv_pip/webgpu/model.py +++ b/backend-python/rwkv_pip/webgpu/model.py @@ -23,4 +23,9 @@ class RWKV: self.w["emb.weight"] = [0] * wrp.peek_info(model_path).num_vocab def forward(self, tokens: List[int], state: Union[Any, None] = None): - return wrp.v5.run_one(self.model, tokens, state) + if type(state).__name__ == "BackedState": # memory state + gpu_state = wrp.v5.ModelState(self.model, 1) + gpu_state.load(state) + else: + gpu_state = state + return wrp.v5.run_one(self.model, tokens, gpu_state) diff --git a/backend-python/utils/rwkv.py b/backend-python/utils/rwkv.py index 8414ed3..9109c47 100644 --- a/backend-python/utils/rwkv.py +++ b/backend-python/utils/rwkv.py @@ -239,7 +239,12 @@ class AbstractRWKV(ABC): self.model_tokens = [] else: delta_prompt = prompt[len(cache["prompt"]) :] - self.model_state = copy.deepcopy(cache["state"]) + state = cache["state"] + self.model_state = ( + copy.deepcopy(state) + if type(state) == list or type(state) == np.ndarray + else state + ) self.model_tokens = copy.deepcopy(cache["tokens"]) logits = copy.deepcopy(cache["logits"])