allow setting tokenChunkSize of WebGPU mode

2024-03-02 16:41:29 +08:00
parent c90cefc453
commit d91c3c004d
6 changed files with 32 additions and 4 deletions
--- a/backend-python/rwkv_pip/webgpu/model.py
+++ b/backend-python/rwkv_pip/webgpu/model.py
@@ -26,12 +26,19 @@ class RWKV:
            if s.startswith("layer")
        )

+        chunk_size = (
+            int(s.lstrip("chunk"))
+            for s in strategy.split()
+            for s in s.split(",")
+            if s.startswith("chunk")
+        )
+
        args = {
            "file": model_path,
            "turbo": True,
            "quant": next(layer, 31) if "i8" in strategy else 0,
            "quant_nf4": next(layer, 26) if "i4" in strategy else 0,
-            "token_chunk_size": 128,
+            "token_chunk_size": next(chunk_size, 32),
            "lora": None,
        }
        self.model = self.wrp.Model(**args)