allow setting tokenChunkSize of WebGPU mode

This commit is contained in:
josc146
2024-03-02 16:41:29 +08:00
parent c90cefc453
commit d91c3c004d
6 changed files with 32 additions and 4 deletions

View File

@@ -26,12 +26,19 @@ class RWKV:
if s.startswith("layer")
)
chunk_size = (
int(s.lstrip("chunk"))
for s in strategy.split()
for s in s.split(",")
if s.startswith("chunk")
)
args = {
"file": model_path,
"turbo": True,
"quant": next(layer, 31) if "i8" in strategy else 0,
"quant_nf4": next(layer, 26) if "i4" in strategy else 0,
"token_chunk_size": 128,
"token_chunk_size": next(chunk_size, 32),
"lora": None,
}
self.model = self.wrp.Model(**args)