diff --git a/backend-python/rwkv_pip/webgpu/model.py b/backend-python/rwkv_pip/webgpu/model.py
index 0c59528..96078c1 100644
--- a/backend-python/rwkv_pip/webgpu/model.py
+++ b/backend-python/rwkv_pip/webgpu/model.py
@@ -26,12 +26,19 @@ class RWKV:
             if s.startswith("layer")
         )
 
+        chunk_size = (
+            int(s.lstrip("chunk"))
+            for s in strategy.split()
+            for s in s.split(",")
+            if s.startswith("chunk")
+        )
+
         args = {
             "file": model_path,
             "turbo": True,
             "quant": next(layer, 31) if "i8" in strategy else 0,
             "quant_nf4": next(layer, 26) if "i4" in strategy else 0,
-            "token_chunk_size": 128,
+            "token_chunk_size": next(chunk_size, 32),
             "lora": None,
         }
         self.model = self.wrp.Model(**args)
diff --git a/frontend/src/_locales/ja/main.json b/frontend/src/_locales/ja/main.json
index dc70b63..419c625 100644
--- a/frontend/src/_locales/ja/main.json
+++ b/frontend/src/_locales/ja/main.json
@@ -343,5 +343,7 @@
   "History Message Number": "履歴メッセージ数",
   "Send All Message": "すべてのメッセージを送信",
   "Quantized Layers": "量子化されたレイヤー",
-  "Number of the neural network layers quantized with current precision, the more you quantize, the lower the VRAM usage, but the quality correspondingly decreases.": "現在の精度で量子化されたニューラルネットワークのレイヤーの数、量子化するほどVRAMの使用量が低くなりますが、品質も相応に低下します。"
+  "Number of the neural network layers quantized with current precision, the more you quantize, the lower the VRAM usage, but the quality correspondingly decreases.": "現在の精度で量子化されたニューラルネットワークのレイヤーの数、量子化するほどVRAMの使用量が低くなりますが、品質も相応に低下します。",
+  "Parallel Token Chunk Size": "並列トークンチャンクサイズ",
+  "Maximum tokens to be processed in parallel at once. For high end GPUs, this could be 64 or 128 (faster).": "一度に並列で処理される最大トークン数。高性能なGPUの場合、64または128になります（高速）。"
 }
\ No newline at end of file
diff --git a/frontend/src/_locales/zh-hans/main.json b/frontend/src/_locales/zh-hans/main.json
index 93afe52..32d7929 100644
--- a/frontend/src/_locales/zh-hans/main.json
+++ b/frontend/src/_locales/zh-hans/main.json
@@ -343,5 +343,7 @@
   "History Message Number": "历史消息数量",
   "Send All Message": "发送所有消息",
   "Quantized Layers": "量化层数",
-  "Number of the neural network layers quantized with current precision, the more you quantize, the lower the VRAM usage, but the quality correspondingly decreases.": "神经网络以当前精度量化的层数, 量化越多, 占用显存越低, 但质量相应下降"
+  "Number of the neural network layers quantized with current precision, the more you quantize, the lower the VRAM usage, but the quality correspondingly decreases.": "神经网络以当前精度量化的层数, 量化越多, 占用显存越低, 但质量相应下降",
+  "Parallel Token Chunk Size": "并行Token块大小",
+  "Maximum tokens to be processed in parallel at once. For high end GPUs, this could be 64 or 128 (faster).": "一次最多可以并行处理的token数量. 对于高端显卡, 这可以是64或128 (更快)"
 }
\ No newline at end of file
diff --git a/frontend/src/pages/Configs.tsx b/frontend/src/pages/Configs.tsx
index 6a503df..e95a09f 100644
--- a/frontend/src/pages/Configs.tsx
+++ b/frontend/src/pages/Configs.tsx
@@ -331,7 +331,21 @@ const Configs: FC = observer(() => {
                         }} />
                     } />
                 }
-                {selectedConfig.modelParameters.device.startsWith('WebGPU') && <div />}
+                {
+                  selectedConfig.modelParameters.device.startsWith('WebGPU') &&
+                  <Labeled label={t('Parallel Token Chunk Size')}
+                    desc={t('Maximum tokens to be processed in parallel at once. For high end GPUs, this could be 64 or 128 (faster).')}
+                    content={
+                      <ValuedSlider
+                        value={selectedConfig.modelParameters.tokenChunkSize || 32}
+                        min={16} max={256} step={16} input
+                        onChange={(e, data) => {
+                          setSelectedConfigModelParams({
+                            tokenChunkSize: data.value
+                          });
+                        }} />
+                    } />
+                }
                 {
                   selectedConfig.modelParameters.device.startsWith('WebGPU') &&
                   <Labeled label={t('Quantized Layers')}
diff --git a/frontend/src/types/configs.ts b/frontend/src/types/configs.ts
index 2b2015c..852e58a 100644
--- a/frontend/src/types/configs.ts
+++ b/frontend/src/types/configs.ts
@@ -17,6 +17,7 @@ export type ModelParameters = {
   storedLayers: number;
   maxStoredLayers: number;
   quantizedLayers?: number;
+  tokenChunkSize?: number;
   useCustomCuda?: boolean;
   customStrategy?: string;
   useCustomTokenizer?: boolean;
diff --git a/frontend/src/utils/index.tsx b/frontend/src/utils/index.tsx
index 1575150..5e70b06 100644
--- a/frontend/src/utils/index.tsx
+++ b/frontend/src/utils/index.tsx
@@ -196,6 +196,8 @@ export const getStrategy = (modelConfig: ModelConfig | undefined = undefined) =>
       strategy += params.precision === 'nf4' ? 'fp16i4' : params.precision === 'int8' ? 'fp16i8' : 'fp16';
       if (params.quantizedLayers)
         strategy += ` layer${params.quantizedLayers}`;
+      if (params.tokenChunkSize)
+        strategy += ` chunk${params.tokenChunkSize}`;
       break;
     case 'CUDA':
     case 'CUDA-Beta':