From 6e6136586c16dc31335b8491e2e00998b4f482e6 Mon Sep 17 00:00:00 2001
From: mi804 <1576993271@qq.com>
Date: Wed, 13 Aug 2025 13:36:26 +0800
Subject: [PATCH] support controlnet depth

---
 README.md                                     |  2 ++
 README_zh.md                                  |  2 ++
 examples/qwen_image/README.md                 |  1 +
 examples/qwen_image/README_zh.md              |  1 +
 .../Qwen-Image-Blockwise-ControlNet-Canny.py  |  4 +--
 .../Qwen-Image-Blockwise-ControlNet-Depth.py  | 32 +++++++++++++++++++
 6 files changed, 40 insertions(+), 2 deletions(-)
 create mode 100644 examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py
diff --git a/README.md b/README.md
index 80e70bc..ead17a9 100644
--- a/README.md
+++ b/README.md
@@ -94,6 +94,7 @@ image.save("image.jpg")
 |[DiffSynth-Studio/Qwen-Image-Distill-LoRA](https://www.modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Distill-LoRA)|[code](./examples/qwen_image/model_inference/Qwen-Image-Distill-LoRA.py)|-|-|-|-|
 |[DiffSynth-Studio/Qwen-Image-EliGen](https://www.modelscope.cn/models/DiffSynth-Studio/Qwen-Image-EliGen)|[code](./examples/qwen_image/model_inference/Qwen-Image-EliGen.py)|-|-|[code](./examples/qwen_image/model_training/lora/Qwen-Image-EliGen.sh)|[code](./examples/qwen_image/model_training/validate_lora/Qwen-Image-EliGen.py)|
 |[DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny)|[code](./examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py)|-|-|-|-|
+|[DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth)|[code](./examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py)|-|-|-|-|
 
 </details>
 
@@ -366,6 +367,7 @@ https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/59fb2f7b-8de0-44
 
 
 ## Update History
+- **August 13, 2025**: We trained and open-sourced the ControlNet model for Qwen-Image, [DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth), which adopts a lightweight architectural design. Please refer to [our sample code](./examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py).
 
 - **August 12, 2025**: We trained and open-sourced the ControlNet model for Qwen-Image, [DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny), which adopts a lightweight architectural design. Please refer to [our sample code](./examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py).
 
diff --git a/README_zh.md b/README_zh.md
index 22a28b3..f6a5e23 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -96,6 +96,7 @@ image.save("image.jpg")
 |[DiffSynth-Studio/Qwen-Image-Distill-LoRA](https://www.modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Distill-LoRA)|[code](./examples/qwen_image/model_inference/Qwen-Image-Distill-LoRA.py)|-|-|-|-|
 |[DiffSynth-Studio/Qwen-Image-EliGen](https://www.modelscope.cn/models/DiffSynth-Studio/Qwen-Image-EliGen)|[code](./examples/qwen_image/model_inference/Qwen-Image-EliGen.py)|-|-|[code](./examples/qwen_image/model_training/lora/Qwen-Image-EliGen.sh)|[code](./examples/qwen_image/model_training/validate_lora/Qwen-Image-EliGen.py)|
 |[DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny)|[code](./examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py)|-|-|-|-|
+|[DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth)|[code](./examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py)|-|-|-|-|
 
 </details>
 
@@ -382,6 +383,7 @@ https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/59fb2f7b-8de0-44
 
 
 ## 更新历史
+- **2025年8月12日** 我们训练并开源了 Qwen-Image 的 ControlNet 模型 [DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth)，模型结构采用了轻量化的设计，请参考[我们的示例代码](./examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py)。
 
 - **2025年8月12日** 我们训练并开源了 Qwen-Image 的 ControlNet 模型 [DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny)，模型结构采用了轻量化的设计，请参考[我们的示例代码](./examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py)。
 
diff --git a/examples/qwen_image/README.md b/examples/qwen_image/README.md
index bfc1126..7940013 100644
--- a/examples/qwen_image/README.md
+++ b/examples/qwen_image/README.md
@@ -47,6 +47,7 @@ image.save("image.jpg")
 |[DiffSynth-Studio/Qwen-Image-Distill-LoRA](https://www.modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Distill-LoRA)|[code](./model_inference/Qwen-Image-Distill-LoRA.py)|-|-|-|-|
 |[DiffSynth-Studio/Qwen-Image-EliGen](https://www.modelscope.cn/models/DiffSynth-Studio/Qwen-Image-EliGen)|[code](./model_inference/Qwen-Image-EliGen.py)|-|-|[code](./model_training/lora/Qwen-Image-EliGen.sh)|[code](./model_training/validate_lora/Qwen-Image-EliGen.py)|
 |[DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny)|[code](./model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py)|-|-|-|-|
+|[DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth)|[code](./model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py)|-|-|-|-|
 
 ## Model Inference
 
diff --git a/examples/qwen_image/README_zh.md b/examples/qwen_image/README_zh.md
index 70534ef..0440aae 100644
--- a/examples/qwen_image/README_zh.md
+++ b/examples/qwen_image/README_zh.md
@@ -47,6 +47,7 @@ image.save("image.jpg")
 |[DiffSynth-Studio/Qwen-Image-Distill-LoRA](https://www.modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Distill-LoRA)|[code](./model_inference/Qwen-Image-Distill-LoRA.py)|-|-|-|-|
 |[DiffSynth-Studio/Qwen-Image-EliGen](https://www.modelscope.cn/models/DiffSynth-Studio/Qwen-Image-EliGen)|[code](./model_inference/Qwen-Image-EliGen.py)|-|-|[code](./model_training/lora/Qwen-Image-EliGen.sh)|[code](./model_training/validate_lora/Qwen-Image-EliGen.py)|
 |[DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Canny)|[code](./model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py)|-|-|-|-|
+|[DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth)|[code](./model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py)|-|-|-|-|
 
 ## 模型推理
 
diff --git a/examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py b/examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py
index 5ae223a..85b9b96 100644
--- a/examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py
+++ b/examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py
@@ -18,8 +18,8 @@ pipe = QwenImagePipeline.from_pretrained(
 
 dataset_snapshot_download(
     dataset_id="DiffSynth-Studio/example_image_dataset",
-    local_dir="./",
-    allow_file_pattern="data/example_image_dataset/canny/image_1.jpg"
+    local_dir="./data/example_image_dataset",
+    allow_file_pattern="canny/image_1.jpg"
 )
 controlnet_image = Image.open("data/example_image_dataset/canny/image_1.jpg").resize((1328, 1328))
 
diff --git a/examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py b/examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py
new file mode 100644
index 0000000..6676868
--- /dev/null
+++ b/examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py
@@ -0,0 +1,32 @@
+from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig, ControlNetInput
+from PIL import Image
+import torch
+from modelscope import dataset_snapshot_download
+
+
+pipe = QwenImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors"),
+        ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors"),
+        ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
+        ModelConfig(model_id="DiffSynth-Studio/Qwen-Image-Blockwise-ControlNet-Depth", origin_file_pattern="model.safetensors"),
+    ],
+    tokenizer_config=ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="tokenizer/"),
+)
+
+dataset_snapshot_download(
+    dataset_id="DiffSynth-Studio/example_image_dataset",
+    local_dir="./data/example_image_dataset",
+    allow_file_pattern="depth/image_1.jpg"
+)
+
+controlnet_image = Image.open("data/example_image_dataset/depth/image_1.jpg").resize((1328, 1328))
+
+prompt = "精致肖像，水下少女，蓝裙飘逸，发丝轻扬，光影透澈，气泡环绕，面容恬静，细节精致，梦幻唯美。"
+image = pipe(
+    prompt, seed=0,
+    blockwise_controlnet_inputs=[ControlNetInput(image=controlnet_image)]
+)
+image.save("image.jpg")