From 6a9d875d65c8851c43a24e7b8d1f56966faebb15 Mon Sep 17 00:00:00 2001
From: mi804 <1576993271@qq.com>
Date: Fri, 15 Aug 2025 17:54:52 +0800
Subject: [PATCH] fix training boundary for wan2.2 A14B

---
 examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py   |  1 +
 .../wanvideo/model_training/full/Wan2.2-I2V-A14B.sh    | 10 ++++++----
 .../wanvideo/model_training/full/Wan2.2-T2V-A14B.sh    | 10 ++++++----
 .../wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh    | 10 ++++++----
 .../wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh    | 10 ++++++----
 5 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py b/examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py
index 0c1be54..1d795f5 100644
--- a/examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py
+++ b/examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py
@@ -28,5 +28,6 @@ video = pipe(
     negative_prompt="色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
     seed=0, tiled=True,
     input_image=input_image,
+    switch_DiT_boundary=0.9,
 )
 save_video(video, "video1.mp4", fps=15, quality=5)
diff --git a/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh b/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh
index 2f531e7..10fb02f 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh
@@ -13,8 +13,9 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
   --trainable_models "dit" \
   --extra_inputs "input_image" \
   --use_gradient_checkpointing_offload \
-  --max_timestep_boundary 1 \
-  --min_timestep_boundary 0.875
+  --max_timestep_boundary 0.358 \
+  --min_timestep_boundary 0
+# boundary corresponds to timesteps [900, 1000]
 
 accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
   --dataset_base_path data/example_video_dataset \
@@ -31,5 +32,6 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
   --trainable_models "dit" \
   --extra_inputs "input_image" \
   --use_gradient_checkpointing_offload \
-  --max_timestep_boundary 0.875 \
-  --min_timestep_boundary 0
+  --max_timestep_boundary 1 \
+  --min_timestep_boundary 0.358
+# boundary corresponds to timesteps [0, 900)
\ No newline at end of file
diff --git a/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh b/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh
index f634117..89c0704 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh
@@ -11,8 +11,9 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
   --remove_prefix_in_ckpt "pipe.dit." \
   --output_path "./models/train/Wan2.2-T2V-A14B_high_noise_full" \
   --trainable_models "dit" \
-  --max_timestep_boundary 1 \
-  --min_timestep_boundary 0.875
+  --max_timestep_boundary 0.417 \
+  --min_timestep_boundary 0
+# boundary corresponds to timesteps [875, 1000]
 
 accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
   --dataset_base_path data/example_video_dataset \
@@ -27,5 +28,6 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
   --remove_prefix_in_ckpt "pipe.dit." \
   --output_path "./models/train/Wan2.2-T2V-A14B_low_noise_full" \
   --trainable_models "dit" \
-  --max_timestep_boundary 0.875 \
-  --min_timestep_boundary 0
+  --max_timestep_boundary 1 \
+  --min_timestep_boundary 0.417
+# boundary corresponds to timesteps [0, 875)
\ No newline at end of file
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh b/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh
index 4201b47..1d9eba0 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh
@@ -14,8 +14,9 @@ accelerate launch examples/wanvideo/model_training/train.py \
   --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
   --lora_rank 32 \
   --extra_inputs "input_image" \
-  --max_timestep_boundary 1 \
-  --min_timestep_boundary 0.875
+  --max_timestep_boundary 0.358 \
+  --min_timestep_boundary 0
+# boundary corresponds to timesteps [900, 1000]
 
 accelerate launch examples/wanvideo/model_training/train.py \
   --dataset_base_path data/example_video_dataset \
@@ -33,5 +34,6 @@ accelerate launch examples/wanvideo/model_training/train.py \
   --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
   --lora_rank 32 \
   --extra_inputs "input_image" \
-  --max_timestep_boundary 0.875 \
-  --min_timestep_boundary 0
+  --max_timestep_boundary 1 \
+  --min_timestep_boundary 0.358
+# boundary corresponds to timesteps [0, 900)
\ No newline at end of file
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh b/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh
index 737896c..f47c96b 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh
@@ -13,8 +13,9 @@ accelerate launch examples/wanvideo/model_training/train.py \
   --lora_base_model "dit" \
   --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
   --lora_rank 32 \
-  --max_timestep_boundary 1 \
-  --min_timestep_boundary 0.875
+  --max_timestep_boundary 0.417 \
+  --min_timestep_boundary 0
+# boundary corresponds to timesteps [875, 1000]
 
 
 accelerate launch examples/wanvideo/model_training/train.py \
@@ -32,5 +33,6 @@ accelerate launch examples/wanvideo/model_training/train.py \
   --lora_base_model "dit" \
   --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
   --lora_rank 32 \
-  --max_timestep_boundary 0.875 \
-  --min_timestep_boundary 0
+  --max_timestep_boundary 1 \
+  --min_timestep_boundary 0.417
+# boundary corresponds to timesteps [0, 875)
\ No newline at end of file