From 6a9d875d65c8851c43a24e7b8d1f56966faebb15 Mon Sep 17 00:00:00 2001 From: mi804 <1576993271@qq.com> Date: Fri, 15 Aug 2025 17:54:52 +0800 Subject: [PATCH] fix training boundary for wan2.2 A14B --- examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py | 1 + .../wanvideo/model_training/full/Wan2.2-I2V-A14B.sh | 10 ++++++---- .../wanvideo/model_training/full/Wan2.2-T2V-A14B.sh | 10 ++++++---- .../wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh | 10 ++++++---- .../wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh | 10 ++++++---- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py b/examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py index 0c1be54..1d795f5 100644 --- a/examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py +++ b/examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py @@ -28,5 +28,6 @@ video = pipe( negative_prompt="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走", seed=0, tiled=True, input_image=input_image, + switch_DiT_boundary=0.9, ) save_video(video, "video1.mp4", fps=15, quality=5) diff --git a/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh b/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh index 2f531e7..10fb02f 100644 --- a/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh +++ b/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh @@ -13,8 +13,9 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate --trainable_models "dit" \ --extra_inputs "input_image" \ --use_gradient_checkpointing_offload \ - --max_timestep_boundary 1 \ - --min_timestep_boundary 0.875 + --max_timestep_boundary 0.358 \ + --min_timestep_boundary 0 +# boundary corresponds to timesteps [900, 1000] accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \ --dataset_base_path data/example_video_dataset \ @@ -31,5 +32,6 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate --trainable_models "dit" \ --extra_inputs "input_image" \ --use_gradient_checkpointing_offload \ - --max_timestep_boundary 0.875 \ - --min_timestep_boundary 0 + --max_timestep_boundary 1 \ + --min_timestep_boundary 0.358 +# boundary corresponds to timesteps [0, 900) \ No newline at end of file diff --git a/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh b/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh index f634117..89c0704 100644 --- a/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh +++ b/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh @@ -11,8 +11,9 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate --remove_prefix_in_ckpt "pipe.dit." \ --output_path "./models/train/Wan2.2-T2V-A14B_high_noise_full" \ --trainable_models "dit" \ - --max_timestep_boundary 1 \ - --min_timestep_boundary 0.875 + --max_timestep_boundary 0.417 \ + --min_timestep_boundary 0 +# boundary corresponds to timesteps [875, 1000] accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \ --dataset_base_path data/example_video_dataset \ @@ -27,5 +28,6 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate --remove_prefix_in_ckpt "pipe.dit." \ --output_path "./models/train/Wan2.2-T2V-A14B_low_noise_full" \ --trainable_models "dit" \ - --max_timestep_boundary 0.875 \ - --min_timestep_boundary 0 + --max_timestep_boundary 1 \ + --min_timestep_boundary 0.417 +# boundary corresponds to timesteps [0, 875) \ No newline at end of file diff --git a/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh b/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh index 4201b47..1d9eba0 100644 --- a/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh +++ b/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh @@ -14,8 +14,9 @@ accelerate launch examples/wanvideo/model_training/train.py \ --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \ --lora_rank 32 \ --extra_inputs "input_image" \ - --max_timestep_boundary 1 \ - --min_timestep_boundary 0.875 + --max_timestep_boundary 0.358 \ + --min_timestep_boundary 0 +# boundary corresponds to timesteps [900, 1000] accelerate launch examples/wanvideo/model_training/train.py \ --dataset_base_path data/example_video_dataset \ @@ -33,5 +34,6 @@ accelerate launch examples/wanvideo/model_training/train.py \ --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \ --lora_rank 32 \ --extra_inputs "input_image" \ - --max_timestep_boundary 0.875 \ - --min_timestep_boundary 0 + --max_timestep_boundary 1 \ + --min_timestep_boundary 0.358 +# boundary corresponds to timesteps [0, 900) \ No newline at end of file diff --git a/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh b/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh index 737896c..f47c96b 100644 --- a/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh +++ b/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh @@ -13,8 +13,9 @@ accelerate launch examples/wanvideo/model_training/train.py \ --lora_base_model "dit" \ --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \ --lora_rank 32 \ - --max_timestep_boundary 1 \ - --min_timestep_boundary 0.875 + --max_timestep_boundary 0.417 \ + --min_timestep_boundary 0 +# boundary corresponds to timesteps [875, 1000] accelerate launch examples/wanvideo/model_training/train.py \ @@ -32,5 +33,6 @@ accelerate launch examples/wanvideo/model_training/train.py \ --lora_base_model "dit" \ --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \ --lora_rank 32 \ - --max_timestep_boundary 0.875 \ - --min_timestep_boundary 0 + --max_timestep_boundary 1 \ + --min_timestep_boundary 0.417 +# boundary corresponds to timesteps [0, 875) \ No newline at end of file