diff --git a/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B-Preview.sh b/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B-Preview.sh index b348874..19b6ecb 100644 --- a/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B-Preview.sh +++ b/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B-Preview.sh @@ -7,10 +7,11 @@ accelerate launch examples/wanvideo/model_training/train.py \ --num_frames 49 \ --dataset_repeat 100 \ --model_id_with_origin_paths "iic/VACE-Wan2.1-1.3B-Preview:diffusion_pytorch_model*.safetensors,iic/VACE-Wan2.1-1.3B-Preview:models_t5_umt5-xxl-enc-bf16.pth,iic/VACE-Wan2.1-1.3B-Preview:Wan2.1_VAE.pth" \ - --learning_rate 1e-4 \ + --learning_rate 5e-5 \ --num_epochs 2 \ --remove_prefix_in_ckpt "pipe.vace." \ --output_path "./models/train/Wan2.1-VACE-1.3B-Preview_full" \ --trainable_models "vace" \ --extra_inputs "vace_video,vace_reference_image" \ - --use_gradient_checkpointing_offload \ No newline at end of file + --use_gradient_checkpointing_offload +# The learning rate is kept consistent with the settings in the original paper \ No newline at end of file diff --git a/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B.sh b/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B.sh index 763252e..f9768c6 100644 --- a/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B.sh +++ b/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B.sh @@ -7,10 +7,11 @@ accelerate launch examples/wanvideo/model_training/train.py \ --num_frames 49 \ --dataset_repeat 100 \ --model_id_with_origin_paths "Wan-AI/Wan2.1-VACE-1.3B:diffusion_pytorch_model*.safetensors,Wan-AI/Wan2.1-VACE-1.3B:models_t5_umt5-xxl-enc-bf16.pth,Wan-AI/Wan2.1-VACE-1.3B:Wan2.1_VAE.pth" \ - --learning_rate 1e-4 \ + --learning_rate 5e-5 \ --num_epochs 2 \ --remove_prefix_in_ckpt "pipe.vace." \ --output_path "./models/train/Wan2.1-VACE-1.3B_full" \ --trainable_models "vace" \ --extra_inputs "vace_video,vace_reference_image" \ - --use_gradient_checkpointing_offload \ No newline at end of file + --use_gradient_checkpointing_offload +# The learning rate is kept consistent with the settings in the original paper \ No newline at end of file diff --git a/examples/wanvideo/model_training/full/Wan2.1-VACE-14B.sh b/examples/wanvideo/model_training/full/Wan2.1-VACE-14B.sh index c549263..401a647 100644 --- a/examples/wanvideo/model_training/full/Wan2.1-VACE-14B.sh +++ b/examples/wanvideo/model_training/full/Wan2.1-VACE-14B.sh @@ -7,10 +7,11 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate --num_frames 17 \ --dataset_repeat 100 \ --model_id_with_origin_paths "Wan-AI/Wan2.1-VACE-14B:diffusion_pytorch_model*.safetensors,Wan-AI/Wan2.1-VACE-14B:models_t5_umt5-xxl-enc-bf16.pth,Wan-AI/Wan2.1-VACE-14B:Wan2.1_VAE.pth" \ - --learning_rate 1e-4 \ + --learning_rate 5e-5 \ --num_epochs 2 \ --remove_prefix_in_ckpt "pipe.vace." \ --output_path "./models/train/Wan2.1-VACE-14B_full" \ --trainable_models "vace" \ --extra_inputs "vace_video,vace_reference_image" \ - --use_gradient_checkpointing_offload \ No newline at end of file + --use_gradient_checkpointing_offload +# The learning rate is kept consistent with the settings in the original paper \ No newline at end of file diff --git a/examples/wanvideo/model_training/full/Wan2.2-VACE-Fun-A14B.sh b/examples/wanvideo/model_training/full/Wan2.2-VACE-Fun-A14B.sh index ecfef32..ba3e875 100644 --- a/examples/wanvideo/model_training/full/Wan2.2-VACE-Fun-A14B.sh +++ b/examples/wanvideo/model_training/full/Wan2.2-VACE-Fun-A14B.sh @@ -7,7 +7,7 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate --num_frames 17 \ --dataset_repeat 100 \ --model_id_with_origin_paths "PAI/Wan2.2-VACE-Fun-A14B:high_noise_model/diffusion_pytorch_model*.safetensors,PAI/Wan2.2-VACE-Fun-A14B:models_t5_umt5-xxl-enc-bf16.pth,PAI/Wan2.2-VACE-Fun-A14B:Wan2.1_VAE.pth" \ - --learning_rate 1e-4 \ + --learning_rate 5e-5 \ --num_epochs 2 \ --remove_prefix_in_ckpt "pipe.vace." \ --output_path "./models/train/Wan2.2-VACE-Fun-A14B_high_noise_full" \ @@ -18,6 +18,7 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate --min_timestep_boundary 0 \ --initialize_model_on_cpu # boundary corresponds to timesteps [900, 1000] +# The learning rate is kept consistent with the settings in the original paper accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \ @@ -29,7 +30,7 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate --num_frames 17 \ --dataset_repeat 100 \ --model_id_with_origin_paths "PAI/Wan2.2-VACE-Fun-A14B:low_noise_model/diffusion_pytorch_model*.safetensors,PAI/Wan2.2-VACE-Fun-A14B:models_t5_umt5-xxl-enc-bf16.pth,PAI/Wan2.2-VACE-Fun-A14B:Wan2.1_VAE.pth" \ - --learning_rate 1e-4 \ + --learning_rate 5e-5 \ --num_epochs 2 \ --remove_prefix_in_ckpt "pipe.vace." \ --output_path "./models/train/Wan2.2-VACE-Fun-A14B_low_noise_full" \ @@ -39,4 +40,5 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate --max_timestep_boundary 1 \ --min_timestep_boundary 0.358 \ --initialize_model_on_cpu -# boundary corresponds to timesteps [0, 900] \ No newline at end of file +# boundary corresponds to timesteps [0, 900] +# The learning rate is kept consistent with the settings in the original paper \ No newline at end of file