add example_dataset in training scripts (#1358)

* add example_dataset in training scripts

* fix example datasets
This commit is contained in:
Zhongjie Duan
2026-03-18 15:37:03 +08:00
committed by GitHub
parent 4ec4d9c20a
commit ba0626e38f
196 changed files with 911 additions and 567 deletions

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/LongCat-Video/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/LongCat-Video \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/LongCat-Video/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/LongCat-Video_full" \
--trainable_models "dit"
--trainable_models "dit"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Video-As-Prompt-Wan2.1-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vap.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Video-As-Prompt-Wan2.1-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Video-As-Prompt-Wan2.1-14B/metadata.csv \
--data_file_keys "video,vap_video" \
--height 480 \
--width 832 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-1.3b-speedcontrol-v1/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_motion_bucket_id.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-1.3b-speedcontrol-v1 \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-1.3b-speedcontrol-v1/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.motion_controller." \
--output_path "./models/train/Wan2.1-1.3b-speedcontrol-v1_full" \
--trainable_models "motion_controller" \
--extra_inputs "motion_bucket_id"
--extra_inputs "motion_bucket_id"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-FLF2V-14B-720P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-FLF2V-14B-720P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-FLF2V-14B-720P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--output_path "./models/train/Wan2.1-FLF2V-14B-720P_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image" \
--initialize_model_on_cpu
--initialize_model_on_cpu

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-1.3B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-Control/metadata.csv \
--data_file_keys "video,control_video" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-1.3B-Control_full" \
--trainable_models "dit" \
--extra_inputs "control_video"
--extra_inputs "control_video"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-1.3B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-1.3B-InP_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-14B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-Control/metadata.csv \
--data_file_keys "video,control_video" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-14B-Control_full" \
--trainable_models "dit" \
--extra_inputs "control_video"
--extra_inputs "control_video"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-14B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-14B-InP_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-1.3B-Control-Camera_full" \
--trainable_models "dit" \
--extra_inputs "input_image,camera_control_direction,camera_control_speed"
--extra_inputs "input_image,camera_control_direction,camera_control_speed"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-1.3B-Control_full" \
--trainable_models "dit" \
--extra_inputs "control_video,reference_image"
--extra_inputs "control_video,reference_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-1.3B-InP_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-14B-Control-Camera_full" \
--trainable_models "dit" \
--extra_inputs "input_image,camera_control_direction,camera_control_speed"
--extra_inputs "input_image,camera_control_direction,camera_control_speed"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-14B-Control_full" \
--trainable_models "dit" \
--extra_inputs "control_video,reference_image"
--extra_inputs "control_video,reference_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-14B-InP_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--output_path "./models/train/Wan2.1-I2V-14B-480P_full" \
--trainable_models "dit" \
--extra_inputs "input_image" \
--initialize_model_on_cpu
--initialize_model_on_cpu

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-720P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-720P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-720P/metadata.csv \
--height 720 \
--width 1280 \
--num_frames 49 \
@@ -13,4 +15,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--trainable_models "dit" \
--extra_inputs "input_image" \
--use_gradient_checkpointing_offload \
--initialize_model_on_cpu
--initialize_model_on_cpu

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-1.3B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-T2V-1.3B_full" \
--trainable_models "dit"
--trainable_models "dit"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-T2V-14B_full" \
--trainable_models "dit"
--trainable_models "dit"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-1.3B-Preview/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B-Preview \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B-Preview/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--trainable_models "vace" \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
# The learning rate is kept consistent with the settings in the original paper
# The learning rate is kept consistent with the settings in the original paper

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-1.3B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--trainable_models "vace" \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
# The learning rate is kept consistent with the settings in the original paper
# The learning rate is kept consistent with the settings in the original paper

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--trainable_models "vace" \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
# The learning rate is kept consistent with the settings in the original paper
# The learning rate is kept consistent with the settings in the original paper

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Animate-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_animate.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Animate-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Animate-14B/metadata.csv \
--data_file_keys "video,animate_pose_video,animate_face_video" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--output_path "./models/train/Wan2.2-Animate-14B_full" \
--trainable_models "animate_adapter" \
--extra_inputs "input_image,animate_pose_video,animate_face_video" \
--use_gradient_checkpointing_offload
--use_gradient_checkpointing_offload

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -17,8 +19,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -32,4 +34,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--extra_inputs "input_image,camera_control_direction,camera_control_speed" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900]
# boundary corresponds to timesteps [0, 900]

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -17,8 +19,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -32,4 +34,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--extra_inputs "control_video,reference_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900]
# boundary corresponds to timesteps [0, 900]

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -16,8 +18,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -30,4 +32,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--extra_inputs "input_image,end_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900]
# boundary corresponds to timesteps [0, 900]

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-I2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -18,8 +20,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -34,4 +36,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--use_gradient_checkpointing_offload \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900)
# boundary corresponds to timesteps [0, 900)

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-S2V-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_s2v.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-S2V-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-S2V-14B/metadata.csv \
--data_file_keys "video,input_audio,s2v_pose_video" \
--height 448 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.2-S2V-14B_full" \
--extra_inputs "input_image,input_audio,s2v_pose_video" \
--use_gradient_checkpointing_offload
--use_gradient_checkpointing_offload

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-T2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -16,8 +18,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [875, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -30,4 +32,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--trainable_models "dit" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.417
# boundary corresponds to timesteps [0, 875)
# boundary corresponds to timesteps [0, 875)

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-TI2V-5B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-TI2V-5B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-TI2V-5B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.2-TI2V-5B_full" \
--trainable_models "dit" \
--extra_inputs "input_image"
--extra_inputs "input_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-VACE-Fun-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -22,8 +24,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -41,4 +43,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--min_timestep_boundary 0.358 \
--initialize_model_on_cpu
# boundary corresponds to timesteps [0, 900]
# The learning rate is kept consistent with the settings in the original paper
# The learning rate is kept consistent with the settings in the original paper

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/krea-realtime-video/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/krea-realtime-video \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/krea-realtime-video/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/krea-realtime-video_full" \
--trainable_models "dit"
--trainable_models "dit"