add example_dataset in training scripts (#1358)

* add example_dataset in training scripts

* fix example datasets
This commit is contained in:
Zhongjie Duan
2026-03-18 15:37:03 +08:00
committed by GitHub
parent 4ec4d9c20a
commit ba0626e38f
196 changed files with 911 additions and 567 deletions

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/LongCat-Video/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/LongCat-Video \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/LongCat-Video/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/LongCat-Video_full" \
--trainable_models "dit"
--trainable_models "dit"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Video-As-Prompt-Wan2.1-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vap.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Video-As-Prompt-Wan2.1-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Video-As-Prompt-Wan2.1-14B/metadata.csv \
--data_file_keys "video,vap_video" \
--height 480 \
--width 832 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-1.3b-speedcontrol-v1/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_motion_bucket_id.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-1.3b-speedcontrol-v1 \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-1.3b-speedcontrol-v1/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.motion_controller." \
--output_path "./models/train/Wan2.1-1.3b-speedcontrol-v1_full" \
--trainable_models "motion_controller" \
--extra_inputs "motion_bucket_id"
--extra_inputs "motion_bucket_id"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-FLF2V-14B-720P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-FLF2V-14B-720P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-FLF2V-14B-720P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--output_path "./models/train/Wan2.1-FLF2V-14B-720P_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image" \
--initialize_model_on_cpu
--initialize_model_on_cpu

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-1.3B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-Control/metadata.csv \
--data_file_keys "video,control_video" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-1.3B-Control_full" \
--trainable_models "dit" \
--extra_inputs "control_video"
--extra_inputs "control_video"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-1.3B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-1.3B-InP_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-14B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-Control/metadata.csv \
--data_file_keys "video,control_video" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-14B-Control_full" \
--trainable_models "dit" \
--extra_inputs "control_video"
--extra_inputs "control_video"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-14B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-14B-InP_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-1.3B-Control-Camera_full" \
--trainable_models "dit" \
--extra_inputs "input_image,camera_control_direction,camera_control_speed"
--extra_inputs "input_image,camera_control_direction,camera_control_speed"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-1.3B-Control_full" \
--trainable_models "dit" \
--extra_inputs "control_video,reference_image"
--extra_inputs "control_video,reference_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-1.3B-InP_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-14B-Control-Camera_full" \
--trainable_models "dit" \
--extra_inputs "input_image,camera_control_direction,camera_control_speed"
--extra_inputs "input_image,camera_control_direction,camera_control_speed"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-14B-Control_full" \
--trainable_models "dit" \
--extra_inputs "control_video,reference_image"
--extra_inputs "control_video,reference_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-14B-InP_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--output_path "./models/train/Wan2.1-I2V-14B-480P_full" \
--trainable_models "dit" \
--extra_inputs "input_image" \
--initialize_model_on_cpu
--initialize_model_on_cpu

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-720P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-720P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-720P/metadata.csv \
--height 720 \
--width 1280 \
--num_frames 49 \
@@ -13,4 +15,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--trainable_models "dit" \
--extra_inputs "input_image" \
--use_gradient_checkpointing_offload \
--initialize_model_on_cpu
--initialize_model_on_cpu

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-1.3B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-T2V-1.3B_full" \
--trainable_models "dit"
--trainable_models "dit"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-T2V-14B_full" \
--trainable_models "dit"
--trainable_models "dit"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-1.3B-Preview/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B-Preview \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B-Preview/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--trainable_models "vace" \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
# The learning rate is kept consistent with the settings in the original paper
# The learning rate is kept consistent with the settings in the original paper

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-1.3B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--trainable_models "vace" \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
# The learning rate is kept consistent with the settings in the original paper
# The learning rate is kept consistent with the settings in the original paper

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--trainable_models "vace" \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
# The learning rate is kept consistent with the settings in the original paper
# The learning rate is kept consistent with the settings in the original paper

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Animate-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_animate.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Animate-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Animate-14B/metadata.csv \
--data_file_keys "video,animate_pose_video,animate_face_video" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--output_path "./models/train/Wan2.2-Animate-14B_full" \
--trainable_models "animate_adapter" \
--extra_inputs "input_image,animate_pose_video,animate_face_video" \
--use_gradient_checkpointing_offload
--use_gradient_checkpointing_offload

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -17,8 +19,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -32,4 +34,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--extra_inputs "input_image,camera_control_direction,camera_control_speed" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900]
# boundary corresponds to timesteps [0, 900]

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -17,8 +19,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -32,4 +34,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--extra_inputs "control_video,reference_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900]
# boundary corresponds to timesteps [0, 900]

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -16,8 +18,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -30,4 +32,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--extra_inputs "input_image,end_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900]
# boundary corresponds to timesteps [0, 900]

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-I2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -18,8 +20,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -34,4 +36,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--use_gradient_checkpointing_offload \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900)
# boundary corresponds to timesteps [0, 900)

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-S2V-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_s2v.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-S2V-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-S2V-14B/metadata.csv \
--data_file_keys "video,input_audio,s2v_pose_video" \
--height 448 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.2-S2V-14B_full" \
--extra_inputs "input_image,input_audio,s2v_pose_video" \
--use_gradient_checkpointing_offload
--use_gradient_checkpointing_offload

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-T2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -16,8 +18,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [875, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -30,4 +32,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--trainable_models "dit" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.417
# boundary corresponds to timesteps [0, 875)
# boundary corresponds to timesteps [0, 875)

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-TI2V-5B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-TI2V-5B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-TI2V-5B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.2-TI2V-5B_full" \
--trainable_models "dit" \
--extra_inputs "input_image"
--extra_inputs "input_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-VACE-Fun-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -22,8 +24,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -41,4 +43,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--min_timestep_boundary 0.358 \
--initialize_model_on_cpu
# boundary corresponds to timesteps [0, 900]
# The learning rate is kept consistent with the settings in the original paper
# The learning rate is kept consistent with the settings in the original paper

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/krea-realtime-video/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/krea-realtime-video \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/krea-realtime-video/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/krea-realtime-video_full" \
--trainable_models "dit"
--trainable_models "dit"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/LongCat-Video/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/LongCat-Video \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/LongCat-Video/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--output_path "./models/train/LongCat-Video_lora" \
--lora_base_model "dit" \
--lora_target_modules "adaLN_modulation.1,attn.qkv,attn.proj,cross_attn.q_linear,cross_attn.kv_linear,cross_attn.proj,ffn.w1,ffn.w2,ffn.w3" \
--lora_rank 32
--lora_rank 32

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Video-As-Prompt-Wan2.1-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vap.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Video-As-Prompt-Wan2.1-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Video-As-Prompt-Wan2.1-14B/metadata.csv \
--data_file_keys "video,vap_video" \
--height 480 \
--width 832 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-1.3b-speedcontrol-v1/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_motion_bucket_id.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-1.3b-speedcontrol-v1 \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-1.3b-speedcontrol-v1/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "motion_bucket_id"
--extra_inputs "motion_bucket_id"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-FLF2V-14B-720P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-FLF2V-14B-720P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-FLF2V-14B-720P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-1.3B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-Control/metadata.csv \
--data_file_keys "video,control_video" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "control_video"
--extra_inputs "control_video"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-1.3B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-14B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-Control/metadata.csv \
--data_file_keys "video,control_video" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "control_video"
--extra_inputs "control_video"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-14B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,camera_control_direction,camera_control_speed"
--extra_inputs "input_image,camera_control_direction,camera_control_speed"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "control_video,reference_image"
--extra_inputs "control_video,reference_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,camera_control_direction,camera_control_speed"
--extra_inputs "input_image,camera_control_direction,camera_control_speed"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "control_video,reference_image"
--extra_inputs "control_video,reference_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,end_image"
--extra_inputs "input_image,end_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image"
--extra_inputs "input_image"

View File

@@ -1,8 +1,10 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-720P/*" --local_dir ./data/diffsynth_example_dataset
# 1*80G GPU cannot train Wan2.2-Animate-14B LoRA
# We tested on 8*80G GPUs
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-720P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-720P/metadata.csv \
--height 720 \
--width 1280 \
--dataset_repeat 100 \
@@ -16,4 +18,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--lora_rank 32 \
--extra_inputs "input_image" \
--use_gradient_checkpointing_offload \
--initialize_model_on_cpu
--initialize_model_on_cpu

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-1.3B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--output_path "./models/train/Wan2.1-T2V-1.3B_lora" \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32
--lora_rank 32

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--output_path "./models/train/Wan2.1-T2V-14B_lora" \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32
--lora_rank 32

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-1.3B-Preview/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B-Preview \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B-Preview/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
--use_gradient_checkpointing_offload

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-1.3B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
--use_gradient_checkpointing_offload

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -15,4 +17,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
--use_gradient_checkpointing_offload

View File

@@ -1,8 +1,10 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Animate-14B/*" --local_dir ./data/diffsynth_example_dataset
# 1*80G GPU cannot train Wan2.2-Animate-14B LoRA
# We tested on 8*80G GPUs
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_animate.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Animate-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Animate-14B/metadata.csv \
--data_file_keys "video,animate_pose_video,animate_face_video" \
--height 480 \
--width 832 \
@@ -17,4 +19,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,animate_pose_video,animate_face_video" \
--use_gradient_checkpointing_offload
--use_gradient_checkpointing_offload

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -19,8 +21,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -36,4 +38,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--extra_inputs "input_image,camera_control_direction,camera_control_speed" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900]
# boundary corresponds to timesteps [0, 900]

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -19,8 +21,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -36,4 +38,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--extra_inputs "control_video,reference_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900]
# boundary corresponds to timesteps [0, 900]

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-InP/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -18,8 +20,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -34,4 +36,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--extra_inputs "input_image,end_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900]
# boundary corresponds to timesteps [0, 900]

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-I2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -19,8 +21,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -36,4 +38,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--extra_inputs "input_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900)
# boundary corresponds to timesteps [0, 900)

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-S2V-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_s2v.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-S2V-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-S2V-14B/metadata.csv \
--data_file_keys "video,input_audio,s2v_pose_video" \
--height 448 \
--width 832 \
@@ -16,4 +18,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,input_audio,s2v_pose_video" \
--use_gradient_checkpointing_offload
--use_gradient_checkpointing_offload

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-T2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -19,8 +21,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -35,4 +37,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_rank 32 \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.417
# boundary corresponds to timesteps [0, 875)
# boundary corresponds to timesteps [0, 875)

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-TI2V-5B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-TI2V-5B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-TI2V-5B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -13,4 +15,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image"
--extra_inputs "input_image"

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-VACE-Fun-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -21,8 +23,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -40,4 +42,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--use_gradient_checkpointing_offload \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
# boundary corresponds to timesteps [0, 900]
# boundary corresponds to timesteps [0, 900]

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/krea-realtime-video/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/krea-realtime-video \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/krea-realtime-video/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--output_path "./models/train/krea-realtime-video_lora" \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32
--lora_rank 32

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-1.3B_direct_distill/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_distill.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B_direct_distill \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B_direct_distill/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 160 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 1 \

View File

@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \

View File

@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-T2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -20,8 +22,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [875, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \

View File

@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-VACE-Fun-A14B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -24,8 +26,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/wanvideo/model_training/train.py \
--dataset_base_path data/example_video_dataset \
--dataset_metadata_path data/example_video_dataset/metadata.csv \
--dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
--dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 1 \