diff --git a/.gitignore b/.gitignore
index 6fd0d8e..a511cf2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
/models
/scripts
/diffusers
+/.vscode
*.pkl
*.safetensors
*.pth
diff --git a/docs/en/Model_Details/Anima.md b/docs/en/Model_Details/Anima.md
index 91ecd89..0bdf80e 100644
--- a/docs/en/Model_Details/Anima.md
+++ b/docs/en/Model_Details/Anima.md
@@ -133,7 +133,7 @@ Anima models are trained through [`examples/anima/model_training/train.py`](http
We provide a sample image dataset for testing:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
For training script details, refer to [Model Training](../Pipeline_Usage/Model_Training.md). For advanced training techniques, see [Training Framework Documentation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/).
\ No newline at end of file
diff --git a/docs/en/Model_Details/FLUX.md b/docs/en/Model_Details/FLUX.md
index 283f895..6607e63 100644
--- a/docs/en/Model_Details/FLUX.md
+++ b/docs/en/Model_Details/FLUX.md
@@ -195,7 +195,7 @@ FLUX series models are uniformly trained through [`examples/flux/model_training/
We have built a sample image dataset for your testing. You can download this dataset with the following command:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).
diff --git a/docs/en/Model_Details/FLUX2.md b/docs/en/Model_Details/FLUX2.md
index f3bb020..63f7e48 100644
--- a/docs/en/Model_Details/FLUX2.md
+++ b/docs/en/Model_Details/FLUX2.md
@@ -145,7 +145,7 @@ FLUX.2 series models are uniformly trained through [`examples/flux2/model_traini
We have built a sample image dataset for your testing. You can download this dataset with the following command:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).
diff --git a/docs/en/Model_Details/LTX-2.md b/docs/en/Model_Details/LTX-2.md
index 18047f9..1a05848 100644
--- a/docs/en/Model_Details/LTX-2.md
+++ b/docs/en/Model_Details/LTX-2.md
@@ -217,7 +217,7 @@ LTX-2 series models are uniformly trained through [`examples/ltx2/model_training
We have built a sample video dataset for your testing. You can download this dataset with the following command:
```shell
-modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).
diff --git a/docs/en/Model_Details/Qwen-Image.md b/docs/en/Model_Details/Qwen-Image.md
index 01d5ea5..39548e1 100644
--- a/docs/en/Model_Details/Qwen-Image.md
+++ b/docs/en/Model_Details/Qwen-Image.md
@@ -199,7 +199,7 @@ Qwen-Image series models are uniformly trained through [`examples/qwen_image/mod
We have built a sample image dataset for your testing. You can download this dataset with the following command:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).
diff --git a/docs/en/Model_Details/Wan.md b/docs/en/Model_Details/Wan.md
index 805a069..73e4d52 100644
--- a/docs/en/Model_Details/Wan.md
+++ b/docs/en/Model_Details/Wan.md
@@ -253,7 +253,7 @@ Wan series models are uniformly trained through [`examples/wanvideo/model_traini
We have built a sample video dataset for your testing. You can download this dataset with the following command:
```shell
-modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).
diff --git a/docs/en/Model_Details/Z-Image.md b/docs/en/Model_Details/Z-Image.md
index 38075cc..92d2f25 100644
--- a/docs/en/Model_Details/Z-Image.md
+++ b/docs/en/Model_Details/Z-Image.md
@@ -134,7 +134,7 @@ Z-Image series models are uniformly trained through [`examples/z_image/model_tra
We have built a sample image dataset for your testing. You can download this dataset with the following command:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).
diff --git a/docs/en/Pipeline_Usage/Model_Training.md b/docs/en/Pipeline_Usage/Model_Training.md
index e206347..7e196b7 100644
--- a/docs/en/Pipeline_Usage/Model_Training.md
+++ b/docs/en/Pipeline_Usage/Model_Training.md
@@ -69,25 +69,11 @@ We have built sample datasets for your testing. To understand how the universal
-Sample Image Dataset
+Sample Dataset
> ```shell
-> modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+> modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
> ```
->
-> Applicable to training of image generation models such as Qwen-Image and FLUX.
-
-
-
-
-
-Sample Video Dataset
-
-> ```shell
-> modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset
-> ```
->
-> Applicable to training of video generation models such as Wan.
diff --git a/docs/en/Training/Direct_Distill.md b/docs/en/Training/Direct_Distill.md
index 34cfabb..e989c42 100644
--- a/docs/en/Training/Direct_Distill.md
+++ b/docs/en/Training/Direct_Distill.md
@@ -77,7 +77,7 @@ distill_qwen/image.jpg,"精致肖像,水下少女,蓝裙飘逸,发丝轻
This sample dataset can be downloaded directly:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
Then start LoRA distillation accelerated training:
diff --git a/docs/zh/Model_Details/Anima.md b/docs/zh/Model_Details/Anima.md
index 0d5576b..5900067 100644
--- a/docs/zh/Model_Details/Anima.md
+++ b/docs/zh/Model_Details/Anima.md
@@ -133,7 +133,7 @@ Anima 系列模型统一通过 [`examples/anima/model_training/train.py`](https:
我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。
diff --git a/docs/zh/Model_Details/FLUX.md b/docs/zh/Model_Details/FLUX.md
index 77828ef..72c8623 100644
--- a/docs/zh/Model_Details/FLUX.md
+++ b/docs/zh/Model_Details/FLUX.md
@@ -195,7 +195,7 @@ FLUX 系列模型统一通过 [`examples/flux/model_training/train.py`](https://
我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。
diff --git a/docs/zh/Model_Details/FLUX2.md b/docs/zh/Model_Details/FLUX2.md
index 66725e6..1d35f94 100644
--- a/docs/zh/Model_Details/FLUX2.md
+++ b/docs/zh/Model_Details/FLUX2.md
@@ -145,7 +145,7 @@ FLUX.2 系列模型统一通过 [`examples/flux2/model_training/train.py`](https
我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。
diff --git a/docs/zh/Model_Details/LTX-2.md b/docs/zh/Model_Details/LTX-2.md
index e2592dd..3e1b541 100644
--- a/docs/zh/Model_Details/LTX-2.md
+++ b/docs/zh/Model_Details/LTX-2.md
@@ -217,7 +217,7 @@ LTX-2 系列模型统一通过 [`examples/ltx2/model_training/train.py`](https:/
我们构建了一个样例视频数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell
-modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。
diff --git a/docs/zh/Model_Details/Qwen-Image.md b/docs/zh/Model_Details/Qwen-Image.md
index 79b2126..74de8c3 100644
--- a/docs/zh/Model_Details/Qwen-Image.md
+++ b/docs/zh/Model_Details/Qwen-Image.md
@@ -199,7 +199,7 @@ Qwen-Image 系列模型统一通过 [`examples/qwen_image/model_training/train.p
我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
我们为每个模型编写了推荐的训练脚本,请参考前文“模型总览”中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。
diff --git a/docs/zh/Model_Details/Wan.md b/docs/zh/Model_Details/Wan.md
index 611a38c..7924e40 100644
--- a/docs/zh/Model_Details/Wan.md
+++ b/docs/zh/Model_Details/Wan.md
@@ -254,7 +254,7 @@ Wan 系列模型统一通过 [`examples/wanvideo/model_training/train.py`](https
我们构建了一个样例视频数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell
-modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。
diff --git a/docs/zh/Model_Details/Z-Image.md b/docs/zh/Model_Details/Z-Image.md
index 4e77360..7a3a249 100644
--- a/docs/zh/Model_Details/Z-Image.md
+++ b/docs/zh/Model_Details/Z-Image.md
@@ -134,7 +134,7 @@ Z-Image 系列模型统一通过 [`examples/z_image/model_training/train.py`](ht
我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。
diff --git a/docs/zh/Pipeline_Usage/Model_Training.md b/docs/zh/Pipeline_Usage/Model_Training.md
index c92b8b1..c72af89 100644
--- a/docs/zh/Pipeline_Usage/Model_Training.md
+++ b/docs/zh/Pipeline_Usage/Model_Training.md
@@ -69,28 +69,16 @@ image_2.jpg,"a cat"
-样例图像数据集
+样例数据集
> ```shell
-> modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+> modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
> ```
>
> 适用于 Qwen-Image、FLUX 等图像生成模型的训练。
-
-
-样例视频数据集
-
-> ```shell
-> modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset
-> ```
->
-> 适用于 Wan 等视频生成模型的训练。
-
-
-
## 加载模型
类似于[推理时的模型加载](../Pipeline_Usage/Model_Inference.md#加载模型),我们支持多种方式配置模型路径,两种方式是可以混用的。
diff --git a/docs/zh/Training/Direct_Distill.md b/docs/zh/Training/Direct_Distill.md
index 4a9ae79..48f25c7 100644
--- a/docs/zh/Training/Direct_Distill.md
+++ b/docs/zh/Training/Direct_Distill.md
@@ -77,7 +77,7 @@ distill_qwen/image.jpg,"精致肖像,水下少女,蓝裙飘逸,发丝轻
这个样例数据集可以直接下载:
```shell
-modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
```
然后开始 LoRA 蒸馏加速训练:
diff --git a/examples/anima/model_training/full/anima-preview.sh b/examples/anima/model_training/full/anima-preview.sh
index 58bf844..fa7778e 100644
--- a/examples/anima/model_training/full/anima-preview.sh
+++ b/examples/anima/model_training/full/anima-preview.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "anima/anima-preview/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/anima/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/anima/anima-preview \
+ --dataset_metadata_path data/diffsynth_example_dataset/anima/anima-preview/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "circlestone-labs/Anima:split_files/diffusion_models/anima-preview.safetensors,circlestone-labs/Anima:split_files/text_encoders/qwen_3_06b_base.safetensors,circlestone-labs/Anima:split_files/vae/qwen_image_vae.safetensors" \
@@ -11,4 +13,4 @@ accelerate launch examples/anima/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/anima-preview_full" \
--trainable_models "dit" \
- --use_gradient_checkpointing
\ No newline at end of file
+ --use_gradient_checkpointing
diff --git a/examples/anima/model_training/lora/anima-preview.sh b/examples/anima/model_training/lora/anima-preview.sh
index 462a844..cb8f0b0 100644
--- a/examples/anima/model_training/lora/anima-preview.sh
+++ b/examples/anima/model_training/lora/anima-preview.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "anima/anima-preview/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/anima/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/anima/anima-preview \
+ --dataset_metadata_path data/diffsynth_example_dataset/anima/anima-preview/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "circlestone-labs/Anima:split_files/diffusion_models/anima-preview.safetensors,circlestone-labs/Anima:split_files/text_encoders/qwen_3_06b_base.safetensors,circlestone-labs/Anima:split_files/vae/qwen_image_vae.safetensors" \
@@ -13,4 +15,4 @@ accelerate launch examples/anima/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "" \
--lora_rank 32 \
- --use_gradient_checkpointing
\ No newline at end of file
+ --use_gradient_checkpointing
diff --git a/examples/flux/model_training/full/FLEX.2-preview.sh b/examples/flux/model_training/full/FLEX.2-preview.sh
index fffe929..0e8116e 100644
--- a/examples/flux/model_training/full/FLEX.2-preview.sh
+++ b/examples/flux/model_training/full/FLEX.2-preview.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLEX.2-preview/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLEX.2-preview \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLEX.2-preview/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 200 \
--model_id_with_origin_paths "ostris/Flex.2-preview:Flex.2-preview.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \
diff --git a/examples/flux/model_training/full/FLUX.1-Kontext-dev.sh b/examples/flux/model_training/full/FLUX.1-Kontext-dev.sh
index 4938f10..dd7cff7 100644
--- a/examples/flux/model_training/full/FLUX.1-Kontext-dev.sh
+++ b/examples/flux/model_training/full/FLUX.1-Kontext-dev.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-Kontext-dev/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_kontext.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev/metadata.csv \
--data_file_keys "image,kontext_images" \
--max_pixels 1048576 \
--dataset_repeat 400 \
diff --git a/examples/flux/model_training/full/FLUX.1-Krea-dev.sh b/examples/flux/model_training/full/FLUX.1-Krea-dev.sh
index 053b0fa..d9d685a 100644
--- a/examples/flux/model_training/full/FLUX.1-Krea-dev.sh
+++ b/examples/flux/model_training/full/FLUX.1-Krea-dev.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-Krea-dev/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-Krea-dev \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-Krea-dev/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 400 \
--model_id_with_origin_paths "black-forest-labs/FLUX.1-Krea-dev:flux1-krea-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \
diff --git a/examples/flux/model_training/full/FLUX.1-dev-AttriCtrl.sh b/examples/flux/model_training/full/FLUX.1-dev-AttriCtrl.sh
index ba620fd..1825ac2 100644
--- a/examples/flux/model_training/full/FLUX.1-dev-AttriCtrl.sh
+++ b/examples/flux/model_training/full/FLUX.1-dev-AttriCtrl.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-AttriCtrl/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_attrictrl.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-AttriCtrl \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-AttriCtrl/metadata.csv \
--data_file_keys "image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Inpainting-Beta.sh b/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Inpainting-Beta.sh
index d362313..6524156 100644
--- a/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Inpainting-Beta.sh
+++ b/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Inpainting-Beta.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Inpainting-Beta/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_inpaint.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Inpainting-Beta \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Inpainting-Beta/metadata.csv \
--data_file_keys "image,controlnet_image,controlnet_inpaint_mask" \
--max_pixels 1048576 \
--dataset_repeat 400 \
diff --git a/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Union-alpha.sh b/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Union-alpha.sh
index f0a56af..d5c9c54 100644
--- a/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Union-alpha.sh
+++ b/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Union-alpha.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Union-alpha/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_canny.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Union-alpha \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Union-alpha/metadata.csv \
--data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 400 \
diff --git a/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Upscaler.sh b/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Upscaler.sh
index 85a0228..99f7e22 100644
--- a/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Upscaler.sh
+++ b/examples/flux/model_training/full/FLUX.1-dev-Controlnet-Upscaler.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Upscaler/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_upscale.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Upscaler \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Upscaler/metadata.csv \
--data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 400 \
diff --git a/examples/flux/model_training/full/FLUX.1-dev-IP-Adapter.sh b/examples/flux/model_training/full/FLUX.1-dev-IP-Adapter.sh
index 6db5e79..5600dcb 100644
--- a/examples/flux/model_training/full/FLUX.1-dev-IP-Adapter.sh
+++ b/examples/flux/model_training/full/FLUX.1-dev-IP-Adapter.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-IP-Adapter/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_ipadapter.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-IP-Adapter \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-IP-Adapter/metadata.csv \
--data_file_keys "image,ipadapter_images" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/flux/model_training/full/FLUX.1-dev-InfiniteYou.sh b/examples/flux/model_training/full/FLUX.1-dev-InfiniteYou.sh
index 7898795..a7d6c46 100644
--- a/examples/flux/model_training/full/FLUX.1-dev-InfiniteYou.sh
+++ b/examples/flux/model_training/full/FLUX.1-dev-InfiniteYou.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-InfiniteYou/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_infiniteyou.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-InfiniteYou \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-InfiniteYou/metadata.csv \
--data_file_keys "image,controlnet_image,infinityou_id_image" \
--max_pixels 1048576 \
--dataset_repeat 400 \
diff --git a/examples/flux/model_training/full/FLUX.1-dev-LoRA-Encoder.sh b/examples/flux/model_training/full/FLUX.1-dev-LoRA-Encoder.sh
index b788434..2240fd6 100644
--- a/examples/flux/model_training/full/FLUX.1-dev-LoRA-Encoder.sh
+++ b/examples/flux/model_training/full/FLUX.1-dev-LoRA-Encoder.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-LoRA-Encoder/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_lora_encoder.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-LoRA-Encoder \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-LoRA-Encoder/metadata.csv \
--data_file_keys "image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/flux/model_training/full/FLUX.1-dev.sh b/examples/flux/model_training/full/FLUX.1-dev.sh
index a9f5820..6f126a0 100644
--- a/examples/flux/model_training/full/FLUX.1-dev.sh
+++ b/examples/flux/model_training/full/FLUX.1-dev.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 400 \
--model_id_with_origin_paths "black-forest-labs/FLUX.1-dev:flux1-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \
diff --git a/examples/flux/model_training/full/Nexus-Gen.sh b/examples/flux/model_training/full/Nexus-Gen.sh
index 6f2960d..2c8242c 100644
--- a/examples/flux/model_training/full/Nexus-Gen.sh
+++ b/examples/flux/model_training/full/Nexus-Gen.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/Nexus-Gen/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_nexusgen_edit.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/Nexus-Gen \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/Nexus-Gen/metadata.csv \
--data_file_keys "image,nexus_gen_reference_image" \
--max_pixels 262144 \
--dataset_repeat 400 \
diff --git a/examples/flux/model_training/full/Step1X-Edit.sh b/examples/flux/model_training/full/Step1X-Edit.sh
index 03ddfda..b482b97 100644
--- a/examples/flux/model_training/full/Step1X-Edit.sh
+++ b/examples/flux/model_training/full/Step1X-Edit.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/Step1X-Edit/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_step1x.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/Step1X-Edit \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/Step1X-Edit/metadata.csv \
--data_file_keys "image,step1x_reference_image" \
--max_pixels 1048576 \
--dataset_repeat 400 \
diff --git a/examples/flux/model_training/lora/FLEX.2-preview.sh b/examples/flux/model_training/lora/FLEX.2-preview.sh
index 444e91c..ff91b3a 100644
--- a/examples/flux/model_training/lora/FLEX.2-preview.sh
+++ b/examples/flux/model_training/lora/FLEX.2-preview.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLEX.2-preview/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLEX.2-preview \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLEX.2-preview/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "ostris/Flex.2-preview:Flex.2-preview.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \
diff --git a/examples/flux/model_training/lora/FLUX.1-Kontext-dev.sh b/examples/flux/model_training/lora/FLUX.1-Kontext-dev.sh
index f45707e..e92818d 100644
--- a/examples/flux/model_training/lora/FLUX.1-Kontext-dev.sh
+++ b/examples/flux/model_training/lora/FLUX.1-Kontext-dev.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-Kontext-dev/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_kontext.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev/metadata.csv \
--data_file_keys "image,kontext_images" \
--max_pixels 1048576 \
--dataset_repeat 400 \
diff --git a/examples/flux/model_training/lora/FLUX.1-Krea-dev.sh b/examples/flux/model_training/lora/FLUX.1-Krea-dev.sh
index cea0009..c32ef0b 100644
--- a/examples/flux/model_training/lora/FLUX.1-Krea-dev.sh
+++ b/examples/flux/model_training/lora/FLUX.1-Krea-dev.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-Krea-dev/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-Krea-dev \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-Krea-dev/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.1-Krea-dev:flux1-krea-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \
diff --git a/examples/flux/model_training/lora/FLUX.1-dev-AttriCtrl.sh b/examples/flux/model_training/lora/FLUX.1-dev-AttriCtrl.sh
index 8e6d8c9..a09a3dc 100644
--- a/examples/flux/model_training/lora/FLUX.1-dev-AttriCtrl.sh
+++ b/examples/flux/model_training/lora/FLUX.1-dev-AttriCtrl.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-AttriCtrl/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_attrictrl.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-AttriCtrl \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-AttriCtrl/metadata.csv \
--data_file_keys "image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Inpainting-Beta.sh b/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Inpainting-Beta.sh
index 1d7afc6..ecb441d 100644
--- a/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Inpainting-Beta.sh
+++ b/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Inpainting-Beta.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Inpainting-Beta/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_inpaint.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Inpainting-Beta \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Inpainting-Beta/metadata.csv \
--data_file_keys "image,controlnet_image,controlnet_inpaint_mask" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Union-alpha.sh b/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Union-alpha.sh
index e1272c2..9c1a099 100644
--- a/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Union-alpha.sh
+++ b/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Union-alpha.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Union-alpha/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_canny.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Union-alpha \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Union-alpha/metadata.csv \
--data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Upscaler.sh b/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Upscaler.sh
index 398e270..066b21d 100644
--- a/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Upscaler.sh
+++ b/examples/flux/model_training/lora/FLUX.1-dev-Controlnet-Upscaler.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Upscaler/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_upscale.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Upscaler \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Upscaler/metadata.csv \
--data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/flux/model_training/lora/FLUX.1-dev-EliGen.sh b/examples/flux/model_training/lora/FLUX.1-dev-EliGen.sh
index 0579cd2..e96b24f 100644
--- a/examples/flux/model_training/lora/FLUX.1-dev-EliGen.sh
+++ b/examples/flux/model_training/lora/FLUX.1-dev-EliGen.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-EliGen/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_eligen.json \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-EliGen \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-EliGen/metadata.json \
--data_file_keys "image,eligen_entity_masks" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/flux/model_training/lora/FLUX.1-dev-IP-Adapter.sh b/examples/flux/model_training/lora/FLUX.1-dev-IP-Adapter.sh
index e110075..091a6d5 100644
--- a/examples/flux/model_training/lora/FLUX.1-dev-IP-Adapter.sh
+++ b/examples/flux/model_training/lora/FLUX.1-dev-IP-Adapter.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-IP-Adapter/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_ipadapter.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-IP-Adapter \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-IP-Adapter/metadata.csv \
--data_file_keys "image,ipadapter_images" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/flux/model_training/lora/FLUX.1-dev-InfiniteYou.sh b/examples/flux/model_training/lora/FLUX.1-dev-InfiniteYou.sh
index 4938301..bfc2a0b 100644
--- a/examples/flux/model_training/lora/FLUX.1-dev-InfiniteYou.sh
+++ b/examples/flux/model_training/lora/FLUX.1-dev-InfiniteYou.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-InfiniteYou/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_infiniteyou.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-InfiniteYou \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-InfiniteYou/metadata.csv \
--data_file_keys "image,controlnet_image,infinityou_id_image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/flux/model_training/lora/FLUX.1-dev.sh b/examples/flux/model_training/lora/FLUX.1-dev.sh
index 5118857..3d7a73c 100644
--- a/examples/flux/model_training/lora/FLUX.1-dev.sh
+++ b/examples/flux/model_training/lora/FLUX.1-dev.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.1-dev:flux1-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \
diff --git a/examples/flux/model_training/lora/Nexus-Gen.sh b/examples/flux/model_training/lora/Nexus-Gen.sh
index b98bd58..ba56604 100644
--- a/examples/flux/model_training/lora/Nexus-Gen.sh
+++ b/examples/flux/model_training/lora/Nexus-Gen.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/Nexus-Gen/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_nexusgen_edit.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/Nexus-Gen \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/Nexus-Gen/metadata.csv \
--data_file_keys "image,nexus_gen_reference_image" \
--max_pixels 1048576 \
--dataset_repeat 400 \
diff --git a/examples/flux/model_training/lora/Step1X-Edit.sh b/examples/flux/model_training/lora/Step1X-Edit.sh
index a7f1d8f..a60e165 100644
--- a/examples/flux/model_training/lora/Step1X-Edit.sh
+++ b/examples/flux/model_training/lora/Step1X-Edit.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/Step1X-Edit/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_step1x.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/Step1X-Edit \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/Step1X-Edit/metadata.csv \
--data_file_keys "image,step1x_reference_image" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/flux/model_training/special/npu_training/FLUX.1-Kontext-dev-NPU.sh b/examples/flux/model_training/special/npu_training/FLUX.1-Kontext-dev-NPU.sh
index 7ec976d..897ecfb 100644
--- a/examples/flux/model_training/special/npu_training/FLUX.1-Kontext-dev-NPU.sh
+++ b/examples/flux/model_training/special/npu_training/FLUX.1-Kontext-dev-NPU.sh
@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-Kontext-dev/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_kontext.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev/metadata.csv \
--data_file_keys "image,kontext_images" \
--max_pixels 1048576 \
--dataset_repeat 400 \
diff --git a/examples/flux/model_training/special/npu_training/FLUX.1-dev-NPU.sh b/examples/flux/model_training/special/npu_training/FLUX.1-dev-NPU.sh
index 8133594..fa5d5d1 100644
--- a/examples/flux/model_training/special/npu_training/FLUX.1-dev-NPU.sh
+++ b/examples/flux/model_training/special/npu_training/FLUX.1-dev-NPU.sh
@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 400 \
--model_id_with_origin_paths "black-forest-labs/FLUX.1-dev:flux1-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \
diff --git a/examples/flux2/model_training/full/FLUX.2-klein-4B.sh b/examples/flux2/model_training/full/FLUX.2-klein-4B.sh
index 9f9a206..e201724 100644
--- a/examples/flux2/model_training/full/FLUX.2-klein-4B.sh
+++ b/examples/flux2/model_training/full/FLUX.2-klein-4B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-4B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-4B \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-4B/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
@@ -13,9 +15,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing
# Edit
+
+# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
# accelerate launch examples/flux2/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
diff --git a/examples/flux2/model_training/full/FLUX.2-klein-9B.sh b/examples/flux2/model_training/full/FLUX.2-klein-9B.sh
index 2cc439b..e34053e 100644
--- a/examples/flux2/model_training/full/FLUX.2-klein-9B.sh
+++ b/examples/flux2/model_training/full/FLUX.2-klein-9B.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-9B/*" --local_dir ./data/diffsynth_example_dataset
+
# This script is tested on 8*A100
accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \
@@ -14,9 +16,12 @@ accelerate launch --config_file examples/flux2/model_training/full/accelerate_co
--use_gradient_checkpointing
# Edit
+
+# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
# accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
diff --git a/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh b/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh
index 6590806..fe9e81f 100644
--- a/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh
+++ b/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-4B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-4B \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-4B/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
@@ -13,9 +15,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing
# Edit
+
+# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
# accelerate launch examples/flux2/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
@@ -27,4 +32,4 @@ accelerate launch examples/flux2/model_training/train.py \
# --remove_prefix_in_ckpt "pipe.dit." \
# --output_path "./models/train/FLUX.2-klein-base-4B_full" \
# --trainable_models "dit" \
-# --use_gradient_checkpointing
\ No newline at end of file
+# --use_gradient_checkpointing
diff --git a/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh b/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh
index 4fb8064..d7457c5 100644
--- a/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh
+++ b/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-9B/*" --local_dir ./data/diffsynth_example_dataset
+
# This script is tested on 8*A100
accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-9B \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-9B/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \
@@ -14,9 +16,12 @@ accelerate launch --config_file examples/flux2/model_training/full/accelerate_co
--use_gradient_checkpointing
# Edit
+
+# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
# accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
diff --git a/examples/flux2/model_training/lora/FLUX.2-dev.sh b/examples/flux2/model_training/lora/FLUX.2-dev.sh
index 4b1e74b..7a6c08e 100644
--- a/examples/flux2/model_training/lora/FLUX.2-dev.sh
+++ b/examples/flux2/model_training/lora/FLUX.2-dev.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-dev/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-dev \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-dev/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 1 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-dev:text_encoder/*.safetensors,black-forest-labs/FLUX.2-dev:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh b/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh
index 0e89205..6f4f3f4 100644
--- a/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh
+++ b/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-4B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-4B \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-4B/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
@@ -15,9 +17,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing
# Edit
+
+# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
# accelerate launch examples/flux2/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
diff --git a/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh b/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh
index 26265f1..36c1802 100644
--- a/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh
+++ b/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-9B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \
@@ -15,9 +17,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing
# Edit
+
+# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
# accelerate launch examples/flux2/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
diff --git a/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh b/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh
index f23e71f..45ef893 100644
--- a/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh
+++ b/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-4B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-4B \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-4B/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
@@ -15,9 +17,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing
# Edit
+
+# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
# accelerate launch examples/flux2/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
diff --git a/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh b/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh
index d714b0e..aa8a321 100644
--- a/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh
+++ b/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-9B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-9B \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-9B/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \
@@ -15,9 +17,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing
# Edit
+
+# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
# accelerate launch examples/flux2/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
@@ -31,4 +36,4 @@ accelerate launch examples/flux2/model_training/train.py \
# --lora_base_model "dit" \
# --lora_target_modules "to_q,to_k,to_v,to_out.0,add_q_proj,add_k_proj,add_v_proj,to_add_out,linear_in,linear_out,to_qkv_mlp_proj,single_transformer_blocks.0.attn.to_out,single_transformer_blocks.1.attn.to_out,single_transformer_blocks.2.attn.to_out,single_transformer_blocks.3.attn.to_out,single_transformer_blocks.4.attn.to_out,single_transformer_blocks.5.attn.to_out,single_transformer_blocks.6.attn.to_out,single_transformer_blocks.7.attn.to_out,single_transformer_blocks.8.attn.to_out,single_transformer_blocks.9.attn.to_out,single_transformer_blocks.10.attn.to_out,single_transformer_blocks.11.attn.to_out,single_transformer_blocks.12.attn.to_out,single_transformer_blocks.13.attn.to_out,single_transformer_blocks.14.attn.to_out,single_transformer_blocks.15.attn.to_out,single_transformer_blocks.16.attn.to_out,single_transformer_blocks.17.attn.to_out,single_transformer_blocks.18.attn.to_out,single_transformer_blocks.19.attn.to_out,single_transformer_blocks.20.attn.to_out,single_transformer_blocks.21.attn.to_out,single_transformer_blocks.22.attn.to_out,single_transformer_blocks.23.attn.to_out" \
# --lora_rank 32 \
-# --use_gradient_checkpointing
\ No newline at end of file
+# --use_gradient_checkpointing
diff --git a/examples/flux2/model_training/special/npu_training/FLUX.2-dev-Lora-NPU.sh b/examples/flux2/model_training/special/npu_training/FLUX.2-dev-Lora-NPU.sh
index ed678f2..a7b4559 100644
--- a/examples/flux2/model_training/special/npu_training/FLUX.2-dev-Lora-NPU.sh
+++ b/examples/flux2/model_training/special/npu_training/FLUX.2-dev-Lora-NPU.sh
@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-dev/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-dev \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-dev/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 1 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-dev:text_encoder/*.safetensors,black-forest-labs/FLUX.2-dev:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/flux2/model_training/special/npu_training/FLUX.2-klein-9B-NPU.sh b/examples/flux2/model_training/special/npu_training/FLUX.2-klein-9B-NPU.sh
index 57755ac..7b5c917 100644
--- a/examples/flux2/model_training/special/npu_training/FLUX.2-klein-9B-NPU.sh
+++ b/examples/flux2/model_training/special/npu_training/FLUX.2-klein-9B-NPU.sh
@@ -2,9 +2,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-9B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B \
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \
@@ -17,9 +19,12 @@ accelerate launch --config_file examples/flux2/model_training/full/accelerate_co
--use_gradient_checkpointing
# Edit
+
+# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
# accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
diff --git a/examples/ltx2/model_training/full/LTX-2-T2AV-splited.sh b/examples/ltx2/model_training/full/LTX-2-T2AV-splited.sh
index 2d37718..47ed269 100644
--- a/examples/ltx2/model_training/full/LTX-2-T2AV-splited.sh
+++ b/examples/ltx2/model_training/full/LTX-2-T2AV-splited.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2-T2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
+
# Splited Training
accelerate launch examples/ltx2/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited \
+ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio" \
--height 512 \
diff --git a/examples/ltx2/model_training/full/LTX-2.3-I2AV-splited.sh b/examples/ltx2/model_training/full/LTX-2.3-I2AV-splited.sh
index 7c83a01..b688b2e 100644
--- a/examples/ltx2/model_training/full/LTX-2.3-I2AV-splited.sh
+++ b/examples/ltx2/model_training/full/LTX-2.3-I2AV-splited.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2.3-I2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
+
# Splited Training
accelerate launch examples/ltx2/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2.3-I2AV-splited \
+ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2.3-I2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \
--height 512 \
diff --git a/examples/ltx2/model_training/full/LTX-2.3-T2AV-splited.sh b/examples/ltx2/model_training/full/LTX-2.3-T2AV-splited.sh
index 4d02da2..24028d7 100644
--- a/examples/ltx2/model_training/full/LTX-2.3-T2AV-splited.sh
+++ b/examples/ltx2/model_training/full/LTX-2.3-T2AV-splited.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2.3-T2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
+
# Splited Training
accelerate launch examples/ltx2/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-splited \
+ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio" \
--height 512 \
diff --git a/examples/ltx2/model_training/lora/LTX-2-T2AV-IC-LoRA-splited.sh b/examples/ltx2/model_training/lora/LTX-2-T2AV-IC-LoRA-splited.sh
index c4fdd86..fa4d23c 100644
--- a/examples/ltx2/model_training/lora/LTX-2-T2AV-IC-LoRA-splited.sh
+++ b/examples/ltx2/model_training/lora/LTX-2-T2AV-IC-LoRA-splited.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2-T2AV-IC-LoRA-splited/*" --local_dir ./data/diffsynth_example_dataset
+
# Splited Training
accelerate launch examples/ltx2/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av_iclora.json \
+ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-IC-LoRA-splited \
+ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-IC-LoRA-splited/metadata.json \
--data_file_keys "video,input_audio,in_context_videos" \
--extra_inputs "input_audio,in_context_videos,in_context_downsample_factor,frame_rate" \
--height 512 \
diff --git a/examples/ltx2/model_training/lora/LTX-2-T2AV-noaudio.sh b/examples/ltx2/model_training/lora/LTX-2-T2AV-noaudio.sh
index f7362af..ca91f26 100644
--- a/examples/ltx2/model_training/lora/LTX-2-T2AV-noaudio.sh
+++ b/examples/ltx2/model_training/lora/LTX-2-T2AV-noaudio.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2-T2AV-noaudio/*" --local_dir ./data/diffsynth_example_dataset
+
# single stage training
# accelerate launch examples/ltx2/model_training/train.py \
-# --dataset_base_path data/example_video_dataset/ltx2 \
-# --dataset_metadata_path data/example_video_dataset/ltx2_t2v.csv \
+# --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-noaudio \
+# --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-noaudio/metadata.csv \
# --height 256 \
# --width 384 \
# --num_frames 25\
@@ -20,8 +22,8 @@
# Splited Training
accelerate launch examples/ltx2/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-noaudio \
+ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-noaudio/metadata.csv \
--height 512 \
--width 768 \
--num_frames 121\
diff --git a/examples/ltx2/model_training/lora/LTX-2-T2AV-splited.sh b/examples/ltx2/model_training/lora/LTX-2-T2AV-splited.sh
index ebee83d..eefc17b 100644
--- a/examples/ltx2/model_training/lora/LTX-2-T2AV-splited.sh
+++ b/examples/ltx2/model_training/lora/LTX-2-T2AV-splited.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2-T2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
+
# Single Stage Training not recommended for T2AV due to the large memory consumption. Please use the Splited Training instead.
# accelerate launch examples/ltx2/model_training/train.py \
-# --dataset_base_path data/example_video_dataset/ltx2 \
-# --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+# --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited \
+# --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited/metadata.csv \
# --data_file_keys "video,input_audio" \
# --extra_inputs "input_audio" \
# --height 256 \
@@ -21,8 +23,8 @@
# Splited Training
accelerate launch examples/ltx2/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited \
+ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio" \
--height 512 \
diff --git a/examples/ltx2/model_training/lora/LTX-2.3-I2AV-splited.sh b/examples/ltx2/model_training/lora/LTX-2.3-I2AV-splited.sh
index 939eff8..35239b0 100644
--- a/examples/ltx2/model_training/lora/LTX-2.3-I2AV-splited.sh
+++ b/examples/ltx2/model_training/lora/LTX-2.3-I2AV-splited.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2.3-I2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
+
# Splited Training
accelerate launch examples/ltx2/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2.3-I2AV-splited \
+ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2.3-I2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \
--height 512 \
diff --git a/examples/ltx2/model_training/lora/LTX-2.3-T2AV-IC-LoRA-splited.sh b/examples/ltx2/model_training/lora/LTX-2.3-T2AV-IC-LoRA-splited.sh
index 917c77c..7cfb82a 100644
--- a/examples/ltx2/model_training/lora/LTX-2.3-T2AV-IC-LoRA-splited.sh
+++ b/examples/ltx2/model_training/lora/LTX-2.3-T2AV-IC-LoRA-splited.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2.3-T2AV-IC-LoRA-splited/*" --local_dir ./data/diffsynth_example_dataset
+
# Splited Training
accelerate launch examples/ltx2/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av_iclora.json \
+ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-IC-LoRA-splited \
+ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-IC-LoRA-splited/metadata.json \
--data_file_keys "video,input_audio,in_context_videos" \
--extra_inputs "input_audio,in_context_videos,in_context_downsample_factor,frame_rate" \
--height 512 \
diff --git a/examples/ltx2/model_training/lora/LTX-2.3-T2AV-splited.sh b/examples/ltx2/model_training/lora/LTX-2.3-T2AV-splited.sh
index 038d660..ff13a9d 100644
--- a/examples/ltx2/model_training/lora/LTX-2.3-T2AV-splited.sh
+++ b/examples/ltx2/model_training/lora/LTX-2.3-T2AV-splited.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2.3-T2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
+
# Splited Training
accelerate launch examples/ltx2/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-splited \
+ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio" \
--height 512 \
diff --git a/examples/mova/model_training/full/MOVA-360P-I2AV.sh b/examples/mova/model_training/full/MOVA-360P-I2AV.sh
index fa7c18c..d102653 100644
--- a/examples/mova/model_training/full/MOVA-360P-I2AV.sh
+++ b/examples/mova/model_training/full/MOVA-360P-I2AV.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "mova/MOVA-360P-I2AV/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV \
+ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \
--height 352 \
@@ -19,8 +21,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV \
+ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \
--height 352 \
@@ -36,4 +38,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358 \
--use_gradient_checkpointing
-# boundary corresponds to timesteps [0, 900)
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900)
diff --git a/examples/mova/model_training/full/MOVA-720P-I2AV.sh b/examples/mova/model_training/full/MOVA-720P-I2AV.sh
index 955efb1..378b292 100644
--- a/examples/mova/model_training/full/MOVA-720P-I2AV.sh
+++ b/examples/mova/model_training/full/MOVA-720P-I2AV.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "mova/MOVA-720P-I2AV/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV \
+ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \
--height 720 \
@@ -19,8 +21,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV \
+ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \
--height 720 \
@@ -36,4 +38,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358 \
--use_gradient_checkpointing
-# boundary corresponds to timesteps [0, 900)
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900)
diff --git a/examples/mova/model_training/lora/MOVA-360P-I2AV.sh b/examples/mova/model_training/lora/MOVA-360P-I2AV.sh
index 0485968..10d46ba 100644
--- a/examples/mova/model_training/lora/MOVA-360P-I2AV.sh
+++ b/examples/mova/model_training/lora/MOVA-360P-I2AV.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "mova/MOVA-360P-I2AV/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/mova/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV \
+ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \
--height 352 \
@@ -20,24 +22,24 @@ accelerate launch examples/mova/model_training/train.py \
--use_gradient_checkpointing
# boundary corresponds to timesteps [900, 1000]
-# accelerate launch examples/mova/model_training/train.py \
-# --dataset_base_path data/example_video_dataset/ltx2 \
-# --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
-# --data_file_keys "video,input_audio" \
-# --extra_inputs "input_audio,input_image" \
-# --height 352 \
-# --width 640 \
-# --num_frames 121 \
-# --dataset_repeat 100 \
-# --model_id_with_origin_paths "openmoss/MOVA-360p:video_dit_2/diffusion_pytorch_model-*.safetensors,openmoss/MOVA-360p:audio_dit/diffusion_pytorch_model.safetensors,openmoss/MOVA-360p:dual_tower_bridge/diffusion_pytorch_model.safetensors,openmoss/MOVA-720p:audio_vae/diffusion_pytorch_model.safetensors,DiffSynth-Studio/Wan-Series-Converted-Safetensors:Wan2.1_VAE.safetensors,DiffSynth-Studio/Wan-Series-Converted-Safetensors:models_t5_umt5-xxl-enc-bf16.safetensors" \
-# --learning_rate 1e-4 \
-# --num_epochs 5 \
-# --remove_prefix_in_ckpt "pipe.video_dit." \
-# --output_path "./models/train/MOVA-360p-I2AV_low_noise_lora" \
-# --lora_base_model "video_dit" \
-# --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
-# --lora_rank 32 \
-# --max_timestep_boundary 1 \
-# --min_timestep_boundary 0.358 \
-# --use_gradient_checkpointing
-# boundary corresponds to timesteps [0, 900)
\ No newline at end of file
+accelerate launch examples/mova/model_training/train.py \
+ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV \
+ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV/metadata.csv \
+ --data_file_keys "video,input_audio" \
+ --extra_inputs "input_audio,input_image" \
+ --height 352 \
+ --width 640 \
+ --num_frames 121 \
+ --dataset_repeat 100 \
+ --model_id_with_origin_paths "openmoss/MOVA-360p:video_dit_2/diffusion_pytorch_model-*.safetensors,openmoss/MOVA-360p:audio_dit/diffusion_pytorch_model.safetensors,openmoss/MOVA-360p:dual_tower_bridge/diffusion_pytorch_model.safetensors,openmoss/MOVA-720p:audio_vae/diffusion_pytorch_model.safetensors,DiffSynth-Studio/Wan-Series-Converted-Safetensors:Wan2.1_VAE.safetensors,DiffSynth-Studio/Wan-Series-Converted-Safetensors:models_t5_umt5-xxl-enc-bf16.safetensors" \
+ --learning_rate 1e-4 \
+ --num_epochs 5 \
+ --remove_prefix_in_ckpt "pipe.video_dit." \
+ --output_path "./models/train/MOVA-360p-I2AV_low_noise_lora" \
+ --lora_base_model "video_dit" \
+ --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
+ --lora_rank 32 \
+ --max_timestep_boundary 1 \
+ --min_timestep_boundary 0.358 \
+ --use_gradient_checkpointing
+# boundary corresponds to timesteps [0, 900)
diff --git a/examples/mova/model_training/lora/MOVA-720P-I2AV.sh b/examples/mova/model_training/lora/MOVA-720P-I2AV.sh
index ae3dae1..02ca92e 100644
--- a/examples/mova/model_training/lora/MOVA-720P-I2AV.sh
+++ b/examples/mova/model_training/lora/MOVA-720P-I2AV.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "mova/MOVA-720P-I2AV/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/mova/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV \
+ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \
--height 720 \
@@ -21,8 +23,8 @@ accelerate launch examples/mova/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/mova/model_training/train.py \
- --dataset_base_path data/example_video_dataset/ltx2 \
- --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \
+ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV \
+ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \
--height 720 \
@@ -40,4 +42,4 @@ accelerate launch examples/mova/model_training/train.py \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358 \
--use_gradient_checkpointing
-# boundary corresponds to timesteps [0, 900)
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900)
diff --git a/examples/mova/model_training/train.py b/examples/mova/model_training/train.py
index 24f08b1..d31cc2a 100644
--- a/examples/mova/model_training/train.py
+++ b/examples/mova/model_training/train.py
@@ -148,11 +148,11 @@ if __name__ == "__main__":
max_pixels=args.max_pixels,
height=args.height,
width=args.width,
- height_division_factor=model.pipe.height_division_factor,
- width_division_factor=model.pipe.width_division_factor,
+ height_division_factor=16,
+ width_division_factor=16,
num_frames=args.num_frames,
- time_division_factor=model.pipe.time_division_factor,
- time_division_remainder=model.pipe.time_division_remainder,
+ time_division_factor=4,
+ time_division_remainder=1,
frame_rate=args.frame_rate,
fix_frame_rate=True,
)
@@ -166,8 +166,8 @@ if __name__ == "__main__":
"input_audio":
ToAbsolutePath(args.dataset_base_path) >> LoadAudioWithTorchaudio(
num_frames=args.num_frames,
- time_division_factor=model.pipe.time_division_factor,
- time_division_remainder=model.pipe.time_division_remainder,
+ time_division_factor=4,
+ time_division_remainder=1,
frame_rate=args.frame_rate,
),
"in_context_videos":
diff --git a/examples/qwen_image/model_training/full/FireRed-Image-Edit-1.0.sh b/examples/qwen_image/model_training/full/FireRed-Image-Edit-1.0.sh
index 2f1b458..b538f58 100644
--- a/examples/qwen_image/model_training/full/FireRed-Image-Edit-1.0.sh
+++ b/examples/qwen_image/model_training/full/FireRed-Image-Edit-1.0.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/FireRed-Image-Edit-1.0/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.0 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.0/metadata.json \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/full/FireRed-Image-Edit-1.1.sh b/examples/qwen_image/model_training/full/FireRed-Image-Edit-1.1.sh
index e0cae83..d91cad8 100644
--- a/examples/qwen_image/model_training/full/FireRed-Image-Edit-1.1.sh
+++ b/examples/qwen_image/model_training/full/FireRed-Image-Edit-1.1.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/FireRed-Image-Edit-1.1/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.1 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.1/metadata.json \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/full/Qwen-Image-2512.sh b/examples/qwen_image/model_training/full/Qwen-Image-2512.sh
index 1ce0798..836219c 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image-2512.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image-2512.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-2512/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-2512 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-2512/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image-2512:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Canny.sh b/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Canny.sh
index e369223..937f189 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Canny.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Canny.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Canny/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_canny.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 400 \
@@ -21,8 +23,8 @@ accelerate launch examples/qwen_image/model_training/train.py \
# python examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Initialize.py
# accelerate launch examples/qwen_image/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_canny.csv \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny/metadata.csv \
# --data_file_keys "image,blockwise_controlnet_image" \
# --max_pixels 1048576 \
# --dataset_repeat 50 \
@@ -35,4 +37,4 @@ accelerate launch examples/qwen_image/model_training/train.py \
# --trainable_models "blockwise_controlnet" \
# --extra_inputs "blockwise_controlnet_image" \
# --use_gradient_checkpointing \
-# --find_unused_parameters
\ No newline at end of file
+# --find_unused_parameters
diff --git a/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Depth.sh b/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Depth.sh
index 93313ec..1c8c03f 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Depth.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Depth.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Depth/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_depth.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 400 \
@@ -21,8 +23,8 @@ accelerate launch examples/qwen_image/model_training/train.py \
# python examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Initialize.py
# accelerate launch examples/qwen_image/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_depth.csv \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth/metadata.csv \
# --data_file_keys "image,blockwise_controlnet_image" \
# --max_pixels 1048576 \
# --dataset_repeat 50 \
@@ -35,4 +37,4 @@ accelerate launch examples/qwen_image/model_training/train.py \
# --trainable_models "blockwise_controlnet" \
# --extra_inputs "blockwise_controlnet_image" \
# --use_gradient_checkpointing \
-# --find_unused_parameters
\ No newline at end of file
+# --find_unused_parameters
diff --git a/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Inpaint.sh b/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Inpaint.sh
index 99b25ad..610c319 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Inpaint.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image-Blockwise-ControlNet-Inpaint.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_inpaint.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image,blockwise_controlnet_inpaint_mask" \
--max_pixels 1048576 \
--dataset_repeat 400 \
@@ -21,8 +23,8 @@ accelerate launch --config_file examples/qwen_image/model_training/full/accelera
# python examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Inpaint-Initialize.py
# accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config.yaml examples/qwen_image/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_inpaint.csv \
+# --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint \
+# --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint/metadata.csv \
# --data_file_keys "image,blockwise_controlnet_image,blockwise_controlnet_inpaint_mask" \
# --max_pixels 1048576 \
# --dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/full/Qwen-Image-Distill-Full.sh b/examples/qwen_image/model_training/full/Qwen-Image-Distill-Full.sh
index a56fe9d..5b76c34 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image-Distill-Full.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image-Distill-Full.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Distill-Full/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-Full \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-Full/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "DiffSynth-Studio/Qwen-Image-Distill-Full:diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/full/Qwen-Image-Edit-2509.sh b/examples/qwen_image/model_training/full/Qwen-Image-Edit-2509.sh
index 7fda7b7..78f8701 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image-Edit-2509.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image-Edit-2509.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2509/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509/metadata.json \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/full/Qwen-Image-Edit-2511.sh b/examples/qwen_image/model_training/full/Qwen-Image-Edit-2511.sh
index de55646..f524921 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image-Edit-2511.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image-Edit-2511.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/full/Qwen-Image-Edit.sh b/examples/qwen_image/model_training/full/Qwen-Image-Edit.sh
index ec25765..a091c6e 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image-Edit.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image-Edit.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_edit.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit/metadata.csv \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/full/Qwen-Image-Layered-Control.sh b/examples/qwen_image/model_training/full/Qwen-Image-Layered-Control.sh
index 14a3cb4..b297601 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image-Layered-Control.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image-Layered-Control.sh
@@ -1,8 +1,8 @@
-# Example Dataset: https://modelscope.cn/datasets/DiffSynth-Studio/example_image_dataset/tree/master/layer
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Layered-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset/layer \
- --dataset_metadata_path data/example_image_dataset/layer/metadata_layered_control.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered-Control/metadata.json \
--data_file_keys "image,layer_input_image" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/full/Qwen-Image-Layered.sh b/examples/qwen_image/model_training/full/Qwen-Image-Layered.sh
index 91cdb5e..5c72b10 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image-Layered.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image-Layered.sh
@@ -1,8 +1,8 @@
-# Example Dataset: https://modelscope.cn/datasets/DiffSynth-Studio/example_image_dataset/tree/master/layer
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Layered/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset/layer \
- --dataset_metadata_path data/example_image_dataset/layer/metadata_layered.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered/metadata.json \
--data_file_keys "image,layer_input_image" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/full/Qwen-Image.sh b/examples/qwen_image/model_training/full/Qwen-Image.sh
index 979101e..32b0b20 100644
--- a/examples/qwen_image/model_training/full/Qwen-Image.sh
+++ b/examples/qwen_image/model_training/full/Qwen-Image.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/lora/FireRed-Image-Edit-1.0.sh b/examples/qwen_image/model_training/lora/FireRed-Image-Edit-1.0.sh
index 6d7aaff..5796366 100644
--- a/examples/qwen_image/model_training/lora/FireRed-Image-Edit-1.0.sh
+++ b/examples/qwen_image/model_training/lora/FireRed-Image-Edit-1.0.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/FireRed-Image-Edit-1.0/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.0 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.0/metadata.json \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/lora/FireRed-Image-Edit-1.1.sh b/examples/qwen_image/model_training/lora/FireRed-Image-Edit-1.1.sh
index 0e5ba05..a7a24bb 100644
--- a/examples/qwen_image/model_training/lora/FireRed-Image-Edit-1.1.sh
+++ b/examples/qwen_image/model_training/lora/FireRed-Image-Edit-1.1.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/FireRed-Image-Edit-1.1/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.1 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.1/metadata.json \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-2512.sh b/examples/qwen_image/model_training/lora/Qwen-Image-2512.sh
index 00ff802..fb96cd1 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-2512.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-2512.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-2512/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-2512 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-2512/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image-2512:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Canny.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Canny.sh
index 2263134..3f4ef4a 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Canny.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Canny.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Canny/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_canny.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Depth.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Depth.sh
index 60d3ca3..99b1b78 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Depth.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Depth.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Depth/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_depth.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Inpaint.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Inpaint.sh
index 853ffe2..cc4b95e 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Inpaint.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Blockwise-ControlNet-Inpaint.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_inpaint.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image,blockwise_controlnet_inpaint_mask" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Distill-Full.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Distill-Full.sh
index 79d7c37..4e10c2b 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Distill-Full.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Distill-Full.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Distill-Full/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-Full \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-Full/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "DiffSynth-Studio/Qwen-Image-Distill-Full:diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Distill-LoRA.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Distill-LoRA.sh
index 061bebb..1efd471 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Distill-LoRA.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Distill-LoRA.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Distill-LoRA/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_distill_qwen_image.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-LoRA \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-LoRA/metadata.csv \
--data_file_keys "image" \
--extra_inputs "seed,rand_device,num_inference_steps,cfg_scale" \
--height 1328 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Edit-2509.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Edit-2509.sh
index 7fc0cf9..d932bf8 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Edit-2509.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Edit-2509.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2509/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509/metadata.json \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Edit-2511.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Edit-2511.sh
index bd9e60b..111d75b 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Edit-2511.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Edit-2511.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Edit.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Edit.sh
index 0662b1e..00c12a5 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Edit.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Edit.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_edit.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit/metadata.csv \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-EliGen-Poster.sh b/examples/qwen_image/model_training/lora/Qwen-Image-EliGen-Poster.sh
index b042671..b30e0fa 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-EliGen-Poster.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-EliGen-Poster.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-EliGen-Poster/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path "data/example_image_dataset" \
- --dataset_metadata_path data/example_image_dataset/metadata_eligen.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-EliGen-Poster \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-EliGen-Poster/metadata.json \
--data_file_keys "image,eligen_entity_masks" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-EliGen.sh b/examples/qwen_image/model_training/lora/Qwen-Image-EliGen.sh
index af861e6..25a61e9 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-EliGen.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-EliGen.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-EliGen/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path "data/example_image_dataset" \
- --dataset_metadata_path data/example_image_dataset/metadata_eligen.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-EliGen \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-EliGen/metadata.json \
--data_file_keys "image,eligen_entity_masks" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-In-Context-Control-Union.sh b/examples/qwen_image/model_training/lora/Qwen-Image-In-Context-Control-Union.sh
index d241ad0..609d7fa 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-In-Context-Control-Union.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-In-Context-Control-Union.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-In-Context-Control-Union/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path "data/example_image_dataset" \
- --dataset_metadata_path data/example_image_dataset/metadata_qwenimage_context.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-In-Context-Control-Union \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-In-Context-Control-Union/metadata.csv \
--data_file_keys "image,context_image" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Layered-Control-V2.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Layered-Control-V2.sh
index 3cce14b..8531a87 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Layered-Control-V2.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Layered-Control-V2.sh
@@ -1,8 +1,8 @@
-# Example Dataset: https://modelscope.cn/datasets/DiffSynth-Studio/example_image_dataset/tree/master/layer
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Layered-Control-V2/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset/layer_v2 \
- --dataset_metadata_path data/example_image_dataset/layer_v2/metadata_layered_control_v2.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered-Control-V2 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered-Control-V2/metadata.json \
--data_file_keys "image,layer_input_image,context_image" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Layered-Control.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Layered-Control.sh
index 397c975..bb078aa 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Layered-Control.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Layered-Control.sh
@@ -1,8 +1,8 @@
-# Example Dataset: https://modelscope.cn/datasets/DiffSynth-Studio/example_image_dataset/tree/master/layer
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Layered-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset/layer \
- --dataset_metadata_path data/example_image_dataset/layer/metadata_layered_control.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered-Control/metadata.json \
--data_file_keys "image,layer_input_image" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image-Layered.sh b/examples/qwen_image/model_training/lora/Qwen-Image-Layered.sh
index 75a23f9..7b9be65 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image-Layered.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image-Layered.sh
@@ -1,8 +1,8 @@
-# Example Dataset: https://modelscope.cn/datasets/DiffSynth-Studio/example_image_dataset/tree/master/layer
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Layered/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset/layer \
- --dataset_metadata_path data/example_image_dataset/layer/metadata_layered.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered/metadata.json \
--data_file_keys "image,layer_input_image" \
--max_pixels 1048576 \
--dataset_repeat 50 \
diff --git a/examples/qwen_image/model_training/lora/Qwen-Image.sh b/examples/qwen_image/model_training/lora/Qwen-Image.sh
index f1198a5..dd15c54 100644
--- a/examples/qwen_image/model_training/lora/Qwen-Image.sh
+++ b/examples/qwen_image/model_training/lora/Qwen-Image.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/special/differential_training/Qwen-Image-LoRA.sh b/examples/qwen_image/model_training/special/differential_training/Qwen-Image-LoRA.sh
index 19191dd..d8eb7f5 100644
--- a/examples/qwen_image/model_training/special/differential_training/Qwen-Image-LoRA.sh
+++ b/examples/qwen_image/model_training/special/differential_training/Qwen-Image-LoRA.sh
@@ -3,9 +3,11 @@
# the first training dataset should contain content you do not want to generate,
# and the second training dataset should contain content you do want to generate.
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
@@ -21,8 +23,8 @@ accelerate launch examples/qwen_image/model_training/train.py \
--find_unused_parameters
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/special/fp8_training/Qwen-Image-LoRA.sh b/examples/qwen_image/model_training/special/fp8_training/Qwen-Image-LoRA.sh
index 133279b..c45c4e4 100644
--- a/examples/qwen_image/model_training/special/fp8_training/Qwen-Image-LoRA.sh
+++ b/examples/qwen_image/model_training/special/fp8_training/Qwen-Image-LoRA.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/special/low_vram_training/Qwen-Image-LoRA.sh b/examples/qwen_image/model_training/special/low_vram_training/Qwen-Image-LoRA.sh
index d96fcea..9aa16f9 100644
--- a/examples/qwen_image/model_training/special/low_vram_training/Qwen-Image-LoRA.sh
+++ b/examples/qwen_image/model_training/special/low_vram_training/Qwen-Image-LoRA.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 1 \
--model_id_with_origin_paths "Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/special/npu_training/Qwen-Image-Edit-2509-LoRA-NPU.sh b/examples/qwen_image/model_training/special/npu_training/Qwen-Image-Edit-2509-LoRA-NPU.sh
index 9c3f02c..97ae664 100644
--- a/examples/qwen_image/model_training/special/npu_training/Qwen-Image-Edit-2509-LoRA-NPU.sh
+++ b/examples/qwen_image/model_training/special/npu_training/Qwen-Image-Edit-2509-LoRA-NPU.sh
@@ -2,9 +2,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2509/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509/metadata.json \
--max_pixels 1048576 \
--dataset_repeat 1 \
--model_id_with_origin_paths "Qwen/Qwen-Image-Edit-2509:text_encoder/model*.safetensors,Qwen/Qwen-Image-Edit-2509:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/special/npu_training/Qwen-Image-Edit-2509-NPU.sh b/examples/qwen_image/model_training/special/npu_training/Qwen-Image-Edit-2509-NPU.sh
index 02de9e9..ce77b0d 100644
--- a/examples/qwen_image/model_training/special/npu_training/Qwen-Image-Edit-2509-NPU.sh
+++ b/examples/qwen_image/model_training/special/npu_training/Qwen-Image-Edit-2509-NPU.sh
@@ -2,9 +2,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2509/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero3.yaml examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509 \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509/metadata.json \
--data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \
--max_pixels 1048576 \
diff --git a/examples/qwen_image/model_training/special/npu_training/Qwen-Image-LoRA-NPU.sh b/examples/qwen_image/model_training/special/npu_training/Qwen-Image-LoRA-NPU.sh
index 08978c0..c2d8366 100644
--- a/examples/qwen_image/model_training/special/npu_training/Qwen-Image-LoRA-NPU.sh
+++ b/examples/qwen_image/model_training/special/npu_training/Qwen-Image-LoRA-NPU.sh
@@ -2,9 +2,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 1 \
--model_id_with_origin_paths "Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/qwen_image/model_training/special/split_training/Qwen-Image-LoRA.sh b/examples/qwen_image/model_training/special/split_training/Qwen-Image-LoRA.sh
index 84e7267..e08b2bc 100644
--- a/examples/qwen_image/model_training/special/split_training/Qwen-Image-LoRA.sh
+++ b/examples/qwen_image/model_training/special/split_training/Qwen-Image-LoRA.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/qwen_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image \
+ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 1 \
--model_id_with_origin_paths "Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/wanvideo/model_training/full/LongCat-Video.sh b/examples/wanvideo/model_training/full/LongCat-Video.sh
index 2d8902e..70526e3 100644
--- a/examples/wanvideo/model_training/full/LongCat-Video.sh
+++ b/examples/wanvideo/model_training/full/LongCat-Video.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/LongCat-Video/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/LongCat-Video \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/LongCat-Video/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/LongCat-Video_full" \
- --trainable_models "dit"
\ No newline at end of file
+ --trainable_models "dit"
diff --git a/examples/wanvideo/model_training/full/Video-As-Prompt-Wan2.1-14B.sh b/examples/wanvideo/model_training/full/Video-As-Prompt-Wan2.1-14B.sh
index 6be4d12..b61fff2 100644
--- a/examples/wanvideo/model_training/full/Video-As-Prompt-Wan2.1-14B.sh
+++ b/examples/wanvideo/model_training/full/Video-As-Prompt-Wan2.1-14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Video-As-Prompt-Wan2.1-14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vap.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Video-As-Prompt-Wan2.1-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Video-As-Prompt-Wan2.1-14B/metadata.csv \
--data_file_keys "video,vap_video" \
--height 480 \
--width 832 \
diff --git a/examples/wanvideo/model_training/full/Wan2.1-1.3b-speedcontrol-v1.sh b/examples/wanvideo/model_training/full/Wan2.1-1.3b-speedcontrol-v1.sh
index 3d580ab..dd20a41 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-1.3b-speedcontrol-v1.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-1.3b-speedcontrol-v1.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-1.3b-speedcontrol-v1/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_motion_bucket_id.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-1.3b-speedcontrol-v1 \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-1.3b-speedcontrol-v1/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.motion_controller." \
--output_path "./models/train/Wan2.1-1.3b-speedcontrol-v1_full" \
--trainable_models "motion_controller" \
- --extra_inputs "motion_bucket_id"
\ No newline at end of file
+ --extra_inputs "motion_bucket_id"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-FLF2V-14B-720P.sh b/examples/wanvideo/model_training/full/Wan2.1-FLF2V-14B-720P.sh
index baf98a9..927c4b2 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-FLF2V-14B-720P.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-FLF2V-14B-720P.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-FLF2V-14B-720P/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-FLF2V-14B-720P \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-FLF2V-14B-720P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--output_path "./models/train/Wan2.1-FLF2V-14B-720P_full" \
--trainable_models "dit" \
--extra_inputs "input_image,end_image" \
- --initialize_model_on_cpu
\ No newline at end of file
+ --initialize_model_on_cpu
diff --git a/examples/wanvideo/model_training/full/Wan2.1-Fun-1.3B-Control.sh b/examples/wanvideo/model_training/full/Wan2.1-Fun-1.3B-Control.sh
index 45a99de..b1d8aa0 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-Fun-1.3B-Control.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-Fun-1.3B-Control.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-1.3B-Control/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-Control/metadata.csv \
--data_file_keys "video,control_video" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-1.3B-Control_full" \
--trainable_models "dit" \
- --extra_inputs "control_video"
\ No newline at end of file
+ --extra_inputs "control_video"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-Fun-1.3B-InP.sh b/examples/wanvideo/model_training/full/Wan2.1-Fun-1.3B-InP.sh
index a202bf9..01849ed 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-Fun-1.3B-InP.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-Fun-1.3B-InP.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-1.3B-InP/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-1.3B-InP_full" \
--trainable_models "dit" \
- --extra_inputs "input_image,end_image"
\ No newline at end of file
+ --extra_inputs "input_image,end_image"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-Fun-14B-Control.sh b/examples/wanvideo/model_training/full/Wan2.1-Fun-14B-Control.sh
index 8a17c3f..d4b9e6d 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-Fun-14B-Control.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-Fun-14B-Control.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-14B-Control/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-Control/metadata.csv \
--data_file_keys "video,control_video" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-14B-Control_full" \
--trainable_models "dit" \
- --extra_inputs "control_video"
\ No newline at end of file
+ --extra_inputs "control_video"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-Fun-14B-InP.sh b/examples/wanvideo/model_training/full/Wan2.1-Fun-14B-InP.sh
index 86feae7..178d346 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-Fun-14B-InP.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-Fun-14B-InP.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-14B-InP/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-14B-InP_full" \
--trainable_models "dit" \
- --extra_inputs "input_image,end_image"
\ No newline at end of file
+ --extra_inputs "input_image,end_image"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-Control-Camera.sh b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-Control-Camera.sh
index b59ed32..b01f664 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-Control-Camera.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-Control-Camera.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-1.3B-Control-Camera_full" \
--trainable_models "dit" \
- --extra_inputs "input_image,camera_control_direction,camera_control_speed"
\ No newline at end of file
+ --extra_inputs "input_image,camera_control_direction,camera_control_speed"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-Control.sh b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-Control.sh
index 34273c1..f4aff51 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-Control.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-Control.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-Control/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-1.3B-Control_full" \
--trainable_models "dit" \
- --extra_inputs "control_video,reference_image"
\ No newline at end of file
+ --extra_inputs "control_video,reference_image"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-InP.sh b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-InP.sh
index f6eed97..adc0796 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-InP.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-1.3B-InP.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-InP/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-1.3B-InP_full" \
--trainable_models "dit" \
- --extra_inputs "input_image,end_image"
\ No newline at end of file
+ --extra_inputs "input_image,end_image"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-Control-Camera.sh b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-Control-Camera.sh
index 41b87e9..71e4c01 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-Control-Camera.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-Control-Camera.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-14B-Control-Camera_full" \
--trainable_models "dit" \
- --extra_inputs "input_image,camera_control_direction,camera_control_speed"
\ No newline at end of file
+ --extra_inputs "input_image,camera_control_direction,camera_control_speed"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-Control.sh b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-Control.sh
index ce6640e..45e9e83 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-Control.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-Control.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-Control/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-14B-Control_full" \
--trainable_models "dit" \
- --extra_inputs "control_video,reference_image"
\ No newline at end of file
+ --extra_inputs "control_video,reference_image"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-InP.sh b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-InP.sh
index afb5d3d..91f0355 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-InP.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-Fun-V1.1-14B-InP.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-InP/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -10,4 +12,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-Fun-V1.1-14B-InP_full" \
--trainable_models "dit" \
- --extra_inputs "input_image,end_image"
\ No newline at end of file
+ --extra_inputs "input_image,end_image"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-I2V-14B-480P.sh b/examples/wanvideo/model_training/full/Wan2.1-I2V-14B-480P.sh
index 492898b..e6ef563 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-I2V-14B-480P.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-I2V-14B-480P.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--output_path "./models/train/Wan2.1-I2V-14B-480P_full" \
--trainable_models "dit" \
--extra_inputs "input_image" \
- --initialize_model_on_cpu
\ No newline at end of file
+ --initialize_model_on_cpu
diff --git a/examples/wanvideo/model_training/full/Wan2.1-I2V-14B-720P.sh b/examples/wanvideo/model_training/full/Wan2.1-I2V-14B-720P.sh
index 1d91359..1efff94 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-I2V-14B-720P.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-I2V-14B-720P.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-720P/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-720P \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-720P/metadata.csv \
--height 720 \
--width 1280 \
--num_frames 49 \
@@ -13,4 +15,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--trainable_models "dit" \
--extra_inputs "input_image" \
--use_gradient_checkpointing_offload \
- --initialize_model_on_cpu
\ No newline at end of file
+ --initialize_model_on_cpu
diff --git a/examples/wanvideo/model_training/full/Wan2.1-T2V-1.3B.sh b/examples/wanvideo/model_training/full/Wan2.1-T2V-1.3B.sh
index e0d6e84..91b7539 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-T2V-1.3B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-T2V-1.3B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-1.3B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-T2V-1.3B_full" \
- --trainable_models "dit"
\ No newline at end of file
+ --trainable_models "dit"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-T2V-14B.sh b/examples/wanvideo/model_training/full/Wan2.1-T2V-14B.sh
index ae804b0..c108f6c 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-T2V-14B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-T2V-14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.1-T2V-14B_full" \
- --trainable_models "dit"
\ No newline at end of file
+ --trainable_models "dit"
diff --git a/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B-Preview.sh b/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B-Preview.sh
index 19b6ecb..1554a81 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B-Preview.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B-Preview.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-1.3B-Preview/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B-Preview \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B-Preview/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--trainable_models "vace" \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
-# The learning rate is kept consistent with the settings in the original paper
\ No newline at end of file
+# The learning rate is kept consistent with the settings in the original paper
diff --git a/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B.sh b/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B.sh
index f9768c6..5ac3cc7 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-VACE-1.3B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-1.3B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--trainable_models "vace" \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
-# The learning rate is kept consistent with the settings in the original paper
\ No newline at end of file
+# The learning rate is kept consistent with the settings in the original paper
diff --git a/examples/wanvideo/model_training/full/Wan2.1-VACE-14B.sh b/examples/wanvideo/model_training/full/Wan2.1-VACE-14B.sh
index 401a647..043ede5 100644
--- a/examples/wanvideo/model_training/full/Wan2.1-VACE-14B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.1-VACE-14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--trainable_models "vace" \
--extra_inputs "vace_video,vace_reference_image" \
--use_gradient_checkpointing_offload
-# The learning rate is kept consistent with the settings in the original paper
\ No newline at end of file
+# The learning rate is kept consistent with the settings in the original paper
diff --git a/examples/wanvideo/model_training/full/Wan2.2-Animate-14B.sh b/examples/wanvideo/model_training/full/Wan2.2-Animate-14B.sh
index ab09a78..95474e7 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-Animate-14B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-Animate-14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Animate-14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_animate.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Animate-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Animate-14B/metadata.csv \
--data_file_keys "video,animate_pose_video,animate_face_video" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--output_path "./models/train/Wan2.2-Animate-14B_full" \
--trainable_models "animate_adapter" \
--extra_inputs "input_image,animate_pose_video,animate_face_video" \
- --use_gradient_checkpointing_offload
\ No newline at end of file
+ --use_gradient_checkpointing_offload
diff --git a/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-Control-Camera.sh b/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-Control-Camera.sh
index fe85ca8..eaa6d27 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-Control-Camera.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-Control-Camera.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -17,8 +19,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -32,4 +34,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--extra_inputs "input_image,camera_control_direction,camera_control_speed" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
-# boundary corresponds to timesteps [0, 900]
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900]
diff --git a/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-Control.sh b/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-Control.sh
index 6f5ac87..ff836b3 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-Control.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-Control.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-Control/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -17,8 +19,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -32,4 +34,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--extra_inputs "control_video,reference_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
-# boundary corresponds to timesteps [0, 900]
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900]
diff --git a/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-InP.sh b/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-InP.sh
index 7c623a0..b88b6fd 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-InP.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-Fun-A14B-InP.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-InP/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -16,8 +18,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -30,4 +32,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--extra_inputs "input_image,end_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
-# boundary corresponds to timesteps [0, 900]
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900]
diff --git a/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh b/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh
index 10fb02f..872cf73 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-I2V-A14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-I2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -18,8 +20,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -34,4 +36,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--use_gradient_checkpointing_offload \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
-# boundary corresponds to timesteps [0, 900)
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900)
diff --git a/examples/wanvideo/model_training/full/Wan2.2-S2V-14B.sh b/examples/wanvideo/model_training/full/Wan2.2-S2V-14B.sh
index 9eec871..d7be0f8 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-S2V-14B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-S2V-14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-S2V-14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_s2v.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-S2V-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-S2V-14B/metadata.csv \
--data_file_keys "video,input_audio,s2v_pose_video" \
--height 448 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.2-S2V-14B_full" \
--extra_inputs "input_image,input_audio,s2v_pose_video" \
- --use_gradient_checkpointing_offload
\ No newline at end of file
+ --use_gradient_checkpointing_offload
diff --git a/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh b/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh
index 89c0704..c336cfe 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-T2V-A14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-T2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -16,8 +18,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [875, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -30,4 +32,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--trainable_models "dit" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.417
-# boundary corresponds to timesteps [0, 875)
\ No newline at end of file
+# boundary corresponds to timesteps [0, 875)
diff --git a/examples/wanvideo/model_training/full/Wan2.2-TI2V-5B.sh b/examples/wanvideo/model_training/full/Wan2.2-TI2V-5B.sh
index def9f89..4cdcddf 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-TI2V-5B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-TI2V-5B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-TI2V-5B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-TI2V-5B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-TI2V-5B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Wan2.2-TI2V-5B_full" \
--trainable_models "dit" \
- --extra_inputs "input_image"
\ No newline at end of file
+ --extra_inputs "input_image"
diff --git a/examples/wanvideo/model_training/full/Wan2.2-VACE-Fun-A14B.sh b/examples/wanvideo/model_training/full/Wan2.2-VACE-Fun-A14B.sh
index ba3e875..da834b1 100644
--- a/examples/wanvideo/model_training/full/Wan2.2-VACE-Fun-A14B.sh
+++ b/examples/wanvideo/model_training/full/Wan2.2-VACE-Fun-A14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-VACE-Fun-A14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -22,8 +24,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -41,4 +43,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--min_timestep_boundary 0.358 \
--initialize_model_on_cpu
# boundary corresponds to timesteps [0, 900]
-# The learning rate is kept consistent with the settings in the original paper
\ No newline at end of file
+# The learning rate is kept consistent with the settings in the original paper
diff --git a/examples/wanvideo/model_training/full/krea-realtime-video.sh b/examples/wanvideo/model_training/full/krea-realtime-video.sh
index f0c4c85..00a7027 100644
--- a/examples/wanvideo/model_training/full/krea-realtime-video.sh
+++ b/examples/wanvideo/model_training/full/krea-realtime-video.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/krea-realtime-video/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/krea-realtime-video \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/krea-realtime-video/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -9,4 +11,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--num_epochs 2 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/krea-realtime-video_full" \
- --trainable_models "dit"
\ No newline at end of file
+ --trainable_models "dit"
diff --git a/examples/wanvideo/model_training/lora/LongCat-Video.sh b/examples/wanvideo/model_training/lora/LongCat-Video.sh
index 022048c..b507570 100644
--- a/examples/wanvideo/model_training/lora/LongCat-Video.sh
+++ b/examples/wanvideo/model_training/lora/LongCat-Video.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/LongCat-Video/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/LongCat-Video \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/LongCat-Video/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--output_path "./models/train/LongCat-Video_lora" \
--lora_base_model "dit" \
--lora_target_modules "adaLN_modulation.1,attn.qkv,attn.proj,cross_attn.q_linear,cross_attn.kv_linear,cross_attn.proj,ffn.w1,ffn.w2,ffn.w3" \
- --lora_rank 32
\ No newline at end of file
+ --lora_rank 32
diff --git a/examples/wanvideo/model_training/lora/Video-As-Prompt-Wan2.1-14B.sh b/examples/wanvideo/model_training/lora/Video-As-Prompt-Wan2.1-14B.sh
index c2c609a..4a8b4f4 100644
--- a/examples/wanvideo/model_training/lora/Video-As-Prompt-Wan2.1-14B.sh
+++ b/examples/wanvideo/model_training/lora/Video-As-Prompt-Wan2.1-14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Video-As-Prompt-Wan2.1-14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vap.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Video-As-Prompt-Wan2.1-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Video-As-Prompt-Wan2.1-14B/metadata.csv \
--data_file_keys "video,vap_video" \
--height 480 \
--width 832 \
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-1.3b-speedcontrol-v1.sh b/examples/wanvideo/model_training/lora/Wan2.1-1.3b-speedcontrol-v1.sh
index 51ebfe4..eb0ffa7 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-1.3b-speedcontrol-v1.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-1.3b-speedcontrol-v1.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-1.3b-speedcontrol-v1/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_motion_bucket_id.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-1.3b-speedcontrol-v1 \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-1.3b-speedcontrol-v1/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "motion_bucket_id"
\ No newline at end of file
+ --extra_inputs "motion_bucket_id"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-FLF2V-14B-720P.sh b/examples/wanvideo/model_training/lora/Wan2.1-FLF2V-14B-720P.sh
index 9a9622d..b6f9f28 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-FLF2V-14B-720P.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-FLF2V-14B-720P.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-FLF2V-14B-720P/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-FLF2V-14B-720P \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-FLF2V-14B-720P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "input_image,end_image"
\ No newline at end of file
+ --extra_inputs "input_image,end_image"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-Fun-1.3B-Control.sh b/examples/wanvideo/model_training/lora/Wan2.1-Fun-1.3B-Control.sh
index 03c1f45..d6f38b7 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-Fun-1.3B-Control.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-Fun-1.3B-Control.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-1.3B-Control/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-Control/metadata.csv \
--data_file_keys "video,control_video" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "control_video"
\ No newline at end of file
+ --extra_inputs "control_video"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-Fun-1.3B-InP.sh b/examples/wanvideo/model_training/lora/Wan2.1-Fun-1.3B-InP.sh
index d5f509b..067071a 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-Fun-1.3B-InP.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-Fun-1.3B-InP.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-1.3B-InP/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-1.3B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "input_image,end_image"
\ No newline at end of file
+ --extra_inputs "input_image,end_image"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-Fun-14B-Control.sh b/examples/wanvideo/model_training/lora/Wan2.1-Fun-14B-Control.sh
index 608df5f..05cc176 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-Fun-14B-Control.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-Fun-14B-Control.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-14B-Control/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-Control/metadata.csv \
--data_file_keys "video,control_video" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "control_video"
\ No newline at end of file
+ --extra_inputs "control_video"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-Fun-14B-InP.sh b/examples/wanvideo/model_training/lora/Wan2.1-Fun-14B-InP.sh
index 37b2518..c9a35a4 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-Fun-14B-InP.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-Fun-14B-InP.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-14B-InP/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "input_image,end_image"
\ No newline at end of file
+ --extra_inputs "input_image,end_image"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-Control-Camera.sh b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-Control-Camera.sh
index 2f809a4..f6cfe4f 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-Control-Camera.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-Control-Camera.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control-Camera/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "input_image,camera_control_direction,camera_control_speed"
\ No newline at end of file
+ --extra_inputs "input_image,camera_control_direction,camera_control_speed"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-Control.sh b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-Control.sh
index 1e7156d..f73e2bd 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-Control.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-Control.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-Control/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "control_video,reference_image"
\ No newline at end of file
+ --extra_inputs "control_video,reference_image"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-InP.sh b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-InP.sh
index 5879f59..42b2e50 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-InP.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-1.3B-InP.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-1.3B-InP/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-1.3B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "input_image,end_image"
\ No newline at end of file
+ --extra_inputs "input_image,end_image"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-Control-Camera.sh b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-Control-Camera.sh
index 176a05f..ae70d87 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-Control-Camera.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-Control-Camera.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control-Camera/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "input_image,camera_control_direction,camera_control_speed"
\ No newline at end of file
+ --extra_inputs "input_image,camera_control_direction,camera_control_speed"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-Control.sh b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-Control.sh
index 3ead12c..11a17b0 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-Control.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-Control.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-Control/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -13,4 +15,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "control_video,reference_image"
\ No newline at end of file
+ --extra_inputs "control_video,reference_image"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-InP.sh b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-InP.sh
index 40a8ad0..8767df9 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-InP.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-Fun-V1.1-14B-InP.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-Fun-V1.1-14B-InP/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-Fun-V1.1-14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "input_image,end_image"
\ No newline at end of file
+ --extra_inputs "input_image,end_image"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-I2V-14B-480P.sh b/examples/wanvideo/model_training/lora/Wan2.1-I2V-14B-480P.sh
index 473d519..eef3800 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-I2V-14B-480P.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-I2V-14B-480P.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -12,4 +14,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "input_image"
\ No newline at end of file
+ --extra_inputs "input_image"
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-I2V-14B-720P.sh b/examples/wanvideo/model_training/lora/Wan2.1-I2V-14B-720P.sh
index 52b72bd..1841080 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-I2V-14B-720P.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-I2V-14B-720P.sh
@@ -1,8 +1,10 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-720P/*" --local_dir ./data/diffsynth_example_dataset
+
# 1*80G GPU cannot train Wan2.2-Animate-14B LoRA
# We tested on 8*80G GPUs
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-720P \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-720P/metadata.csv \
--height 720 \
--width 1280 \
--dataset_repeat 100 \
@@ -16,4 +18,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--lora_rank 32 \
--extra_inputs "input_image" \
--use_gradient_checkpointing_offload \
- --initialize_model_on_cpu
\ No newline at end of file
+ --initialize_model_on_cpu
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-T2V-1.3B.sh b/examples/wanvideo/model_training/lora/Wan2.1-T2V-1.3B.sh
index d16a287..8b1a2a6 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-T2V-1.3B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-T2V-1.3B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-1.3B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--output_path "./models/train/Wan2.1-T2V-1.3B_lora" \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
- --lora_rank 32
\ No newline at end of file
+ --lora_rank 32
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-T2V-14B.sh b/examples/wanvideo/model_training/lora/Wan2.1-T2V-14B.sh
index 1fb55ac..f8c5c52 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-T2V-14B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-T2V-14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--output_path "./models/train/Wan2.1-T2V-14B_lora" \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
- --lora_rank 32
\ No newline at end of file
+ --lora_rank 32
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-VACE-1.3B-Preview.sh b/examples/wanvideo/model_training/lora/Wan2.1-VACE-1.3B-Preview.sh
index 2bcb55b..1040a17 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-VACE-1.3B-Preview.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-VACE-1.3B-Preview.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-1.3B-Preview/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B-Preview \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B-Preview/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "vace_video,vace_reference_image" \
- --use_gradient_checkpointing_offload
\ No newline at end of file
+ --use_gradient_checkpointing_offload
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-VACE-1.3B.sh b/examples/wanvideo/model_training/lora/Wan2.1-VACE-1.3B.sh
index b565078..f6e9891 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-VACE-1.3B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-VACE-1.3B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-1.3B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-1.3B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -14,4 +16,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "vace_video,vace_reference_image" \
- --use_gradient_checkpointing_offload
\ No newline at end of file
+ --use_gradient_checkpointing_offload
diff --git a/examples/wanvideo/model_training/lora/Wan2.1-VACE-14B.sh b/examples/wanvideo/model_training/lora/Wan2.1-VACE-14B.sh
index 633ea0e..6c37a3b 100644
--- a/examples/wanvideo/model_training/lora/Wan2.1-VACE-14B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.1-VACE-14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-VACE-14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-VACE-14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -15,4 +17,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "vace_video,vace_reference_image" \
- --use_gradient_checkpointing_offload
\ No newline at end of file
+ --use_gradient_checkpointing_offload
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-Animate-14B.sh b/examples/wanvideo/model_training/lora/Wan2.2-Animate-14B.sh
index 0b6e571..4e5d6ec 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-Animate-14B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-Animate-14B.sh
@@ -1,8 +1,10 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Animate-14B/*" --local_dir ./data/diffsynth_example_dataset
+
# 1*80G GPU cannot train Wan2.2-Animate-14B LoRA
# We tested on 8*80G GPUs
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_animate.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Animate-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Animate-14B/metadata.csv \
--data_file_keys "video,animate_pose_video,animate_face_video" \
--height 480 \
--width 832 \
@@ -17,4 +19,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,animate_pose_video,animate_face_video" \
- --use_gradient_checkpointing_offload
\ No newline at end of file
+ --use_gradient_checkpointing_offload
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-Control-Camera.sh b/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-Control-Camera.sh
index 1a9983b..b982d39 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-Control-Camera.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-Control-Camera.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-Control-Camera/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -19,8 +21,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_camera_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control-Camera/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -36,4 +38,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--extra_inputs "input_image,camera_control_direction,camera_control_speed" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
-# boundary corresponds to timesteps [0, 900]
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900]
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-Control.sh b/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-Control.sh
index 571ae54..1841aaa 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-Control.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-Control.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-Control/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -19,8 +21,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_reference_control.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-Control/metadata.csv \
--data_file_keys "video,control_video,reference_image" \
--height 480 \
--width 832 \
@@ -36,4 +38,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--extra_inputs "control_video,reference_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
-# boundary corresponds to timesteps [0, 900]
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900]
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-InP.sh b/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-InP.sh
index 491351c..ccf5dbb 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-InP.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-Fun-A14B-InP.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-Fun-A14B-InP/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -18,8 +20,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-Fun-A14B-InP/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -34,4 +36,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--extra_inputs "input_image,end_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
-# boundary corresponds to timesteps [0, 900]
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900]
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh b/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh
index 1d9eba0..d487d25 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-I2V-A14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-I2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -19,8 +21,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-I2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -36,4 +38,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--extra_inputs "input_image" \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
-# boundary corresponds to timesteps [0, 900)
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900)
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-S2V-14B.sh b/examples/wanvideo/model_training/lora/Wan2.2-S2V-14B.sh
index ec5bb87..1f819f5 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-S2V-14B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-S2V-14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-S2V-14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_s2v.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-S2V-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-S2V-14B/metadata.csv \
--data_file_keys "video,input_audio,s2v_pose_video" \
--height 448 \
--width 832 \
@@ -16,4 +18,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
--extra_inputs "input_image,input_audio,s2v_pose_video" \
- --use_gradient_checkpointing_offload
\ No newline at end of file
+ --use_gradient_checkpointing_offload
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh b/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh
index f47c96b..ed1b4a3 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-T2V-A14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-T2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -19,8 +21,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -35,4 +37,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_rank 32 \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.417
-# boundary corresponds to timesteps [0, 875)
\ No newline at end of file
+# boundary corresponds to timesteps [0, 875)
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-TI2V-5B.sh b/examples/wanvideo/model_training/lora/Wan2.2-TI2V-5B.sh
index 6a33b57..fe733a2 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-TI2V-5B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-TI2V-5B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-TI2V-5B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-TI2V-5B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-TI2V-5B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -13,4 +15,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
--lora_rank 32 \
- --extra_inputs "input_image"
\ No newline at end of file
+ --extra_inputs "input_image"
diff --git a/examples/wanvideo/model_training/lora/Wan2.2-VACE-Fun-A14B.sh b/examples/wanvideo/model_training/lora/Wan2.2-VACE-Fun-A14B.sh
index 93b38cf..dba43e5 100644
--- a/examples/wanvideo/model_training/lora/Wan2.2-VACE-Fun-A14B.sh
+++ b/examples/wanvideo/model_training/lora/Wan2.2-VACE-Fun-A14B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-VACE-Fun-A14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -21,8 +23,8 @@ accelerate launch examples/wanvideo/model_training/train.py \
# boundary corresponds to timesteps [900, 1000]
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -40,4 +42,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--use_gradient_checkpointing_offload \
--max_timestep_boundary 1 \
--min_timestep_boundary 0.358
-# boundary corresponds to timesteps [0, 900]
\ No newline at end of file
+# boundary corresponds to timesteps [0, 900]
diff --git a/examples/wanvideo/model_training/lora/krea-realtime-video.sh b/examples/wanvideo/model_training/lora/krea-realtime-video.sh
index 94c64d1..b452612 100644
--- a/examples/wanvideo/model_training/lora/krea-realtime-video.sh
+++ b/examples/wanvideo/model_training/lora/krea-realtime-video.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/krea-realtime-video/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/krea-realtime-video \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/krea-realtime-video/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
@@ -11,4 +13,4 @@ accelerate launch examples/wanvideo/model_training/train.py \
--output_path "./models/train/krea-realtime-video_lora" \
--lora_base_model "dit" \
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
- --lora_rank 32
\ No newline at end of file
+ --lora_rank 32
diff --git a/examples/wanvideo/model_training/special/direct_distill/Wan2.1-T2V-1.3B.sh b/examples/wanvideo/model_training/special/direct_distill/Wan2.1-T2V-1.3B.sh
index 73e85f3..e01b85a 100644
--- a/examples/wanvideo/model_training/special/direct_distill/Wan2.1-T2V-1.3B.sh
+++ b/examples/wanvideo/model_training/special/direct_distill/Wan2.1-T2V-1.3B.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-1.3B_direct_distill/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_distill.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B_direct_distill \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-1.3B_direct_distill/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 160 \
diff --git a/examples/wanvideo/model_training/special/fp8_training/Wan2.1-I2V-14B-480P.sh b/examples/wanvideo/model_training/special/fp8_training/Wan2.1-I2V-14B-480P.sh
index 58863b8..4432875 100644
--- a/examples/wanvideo/model_training/special/fp8_training/Wan2.1-I2V-14B-480P.sh
+++ b/examples/wanvideo/model_training/special/fp8_training/Wan2.1-I2V-14B-480P.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
diff --git a/examples/wanvideo/model_training/special/low_vram_training/Wan2.1-I2V-14B-480P.sh b/examples/wanvideo/model_training/special/low_vram_training/Wan2.1-I2V-14B-480P.sh
index 4bb8249..bdfa802 100644
--- a/examples/wanvideo/model_training/special/low_vram_training/Wan2.1-I2V-14B-480P.sh
+++ b/examples/wanvideo/model_training/special/low_vram_training/Wan2.1-I2V-14B-480P.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 1 \
diff --git a/examples/wanvideo/model_training/special/npu_training/Wan2.1-T2V-14B-NPU.sh b/examples/wanvideo/model_training/special/npu_training/Wan2.1-T2V-14B-NPU.sh
index ac2d9dd..8be7469 100644
--- a/examples/wanvideo/model_training/special/npu_training/Wan2.1-T2V-14B-NPU.sh
+++ b/examples/wanvideo/model_training/special/npu_training/Wan2.1-T2V-14B-NPU.sh
@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-T2V-14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-T2V-14B/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 100 \
diff --git a/examples/wanvideo/model_training/special/npu_training/Wan2.2-T2V-A14B-NPU.sh b/examples/wanvideo/model_training/special/npu_training/Wan2.2-T2V-A14B-NPU.sh
index 4748f87..989bd4e 100644
--- a/examples/wanvideo/model_training/special/npu_training/Wan2.2-T2V-A14B-NPU.sh
+++ b/examples/wanvideo/model_training/special/npu_training/Wan2.2-T2V-A14B-NPU.sh
@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-T2V-A14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
@@ -20,8 +22,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [875, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-T2V-A14B/metadata.csv \
--height 480 \
--width 832 \
--num_frames 49 \
diff --git a/examples/wanvideo/model_training/special/npu_training/Wan2.2-VACE-Fun-A14B-NPU.sh b/examples/wanvideo/model_training/special/npu_training/Wan2.2-VACE-Fun-A14B-NPU.sh
index 304d53d..b72fdfc 100644
--- a/examples/wanvideo/model_training/special/npu_training/Wan2.2-VACE-Fun-A14B-NPU.sh
+++ b/examples/wanvideo/model_training/special/npu_training/Wan2.2-VACE-Fun-A14B-NPU.sh
@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.2-VACE-Fun-A14B/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
@@ -24,8 +26,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.2-VACE-Fun-A14B/metadata.csv \
--data_file_keys "video,vace_video,vace_reference_image" \
--height 480 \
--width 832 \
diff --git a/examples/wanvideo/model_training/special/split_training/Wan2.1-I2V-14B-480P.sh b/examples/wanvideo/model_training/special/split_training/Wan2.1-I2V-14B-480P.sh
index 6ea109f..36ff935 100644
--- a/examples/wanvideo/model_training/special/split_training/Wan2.1-I2V-14B-480P.sh
+++ b/examples/wanvideo/model_training/special/split_training/Wan2.1-I2V-14B-480P.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "wanvideo/Wan2.1-I2V-14B-480P/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/wanvideo/model_training/train.py \
- --dataset_base_path data/example_video_dataset \
- --dataset_metadata_path data/example_video_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P \
+ --dataset_metadata_path data/diffsynth_example_dataset/wanvideo/Wan2.1-I2V-14B-480P/metadata.csv \
--height 480 \
--width 832 \
--dataset_repeat 1 \
diff --git a/examples/z_image/model_training/full/Z-Image-Omni-Base.sh b/examples/z_image/model_training/full/Z-Image-Omni-Base.sh
index 4ed3c18..f677d59 100644
--- a/examples/z_image/model_training/full/Z-Image-Omni-Base.sh
+++ b/examples/z_image/model_training/full/Z-Image-Omni-Base.sh
@@ -1,8 +1,10 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Omni-Base/*" --local_dir ./data/diffsynth_example_dataset
+
# This example is tested on 8*A100
# Text to image training
accelerate launch --config_file examples/z_image/model_training/full/accelerate_config.yaml examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Omni-Base \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Omni-Base/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 400 \
--model_id_with_origin_paths "Tongyi-MAI/Z-Image-Omni-Base:transformer/*.safetensors,Tongyi-MAI/Z-Image-Omni-Base:siglip/model.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \
@@ -17,8 +19,8 @@ accelerate launch --config_file examples/z_image/model_training/full/accelerate_
# Image(s) to image training
# accelerate launch --config_file examples/z_image/model_training/full/accelerate_config.yaml examples/z_image/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Omni-Base \
+# --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Omni-Base/metadata.csv \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
diff --git a/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh b/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh
index 1f0f928..0f38f4b 100644
--- a/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh
+++ b/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_upscale.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps/metadata.csv \
--data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh b/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh
index 69d0958..2b334d5 100644
--- a/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh
+++ b/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_canny.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps/metadata.csv \
--data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh b/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh
index c56e735..fce6edf 100644
--- a/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh
+++ b/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_canny.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1 \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1/metadata.csv \
--data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/z_image/model_training/full/Z-Image-Turbo.sh b/examples/z_image/model_training/full/Z-Image-Turbo.sh
index 7bbe909..524c647 100644
--- a/examples/z_image/model_training/full/Z-Image-Turbo.sh
+++ b/examples/z_image/model_training/full/Z-Image-Turbo.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo/*" --local_dir ./data/diffsynth_example_dataset
+
# This example is tested on 8*A100
accelerate launch --config_file examples/z_image/model_training/full/accelerate_config.yaml examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 400 \
--model_id_with_origin_paths "Tongyi-MAI/Z-Image-Turbo:transformer/*.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/z_image/model_training/full/Z-Image.sh b/examples/z_image/model_training/full/Z-Image.sh
index 2136324..dae69c8 100644
--- a/examples/z_image/model_training/full/Z-Image.sh
+++ b/examples/z_image/model_training/full/Z-Image.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image/*" --local_dir ./data/diffsynth_example_dataset
+
# This example is tested on 8*A100
accelerate launch --config_file examples/z_image/model_training/full/accelerate_config.yaml examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 400 \
--model_id_with_origin_paths "Tongyi-MAI/Z-Image:transformer/*.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/z_image/model_training/lora/Z-Image-Omni-Base.sh b/examples/z_image/model_training/lora/Z-Image-Omni-Base.sh
index d8f522a..9429a85 100644
--- a/examples/z_image/model_training/lora/Z-Image-Omni-Base.sh
+++ b/examples/z_image/model_training/lora/Z-Image-Omni-Base.sh
@@ -1,7 +1,9 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Omni-Base/*" --local_dir ./data/diffsynth_example_dataset
+
# Text to image training
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Omni-Base \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Omni-Base/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Tongyi-MAI/Z-Image-Omni-Base:transformer/*.safetensors,Tongyi-MAI/Z-Image-Omni-Base:siglip/model.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \
@@ -18,8 +20,8 @@ accelerate launch examples/z_image/model_training/train.py \
# Image(s) to image training
# accelerate launch examples/z_image/model_training/train.py \
-# --dataset_base_path data/example_image_dataset \
-# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \
+# --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Omni-Base \
+# --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Omni-Base/metadata.csv \
# --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \
# --max_pixels 1048576 \
diff --git a/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh b/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh
index 9f2032f..7a650f7 100644
--- a/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh
+++ b/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_upscale.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps/metadata.csv \
--data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh b/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh
index 22c46ce..cdd9310 100644
--- a/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh
+++ b/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_canny.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps/metadata.csv \
--data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh b/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh
index 97de2a0..7bb1bb5 100644
--- a/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh
+++ b/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata_controlnet_canny.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1 \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo-Fun-Controlnet-Union-2.1/metadata.csv \
--data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \
--dataset_repeat 100 \
diff --git a/examples/z_image/model_training/lora/Z-Image-Turbo.sh b/examples/z_image/model_training/lora/Z-Image-Turbo.sh
index a00d57e..0a45709 100644
--- a/examples/z_image/model_training/lora/Z-Image-Turbo.sh
+++ b/examples/z_image/model_training/lora/Z-Image-Turbo.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Tongyi-MAI/Z-Image-Turbo:transformer/*.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/z_image/model_training/lora/Z-Image.sh b/examples/z_image/model_training/lora/Z-Image.sh
index b660eef..8c0038b 100644
--- a/examples/z_image/model_training/lora/Z-Image.sh
+++ b/examples/z_image/model_training/lora/Z-Image.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Tongyi-MAI/Z-Image:transformer/*.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/z_image/model_training/special/differential_training/Z-Image-Turbo.sh b/examples/z_image/model_training/special/differential_training/Z-Image-Turbo.sh
index 1751ec7..60f3c86 100644
--- a/examples/z_image/model_training/special/differential_training/Z-Image-Turbo.sh
+++ b/examples/z_image/model_training/special/differential_training/Z-Image-Turbo.sh
@@ -4,9 +4,11 @@
# This issue can be mitigated by using a pre-trained LoRA model to assist the training process.
# https://www.modelscope.cn/models/ostris/zimage_turbo_training_adapter
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Tongyi-MAI/Z-Image-Turbo:transformer/*.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/z_image/model_training/special/npu_training/Z-Image-Turbo-NPU.sh b/examples/z_image/model_training/special/npu_training/Z-Image-Turbo-NPU.sh
index 75938bf..e0313ab 100644
--- a/examples/z_image/model_training/special/npu_training/Z-Image-Turbo-NPU.sh
+++ b/examples/z_image/model_training/special/npu_training/Z-Image-Turbo-NPU.sh
@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch --config_file examples/z_image/model_training/full/accelerate_config.yaml examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 400 \
--model_id_with_origin_paths "Tongyi-MAI/Z-Image-Turbo:transformer/*.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \
diff --git a/examples/z_image/model_training/special/trajectory_imitation/Z-Image-Turbo.sh b/examples/z_image/model_training/special/trajectory_imitation/Z-Image-Turbo.sh
index c4ec8de..0fea6be 100644
--- a/examples/z_image/model_training/special/trajectory_imitation/Z-Image-Turbo.sh
+++ b/examples/z_image/model_training/special/trajectory_imitation/Z-Image-Turbo.sh
@@ -1,6 +1,8 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/Z-Image-Turbo/*" --local_dir ./data/diffsynth_example_dataset
+
accelerate launch examples/z_image/model_training/train.py \
- --dataset_base_path data/example_image_dataset \
- --dataset_metadata_path data/example_image_dataset/metadata.csv \
+ --dataset_base_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo \
+ --dataset_metadata_path data/diffsynth_example_dataset/z_image/Z-Image-Turbo/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Tongyi-MAI/Z-Image-Turbo:transformer/*.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \