Compare commits

...

2 Commits
dev ... main

Author SHA1 Message Date
Zhongjie Duan
ba0626e38f add example_dataset in training scripts (#1358)
* add example_dataset in training scripts

* fix example datasets
2026-03-18 15:37:03 +08:00
Hong Zhang
4ec4d9c20a Merge pull request #1354 from mi804/low_vram_training_ds
low vram training with deepspeed zero3
2026-03-17 16:09:52 +08:00
200 changed files with 1257 additions and 581 deletions

1
.gitignore vendored
View File

@@ -2,6 +2,7 @@
/models /models
/scripts /scripts
/diffusers /diffusers
/.vscode
*.pkl *.pkl
*.safetensors *.safetensors
*.pth *.pth

View File

@@ -1,12 +1,32 @@
import torch import torch
try:
import deepspeed
_HAS_DEEPSPEED = True
except ModuleNotFoundError:
_HAS_DEEPSPEED = False
def create_custom_forward(module): def create_custom_forward(module):
def custom_forward(*inputs, **kwargs): def custom_forward(*inputs, **kwargs):
return module(*inputs, **kwargs) return module(*inputs, **kwargs)
return custom_forward return custom_forward
def create_custom_forward_use_reentrant(module):
def custom_forward(*inputs):
return module(*inputs)
return custom_forward
def judge_args_requires_grad(*args):
for arg in args:
if isinstance(arg, torch.Tensor) and arg.requires_grad:
return True
return False
def gradient_checkpoint_forward( def gradient_checkpoint_forward(
model, model,
use_gradient_checkpointing, use_gradient_checkpointing,
@@ -14,6 +34,17 @@ def gradient_checkpoint_forward(
*args, *args,
**kwargs, **kwargs,
): ):
if use_gradient_checkpointing and _HAS_DEEPSPEED and deepspeed.checkpointing.is_configured():
all_args = args + tuple(kwargs.values())
if not judge_args_requires_grad(*all_args):
# get the first grad_enabled tensor from un_checkpointed forward
model_output = model(*args, **kwargs)
else:
model_output = deepspeed.checkpointing.checkpoint(
create_custom_forward_use_reentrant(model),
*all_args,
)
return model_output
if use_gradient_checkpointing_offload: if use_gradient_checkpointing_offload:
with torch.autograd.graph.save_on_cpu(): with torch.autograd.graph.save_on_cpu():
model_output = torch.utils.checkpoint.checkpoint( model_output = torch.utils.checkpoint.checkpoint(

View File

@@ -29,7 +29,7 @@ def launch_training_task(
dataloader = torch.utils.data.DataLoader(dataset, shuffle=True, collate_fn=lambda x: x[0], num_workers=num_workers) dataloader = torch.utils.data.DataLoader(dataset, shuffle=True, collate_fn=lambda x: x[0], num_workers=num_workers)
model.to(device=accelerator.device) model.to(device=accelerator.device)
model, optimizer, dataloader, scheduler = accelerator.prepare(model, optimizer, dataloader, scheduler) model, optimizer, dataloader, scheduler = accelerator.prepare(model, optimizer, dataloader, scheduler)
initialize_deepspeed_gradient_checkpointing(accelerator)
for epoch_id in range(num_epochs): for epoch_id in range(num_epochs):
for data in tqdm(dataloader): for data in tqdm(dataloader):
with accelerator.accumulate(model): with accelerator.accumulate(model):
@@ -70,3 +70,19 @@ def launch_data_process_task(
save_path = os.path.join(model_logger.output_path, str(accelerator.process_index), f"{data_id}.pth") save_path = os.path.join(model_logger.output_path, str(accelerator.process_index), f"{data_id}.pth")
data = model(data) data = model(data)
torch.save(data, save_path) torch.save(data, save_path)
def initialize_deepspeed_gradient_checkpointing(accelerator: Accelerator):
if getattr(accelerator.state, "deepspeed_plugin", None) is not None:
ds_config = accelerator.state.deepspeed_plugin.deepspeed_config
if "activation_checkpointing" in ds_config:
import deepspeed
act_config = ds_config["activation_checkpointing"]
deepspeed.checkpointing.configure(
mpu_=None,
partition_activations=act_config.get("partition_activations", False),
checkpoint_in_cpu=act_config.get("cpu_checkpointing", False),
contiguous_checkpointing=act_config.get("contiguous_memory_optimization", False)
)
else:
print("Do not find activation_checkpointing config in deepspeed config, skip initializing deepspeed gradient checkpointing.")

View File

@@ -133,7 +133,7 @@ Anima models are trained through [`examples/anima/model_training/train.py`](http
We provide a sample image dataset for testing: We provide a sample image dataset for testing:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
For training script details, refer to [Model Training](../Pipeline_Usage/Model_Training.md). For advanced training techniques, see [Training Framework Documentation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/). For training script details, refer to [Model Training](../Pipeline_Usage/Model_Training.md). For advanced training techniques, see [Training Framework Documentation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/).

View File

@@ -195,7 +195,7 @@ FLUX series models are uniformly trained through [`examples/flux/model_training/
We have built a sample image dataset for your testing. You can download this dataset with the following command: We have built a sample image dataset for your testing. You can download this dataset with the following command:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/). We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).

View File

@@ -145,7 +145,7 @@ FLUX.2 series models are uniformly trained through [`examples/flux2/model_traini
We have built a sample image dataset for your testing. You can download this dataset with the following command: We have built a sample image dataset for your testing. You can download this dataset with the following command:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/). We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).

View File

@@ -217,7 +217,7 @@ LTX-2 series models are uniformly trained through [`examples/ltx2/model_training
We have built a sample video dataset for your testing. You can download this dataset with the following command: We have built a sample video dataset for your testing. You can download this dataset with the following command:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/). We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).

View File

@@ -199,7 +199,7 @@ Qwen-Image series models are uniformly trained through [`examples/qwen_image/mod
We have built a sample image dataset for your testing. You can download this dataset with the following command: We have built a sample image dataset for your testing. You can download this dataset with the following command:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/). We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).

View File

@@ -253,7 +253,7 @@ Wan series models are uniformly trained through [`examples/wanvideo/model_traini
We have built a sample video dataset for your testing. You can download this dataset with the following command: We have built a sample video dataset for your testing. You can download this dataset with the following command:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/). We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).

View File

@@ -134,7 +134,7 @@ Z-Image series models are uniformly trained through [`examples/z_image/model_tra
We have built a sample image dataset for your testing. You can download this dataset with the following command: We have built a sample image dataset for your testing. You can download this dataset with the following command:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/). We have written recommended training scripts for each model, please refer to the table in the "Model Overview" section above. For how to write model training scripts, please refer to [Model Training](../Pipeline_Usage/Model_Training.md); for more advanced training algorithms, please refer to [Training Framework Detailed Explanation](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/en/Training/).

View File

@@ -69,25 +69,11 @@ We have built sample datasets for your testing. To understand how the universal
<details> <details>
<summary>Sample Image Dataset</summary> <summary>Sample Dataset</summary>
> ```shell > ```shell
> modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset > modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
> ``` > ```
>
> Applicable to training of image generation models such as Qwen-Image and FLUX.
</details>
<details>
<summary>Sample Video Dataset</summary>
> ```shell
> modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset
> ```
>
> Applicable to training of video generation models such as Wan.
</details> </details>
@@ -123,7 +109,6 @@ Similar to [model loading during inference](../Pipeline_Usage/Model_Inference.md
<details> <details>
<details>
<summary>Load models from local file paths</summary> <summary>Load models from local file paths</summary>
@@ -244,4 +229,119 @@ accelerate launch --config_file examples/qwen_image/model_training/full/accelera
* The training framework does not support batch size > 1. The reasons are complex. See [Q&A: Why doesn't the training framework support batch size > 1?](../QA.md#why-doesnt-the-training-framework-support-batch-size--1) * The training framework does not support batch size > 1. The reasons are complex. See [Q&A: Why doesn't the training framework support batch size > 1?](../QA.md#why-doesnt-the-training-framework-support-batch-size--1)
* Some models contain redundant parameters. For example, the text encoding part of the last layer of Qwen-Image's DiT part. When training these models, `--find_unused_parameters` needs to be set to avoid errors in multi-GPU training. For compatibility with community models, we do not intend to remove these redundant parameters. * Some models contain redundant parameters. For example, the text encoding part of the last layer of Qwen-Image's DiT part. When training these models, `--find_unused_parameters` needs to be set to avoid errors in multi-GPU training. For compatibility with community models, we do not intend to remove these redundant parameters.
* The loss function value of Diffusion models has little relationship with actual effects. Therefore, we do not record loss function values during training. We recommend setting `--num_epochs` to a sufficiently large value, testing while training, and manually closing the training program after the effect converges. * The loss function value of Diffusion models has little relationship with actual effects. Therefore, we do not record loss function values during training. We recommend setting `--num_epochs` to a sufficiently large value, testing while training, and manually closing the training program after the effect converges.
* `--use_gradient_checkpointing` is usually enabled unless GPU VRAM is sufficient; `--use_gradient_checkpointing_offload` is enabled as needed. See [`diffsynth.core.gradient`](../API_Reference/core/gradient.md) for details. * `--use_gradient_checkpointing` is usually enabled unless GPU VRAM is sufficient; `--use_gradient_checkpointing_offload` is enabled as needed. See [`diffsynth.core.gradient`](../API_Reference/core/gradient.md) for details.
## Low VRAM Training
If you want to complete LoRA model training on GPU with low vram, you can combine [Two-Stage Split Training](../Training/Split_Training.md) with `deepspeed_zero3_offload` training. First, split the preprocessing steps into the first stage and store the computed results onto the hard disk. Second, read these results from the disk and train the denoising model. By using `deepspeed_zero3_offload`, the training parameters and optimizer states are offloaded to the CPU or disk. We provide examples for some models, primarily by specifying the `deepspeed` configuration via `--config_file`.
Please note that the `deepspeed_zero3_offload` mode is incompatible with PyTorch's native gradient checkpointing mechanism. To address this, we have adapted the `checkpointing` interface of `deepspeed`. Users need to fill the `activation_checkpointing` field in the `deepspeed` configuration to enable gradient checkpointing.
Below is the script for low VRAM model training for the Qwen-Image model:
```shell
accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \
--dataset_metadata_path data/example_image_dataset/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 1 \
--model_id_with_origin_paths "Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
--learning_rate 1e-4 \
--num_epochs 5 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Qwen-Image_lora-splited-cache" \
--lora_base_model "dit" \
--lora_target_modules "to_q,to_k,to_v,add_q_proj,add_k_proj,add_v_proj,to_out.0,to_add_out,img_mlp.net.2,img_mod.1,txt_mlp.net.2,txt_mod.1" \
--lora_rank 32 \
--task "sft:data_process" \
--use_gradient_checkpointing \
--dataset_num_workers 8 \
--find_unused_parameters
accelerate launch --config_file examples/qwen_image/model_training/special/low_vram_training/deepspeed_zero3_cpuoffload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path "./models/train/Qwen-Image_lora-splited-cache" \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors" \
--learning_rate 1e-4 \
--num_epochs 5 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Qwen-Image_lora" \
--lora_base_model "dit" \
--lora_target_modules "to_q,to_k,to_v,add_q_proj,add_k_proj,add_v_proj,to_out.0,to_add_out,img_mlp.net.2,img_mod.1,txt_mlp.net.2,txt_mod.1" \
--lora_rank 32 \
--task "sft:train" \
--use_gradient_checkpointing \
--dataset_num_workers 8 \
--find_unused_parameters \
--initialize_model_on_cpu
```
The configurations for `accelerate` and `deepspeed` are as follows:
```yaml
compute_environment: LOCAL_MACHINE
debug: true
deepspeed_config:
deepspeed_config_file: examples/qwen_image/model_training/special/low_vram_training/ds_z3_cpuoffload.json
zero3_init_flag: true
distributed_type: DEEPSPEED
downcast_bf16: 'no'
enable_cpu_affinity: false
machine_rank: 0
main_training_function: main
num_machines: 1
num_processes: 1
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
```
```json
{
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"loss_scale_window": 1000,
"initial_scale_power": 16,
"hysteresis": 2,
"min_loss_scale": 1
},
"bf16": {
"enabled": "auto"
},
"zero_optimization": {
"stage": 3,
"offload_optimizer": {
"device": "cpu",
"pin_memory": true
},
"offload_param": {
"device": "cpu",
"pin_memory": true
},
"overlap_comm": false,
"contiguous_gradients": true,
"sub_group_size": 1e9,
"reduce_bucket_size": 5e7,
"stage3_prefetch_bucket_size": 5e7,
"stage3_param_persistence_threshold": 1e5,
"stage3_max_live_parameters": 1e8,
"stage3_max_reuse_distance": 1e8,
"stage3_gather_16bit_weights_on_model_save": true
},
"activation_checkpointing": {
"partition_activations": false,
"cpu_checkpointing": false,
"contiguous_memory_optimization": false
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}
```

View File

@@ -77,7 +77,7 @@ distill_qwen/image.jpg,"精致肖像,水下少女,蓝裙飘逸,发丝轻
This sample dataset can be downloaded directly: This sample dataset can be downloaded directly:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
Then start LoRA distillation accelerated training: Then start LoRA distillation accelerated training:

View File

@@ -133,7 +133,7 @@ Anima 系列模型统一通过 [`examples/anima/model_training/train.py`](https:
我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集: 我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。 我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。

View File

@@ -195,7 +195,7 @@ FLUX 系列模型统一通过 [`examples/flux/model_training/train.py`](https://
我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集: 我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。 我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。

View File

@@ -145,7 +145,7 @@ FLUX.2 系列模型统一通过 [`examples/flux2/model_training/train.py`](https
我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集: 我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。 我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。

View File

@@ -217,7 +217,7 @@ LTX-2 系列模型统一通过 [`examples/ltx2/model_training/train.py`](https:/
我们构建了一个样例视频数据集,以方便您进行测试,通过以下命令可以下载这个数据集: 我们构建了一个样例视频数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。 我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。

View File

@@ -199,7 +199,7 @@ Qwen-Image 系列模型统一通过 [`examples/qwen_image/model_training/train.p
我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集: 我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
我们为每个模型编写了推荐的训练脚本,请参考前文“模型总览”中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。 我们为每个模型编写了推荐的训练脚本,请参考前文“模型总览”中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。

View File

@@ -254,7 +254,7 @@ Wan 系列模型统一通过 [`examples/wanvideo/model_training/train.py`](https
我们构建了一个样例视频数据集,以方便您进行测试,通过以下命令可以下载这个数据集: 我们构建了一个样例视频数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。 我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。

View File

@@ -134,7 +134,7 @@ Z-Image 系列模型统一通过 [`examples/z_image/model_training/train.py`](ht
我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集: 我们构建了一个样例图像数据集,以方便您进行测试,通过以下命令可以下载这个数据集:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。 我们为每个模型编写了推荐的训练脚本,请参考前文"模型总览"中的表格。关于如何编写模型训练脚本,请参考[模型训练](../Pipeline_Usage/Model_Training.md);更多高阶训练算法,请参考[训练框架详解](https://github.com/modelscope/DiffSynth-Studio/tree/main/docs/zh/Training/)。

View File

@@ -69,28 +69,16 @@ image_2.jpg,"a cat"
<details> <details>
<summary>样例图像数据集</summary> <summary>样例数据集</summary>
> ```shell > ```shell
> modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset > modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
> ``` > ```
> >
> 适用于 Qwen-Image、FLUX 等图像生成模型的训练。 > 适用于 Qwen-Image、FLUX 等图像生成模型的训练。
</details> </details>
<details>
<summary>样例视频数据集</summary>
> ```shell
> modelscope download --dataset DiffSynth-Studio/example_video_dataset --local_dir ./data/example_video_dataset
> ```
>
> 适用于 Wan 等视频生成模型的训练。
</details>
## 加载模型 ## 加载模型
类似于[推理时的模型加载](../Pipeline_Usage/Model_Inference.md#加载模型),我们支持多种方式配置模型路径,两种方式是可以混用的。 类似于[推理时的模型加载](../Pipeline_Usage/Model_Inference.md#加载模型),我们支持多种方式配置模型路径,两种方式是可以混用的。
@@ -243,3 +231,116 @@ accelerate launch --config_file examples/qwen_image/model_training/full/accelera
* 少数模型包含冗余参数,例如 Qwen-Image 的 DiT 部分最后一层的文本编码部分,在训练这些模型时,需设置 `--find_unused_parameters` 避免在多 GPU 训练中报错。出于对开源社区模型兼容性的考虑,我们不打算删除这些冗余参数。 * 少数模型包含冗余参数,例如 Qwen-Image 的 DiT 部分最后一层的文本编码部分,在训练这些模型时,需设置 `--find_unused_parameters` 避免在多 GPU 训练中报错。出于对开源社区模型兼容性的考虑,我们不打算删除这些冗余参数。
* Diffusion 模型的损失函数值与实际效果的关系不大,因此我们在训练过程中不会记录损失函数值。我们建议把 `--num_epochs` 设置为足够大的数值,边训边测,直至效果收敛后手动关闭训练程序。 * Diffusion 模型的损失函数值与实际效果的关系不大,因此我们在训练过程中不会记录损失函数值。我们建议把 `--num_epochs` 设置为足够大的数值,边训边测,直至效果收敛后手动关闭训练程序。
* `--use_gradient_checkpointing` 通常是开启的,除非 GPU 显存足够;`--use_gradient_checkpointing_offload` 则按需开启,详见 [`diffsynth.core.gradient`](../API_Reference/core/gradient.md)。 * `--use_gradient_checkpointing` 通常是开启的,除非 GPU 显存足够;`--use_gradient_checkpointing_offload` 则按需开启,详见 [`diffsynth.core.gradient`](../API_Reference/core/gradient.md)。
## 低显存训练
如果想在低显存显卡上完成 LoRA 模型训练,可以同时采用 [两阶段拆分训练](../Training/Split_Training.md) 和 `deepspeed_zero3_offload` 训练。 首先,将前处理过程拆分到第一阶段,将计算结果存储到硬盘中。其次,在第二阶段从硬盘中读取这些结果并进行去噪模型的训练,训练通过采用 `deepspeed_zero3_offload`,将训练参数和优化器状态 offload 到 cpu 或者 disk 上。我们为部分模型提供了样例,主要是通过 `--config_file` 指定 `deepspeed` 配置。
需要注意的是,`deepspeed_zero3_offload` 模式与 `pytorch` 原生的梯度检查点机制不兼容,我们为此对 `deepspeed``checkpointing` 接口做了适配。用户需要在 `deepspeed` 配置中填写 `activation_checkpointing` 字段以启用梯度检查点。
以下为 Qwen-Image 模型的低显存模型训练脚本:
```shell
accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \
--dataset_metadata_path data/example_image_dataset/metadata.csv \
--max_pixels 1048576 \
--dataset_repeat 1 \
--model_id_with_origin_paths "Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
--learning_rate 1e-4 \
--num_epochs 5 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Qwen-Image_lora-splited-cache" \
--lora_base_model "dit" \
--lora_target_modules "to_q,to_k,to_v,add_q_proj,add_k_proj,add_v_proj,to_out.0,to_add_out,img_mlp.net.2,img_mod.1,txt_mlp.net.2,txt_mod.1" \
--lora_rank 32 \
--task "sft:data_process" \
--use_gradient_checkpointing \
--dataset_num_workers 8 \
--find_unused_parameters
accelerate launch --config_file examples/qwen_image/model_training/special/low_vram_training/deepspeed_zero3_cpuoffload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path "./models/train/Qwen-Image_lora-splited-cache" \
--max_pixels 1048576 \
--dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors" \
--learning_rate 1e-4 \
--num_epochs 5 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/Qwen-Image_lora" \
--lora_base_model "dit" \
--lora_target_modules "to_q,to_k,to_v,add_q_proj,add_k_proj,add_v_proj,to_out.0,to_add_out,img_mlp.net.2,img_mod.1,txt_mlp.net.2,txt_mod.1" \
--lora_rank 32 \
--task "sft:train" \
--use_gradient_checkpointing \
--dataset_num_workers 8 \
--find_unused_parameters \
--initialize_model_on_cpu
```
其中,`accelerate``deepspeed` 的配置文件如下:
```yaml
compute_environment: LOCAL_MACHINE
debug: true
deepspeed_config:
deepspeed_config_file: examples/qwen_image/model_training/special/low_vram_training/ds_z3_cpuoffload.json
zero3_init_flag: true
distributed_type: DEEPSPEED
downcast_bf16: 'no'
enable_cpu_affinity: false
machine_rank: 0
main_training_function: main
num_machines: 1
num_processes: 1
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
```
```json
{
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"loss_scale_window": 1000,
"initial_scale_power": 16,
"hysteresis": 2,
"min_loss_scale": 1
},
"bf16": {
"enabled": "auto"
},
"zero_optimization": {
"stage": 3,
"offload_optimizer": {
"device": "cpu",
"pin_memory": true
},
"offload_param": {
"device": "cpu",
"pin_memory": true
},
"overlap_comm": false,
"contiguous_gradients": true,
"sub_group_size": 1e9,
"reduce_bucket_size": 5e7,
"stage3_prefetch_bucket_size": 5e7,
"stage3_param_persistence_threshold": 1e5,
"stage3_max_live_parameters": 1e8,
"stage3_max_reuse_distance": 1e8,
"stage3_gather_16bit_weights_on_model_save": true
},
"activation_checkpointing": {
"partition_activations": false,
"cpu_checkpointing": false,
"contiguous_memory_optimization": false
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}
```

View File

@@ -77,7 +77,7 @@ distill_qwen/image.jpg,"精致肖像,水下少女,蓝裙飘逸,发丝轻
这个样例数据集可以直接下载: 这个样例数据集可以直接下载:
```shell ```shell
modelscope download --dataset DiffSynth-Studio/example_image_dataset --local_dir ./data/example_image_dataset modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --local_dir ./data/diffsynth_example_dataset
``` ```
然后开始 LoRA 蒸馏加速训练: 然后开始 LoRA 蒸馏加速训练:

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "anima/anima-preview/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/anima/model_training/train.py \ accelerate launch examples/anima/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/anima/anima-preview \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/anima/anima-preview/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "circlestone-labs/Anima:split_files/diffusion_models/anima-preview.safetensors,circlestone-labs/Anima:split_files/text_encoders/qwen_3_06b_base.safetensors,circlestone-labs/Anima:split_files/vae/qwen_image_vae.safetensors" \ --model_id_with_origin_paths "circlestone-labs/Anima:split_files/diffusion_models/anima-preview.safetensors,circlestone-labs/Anima:split_files/text_encoders/qwen_3_06b_base.safetensors,circlestone-labs/Anima:split_files/vae/qwen_image_vae.safetensors" \
@@ -11,4 +13,4 @@ accelerate launch examples/anima/model_training/train.py \
--remove_prefix_in_ckpt "pipe.dit." \ --remove_prefix_in_ckpt "pipe.dit." \
--output_path "./models/train/anima-preview_full" \ --output_path "./models/train/anima-preview_full" \
--trainable_models "dit" \ --trainable_models "dit" \
--use_gradient_checkpointing --use_gradient_checkpointing

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "anima/anima-preview/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/anima/model_training/train.py \ accelerate launch examples/anima/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/anima/anima-preview \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/anima/anima-preview/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "circlestone-labs/Anima:split_files/diffusion_models/anima-preview.safetensors,circlestone-labs/Anima:split_files/text_encoders/qwen_3_06b_base.safetensors,circlestone-labs/Anima:split_files/vae/qwen_image_vae.safetensors" \ --model_id_with_origin_paths "circlestone-labs/Anima:split_files/diffusion_models/anima-preview.safetensors,circlestone-labs/Anima:split_files/text_encoders/qwen_3_06b_base.safetensors,circlestone-labs/Anima:split_files/vae/qwen_image_vae.safetensors" \
@@ -13,4 +15,4 @@ accelerate launch examples/anima/model_training/train.py \
--lora_base_model "dit" \ --lora_base_model "dit" \
--lora_target_modules "" \ --lora_target_modules "" \
--lora_rank 32 \ --lora_rank 32 \
--use_gradient_checkpointing --use_gradient_checkpointing

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLEX.2-preview/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLEX.2-preview \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLEX.2-preview/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 200 \ --dataset_repeat 200 \
--model_id_with_origin_paths "ostris/Flex.2-preview:Flex.2-preview.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \ --model_id_with_origin_paths "ostris/Flex.2-preview:Flex.2-preview.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-Kontext-dev/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev \
--dataset_metadata_path data/example_image_dataset/metadata_kontext.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev/metadata.csv \
--data_file_keys "image,kontext_images" \ --data_file_keys "image,kontext_images" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-Krea-dev/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-Krea-dev \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-Krea-dev/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \
--model_id_with_origin_paths "black-forest-labs/FLUX.1-Krea-dev:flux1-krea-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.1-Krea-dev:flux1-krea-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-AttriCtrl/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-AttriCtrl \
--dataset_metadata_path data/example_image_dataset/metadata_attrictrl.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-AttriCtrl/metadata.csv \
--data_file_keys "image" \ --data_file_keys "image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 100 \ --dataset_repeat 100 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Inpainting-Beta/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Inpainting-Beta \
--dataset_metadata_path data/example_image_dataset/metadata_controlnet_inpaint.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Inpainting-Beta/metadata.csv \
--data_file_keys "image,controlnet_image,controlnet_inpaint_mask" \ --data_file_keys "image,controlnet_image,controlnet_inpaint_mask" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Union-alpha/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Union-alpha \
--dataset_metadata_path data/example_image_dataset/metadata_controlnet_canny.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Union-alpha/metadata.csv \
--data_file_keys "image,controlnet_image" \ --data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Upscaler/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Upscaler \
--dataset_metadata_path data/example_image_dataset/metadata_controlnet_upscale.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Upscaler/metadata.csv \
--data_file_keys "image,controlnet_image" \ --data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-IP-Adapter/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-IP-Adapter \
--dataset_metadata_path data/example_image_dataset/metadata_ipadapter.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-IP-Adapter/metadata.csv \
--data_file_keys "image,ipadapter_images" \ --data_file_keys "image,ipadapter_images" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 100 \ --dataset_repeat 100 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-InfiniteYou/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-InfiniteYou \
--dataset_metadata_path data/example_image_dataset/metadata_infiniteyou.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-InfiniteYou/metadata.csv \
--data_file_keys "image,controlnet_image,infinityou_id_image" \ --data_file_keys "image,controlnet_image,infinityou_id_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-LoRA-Encoder/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-LoRA-Encoder \
--dataset_metadata_path data/example_image_dataset/metadata_lora_encoder.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-LoRA-Encoder/metadata.csv \
--data_file_keys "image" \ --data_file_keys "image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 100 \ --dataset_repeat 100 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \
--model_id_with_origin_paths "black-forest-labs/FLUX.1-dev:flux1-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.1-dev:flux1-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/Nexus-Gen/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/Nexus-Gen \
--dataset_metadata_path data/example_image_dataset/metadata_nexusgen_edit.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/Nexus-Gen/metadata.csv \
--data_file_keys "image,nexus_gen_reference_image" \ --data_file_keys "image,nexus_gen_reference_image" \
--max_pixels 262144 \ --max_pixels 262144 \
--dataset_repeat 400 \ --dataset_repeat 400 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/Step1X-Edit/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/Step1X-Edit \
--dataset_metadata_path data/example_image_dataset/metadata_step1x.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/Step1X-Edit/metadata.csv \
--data_file_keys "image,step1x_reference_image" \ --data_file_keys "image,step1x_reference_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLEX.2-preview/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLEX.2-preview \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLEX.2-preview/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "ostris/Flex.2-preview:Flex.2-preview.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \ --model_id_with_origin_paths "ostris/Flex.2-preview:Flex.2-preview.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-Kontext-dev/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev \
--dataset_metadata_path data/example_image_dataset/metadata_kontext.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev/metadata.csv \
--data_file_keys "image,kontext_images" \ --data_file_keys "image,kontext_images" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-Krea-dev/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-Krea-dev \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-Krea-dev/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.1-Krea-dev:flux1-krea-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.1-Krea-dev:flux1-krea-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-AttriCtrl/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-AttriCtrl \
--dataset_metadata_path data/example_image_dataset/metadata_attrictrl.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-AttriCtrl/metadata.csv \
--data_file_keys "image" \ --data_file_keys "image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 100 \ --dataset_repeat 100 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Inpainting-Beta/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Inpainting-Beta \
--dataset_metadata_path data/example_image_dataset/metadata_controlnet_inpaint.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Inpainting-Beta/metadata.csv \
--data_file_keys "image,controlnet_image,controlnet_inpaint_mask" \ --data_file_keys "image,controlnet_image,controlnet_inpaint_mask" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 100 \ --dataset_repeat 100 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Union-alpha/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Union-alpha \
--dataset_metadata_path data/example_image_dataset/metadata_controlnet_canny.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Union-alpha/metadata.csv \
--data_file_keys "image,controlnet_image" \ --data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 100 \ --dataset_repeat 100 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-Controlnet-Upscaler/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Upscaler \
--dataset_metadata_path data/example_image_dataset/metadata_controlnet_upscale.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-Controlnet-Upscaler/metadata.csv \
--data_file_keys "image,controlnet_image" \ --data_file_keys "image,controlnet_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 100 \ --dataset_repeat 100 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-EliGen/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-EliGen \
--dataset_metadata_path data/example_image_dataset/metadata_eligen.json \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-EliGen/metadata.json \
--data_file_keys "image,eligen_entity_masks" \ --data_file_keys "image,eligen_entity_masks" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-IP-Adapter/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-IP-Adapter \
--dataset_metadata_path data/example_image_dataset/metadata_ipadapter.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-IP-Adapter/metadata.csv \
--data_file_keys "image,ipadapter_images" \ --data_file_keys "image,ipadapter_images" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev-InfiniteYou/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev-InfiniteYou \
--dataset_metadata_path data/example_image_dataset/metadata_infiniteyou.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev-InfiniteYou/metadata.csv \
--data_file_keys "image,controlnet_image,infinityou_id_image" \ --data_file_keys "image,controlnet_image,infinityou_id_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 100 \ --dataset_repeat 100 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.1-dev:flux1-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.1-dev:flux1-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/Nexus-Gen/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/Nexus-Gen \
--dataset_metadata_path data/example_image_dataset/metadata_nexusgen_edit.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/Nexus-Gen/metadata.csv \
--data_file_keys "image,nexus_gen_reference_image" \ --data_file_keys "image,nexus_gen_reference_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/Step1X-Edit/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux/model_training/train.py \ accelerate launch examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/Step1X-Edit \
--dataset_metadata_path data/example_image_dataset/metadata_step1x.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/Step1X-Edit/metadata.csv \
--data_file_keys "image,step1x_reference_image" \ --data_file_keys "image,step1x_reference_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \

View File

@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1 export CPU_AFFINITY_CONF=1
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-Kontext-dev/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev \
--dataset_metadata_path data/example_image_dataset/metadata_kontext.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-Kontext-dev/metadata.csv \
--data_file_keys "image,kontext_images" \ --data_file_keys "image,kontext_images" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \

View File

@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1 export CPU_AFFINITY_CONF=1
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux/FLUX.1-dev/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \ accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux/FLUX.1-dev \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux/FLUX.1-dev/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \
--model_id_with_origin_paths "black-forest-labs/FLUX.1-dev:flux1-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.1-dev:flux1-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-4B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux2/model_training/train.py \ accelerate launch examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-4B \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-4B/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
@@ -13,9 +15,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing --use_gradient_checkpointing
# Edit # Edit
# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
# accelerate launch examples/flux2/model_training/train.py \ # accelerate launch examples/flux2/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \ # --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \ # --extra_inputs "edit_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-9B/*" --local_dir ./data/diffsynth_example_dataset
# This script is tested on 8*A100 # This script is tested on 8*A100
accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \ accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \
@@ -14,9 +16,12 @@ accelerate launch --config_file examples/flux2/model_training/full/accelerate_co
--use_gradient_checkpointing --use_gradient_checkpointing
# Edit # Edit
# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
# accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \ # accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \ # --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \ # --extra_inputs "edit_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-4B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux2/model_training/train.py \ accelerate launch examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-4B \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-4B/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
@@ -13,9 +15,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing --use_gradient_checkpointing
# Edit # Edit
# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
# accelerate launch examples/flux2/model_training/train.py \ # accelerate launch examples/flux2/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \ # --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \ # --extra_inputs "edit_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \
@@ -27,4 +32,4 @@ accelerate launch examples/flux2/model_training/train.py \
# --remove_prefix_in_ckpt "pipe.dit." \ # --remove_prefix_in_ckpt "pipe.dit." \
# --output_path "./models/train/FLUX.2-klein-base-4B_full" \ # --output_path "./models/train/FLUX.2-klein-base-4B_full" \
# --trainable_models "dit" \ # --trainable_models "dit" \
# --use_gradient_checkpointing # --use_gradient_checkpointing

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-9B/*" --local_dir ./data/diffsynth_example_dataset
# This script is tested on 8*A100 # This script is tested on 8*A100
accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \ accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-9B \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-9B/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \
@@ -14,9 +16,12 @@ accelerate launch --config_file examples/flux2/model_training/full/accelerate_co
--use_gradient_checkpointing --use_gradient_checkpointing
# Edit # Edit
# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
# accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \ # accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \ # --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \ # --extra_inputs "edit_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-dev/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux2/model_training/train.py \ accelerate launch examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-dev \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-dev/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 1 \ --dataset_repeat 1 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-dev:text_encoder/*.safetensors,black-forest-labs/FLUX.2-dev:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-dev:text_encoder/*.safetensors,black-forest-labs/FLUX.2-dev:vae/diffusion_pytorch_model.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-4B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux2/model_training/train.py \ accelerate launch examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-4B \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-4B/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
@@ -15,9 +17,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing --use_gradient_checkpointing
# Edit # Edit
# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
# accelerate launch examples/flux2/model_training/train.py \ # accelerate launch examples/flux2/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \ # --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \ # --extra_inputs "edit_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-9B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux2/model_training/train.py \ accelerate launch examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \
@@ -15,9 +17,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing --use_gradient_checkpointing
# Edit # Edit
# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
# accelerate launch examples/flux2/model_training/train.py \ # accelerate launch examples/flux2/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \ # --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \ # --extra_inputs "edit_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-4B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux2/model_training/train.py \ accelerate launch examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-4B \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-4B/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
@@ -15,9 +17,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing --use_gradient_checkpointing
# Edit # Edit
# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
# accelerate launch examples/flux2/model_training/train.py \ # accelerate launch examples/flux2/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \ # --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \ # --extra_inputs "edit_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-9B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux2/model_training/train.py \ accelerate launch examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-9B \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-base-9B/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \
@@ -15,9 +17,12 @@ accelerate launch examples/flux2/model_training/train.py \
--use_gradient_checkpointing --use_gradient_checkpointing
# Edit # Edit
# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
# accelerate launch examples/flux2/model_training/train.py \ # accelerate launch examples/flux2/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \ # --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \ # --extra_inputs "edit_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \
@@ -31,4 +36,4 @@ accelerate launch examples/flux2/model_training/train.py \
# --lora_base_model "dit" \ # --lora_base_model "dit" \
# --lora_target_modules "to_q,to_k,to_v,to_out.0,add_q_proj,add_k_proj,add_v_proj,to_add_out,linear_in,linear_out,to_qkv_mlp_proj,single_transformer_blocks.0.attn.to_out,single_transformer_blocks.1.attn.to_out,single_transformer_blocks.2.attn.to_out,single_transformer_blocks.3.attn.to_out,single_transformer_blocks.4.attn.to_out,single_transformer_blocks.5.attn.to_out,single_transformer_blocks.6.attn.to_out,single_transformer_blocks.7.attn.to_out,single_transformer_blocks.8.attn.to_out,single_transformer_blocks.9.attn.to_out,single_transformer_blocks.10.attn.to_out,single_transformer_blocks.11.attn.to_out,single_transformer_blocks.12.attn.to_out,single_transformer_blocks.13.attn.to_out,single_transformer_blocks.14.attn.to_out,single_transformer_blocks.15.attn.to_out,single_transformer_blocks.16.attn.to_out,single_transformer_blocks.17.attn.to_out,single_transformer_blocks.18.attn.to_out,single_transformer_blocks.19.attn.to_out,single_transformer_blocks.20.attn.to_out,single_transformer_blocks.21.attn.to_out,single_transformer_blocks.22.attn.to_out,single_transformer_blocks.23.attn.to_out" \ # --lora_target_modules "to_q,to_k,to_v,to_out.0,add_q_proj,add_k_proj,add_v_proj,to_add_out,linear_in,linear_out,to_qkv_mlp_proj,single_transformer_blocks.0.attn.to_out,single_transformer_blocks.1.attn.to_out,single_transformer_blocks.2.attn.to_out,single_transformer_blocks.3.attn.to_out,single_transformer_blocks.4.attn.to_out,single_transformer_blocks.5.attn.to_out,single_transformer_blocks.6.attn.to_out,single_transformer_blocks.7.attn.to_out,single_transformer_blocks.8.attn.to_out,single_transformer_blocks.9.attn.to_out,single_transformer_blocks.10.attn.to_out,single_transformer_blocks.11.attn.to_out,single_transformer_blocks.12.attn.to_out,single_transformer_blocks.13.attn.to_out,single_transformer_blocks.14.attn.to_out,single_transformer_blocks.15.attn.to_out,single_transformer_blocks.16.attn.to_out,single_transformer_blocks.17.attn.to_out,single_transformer_blocks.18.attn.to_out,single_transformer_blocks.19.attn.to_out,single_transformer_blocks.20.attn.to_out,single_transformer_blocks.21.attn.to_out,single_transformer_blocks.22.attn.to_out,single_transformer_blocks.23.attn.to_out" \
# --lora_rank 32 \ # --lora_rank 32 \
# --use_gradient_checkpointing # --use_gradient_checkpointing

View File

@@ -1,9 +1,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1 export CPU_AFFINITY_CONF=1
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-dev/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/flux2/model_training/train.py \ accelerate launch examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-dev \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-dev/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 1 \ --dataset_repeat 1 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-dev:text_encoder/*.safetensors,black-forest-labs/FLUX.2-dev:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-dev:text_encoder/*.safetensors,black-forest-labs/FLUX.2-dev:vae/diffusion_pytorch_model.safetensors" \

View File

@@ -2,9 +2,11 @@
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export CPU_AFFINITY_CONF=1 export CPU_AFFINITY_CONF=1
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-9B/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \ accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/flux2/FLUX.2-klein-9B/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-9B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-9B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-9B:vae/diffusion_pytorch_model.safetensors" \
@@ -17,9 +19,12 @@ accelerate launch --config_file examples/flux2/model_training/full/accelerate_co
--use_gradient_checkpointing --use_gradient_checkpointing
# Edit # Edit
# modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
# accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \ # accelerate launch --config_file examples/flux2/model_training/full/accelerate_config.yaml examples/flux2/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
# --dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
# --data_file_keys "image,edit_image" \ # --data_file_keys "image,edit_image" \
# --extra_inputs "edit_image" \ # --extra_inputs "edit_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2-T2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
# Splited Training # Splited Training
accelerate launch examples/ltx2/model_training/train.py \ accelerate launch examples/ltx2/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio" \ --extra_inputs "input_audio" \
--height 512 \ --height 512 \

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2.3-I2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
# Splited Training # Splited Training
accelerate launch examples/ltx2/model_training/train.py \ accelerate launch examples/ltx2/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2.3-I2AV-splited \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2.3-I2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \ --extra_inputs "input_audio,input_image" \
--height 512 \ --height 512 \

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2.3-T2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
# Splited Training # Splited Training
accelerate launch examples/ltx2/model_training/train.py \ accelerate launch examples/ltx2/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-splited \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio" \ --extra_inputs "input_audio" \
--height 512 \ --height 512 \

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2-T2AV-IC-LoRA-splited/*" --local_dir ./data/diffsynth_example_dataset
# Splited Training # Splited Training
accelerate launch examples/ltx2/model_training/train.py \ accelerate launch examples/ltx2/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-IC-LoRA-splited \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av_iclora.json \ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-IC-LoRA-splited/metadata.json \
--data_file_keys "video,input_audio,in_context_videos" \ --data_file_keys "video,input_audio,in_context_videos" \
--extra_inputs "input_audio,in_context_videos,in_context_downsample_factor,frame_rate" \ --extra_inputs "input_audio,in_context_videos,in_context_downsample_factor,frame_rate" \
--height 512 \ --height 512 \

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2-T2AV-noaudio/*" --local_dir ./data/diffsynth_example_dataset
# single stage training # single stage training
# accelerate launch examples/ltx2/model_training/train.py \ # accelerate launch examples/ltx2/model_training/train.py \
# --dataset_base_path data/example_video_dataset/ltx2 \ # --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-noaudio \
# --dataset_metadata_path data/example_video_dataset/ltx2_t2v.csv \ # --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-noaudio/metadata.csv \
# --height 256 \ # --height 256 \
# --width 384 \ # --width 384 \
# --num_frames 25\ # --num_frames 25\
@@ -20,8 +22,8 @@
# Splited Training # Splited Training
accelerate launch examples/ltx2/model_training/train.py \ accelerate launch examples/ltx2/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-noaudio \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-noaudio/metadata.csv \
--height 512 \ --height 512 \
--width 768 \ --width 768 \
--num_frames 121\ --num_frames 121\

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2-T2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
# Single Stage Training not recommended for T2AV due to the large memory consumption. Please use the Splited Training instead. # Single Stage Training not recommended for T2AV due to the large memory consumption. Please use the Splited Training instead.
# accelerate launch examples/ltx2/model_training/train.py \ # accelerate launch examples/ltx2/model_training/train.py \
# --dataset_base_path data/example_video_dataset/ltx2 \ # --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited \
# --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ # --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited/metadata.csv \
# --data_file_keys "video,input_audio" \ # --data_file_keys "video,input_audio" \
# --extra_inputs "input_audio" \ # --extra_inputs "input_audio" \
# --height 256 \ # --height 256 \
@@ -21,8 +23,8 @@
# Splited Training # Splited Training
accelerate launch examples/ltx2/model_training/train.py \ accelerate launch examples/ltx2/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2-T2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio" \ --extra_inputs "input_audio" \
--height 512 \ --height 512 \

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2.3-I2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
# Splited Training # Splited Training
accelerate launch examples/ltx2/model_training/train.py \ accelerate launch examples/ltx2/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2.3-I2AV-splited \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2.3-I2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \ --extra_inputs "input_audio,input_image" \
--height 512 \ --height 512 \

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2.3-T2AV-IC-LoRA-splited/*" --local_dir ./data/diffsynth_example_dataset
# Splited Training # Splited Training
accelerate launch examples/ltx2/model_training/train.py \ accelerate launch examples/ltx2/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-IC-LoRA-splited \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av_iclora.json \ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-IC-LoRA-splited/metadata.json \
--data_file_keys "video,input_audio,in_context_videos" \ --data_file_keys "video,input_audio,in_context_videos" \
--extra_inputs "input_audio,in_context_videos,in_context_downsample_factor,frame_rate" \ --extra_inputs "input_audio,in_context_videos,in_context_downsample_factor,frame_rate" \
--height 512 \ --height 512 \

View File

@@ -1,7 +1,9 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ltx2/LTX-2.3-T2AV-splited/*" --local_dir ./data/diffsynth_example_dataset
# Splited Training # Splited Training
accelerate launch examples/ltx2/model_training/train.py \ accelerate launch examples/ltx2/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-splited \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/ltx2/LTX-2.3-T2AV-splited/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio" \ --extra_inputs "input_audio" \
--height 512 \ --height 512 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "mova/MOVA-360P-I2AV/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \ --extra_inputs "input_audio,input_image" \
--height 352 \ --height 352 \
@@ -19,8 +21,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000] # boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \ --extra_inputs "input_audio,input_image" \
--height 352 \ --height 352 \
@@ -36,4 +38,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--max_timestep_boundary 1 \ --max_timestep_boundary 1 \
--min_timestep_boundary 0.358 \ --min_timestep_boundary 0.358 \
--use_gradient_checkpointing --use_gradient_checkpointing
# boundary corresponds to timesteps [0, 900) # boundary corresponds to timesteps [0, 900)

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "mova/MOVA-720P-I2AV/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \ --extra_inputs "input_audio,input_image" \
--height 720 \ --height 720 \
@@ -19,8 +21,8 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
# boundary corresponds to timesteps [900, 1000] # boundary corresponds to timesteps [900, 1000]
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/mova/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \ --extra_inputs "input_audio,input_image" \
--height 720 \ --height 720 \
@@ -36,4 +38,4 @@ accelerate launch --config_file examples/wanvideo/model_training/full/accelerate
--max_timestep_boundary 1 \ --max_timestep_boundary 1 \
--min_timestep_boundary 0.358 \ --min_timestep_boundary 0.358 \
--use_gradient_checkpointing --use_gradient_checkpointing
# boundary corresponds to timesteps [0, 900) # boundary corresponds to timesteps [0, 900)

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "mova/MOVA-360P-I2AV/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/mova/model_training/train.py \ accelerate launch examples/mova/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \ --extra_inputs "input_audio,input_image" \
--height 352 \ --height 352 \
@@ -20,24 +22,24 @@ accelerate launch examples/mova/model_training/train.py \
--use_gradient_checkpointing --use_gradient_checkpointing
# boundary corresponds to timesteps [900, 1000] # boundary corresponds to timesteps [900, 1000]
# accelerate launch examples/mova/model_training/train.py \ accelerate launch examples/mova/model_training/train.py \
# --dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV \
# --dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-360P-I2AV/metadata.csv \
# --data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
# --extra_inputs "input_audio,input_image" \ --extra_inputs "input_audio,input_image" \
# --height 352 \ --height 352 \
# --width 640 \ --width 640 \
# --num_frames 121 \ --num_frames 121 \
# --dataset_repeat 100 \ --dataset_repeat 100 \
# --model_id_with_origin_paths "openmoss/MOVA-360p:video_dit_2/diffusion_pytorch_model-*.safetensors,openmoss/MOVA-360p:audio_dit/diffusion_pytorch_model.safetensors,openmoss/MOVA-360p:dual_tower_bridge/diffusion_pytorch_model.safetensors,openmoss/MOVA-720p:audio_vae/diffusion_pytorch_model.safetensors,DiffSynth-Studio/Wan-Series-Converted-Safetensors:Wan2.1_VAE.safetensors,DiffSynth-Studio/Wan-Series-Converted-Safetensors:models_t5_umt5-xxl-enc-bf16.safetensors" \ --model_id_with_origin_paths "openmoss/MOVA-360p:video_dit_2/diffusion_pytorch_model-*.safetensors,openmoss/MOVA-360p:audio_dit/diffusion_pytorch_model.safetensors,openmoss/MOVA-360p:dual_tower_bridge/diffusion_pytorch_model.safetensors,openmoss/MOVA-720p:audio_vae/diffusion_pytorch_model.safetensors,DiffSynth-Studio/Wan-Series-Converted-Safetensors:Wan2.1_VAE.safetensors,DiffSynth-Studio/Wan-Series-Converted-Safetensors:models_t5_umt5-xxl-enc-bf16.safetensors" \
# --learning_rate 1e-4 \ --learning_rate 1e-4 \
# --num_epochs 5 \ --num_epochs 5 \
# --remove_prefix_in_ckpt "pipe.video_dit." \ --remove_prefix_in_ckpt "pipe.video_dit." \
# --output_path "./models/train/MOVA-360p-I2AV_low_noise_lora" \ --output_path "./models/train/MOVA-360p-I2AV_low_noise_lora" \
# --lora_base_model "video_dit" \ --lora_base_model "video_dit" \
# --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \ --lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
# --lora_rank 32 \ --lora_rank 32 \
# --max_timestep_boundary 1 \ --max_timestep_boundary 1 \
# --min_timestep_boundary 0.358 \ --min_timestep_boundary 0.358 \
# --use_gradient_checkpointing --use_gradient_checkpointing
# boundary corresponds to timesteps [0, 900) # boundary corresponds to timesteps [0, 900)

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "mova/MOVA-720P-I2AV/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/mova/model_training/train.py \ accelerate launch examples/mova/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \ --extra_inputs "input_audio,input_image" \
--height 720 \ --height 720 \
@@ -21,8 +23,8 @@ accelerate launch examples/mova/model_training/train.py \
# boundary corresponds to timesteps [900, 1000] # boundary corresponds to timesteps [900, 1000]
accelerate launch examples/mova/model_training/train.py \ accelerate launch examples/mova/model_training/train.py \
--dataset_base_path data/example_video_dataset/ltx2 \ --dataset_base_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV \
--dataset_metadata_path data/example_video_dataset/ltx2_t2av.csv \ --dataset_metadata_path data/diffsynth_example_dataset/mova/MOVA-720P-I2AV/metadata.csv \
--data_file_keys "video,input_audio" \ --data_file_keys "video,input_audio" \
--extra_inputs "input_audio,input_image" \ --extra_inputs "input_audio,input_image" \
--height 720 \ --height 720 \
@@ -40,4 +42,4 @@ accelerate launch examples/mova/model_training/train.py \
--max_timestep_boundary 1 \ --max_timestep_boundary 1 \
--min_timestep_boundary 0.358 \ --min_timestep_boundary 0.358 \
--use_gradient_checkpointing --use_gradient_checkpointing
# boundary corresponds to timesteps [0, 900) # boundary corresponds to timesteps [0, 900)

View File

@@ -148,11 +148,11 @@ if __name__ == "__main__":
max_pixels=args.max_pixels, max_pixels=args.max_pixels,
height=args.height, height=args.height,
width=args.width, width=args.width,
height_division_factor=model.pipe.height_division_factor, height_division_factor=16,
width_division_factor=model.pipe.width_division_factor, width_division_factor=16,
num_frames=args.num_frames, num_frames=args.num_frames,
time_division_factor=model.pipe.time_division_factor, time_division_factor=4,
time_division_remainder=model.pipe.time_division_remainder, time_division_remainder=1,
frame_rate=args.frame_rate, frame_rate=args.frame_rate,
fix_frame_rate=True, fix_frame_rate=True,
) )
@@ -166,8 +166,8 @@ if __name__ == "__main__":
"input_audio": "input_audio":
ToAbsolutePath(args.dataset_base_path) >> LoadAudioWithTorchaudio( ToAbsolutePath(args.dataset_base_path) >> LoadAudioWithTorchaudio(
num_frames=args.num_frames, num_frames=args.num_frames,
time_division_factor=model.pipe.time_division_factor, time_division_factor=4,
time_division_remainder=model.pipe.time_division_remainder, time_division_remainder=1,
frame_rate=args.frame_rate, frame_rate=args.frame_rate,
), ),
"in_context_videos": "in_context_videos":

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/FireRed-Image-Edit-1.0/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.0 \
--dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.0/metadata.json \
--data_file_keys "image,edit_image" \ --data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \ --extra_inputs "edit_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/FireRed-Image-Edit-1.1/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.1 \
--dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.1/metadata.json \
--data_file_keys "image,edit_image" \ --data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \ --extra_inputs "edit_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-2512/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-2512 \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-2512/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image-2512:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "Qwen/Qwen-Image-2512:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Canny/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny \
--dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_canny.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image" \ --data_file_keys "image,blockwise_controlnet_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \
@@ -21,8 +23,8 @@ accelerate launch examples/qwen_image/model_training/train.py \
# python examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Initialize.py # python examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Initialize.py
# accelerate launch examples/qwen_image/model_training/train.py \ # accelerate launch examples/qwen_image/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny \
# --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_canny.csv \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny/metadata.csv \
# --data_file_keys "image,blockwise_controlnet_image" \ # --data_file_keys "image,blockwise_controlnet_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \
# --dataset_repeat 50 \ # --dataset_repeat 50 \
@@ -35,4 +37,4 @@ accelerate launch examples/qwen_image/model_training/train.py \
# --trainable_models "blockwise_controlnet" \ # --trainable_models "blockwise_controlnet" \
# --extra_inputs "blockwise_controlnet_image" \ # --extra_inputs "blockwise_controlnet_image" \
# --use_gradient_checkpointing \ # --use_gradient_checkpointing \
# --find_unused_parameters # --find_unused_parameters

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Depth/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth \
--dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_depth.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image" \ --data_file_keys "image,blockwise_controlnet_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \
@@ -21,8 +23,8 @@ accelerate launch examples/qwen_image/model_training/train.py \
# python examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Initialize.py # python examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Initialize.py
# accelerate launch examples/qwen_image/model_training/train.py \ # accelerate launch examples/qwen_image/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth \
# --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_depth.csv \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth/metadata.csv \
# --data_file_keys "image,blockwise_controlnet_image" \ # --data_file_keys "image,blockwise_controlnet_image" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \
# --dataset_repeat 50 \ # --dataset_repeat 50 \
@@ -35,4 +37,4 @@ accelerate launch examples/qwen_image/model_training/train.py \
# --trainable_models "blockwise_controlnet" \ # --trainable_models "blockwise_controlnet" \
# --extra_inputs "blockwise_controlnet_image" \ # --extra_inputs "blockwise_controlnet_image" \
# --use_gradient_checkpointing \ # --use_gradient_checkpointing \
# --find_unused_parameters # --find_unused_parameters

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint \
--dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_inpaint.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image,blockwise_controlnet_inpaint_mask" \ --data_file_keys "image,blockwise_controlnet_image,blockwise_controlnet_inpaint_mask" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 400 \ --dataset_repeat 400 \
@@ -21,8 +23,8 @@ accelerate launch --config_file examples/qwen_image/model_training/full/accelera
# python examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Inpaint-Initialize.py # python examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Inpaint-Initialize.py
# accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config.yaml examples/qwen_image/model_training/train.py \ # accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config.yaml examples/qwen_image/model_training/train.py \
# --dataset_base_path data/example_image_dataset \ # --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint \
# --dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_inpaint.csv \ # --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint/metadata.csv \
# --data_file_keys "image,blockwise_controlnet_image,blockwise_controlnet_inpaint_mask" \ # --data_file_keys "image,blockwise_controlnet_image,blockwise_controlnet_inpaint_mask" \
# --max_pixels 1048576 \ # --max_pixels 1048576 \
# --dataset_repeat 50 \ # --dataset_repeat 50 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Distill-Full/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-Full \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-Full/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "DiffSynth-Studio/Qwen-Image-Distill-Full:diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "DiffSynth-Studio/Qwen-Image-Distill-Full:diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2509/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509 \
--dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509/metadata.json \
--data_file_keys "image,edit_image" \ --data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \ --extra_inputs "edit_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
--dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
--data_file_keys "image,edit_image" \ --data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \ --extra_inputs "edit_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit \
--dataset_metadata_path data/example_image_dataset/metadata_edit.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit/metadata.csv \
--data_file_keys "image,edit_image" \ --data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \ --extra_inputs "edit_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \

View File

@@ -1,8 +1,8 @@
# Example Dataset: https://modelscope.cn/datasets/DiffSynth-Studio/example_image_dataset/tree/master/layer modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Layered-Control/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset/layer \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered-Control \
--dataset_metadata_path data/example_image_dataset/layer/metadata_layered_control.json \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered-Control/metadata.json \
--data_file_keys "image,layer_input_image" \ --data_file_keys "image,layer_input_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \

View File

@@ -1,8 +1,8 @@
# Example Dataset: https://modelscope.cn/datasets/DiffSynth-Studio/example_image_dataset/tree/master/layer modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Layered/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset/layer \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered \
--dataset_metadata_path data/example_image_dataset/layer/metadata_layered.json \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Layered/metadata.json \
--data_file_keys "image,layer_input_image" \ --data_file_keys "image,layer_input_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \ accelerate launch --config_file examples/qwen_image/model_training/full/accelerate_config_zero2offload.yaml examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/FireRed-Image-Edit-1.0/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.0 \
--dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.0/metadata.json \
--data_file_keys "image,edit_image" \ --data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \ --extra_inputs "edit_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/FireRed-Image-Edit-1.1/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.1 \
--dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/FireRed-Image-Edit-1.1/metadata.json \
--data_file_keys "image,edit_image" \ --data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \ --extra_inputs "edit_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-2512/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-2512 \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-2512/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "Qwen/Qwen-Image-2512:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "Qwen/Qwen-Image-2512:transformer/diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Canny/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny \
--dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_canny.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Canny/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image" \ --data_file_keys "image,blockwise_controlnet_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Depth/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth \
--dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_depth.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Depth/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image" \ --data_file_keys "image,blockwise_controlnet_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint \
--dataset_metadata_path data/example_image_dataset/metadata_blockwise_controlnet_inpaint.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Blockwise-ControlNet-Inpaint/metadata.csv \
--data_file_keys "image,blockwise_controlnet_image,blockwise_controlnet_inpaint_mask" \ --data_file_keys "image,blockwise_controlnet_image,blockwise_controlnet_inpaint_mask" \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Distill-Full/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-Full \
--dataset_metadata_path data/example_image_dataset/metadata.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-Full/metadata.csv \
--max_pixels 1048576 \ --max_pixels 1048576 \
--dataset_repeat 50 \ --dataset_repeat 50 \
--model_id_with_origin_paths "DiffSynth-Studio/Qwen-Image-Distill-Full:diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \ --model_id_with_origin_paths "DiffSynth-Studio/Qwen-Image-Distill-Full:diffusion_pytorch_model*.safetensors,Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Distill-LoRA/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-LoRA \
--dataset_metadata_path data/example_image_dataset/metadata_distill_qwen_image.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Distill-LoRA/metadata.csv \
--data_file_keys "image" \ --data_file_keys "image" \
--extra_inputs "seed,rand_device,num_inference_steps,cfg_scale" \ --extra_inputs "seed,rand_device,num_inference_steps,cfg_scale" \
--height 1328 \ --height 1328 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2509/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509 \
--dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2509/metadata.json \
--data_file_keys "image,edit_image" \ --data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \ --extra_inputs "edit_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit-2511/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511 \
--dataset_metadata_path data/example_image_dataset/metadata_qwen_imgae_edit_multi.json \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit-2511/metadata.json \
--data_file_keys "image,edit_image" \ --data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \ --extra_inputs "edit_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \

View File

@@ -1,6 +1,8 @@
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "qwen_image/Qwen-Image-Edit/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/qwen_image/model_training/train.py \ accelerate launch examples/qwen_image/model_training/train.py \
--dataset_base_path data/example_image_dataset \ --dataset_base_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit \
--dataset_metadata_path data/example_image_dataset/metadata_edit.csv \ --dataset_metadata_path data/diffsynth_example_dataset/qwen_image/Qwen-Image-Edit/metadata.csv \
--data_file_keys "image,edit_image" \ --data_file_keys "image,edit_image" \
--extra_inputs "edit_image" \ --extra_inputs "edit_image" \
--max_pixels 1048576 \ --max_pixels 1048576 \

Some files were not shown because too many files have changed in this diff Show More