mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-18 22:08:13 +00:00
Merge branch 'modelscope:main' into wan_rope
This commit is contained in:
@@ -1 +1,2 @@
|
||||
from .npu_compatible_device import parse_device_type, parse_nccl_backend, get_available_device_type
|
||||
from .npu_compatible_device import parse_device_type, parse_nccl_backend, get_available_device_type, get_device_name
|
||||
from .npu_compatible_device import IS_NPU_AVAILABLE
|
||||
|
||||
@@ -2,7 +2,7 @@ import torch, copy
|
||||
from typing import Union
|
||||
from .initialization import skip_model_initialization
|
||||
from .disk_map import DiskMap
|
||||
from ..device import parse_device_type
|
||||
from ..device import parse_device_type, get_device_name, IS_NPU_AVAILABLE
|
||||
|
||||
|
||||
class AutoTorchModule(torch.nn.Module):
|
||||
@@ -63,7 +63,7 @@ class AutoTorchModule(torch.nn.Module):
|
||||
return r
|
||||
|
||||
def check_free_vram(self):
|
||||
device = self.computation_device if self.computation_device != "npu" else "npu:0"
|
||||
device = self.computation_device if not IS_NPU_AVAILABLE else get_device_name()
|
||||
gpu_mem_state = getattr(torch, self.computation_device_type).mem_get_info(device)
|
||||
used_memory = (gpu_mem_state[1] - gpu_mem_state[0]) / (1024**3)
|
||||
return used_memory < self.vram_limit
|
||||
|
||||
@@ -7,6 +7,7 @@ from ..core import AutoTorchModule, AutoWrappedLinear, load_state_dict, ModelCon
|
||||
from ..utils.lora import GeneralLoRALoader
|
||||
from ..models.model_loader import ModelPool
|
||||
from ..utils.controlnet import ControlNetInput
|
||||
from ..core.device import get_device_name, IS_NPU_AVAILABLE
|
||||
|
||||
|
||||
class PipelineUnit:
|
||||
@@ -177,7 +178,7 @@ class BasePipeline(torch.nn.Module):
|
||||
|
||||
|
||||
def get_vram(self):
|
||||
device = self.device if self.device != "npu" else "npu:0"
|
||||
device = self.device if not IS_NPU_AVAILABLE else get_device_name()
|
||||
return getattr(torch, self.device_type).mem_get_info(device)[1] / (1024 ** 3)
|
||||
|
||||
def get_module(self, model, name):
|
||||
|
||||
@@ -8,6 +8,7 @@ from torch.nn.utils.rnn import pad_sequence
|
||||
|
||||
from torch.nn import RMSNorm
|
||||
from ..core.attention import attention_forward
|
||||
from ..core.device.npu_compatible_device import IS_NPU_AVAILABLE
|
||||
from ..core.gradient import gradient_checkpoint_forward
|
||||
|
||||
|
||||
@@ -315,7 +316,10 @@ class RopeEmbedder:
|
||||
result = []
|
||||
for i in range(len(self.axes_dims)):
|
||||
index = ids[:, i]
|
||||
result.append(self.freqs_cis[i][index])
|
||||
if IS_NPU_AVAILABLE:
|
||||
result.append(torch.index_select(self.freqs_cis[i], 0, index))
|
||||
else:
|
||||
result.append(self.freqs_cis[i][index])
|
||||
return torch.cat(result, dim=-1)
|
||||
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ All sample code provided by this project supports NVIDIA GPUs by default, requir
|
||||
AMD provides PyTorch packages based on ROCm, so most models can run without code changes. A small number of models may not be compatible due to their reliance on CUDA-specific instructions.
|
||||
|
||||
## Ascend NPU
|
||||
|
||||
### Inference
|
||||
When using Ascend NPU, you need to replace `"cuda"` with `"npu"` in your code.
|
||||
|
||||
For example, here is the inference code for **Wan2.1-T2V-1.3B**, modified for Ascend NPU:
|
||||
@@ -22,6 +22,7 @@ For example, here is the inference code for **Wan2.1-T2V-1.3B**, modified for As
|
||||
import torch
|
||||
from diffsynth.utils.data import save_video, VideoData
|
||||
from diffsynth.pipelines.wan_video import WanVideoPipeline, ModelConfig
|
||||
from diffsynth.core.device.npu_compatible_device import get_device_name
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
@@ -46,7 +47,7 @@ pipe = WanVideoPipeline.from_pretrained(
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="Wan-AI/Wan2.1-T2V-1.3B", origin_file_pattern="google/umt5-xxl/"),
|
||||
- vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 2,
|
||||
+ vram_limit=torch.npu.mem_get_info("npu:0")[1] / (1024 ** 3) - 2,
|
||||
+ vram_limit=torch.npu.mem_get_info(get_device_name())[1] / (1024 ** 3) - 2,
|
||||
)
|
||||
|
||||
video = pipe(
|
||||
@@ -56,3 +57,28 @@ video = pipe(
|
||||
)
|
||||
save_video(video, "video.mp4", fps=15, quality=5)
|
||||
```
|
||||
|
||||
### Training
|
||||
NPU startup script samples have been added for each type of model,the scripts are stored in the `examples/xxx/special/npu_scripts`, for example `examples/wanvideo/model_training/special/npu_scripts/Wan2.2-T2V-A14B-NPU.sh`.
|
||||
|
||||
In the NPU training scripts, NPU specific environment variables that can optimize performance have been added, and relevant parameters have been enabled for specific models.
|
||||
|
||||
#### Environment variables
|
||||
```shell
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
```
|
||||
`expandable_segments:<value>`: Enable the memory pool expansion segment function, which is the virtual memory feature.
|
||||
|
||||
```shell
|
||||
export CPU_AFFINITY_CONF=1
|
||||
```
|
||||
Set 0 or not set: indicates not enabling the binding function
|
||||
|
||||
1: Indicates enabling coarse-grained kernel binding
|
||||
|
||||
2: Indicates enabling fine-grained kernel binding
|
||||
|
||||
#### Parameters for specific models
|
||||
| Model | Parameter | Note |
|
||||
|----------------|---------------------------|-------------------|
|
||||
| Wan 14B series | --initialize_model_on_cpu | The 14B model needs to be initialized on the CPU |
|
||||
@@ -30,11 +30,16 @@ pip install torch torchvision --index-url https://download.pytorch.org/whl/rocm6
|
||||
|
||||
* **Ascend NPU**
|
||||
|
||||
Ascend NPU support is provided via the `torch-npu` package. Taking version `2.1.0.post17` (as of the article update date: December 15, 2025) as an example, run the following command:
|
||||
1. Install [CANN](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/softwareinst/instg/instg_quick.html?Mode=PmIns&InstallType=local&OS=openEuler&Software=cannToolKit) through official documentation.
|
||||
|
||||
```shell
|
||||
pip install torch-npu==2.1.0.post17
|
||||
```
|
||||
2. Install from source
|
||||
```shell
|
||||
git clone https://github.com/modelscope/DiffSynth-Studio.git
|
||||
cd DiffSynth-Studio
|
||||
# aarch64/ARM
|
||||
pip install -e .[npu_aarch64] --extra-index-url "https://download.pytorch.org/whl/cpu"
|
||||
# x86
|
||||
pip install -e .[npu]
|
||||
|
||||
When using Ascend NPU, please replace `"cuda"` with `"npu"` in your Python code. For details, see [NPU Support](/docs/en/Pipeline_Usage/GPU_support.md#ascend-npu).
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
AMD 提供了基于 ROCm 的 torch 包,所以大多数模型无需修改代码即可运行,少数模型由于依赖特定的 cuda 指令无法运行。
|
||||
|
||||
## Ascend NPU
|
||||
|
||||
### 推理
|
||||
使用 Ascend NPU 时,需把代码中的 `"cuda"` 改为 `"npu"`。
|
||||
|
||||
例如,Wan2.1-T2V-1.3B 的推理代码:
|
||||
@@ -22,6 +22,7 @@ AMD 提供了基于 ROCm 的 torch 包,所以大多数模型无需修改代码
|
||||
import torch
|
||||
from diffsynth.utils.data import save_video, VideoData
|
||||
from diffsynth.pipelines.wan_video import WanVideoPipeline, ModelConfig
|
||||
from diffsynth.core.device.npu_compatible_device import get_device_name
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
@@ -33,7 +34,7 @@ vram_config = {
|
||||
+ "preparing_device": "npu",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
- "computation_device": "cuda",
|
||||
+ "preparing_device": "npu",
|
||||
+ "computation_device": "npu",
|
||||
}
|
||||
pipe = WanVideoPipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
@@ -46,7 +47,7 @@ pipe = WanVideoPipeline.from_pretrained(
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="Wan-AI/Wan2.1-T2V-1.3B", origin_file_pattern="google/umt5-xxl/"),
|
||||
- vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 2,
|
||||
+ vram_limit=torch.npu.mem_get_info("npu:0")[1] / (1024 ** 3) - 2,
|
||||
+ vram_limit=torch.npu.mem_get_info(get_device_name())[1] / (1024 ** 3) - 2,
|
||||
)
|
||||
|
||||
video = pipe(
|
||||
@@ -56,3 +57,28 @@ video = pipe(
|
||||
)
|
||||
save_video(video, "video.mp4", fps=15, quality=5)
|
||||
```
|
||||
|
||||
### 训练
|
||||
当前已为每类模型添加NPU的启动脚本样例,脚本存放在`examples/xxx/special/npu_scripts`目录下,例如 `examples/wanvideo/model_training/special/npu_scripts/Wan2.2-T2V-A14B-NPU.sh`。
|
||||
|
||||
在NPU训练脚本中,添加了可以优化性能的NPU特有环境变量,并针对特定模型开启了相关参数。
|
||||
|
||||
#### 环境变量
|
||||
```shell
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
```
|
||||
`expandable_segments:<value>`: 使能内存池扩展段功能,即虚拟内存特征。
|
||||
|
||||
```shell
|
||||
export CPU_AFFINITY_CONF=1
|
||||
```
|
||||
设置0或未设置: 表示不启用绑核功能
|
||||
|
||||
1: 表示开启粗粒度绑核
|
||||
|
||||
2: 表示开启细粒度绑核
|
||||
|
||||
#### 特定模型需要开启的参数
|
||||
| 模型 | 参数 | 备注 |
|
||||
|-----------|------|-------------------|
|
||||
| Wan 14B系列 | --initialize_model_on_cpu | 14B模型需要在cpu上进行初始化 |
|
||||
@@ -30,11 +30,16 @@ pip install torch torchvision --index-url https://download.pytorch.org/whl/rocm6
|
||||
|
||||
* Ascend NPU
|
||||
|
||||
Ascend NPU 通过 `torch-npu` 包提供支持,以 `2.1.0.post17` 版本(本文更新于 2025 年 12 月 15 日)为例,请运行以下命令
|
||||
1. 通过官方文档安装[CANN](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/softwareinst/instg/instg_quick.html?Mode=PmIns&InstallType=local&OS=openEuler&Software=cannToolKit)
|
||||
|
||||
```shell
|
||||
pip install torch-npu==2.1.0.post17
|
||||
```
|
||||
2. 从源码安装
|
||||
```shell
|
||||
git clone https://github.com/modelscope/DiffSynth-Studio.git
|
||||
cd DiffSynth-Studio
|
||||
# aarch64/ARM
|
||||
pip install -e .[npu_aarch64] --extra-index-url "https://download.pytorch.org/whl/cpu"
|
||||
# x86
|
||||
pip install -e .[npu]
|
||||
|
||||
使用 Ascend NPU 时,请将 Python 代码中的 `"cuda"` 改为 `"npu"`,详见[NPU 支持](/docs/zh/Pipeline_Usage/GPU_support.md#ascend-npu)。
|
||||
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
export CPU_AFFINITY_CONF=1
|
||||
|
||||
accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \
|
||||
--dataset_base_path data/example_image_dataset \
|
||||
--dataset_metadata_path data/example_image_dataset/metadata_kontext.csv \
|
||||
--data_file_keys "image,kontext_images" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 400 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.1-Kontext-dev:flux1-kontext-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \
|
||||
--learning_rate 1e-5 \
|
||||
--num_epochs 1 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/FLUX.1-Kontext-dev_full" \
|
||||
--trainable_models "dit" \
|
||||
--extra_inputs "kontext_images" \
|
||||
--use_gradient_checkpointing
|
||||
@@ -0,0 +1,15 @@
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
export CPU_AFFINITY_CONF=1
|
||||
|
||||
accelerate launch --config_file examples/flux/model_training/full/accelerate_config_zero2offload.yaml examples/flux/model_training/train.py \
|
||||
--dataset_base_path data/example_image_dataset \
|
||||
--dataset_metadata_path data/example_image_dataset/metadata.csv \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 400 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.1-dev:flux1-dev.safetensors,black-forest-labs/FLUX.1-dev:text_encoder/model.safetensors,black-forest-labs/FLUX.1-dev:text_encoder_2/*.safetensors,black-forest-labs/FLUX.1-dev:ae.safetensors" \
|
||||
--learning_rate 1e-5 \
|
||||
--num_epochs 1 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/FLUX.1-dev_full" \
|
||||
--trainable_models "dit" \
|
||||
--use_gradient_checkpointing
|
||||
@@ -0,0 +1,38 @@
|
||||
# Due to memory limitations, split training is required to train the model on NPU
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
export CPU_AFFINITY_CONF=1
|
||||
|
||||
accelerate launch examples/qwen_image/model_training/train.py \
|
||||
--dataset_base_path data/example_image_dataset \
|
||||
--dataset_metadata_path data/example_image_dataset/metadata.csv \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 1 \
|
||||
--model_id_with_origin_paths "Qwen/Qwen-Image-Edit-2509:text_encoder/model*.safetensors,Qwen/Qwen-Image-Edit-2509:vae/diffusion_pytorch_model.safetensors" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 5 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/Qwen-Image-Edit-2509-LoRA-splited-cache" \
|
||||
--lora_base_model "dit" \
|
||||
--lora_target_modules "to_q,to_k,to_v,add_q_proj,add_k_proj,add_v_proj,to_out.0,to_add_out,img_mlp.net.2,img_mod.1,txt_mlp.net.2,txt_mod.1" \
|
||||
--lora_rank 32 \
|
||||
--use_gradient_checkpointing \
|
||||
--dataset_num_workers 8 \
|
||||
--find_unused_parameters \
|
||||
--task "sft:data_process"
|
||||
|
||||
accelerate launch examples/qwen_image/model_training/train.py \
|
||||
--dataset_base_path "./models/train/Qwen-Image-Edit-2509-LoRA-splited-cache" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "Qwen/Qwen-Image-Edit-2509:transformer/diffusion_pytorch_model*.safetensors" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 5 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/Qwen-Image-Edit-2509-LoRA-splited" \
|
||||
--lora_base_model "dit" \
|
||||
--lora_target_modules "to_q,to_k,to_v,add_q_proj,add_k_proj,add_v_proj,to_out.0,to_add_out,img_mlp.net.2,img_mod.1,txt_mlp.net.2,txt_mod.1" \
|
||||
--lora_rank 32 \
|
||||
--use_gradient_checkpointing \
|
||||
--dataset_num_workers 8 \
|
||||
--find_unused_parameters \
|
||||
--task "sft:train"
|
||||
@@ -0,0 +1,38 @@
|
||||
# Due to memory limitations, split training is required to train the model on NPU
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
export CPU_AFFINITY_CONF=1
|
||||
|
||||
accelerate launch examples/qwen_image/model_training/train.py \
|
||||
--dataset_base_path data/example_image_dataset \
|
||||
--dataset_metadata_path data/example_image_dataset/metadata.csv \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 1 \
|
||||
--model_id_with_origin_paths "Qwen/Qwen-Image:text_encoder/model*.safetensors,Qwen/Qwen-Image:vae/diffusion_pytorch_model.safetensors" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 5 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/Qwen-Image-LoRA-splited-cache" \
|
||||
--lora_base_model "dit" \
|
||||
--lora_target_modules "to_q,to_k,to_v,add_q_proj,add_k_proj,add_v_proj,to_out.0,to_add_out,img_mlp.net.2,img_mod.1,txt_mlp.net.2,txt_mod.1" \
|
||||
--lora_rank 32 \
|
||||
--use_gradient_checkpointing \
|
||||
--dataset_num_workers 8 \
|
||||
--find_unused_parameters \
|
||||
--task "sft:data_process"
|
||||
|
||||
accelerate launch examples/qwen_image/model_training/train.py \
|
||||
--dataset_base_path "./models/train/Qwen-Image-LoRA-splited-cache" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "Qwen/Qwen-Image:transformer/diffusion_pytorch_model*.safetensors" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 5 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/Qwen-Image-LoRA-splited" \
|
||||
--lora_base_model "dit" \
|
||||
--lora_target_modules "to_q,to_k,to_v,add_q_proj,add_k_proj,add_v_proj,to_out.0,to_add_out,img_mlp.net.2,img_mod.1,txt_mlp.net.2,txt_mod.1" \
|
||||
--lora_rank 32 \
|
||||
--use_gradient_checkpointing \
|
||||
--dataset_num_workers 8 \
|
||||
--find_unused_parameters \
|
||||
--task "sft:train"
|
||||
@@ -0,0 +1,16 @@
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
export CPU_AFFINITY_CONF=1
|
||||
|
||||
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
|
||||
--dataset_base_path data/example_video_dataset \
|
||||
--dataset_metadata_path data/example_video_dataset/metadata.csv \
|
||||
--height 480 \
|
||||
--width 832 \
|
||||
--dataset_repeat 100 \
|
||||
--model_id_with_origin_paths "Wan-AI/Wan2.1-T2V-14B:diffusion_pytorch_model*.safetensors,Wan-AI/Wan2.1-T2V-14B:models_t5_umt5-xxl-enc-bf16.pth,Wan-AI/Wan2.1-T2V-14B:Wan2.1_VAE.pth" \
|
||||
--learning_rate 1e-5 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/Wan2.1-T2V-14B_full" \
|
||||
--trainable_models "dit" \
|
||||
--initialize_model_on_cpu
|
||||
@@ -0,0 +1,38 @@
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
export CPU_AFFINITY_CONF=1
|
||||
|
||||
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
|
||||
--dataset_base_path data/example_video_dataset \
|
||||
--dataset_metadata_path data/example_video_dataset/metadata.csv \
|
||||
--height 480 \
|
||||
--width 832 \
|
||||
--num_frames 49 \
|
||||
--dataset_repeat 100 \
|
||||
--model_id_with_origin_paths "Wan-AI/Wan2.2-T2V-A14B:high_noise_model/diffusion_pytorch_model*.safetensors,Wan-AI/Wan2.2-T2V-A14B:models_t5_umt5-xxl-enc-bf16.pth,Wan-AI/Wan2.2-T2V-A14B:Wan2.1_VAE.pth" \
|
||||
--learning_rate 1e-5 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/Wan2.2-T2V-A14B_high_noise_full" \
|
||||
--trainable_models "dit" \
|
||||
--max_timestep_boundary 0.417 \
|
||||
--min_timestep_boundary 0 \
|
||||
--initialize_model_on_cpu
|
||||
# boundary corresponds to timesteps [875, 1000]
|
||||
|
||||
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
|
||||
--dataset_base_path data/example_video_dataset \
|
||||
--dataset_metadata_path data/example_video_dataset/metadata.csv \
|
||||
--height 480 \
|
||||
--width 832 \
|
||||
--num_frames 49 \
|
||||
--dataset_repeat 100 \
|
||||
--model_id_with_origin_paths "Wan-AI/Wan2.2-T2V-A14B:low_noise_model/diffusion_pytorch_model*.safetensors,Wan-AI/Wan2.2-T2V-A14B:models_t5_umt5-xxl-enc-bf16.pth,Wan-AI/Wan2.2-T2V-A14B:Wan2.1_VAE.pth" \
|
||||
--learning_rate 1e-5 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/Wan2.2-T2V-A14B_low_noise_full" \
|
||||
--trainable_models "dit" \
|
||||
--max_timestep_boundary 1 \
|
||||
--min_timestep_boundary 0.417 \
|
||||
--initialize_model_on_cpu
|
||||
# boundary corresponds to timesteps [0, 875)
|
||||
@@ -0,0 +1,45 @@
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
export CPU_AFFINITY_CONF=1
|
||||
|
||||
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
|
||||
--dataset_base_path data/example_video_dataset \
|
||||
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
|
||||
--data_file_keys "video,vace_video,vace_reference_image" \
|
||||
--height 480 \
|
||||
--width 832 \
|
||||
--num_frames 17 \
|
||||
--dataset_repeat 100 \
|
||||
--model_id_with_origin_paths "PAI/Wan2.2-VACE-Fun-A14B:high_noise_model/diffusion_pytorch_model*.safetensors,PAI/Wan2.2-VACE-Fun-A14B:models_t5_umt5-xxl-enc-bf16.pth,PAI/Wan2.2-VACE-Fun-A14B:Wan2.1_VAE.pth" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.vace." \
|
||||
--output_path "./models/train/Wan2.2-VACE-Fun-A14B_high_noise_full" \
|
||||
--trainable_models "vace" \
|
||||
--extra_inputs "vace_video,vace_reference_image" \
|
||||
--use_gradient_checkpointing_offload \
|
||||
--max_timestep_boundary 0.358 \
|
||||
--min_timestep_boundary 0 \
|
||||
--initialize_model_on_cpu
|
||||
# boundary corresponds to timesteps [900, 1000]
|
||||
|
||||
|
||||
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
|
||||
--dataset_base_path data/example_video_dataset \
|
||||
--dataset_metadata_path data/example_video_dataset/metadata_vace.csv \
|
||||
--data_file_keys "video,vace_video,vace_reference_image" \
|
||||
--height 480 \
|
||||
--width 832 \
|
||||
--num_frames 17 \
|
||||
--dataset_repeat 100 \
|
||||
--model_id_with_origin_paths "PAI/Wan2.2-VACE-Fun-A14B:low_noise_model/diffusion_pytorch_model*.safetensors,PAI/Wan2.2-VACE-Fun-A14B:models_t5_umt5-xxl-enc-bf16.pth,PAI/Wan2.2-VACE-Fun-A14B:Wan2.1_VAE.pth" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.vace." \
|
||||
--output_path "./models/train/Wan2.2-VACE-Fun-A14B_low_noise_full" \
|
||||
--trainable_models "vace" \
|
||||
--extra_inputs "vace_video,vace_reference_image" \
|
||||
--use_gradient_checkpointing_offload \
|
||||
--max_timestep_boundary 1 \
|
||||
--min_timestep_boundary 0.358 \
|
||||
--initialize_model_on_cpu
|
||||
# boundary corresponds to timesteps [0, 900]
|
||||
@@ -0,0 +1,16 @@
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
export CPU_AFFINITY_CONF=1
|
||||
|
||||
accelerate launch --config_file examples/z_image/model_training/full/accelerate_config.yaml examples/z_image/model_training/train.py \
|
||||
--dataset_base_path data/example_image_dataset \
|
||||
--dataset_metadata_path data/example_image_dataset/metadata.csv \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 400 \
|
||||
--model_id_with_origin_paths "Tongyi-MAI/Z-Image-Turbo:transformer/*.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \
|
||||
--learning_rate 1e-5 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/Z-Image-Turbo_full" \
|
||||
--trainable_models "dit" \
|
||||
--use_gradient_checkpointing \
|
||||
--dataset_num_workers 8
|
||||
@@ -34,7 +34,19 @@ classifiers = [
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["./"]
|
||||
include = ["diffsynth"]
|
||||
include = ["diffsynth", "diffsynth.*"]
|
||||
|
||||
[project.optional-dependencies]
|
||||
npu_aarch64 = [
|
||||
"torch==2.7.1",
|
||||
"torch-npu==2.7.1",
|
||||
"torchvision==0.22.1"
|
||||
]
|
||||
npu = [
|
||||
"torch==2.7.1+cpu",
|
||||
"torch-npu==2.7.1",
|
||||
"torchvision==0.22.1+cpu"
|
||||
]
|
||||
|
||||
[tool.setuptools]
|
||||
include-package-data = true
|
||||
|
||||
Reference in New Issue
Block a user