support wan full training

support wan full train
This commit is contained in:
Zhongjie Duan
2025-02-26 11:38:51 +08:00
committed by GitHub
2 changed files with 67 additions and 12 deletions

View File

@@ -42,7 +42,7 @@ https://github.com/user-attachments/assets/c0bdd5ca-292f-45ed-b9bc-afe193156e75
## Train ## Train
We support Wan-Video LoRA training. Here is a tutorial. We support Wan-Video LoRA training and full training. Here is a tutorial.
Step 1: Install additional packages Step 1: Install additional packages
@@ -99,9 +99,12 @@ data/example_dataset/
Step 4: Train Step 4: Train
LoRA training:
```shell ```shell
CUDA_VISIBLE_DEVICES="0" python examples/wanvideo/train_wan_t2v.py \ CUDA_VISIBLE_DEVICES="0" python examples/wanvideo/train_wan_t2v.py \
--task train \ --task train \
--train_architecture lora \
--dataset_path data/example_dataset \ --dataset_path data/example_dataset \
--output_path ./models \ --output_path ./models \
--dit_path "models/Wan-AI/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors" \ --dit_path "models/Wan-AI/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors" \
@@ -115,8 +118,26 @@ CUDA_VISIBLE_DEVICES="0" python examples/wanvideo/train_wan_t2v.py \
--use_gradient_checkpointing --use_gradient_checkpointing
``` ```
Full training:
```shell
CUDA_VISIBLE_DEVICES="0" python examples/wanvideo/train_wan_t2v.py \
--task train \
--train_architecture full \
--dataset_path data/example_dataset \
--output_path ./models \
--dit_path "models/Wan-AI/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors" \
--steps_per_epoch 500 \
--max_epochs 10 \
--learning_rate 1e-4 \
--accumulate_grad_batches 1 \
--use_gradient_checkpointing
```
Step 5: Test Step 5: Test
Test LoRA:
```python ```python
import torch import torch
from diffsynth import ModelManager, WanVideoPipeline, save_video, VideoData from diffsynth import ModelManager, WanVideoPipeline, save_video, VideoData
@@ -129,16 +150,39 @@ model_manager.load_models([
"models/Wan-AI/Wan2.1-T2V-1.3B/Wan2.1_VAE.pth", "models/Wan-AI/Wan2.1-T2V-1.3B/Wan2.1_VAE.pth",
]) ])
model_manager.load_lora("models/lightning_logs/version_1/checkpoints/epoch=0-step=500.ckpt", lora_alpha=1.0) model_manager.load_lora("models/lightning_logs/version_1/checkpoints/epoch=0-step=500.ckpt", lora_alpha=1.0)
pipe = WanVideoPipeline.from_model_manager(model_manager, device="cuda") pipe = WanVideoPipeline.from_model_manager(model_manager, device="cuda")
pipe.enable_vram_management(num_persistent_param_in_dit=None) pipe.enable_vram_management(num_persistent_param_in_dit=None)
# Text-to-video
video = pipe( video = pipe(
prompt="...", prompt="...",
negative_prompt="...", negative_prompt="...",
num_inference_steps=50, num_inference_steps=50,
seed=0, tiled=True seed=0, tiled=True
) )
save_video(video, "video_with_lora.mp4", fps=30, quality=5) save_video(video, "video.mp4", fps=30, quality=5)
```
Test fine-tuned base model:
```python
import torch
from diffsynth import ModelManager, WanVideoPipeline, save_video, VideoData
model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cpu")
model_manager.load_models([
"models/lightning_logs/version_1/checkpoints/epoch=0-step=500.ckpt",
"models/Wan-AI/Wan2.1-T2V-1.3B/models_t5_umt5-xxl-enc-bf16.pth",
"models/Wan-AI/Wan2.1-T2V-1.3B/Wan2.1_VAE.pth",
])
pipe = WanVideoPipeline.from_model_manager(model_manager, device="cuda")
pipe.enable_vram_management(num_persistent_param_in_dit=None)
video = pipe(
prompt="...",
negative_prompt="...",
num_inference_steps=50,
seed=0, tiled=True
)
save_video(video, "video.mp4", fps=30, quality=5)
``` ```

View File

@@ -134,7 +134,7 @@ class TensorDataset(torch.utils.data.Dataset):
class LightningModelForTrain(pl.LightningModule): class LightningModelForTrain(pl.LightningModule):
def __init__(self, dit_path, learning_rate=1e-5, lora_rank=4, lora_alpha=4, lora_target_modules="q,k,v,o,ffn.0,ffn.2", init_lora_weights="kaiming", use_gradient_checkpointing=True): def __init__(self, dit_path, learning_rate=1e-5, lora_rank=4, lora_alpha=4, train_architecture="lora", lora_target_modules="q,k,v,o,ffn.0,ffn.2", init_lora_weights="kaiming", use_gradient_checkpointing=True):
super().__init__() super().__init__()
model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cpu") model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cpu")
model_manager.load_models([dit_path]) model_manager.load_models([dit_path])
@@ -142,13 +142,16 @@ class LightningModelForTrain(pl.LightningModule):
self.pipe = WanVideoPipeline.from_model_manager(model_manager) self.pipe = WanVideoPipeline.from_model_manager(model_manager)
self.pipe.scheduler.set_timesteps(1000, training=True) self.pipe.scheduler.set_timesteps(1000, training=True)
self.freeze_parameters() self.freeze_parameters()
self.add_lora_to_model( if train_architecture == "lora":
self.pipe.denoising_model(), self.add_lora_to_model(
lora_rank=lora_rank, self.pipe.denoising_model(),
lora_alpha=lora_alpha, lora_rank=lora_rank,
lora_target_modules=lora_target_modules, lora_alpha=lora_alpha,
init_lora_weights=init_lora_weights, lora_target_modules=lora_target_modules,
) init_lora_weights=init_lora_weights,
)
else:
self.pipe.denoising_model().requires_grad_(True)
self.learning_rate = learning_rate self.learning_rate = learning_rate
self.use_gradient_checkpointing = use_gradient_checkpointing self.use_gradient_checkpointing = use_gradient_checkpointing
@@ -384,6 +387,13 @@ def parse_args():
action="store_true", action="store_true",
help="Whether to use gradient checkpointing.", help="Whether to use gradient checkpointing.",
) )
parser.add_argument(
"--train_architecture",
type=str,
default="lora",
choices=["lora", "full"],
help="Model structure to train. LoRA training or full training.",
)
args = parser.parse_args() args = parser.parse_args()
return args return args
@@ -434,6 +444,7 @@ def train(args):
model = LightningModelForTrain( model = LightningModelForTrain(
dit_path=args.dit_path, dit_path=args.dit_path,
learning_rate=args.learning_rate, learning_rate=args.learning_rate,
train_architecture=args.train_architecture,
lora_rank=args.lora_rank, lora_rank=args.lora_rank,
lora_alpha=args.lora_alpha, lora_alpha=args.lora_alpha,
lora_target_modules=args.lora_target_modules, lora_target_modules=args.lora_target_modules,