mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-24 01:48:13 +00:00
Krea realtime video (#1011)
* krea-realtime-video * Add Krea real-time video inference and training support * Delete .gitignore * update README * update README --------- Co-authored-by: Artiprocher <wangye87v5@hotmail.com> Co-authored-by: Jintao Huang <huangjintao.hjt@alibaba-inc.com> Co-authored-by: Zhongjie Duan <35051019+Artiprocher@users.noreply.github.com>
This commit is contained in:
12
examples/wanvideo/model_training/full/krea-realtime-video.sh
Normal file
12
examples/wanvideo/model_training/full/krea-realtime-video.sh
Normal file
@@ -0,0 +1,12 @@
|
||||
accelerate launch --config_file examples/wanvideo/model_training/full/accelerate_config_14B.yaml examples/wanvideo/model_training/train.py \
|
||||
--dataset_base_path data/example_video_dataset \
|
||||
--dataset_metadata_path data/example_video_dataset/metadata.csv \
|
||||
--height 480 \
|
||||
--width 832 \
|
||||
--dataset_repeat 100 \
|
||||
--model_id_with_origin_paths "krea/krea-realtime-video:krea-realtime-video-14b.safetensors,Wan-AI/Wan2.1-T2V-14B:models_t5_umt5-xxl-enc-bf16.pth,Wan-AI/Wan2.1-T2V-14B:Wan2.1_VAE.pth" \
|
||||
--learning_rate 1e-5 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/krea-realtime-video_full" \
|
||||
--trainable_models "dit"
|
||||
14
examples/wanvideo/model_training/lora/krea-realtime-video.sh
Normal file
14
examples/wanvideo/model_training/lora/krea-realtime-video.sh
Normal file
@@ -0,0 +1,14 @@
|
||||
accelerate launch examples/wanvideo/model_training/train.py \
|
||||
--dataset_base_path data/example_video_dataset \
|
||||
--dataset_metadata_path data/example_video_dataset/metadata.csv \
|
||||
--height 480 \
|
||||
--width 832 \
|
||||
--dataset_repeat 100 \
|
||||
--model_id_with_origin_paths "krea/krea-realtime-video:krea-realtime-video-14b.safetensors,Wan-AI/Wan2.1-T2V-14B:models_t5_umt5-xxl-enc-bf16.pth,Wan-AI/Wan2.1-T2V-14B:Wan2.1_VAE.pth" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 5 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/krea-realtime-video_lora" \
|
||||
--lora_base_model "dit" \
|
||||
--lora_target_modules "q,k,v,o,ffn.0,ffn.2" \
|
||||
--lora_rank 32
|
||||
@@ -0,0 +1,28 @@
|
||||
import torch
|
||||
from PIL import Image
|
||||
from diffsynth import save_video, VideoData, load_state_dict
|
||||
from diffsynth.pipelines.wan_video_new import WanVideoPipeline, ModelConfig
|
||||
|
||||
|
||||
pipe = WanVideoPipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="krea/krea-realtime-video", origin_file_pattern="krea-realtime-video-14b.safetensors", offload_device="cpu"),
|
||||
ModelConfig(model_id="Wan-AI/Wan2.1-T2V-14B", origin_file_pattern="models_t5_umt5-xxl-enc-bf16.pth", offload_device="cpu"),
|
||||
ModelConfig(model_id="Wan-AI/Wan2.1-T2V-14B", origin_file_pattern="Wan2.1_VAE.pth", offload_device="cpu"),
|
||||
],
|
||||
)
|
||||
state_dict = load_state_dict("models/train/krea-realtime-video_full/epoch-1.safetensors")
|
||||
pipe.dit.load_state_dict(state_dict)
|
||||
pipe.enable_vram_management()
|
||||
|
||||
# Text-to-video
|
||||
video = pipe(
|
||||
prompt="a cat sitting on a boat",
|
||||
num_inference_steps=6, num_frames=81,
|
||||
seed=0, tiled=True,
|
||||
cfg_scale=1,
|
||||
sigma_shift=20,
|
||||
)
|
||||
save_video(video, "output.mp4", fps=15, quality=5)
|
||||
@@ -0,0 +1,28 @@
|
||||
import torch
|
||||
from PIL import Image
|
||||
from diffsynth import save_video, VideoData, load_state_dict
|
||||
from diffsynth.pipelines.wan_video_new import WanVideoPipeline, ModelConfig
|
||||
|
||||
|
||||
pipe = WanVideoPipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="krea/krea-realtime-video", origin_file_pattern="krea-realtime-video-14b.safetensors", offload_device="cpu"),
|
||||
ModelConfig(model_id="Wan-AI/Wan2.1-T2V-14B", origin_file_pattern="models_t5_umt5-xxl-enc-bf16.pth", offload_device="cpu"),
|
||||
ModelConfig(model_id="Wan-AI/Wan2.1-T2V-14B", origin_file_pattern="Wan2.1_VAE.pth", offload_device="cpu"),
|
||||
],
|
||||
)
|
||||
|
||||
pipe.load_lora(pipe.dit, "models/train/krea-realtime-video_lora/epoch-4.safetensors", alpha=1)
|
||||
pipe.enable_vram_management()
|
||||
|
||||
# Text-to-video
|
||||
video = pipe(
|
||||
prompt="a cat sitting on a boat",
|
||||
num_inference_steps=6, num_frames=81,
|
||||
seed=0, tiled=True,
|
||||
cfg_scale=1,
|
||||
sigma_shift=20,
|
||||
)
|
||||
save_video(video, "output.mp4", fps=15, quality=5)
|
||||
Reference in New Issue
Block a user