mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-18 13:58:15 +00:00
36 lines
2.9 KiB
Python
36 lines
2.9 KiB
Python
import torch
|
|
from diffsynth.utils.data import save_video, VideoData
|
|
from diffsynth.pipelines.wan_video import WanVideoPipeline, ModelConfig
|
|
|
|
|
|
pipe = WanVideoPipeline.from_pretrained(
|
|
torch_dtype=torch.bfloat16,
|
|
device="cuda",
|
|
model_configs=[
|
|
ModelConfig(model_id="meituan-longcat/LongCat-Video", origin_file_pattern="dit/diffusion_pytorch_model*.safetensors"),
|
|
ModelConfig(model_id="Wan-AI/Wan2.1-T2V-14B", origin_file_pattern="models_t5_umt5-xxl-enc-bf16.pth"),
|
|
ModelConfig(model_id="Wan-AI/Wan2.1-T2V-14B", origin_file_pattern="Wan2.1_VAE.pth"),
|
|
],
|
|
tokenizer_config=ModelConfig(model_id="Wan-AI/Wan2.1-T2V-1.3B", origin_file_pattern="google/umt5-xxl/"),
|
|
)
|
|
|
|
# Text-to-video
|
|
video = pipe(
|
|
prompt="In a realistic photography style, a white boy around seven or eight years old sits on a park bench, wearing a light blue T-shirt, denim shorts, and white sneakers. He holds an ice cream cone with vanilla and chocolate flavors, and beside him is a medium-sized golden Labrador. Smiling, the boy offers the ice cream to the dog, who eagerly licks it with its tongue. The sun is shining brightly, and the background features a green lawn and several tall trees, creating a warm and loving scene.",
|
|
negative_prompt="Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards",
|
|
seed=0, tiled=True, num_frames=93,
|
|
cfg_scale=2, sigma_shift=1,
|
|
)
|
|
save_video(video, "video_1_LongCat-Video.mp4", fps=15, quality=5)
|
|
|
|
# Video-continuation (The number of frames in `longcat_video` should be 4n+1.)
|
|
longcat_video = video[-17:]
|
|
video = pipe(
|
|
prompt="In a realistic photography style, a white boy around seven or eight years old sits on a park bench, wearing a light blue T-shirt, denim shorts, and white sneakers. He holds an ice cream cone with vanilla and chocolate flavors, and beside him is a medium-sized golden Labrador. Smiling, the boy offers the ice cream to the dog, who eagerly licks it with its tongue. The sun is shining brightly, and the background features a green lawn and several tall trees, creating a warm and loving scene.",
|
|
negative_prompt="Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards",
|
|
seed=1, tiled=True, num_frames=93,
|
|
cfg_scale=2, sigma_shift=1,
|
|
longcat_video=longcat_video,
|
|
)
|
|
save_video(video, "video_2_LongCat-Video.mp4", fps=15, quality=5)
|