mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-19 06:48:12 +00:00
hunyuanvideo examples
This commit is contained in:
19
examples/HunyuanVideo/README.md
Normal file
19
examples/HunyuanVideo/README.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# HunyuanVideo
|
||||
|
||||
HunyuanVideo is a video generation model trained by Tencent. We provide advanced VRAM management for this model, including three stages:
|
||||
|
||||
|VRAM required|Example script|Frames|Resolution|Note|
|
||||
|-|-|-|-|-|
|
||||
|80G|[hunyuanvideo_80G.py](hunyuanvideo_80G.py)|129|720*1280|No VRAM management.|
|
||||
|24G|[hunyuanvideo_24G.py](hunyuanvideo_24G.py)|129|720*1280|The video is consistent with the original implementation, but it requires 5%~10% more time than [hunyuanvideo_80G.py](hunyuanvideo_80G.py)|
|
||||
|6G|[hunyuanvideo_6G.py](hunyuanvideo_6G.py)|129|512*384|The base model doesn't support low resolutions. We recommend users to use some LoRA ([example](https://civitai.com/models/1032126/walking-animation-hunyuan-video)) trained using low resolutions.|
|
||||
|
||||
## Gallery
|
||||
|
||||
Video generated by [hunyuanvideo_80G.py](hunyuanvideo_80G.py) and [hunyuanvideo_24G.py](hunyuanvideo_24G.py):
|
||||
|
||||
https://github.com/user-attachments/assets/48dd24bb-0cc6-40d2-88c3-10feed3267e9
|
||||
|
||||
Video generated by [hunyuanvideo_6G.py](hunyuanvideo_6G.py) using [this LoRA](https://civitai.com/models/1032126/walking-animation-hunyuan-video):
|
||||
|
||||
https://github.com/user-attachments/assets/2997f107-d02d-4ecb-89bb-5ce1a7f93817
|
||||
@@ -39,4 +39,4 @@ pipe = HunyuanVideoPipeline.from_model_manager(
|
||||
# Enjoy!
|
||||
prompt = "CG, masterpiece, best quality, solo, long hair, wavy hair, silver hair, blue eyes, blue dress, medium breasts, dress, underwater, air bubble, floating hair, refraction, portrait. The girl's flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her."
|
||||
video = pipe(prompt, seed=0)
|
||||
save_video(video, "video.mp4", fps=30, quality=5)
|
||||
save_video(video, "video_girl.mp4", fps=30, quality=6)
|
||||
|
||||
47
examples/HunyuanVideo/hunyuanvideo_6G.py
Normal file
47
examples/HunyuanVideo/hunyuanvideo_6G.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import torch
|
||||
torch.cuda.set_per_process_memory_fraction(1.0, 0)
|
||||
from diffsynth import ModelManager, HunyuanVideoPipeline, download_models, save_video, FlowMatchScheduler
|
||||
|
||||
|
||||
download_models(["HunyuanVideo"])
|
||||
model_manager = ModelManager()
|
||||
|
||||
# The DiT model is loaded in bfloat16.
|
||||
model_manager.load_models(
|
||||
[
|
||||
"models/HunyuanVideo/transformers/mp_rank_00_model_states.pt"
|
||||
],
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cpu"
|
||||
)
|
||||
|
||||
# The other modules are loaded in float16.
|
||||
model_manager.load_models(
|
||||
[
|
||||
"models/HunyuanVideo/text_encoder/model.safetensors",
|
||||
"models/HunyuanVideo/text_encoder_2",
|
||||
"models/HunyuanVideo/vae/pytorch_model.pt",
|
||||
],
|
||||
torch_dtype=torch.float16,
|
||||
device="cpu"
|
||||
)
|
||||
|
||||
# We support LoRA inference. You can use the following code to load your LoRA model.
|
||||
# Example LoRA: https://civitai.com/models/1032126/walking-animation-hunyuan-video
|
||||
model_manager.load_lora("models/lora/kxsr_walking_anim_v1-5.safetensors", lora_alpha=1.0)
|
||||
|
||||
# The computation device is "cuda".
|
||||
pipe = HunyuanVideoPipeline.from_model_manager(
|
||||
model_manager,
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda"
|
||||
)
|
||||
# This LoRA requires shift=9.0.
|
||||
pipe.scheduler = FlowMatchScheduler(shift=9.0, sigma_min=0.0, extra_one_step=True)
|
||||
|
||||
# Enjoy!
|
||||
for clothes_up in ["white t-shirt", "black t-shirt", "orange t-shirt"]:
|
||||
for clothes_down in ["blue sports skirt", "red sports skirt", "white sports skirt"]:
|
||||
prompt = f"kxsr, full body, no crop, A 3D-rendered CG animation video featuring a Gorgeous, mature, curvaceous, fair-skinned female girl with long silver hair and blue eyes. She wears a {clothes_up} and a {clothes_down}, walking offering a sense of fluid movement and vivid animation."
|
||||
video = pipe(prompt, seed=0, height=512, width=384, num_frames=129, num_inference_steps=18, tile_size=(17, 16, 16), tile_stride=(12, 12, 12))
|
||||
save_video(video, f"video-{clothes_up}-{clothes_down}.mp4", fps=30, quality=6)
|
||||
@@ -12,7 +12,7 @@ model_manager.load_models(
|
||||
"models/HunyuanVideo/transformers/mp_rank_00_model_states.pt"
|
||||
],
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cpu"
|
||||
device="cuda"
|
||||
)
|
||||
|
||||
# The other modules are loaded in float16.
|
||||
@@ -23,7 +23,7 @@ model_manager.load_models(
|
||||
"models/HunyuanVideo/vae/pytorch_model.pt",
|
||||
],
|
||||
torch_dtype=torch.float16,
|
||||
device="cpu"
|
||||
device="cuda"
|
||||
)
|
||||
|
||||
# We support LoRA inference. You can use the following code to load your LoRA model.
|
||||
@@ -33,10 +33,13 @@ model_manager.load_models(
|
||||
pipe = HunyuanVideoPipeline.from_model_manager(
|
||||
model_manager,
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda"
|
||||
device="cuda",
|
||||
enable_vram_management=False
|
||||
)
|
||||
# Although you have enough VRAM, we still recommend you to enable offload.
|
||||
pipe.enable_cpu_offload()
|
||||
|
||||
# Enjoy!
|
||||
prompt = "CG, masterpiece, best quality, solo, long hair, wavy hair, silver hair, blue eyes, blue dress, medium breasts, dress, underwater, air bubble, floating hair, refraction, portrait. The girl's flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her."
|
||||
video = pipe(prompt, seed=0, height=720, width=960)
|
||||
save_video(video, "video.mp4", fps=30, quality=5)
|
||||
video = pipe(prompt, seed=0)
|
||||
save_video(video, "video.mp4", fps=30, quality=6)
|
||||
@@ -1,42 +0,0 @@
|
||||
import torch
|
||||
torch.cuda.set_per_process_memory_fraction(1.0, 0)
|
||||
from diffsynth import ModelManager, HunyuanVideoPipeline, download_models, save_video
|
||||
|
||||
|
||||
download_models(["HunyuanVideo"])
|
||||
model_manager = ModelManager()
|
||||
|
||||
# The DiT model is loaded in bfloat16.
|
||||
model_manager.load_models(
|
||||
[
|
||||
"models/HunyuanVideo/transformers/mp_rank_00_model_states.pt"
|
||||
],
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cpu"
|
||||
)
|
||||
|
||||
# The other modules are loaded in float16.
|
||||
model_manager.load_models(
|
||||
[
|
||||
"models/HunyuanVideo/text_encoder/model.safetensors",
|
||||
"models/HunyuanVideo/text_encoder_2",
|
||||
"models/HunyuanVideo/vae/pytorch_model.pt",
|
||||
],
|
||||
torch_dtype=torch.float16,
|
||||
device="cpu"
|
||||
)
|
||||
|
||||
# We support LoRA inference. You can use the following code to load your LoRA model.
|
||||
model_manager.load_lora("models/lora/Rem_hunyuan_video_v3.safetensors", lora_alpha=1.0)
|
||||
|
||||
# The computation device is "cuda".
|
||||
pipe = HunyuanVideoPipeline.from_model_manager(
|
||||
model_manager,
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda"
|
||||
)
|
||||
|
||||
# Enjoy!
|
||||
prompt = "a woman with blue hair wearing a white and black dress, sitting on a bed with a white wall in the background. she is wearing a re:zero starting life in another world rem cosplay costume, complete with a black and white dress, black gloves, and a black bow tie."
|
||||
video = pipe(prompt, seed=0, height=512, width=512, tile_size=(17, 16, 16), tile_stride=(12, 12, 12))
|
||||
save_video(video, "video.mp4", fps=30, quality=5)
|
||||
Reference in New Issue
Block a user