mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-18 22:08:13 +00:00
44 lines
1.6 KiB
Python
44 lines
1.6 KiB
Python
import torch
|
|
from diffsynth import ModelManager, HunyuanVideoPipeline, download_models, save_video
|
|
from modelscope import dataset_snapshot_download
|
|
from PIL import Image
|
|
|
|
|
|
download_models(["HunyuanVideoI2V"])
|
|
model_manager = ModelManager()
|
|
|
|
# The DiT model is loaded in bfloat16.
|
|
model_manager.load_models(
|
|
[
|
|
"models/HunyuanVideoI2V/transformers/mp_rank_00_model_states.pt"
|
|
],
|
|
torch_dtype=torch.bfloat16,
|
|
device="cpu"
|
|
)
|
|
|
|
# The other modules are loaded in float16.
|
|
model_manager.load_models(
|
|
[
|
|
"models/HunyuanVideoI2V/text_encoder/model.safetensors",
|
|
"models/HunyuanVideoI2V/text_encoder_2",
|
|
'models/HunyuanVideoI2V/vae/pytorch_model.pt'
|
|
],
|
|
torch_dtype=torch.float16,
|
|
device="cpu"
|
|
)
|
|
# The computation device is "cuda".
|
|
pipe = HunyuanVideoPipeline.from_model_manager(model_manager,
|
|
torch_dtype=torch.bfloat16,
|
|
device="cuda",
|
|
enable_vram_management=True)
|
|
|
|
dataset_snapshot_download(dataset_id="DiffSynth-Studio/examples_in_diffsynth",
|
|
local_dir="./",
|
|
allow_file_pattern=f"data/examples/hunyuanvideo/*")
|
|
|
|
i2v_resolution = "720p"
|
|
prompt = "An Asian man with short hair in black tactical uniform and white clothes waves a firework stick."
|
|
images = [Image.open("data/examples/hunyuanvideo/0.jpg").convert('RGB')]
|
|
video = pipe(prompt, input_images=images, num_inference_steps=50, seed=0, i2v_resolution=i2v_resolution)
|
|
save_video(video, f"video_{i2v_resolution}_low_vram.mp4", fps=30, quality=6)
|