Files
DiffSynth-Studio/examples/WanX/test_vae.py
2025-02-21 11:27:30 +08:00

48 lines
1.3 KiB
Python

import torch
import torchvision
import imageio
from diffsynth import ModelManager
def save_video(tensor,
save_file=None,
fps=30,
nrow=8,
normalize=True,
value_range=(-1, 1)):
tensor = tensor.clamp(min(value_range), max(value_range))
tensor = torch.stack([
torchvision.utils.make_grid(
u, nrow=nrow, normalize=normalize, value_range=value_range)
for u in tensor.unbind(2)
],
dim=1).permute(1, 2, 3, 0) #frame, h, w, 3
tensor = (tensor * 255).type(torch.uint8).cpu()
# write video
writer = imageio.get_writer(
save_file, fps=fps, codec='libx264', quality=8)
for frame in tensor.numpy():
writer.append_data(frame)
writer.close()
torch.cuda.memory._record_memory_history()
model_manager = ModelManager(torch_dtype=torch.float, device="cuda")
model_manager.load_models([
"models/WanX/vae.pth",
])
vae = model_manager.fetch_model('wanxvideo_vae')
latents = [torch.load('sample.pt')]
videos = vae.decode(latents, device=latents[0].device, tiled=True)
# back_encode = vae.encode(videos)
torch.cuda.memory._dump_snapshot("my_snapshot.pickle")
save_video(videos[0][None], save_file='example3.mp4', fps=16, nrow=8)
print(latents)
print(videos)
# print(back_encode)