From d381c7b18608bc8ab951c1675fd012bb8b132b21 Mon Sep 17 00:00:00 2001 From: Qianyi Zhao <49068354+Qing112@users.noreply.github.com> Date: Wed, 23 Oct 2024 03:27:59 -0500 Subject: [PATCH] Update svd_video.py --- diffsynth/pipelines/svd_video.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diffsynth/pipelines/svd_video.py b/diffsynth/pipelines/svd_video.py index 8c93b1f..01b358b 100644 --- a/diffsynth/pipelines/svd_video.py +++ b/diffsynth/pipelines/svd_video.py @@ -49,7 +49,7 @@ class SVDVideoPipeline(BasePipeline): return image_emb - def encode_image_with_vae(self, image, noise_aug_strength): + def encode_image_with_vae(self, image, noise_aug_strength, seed): image = self.preprocess_image(image).to(device=self.device, dtype=self.torch_dtype) noise = self.generate_noise(image.shape, seed=seed, device=self.device, dtype=self.torch_dtype) image = image + noise_aug_strength * noise @@ -148,7 +148,7 @@ class SVDVideoPipeline(BasePipeline): # Encode image image_emb_clip_posi = self.encode_image_with_clip(input_image) image_emb_clip_nega = torch.zeros_like(image_emb_clip_posi) - image_emb_vae_posi = repeat(self.encode_image_with_vae(input_image, noise_aug_strength), "B C H W -> (B T) C H W", T=num_frames) + image_emb_vae_posi = repeat(self.encode_image_with_vae(input_image, noise_aug_strength, seed=seed), "B C H W -> (B T) C H W", T=num_frames) image_emb_vae_nega = torch.zeros_like(image_emb_vae_posi) # Prepare classifier-free guidance