mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-24 01:48:13 +00:00
support wan-fun-inp generating
This commit is contained in:
@@ -163,16 +163,22 @@ class WanVideoPipeline(BasePipeline):
|
|||||||
return {"context": prompt_emb}
|
return {"context": prompt_emb}
|
||||||
|
|
||||||
|
|
||||||
def encode_image(self, image, num_frames, height, width):
|
def encode_image(self, image, end_image, num_frames, height, width):
|
||||||
image = self.preprocess_image(image.resize((width, height))).to(self.device)
|
image = self.preprocess_image(image.resize((width, height))).to(self.device)
|
||||||
clip_context = self.image_encoder.encode_image([image])
|
clip_context = self.image_encoder.encode_image([image])
|
||||||
msk = torch.ones(1, num_frames, height//8, width//8, device=self.device)
|
msk = torch.ones(1, num_frames, height//8, width//8, device=self.device)
|
||||||
msk[:, 1:] = 0
|
msk[:, 1:] = 0
|
||||||
|
if end_image is not None:
|
||||||
|
end_image = self.preprocess_image(end_image.resize((width, height))).to(self.device)
|
||||||
|
vae_input = torch.concat([image.transpose(0,1), torch.zeros(3, num_frames-2, height, width).to(image.device), end_image.transpose(0,1)],dim=1)
|
||||||
|
msk[:, -1:] = 1
|
||||||
|
else:
|
||||||
|
vae_input = torch.concat([image.transpose(0, 1), torch.zeros(3, num_frames-1, height, width).to(image.device)], dim=1)
|
||||||
|
|
||||||
msk = torch.concat([torch.repeat_interleave(msk[:, 0:1], repeats=4, dim=1), msk[:, 1:]], dim=1)
|
msk = torch.concat([torch.repeat_interleave(msk[:, 0:1], repeats=4, dim=1), msk[:, 1:]], dim=1)
|
||||||
msk = msk.view(1, msk.shape[1] // 4, 4, height//8, width//8)
|
msk = msk.view(1, msk.shape[1] // 4, 4, height//8, width//8)
|
||||||
msk = msk.transpose(1, 2)[0]
|
msk = msk.transpose(1, 2)[0]
|
||||||
|
|
||||||
vae_input = torch.concat([image.transpose(0, 1), torch.zeros(3, num_frames-1, height, width).to(image.device)], dim=1)
|
|
||||||
y = self.vae.encode([vae_input.to(dtype=self.torch_dtype, device=self.device)], device=self.device)[0]
|
y = self.vae.encode([vae_input.to(dtype=self.torch_dtype, device=self.device)], device=self.device)[0]
|
||||||
y = torch.concat([msk, y])
|
y = torch.concat([msk, y])
|
||||||
y = y.unsqueeze(0)
|
y = y.unsqueeze(0)
|
||||||
@@ -212,6 +218,7 @@ class WanVideoPipeline(BasePipeline):
|
|||||||
prompt,
|
prompt,
|
||||||
negative_prompt="",
|
negative_prompt="",
|
||||||
input_image=None,
|
input_image=None,
|
||||||
|
end_image=None,
|
||||||
input_video=None,
|
input_video=None,
|
||||||
denoising_strength=1.0,
|
denoising_strength=1.0,
|
||||||
seed=None,
|
seed=None,
|
||||||
@@ -263,7 +270,7 @@ class WanVideoPipeline(BasePipeline):
|
|||||||
# Encode image
|
# Encode image
|
||||||
if input_image is not None and self.image_encoder is not None:
|
if input_image is not None and self.image_encoder is not None:
|
||||||
self.load_models_to_device(["image_encoder", "vae"])
|
self.load_models_to_device(["image_encoder", "vae"])
|
||||||
image_emb = self.encode_image(input_image, num_frames, height, width)
|
image_emb = self.encode_image(input_image, end_image, num_frames, height, width)
|
||||||
else:
|
else:
|
||||||
image_emb = {}
|
image_emb = {}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user