mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-18 22:08:13 +00:00
Fix num_frames in i2v (#339)
* Fix num_frames in i2v * Remove print in flash_attention
This commit is contained in:
@@ -150,16 +150,16 @@ class WanVideoPipeline(BasePipeline):
|
||||
return {"context": prompt_emb}
|
||||
|
||||
|
||||
def encode_image(self, image, height, width):
|
||||
def encode_image(self, image, num_frames, height, width):
|
||||
with torch.amp.autocast(dtype=torch.bfloat16, device_type=torch.device(self.device).type):
|
||||
image = self.preprocess_image(image.resize((width, height))).to(self.device)
|
||||
clip_context = self.image_encoder.encode_image([image])
|
||||
msk = torch.ones(1, 81, height//8, width//8, device=self.device)
|
||||
msk = torch.ones(1, num_frames, height//8, width//8, device=self.device)
|
||||
msk[:, 1:] = 0
|
||||
msk = torch.concat([torch.repeat_interleave(msk[:, 0:1], repeats=4, dim=1), msk[:, 1:]], dim=1)
|
||||
msk = msk.view(1, msk.shape[1] // 4, 4, height//8, width//8)
|
||||
msk = msk.transpose(1, 2)[0]
|
||||
y = self.vae.encode([torch.concat([image.transpose(0, 1), torch.zeros(3, 80, height, width).to(image.device)], dim=1)], device=self.device)[0]
|
||||
y = self.vae.encode([torch.concat([image.transpose(0, 1), torch.zeros(3, num_frames-1, height, width).to(image.device)], dim=1)], device=self.device)[0]
|
||||
y = torch.concat([msk, y])
|
||||
return {"clip_fea": clip_context, "y": [y]}
|
||||
|
||||
@@ -234,7 +234,7 @@ class WanVideoPipeline(BasePipeline):
|
||||
# Encode image
|
||||
if input_image is not None and self.image_encoder is not None:
|
||||
self.load_models_to_device(["image_encoder", "vae"])
|
||||
image_emb = self.encode_image(input_image, height, width)
|
||||
image_emb = self.encode_image(input_image, num_frames, height, width)
|
||||
else:
|
||||
image_emb = {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user