mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-18 22:08:13 +00:00
fix bugs
This commit is contained in:
@@ -257,10 +257,10 @@ class SDVideoPipeline(SDImagePipeline):
|
||||
progress_bar_st.progress(progress_id / len(self.scheduler.timesteps))
|
||||
|
||||
# Decode image
|
||||
image = self.decode_video(latents, **tiler_kwargs)
|
||||
output_frames = self.decode_video(latents, **tiler_kwargs)
|
||||
|
||||
# Post-process
|
||||
if smoother is not None and (num_inference_steps in smoother_progress_ids or -1 in smoother_progress_ids):
|
||||
output_frames = smoother(output_frames, original_frames=input_frames)
|
||||
|
||||
return image
|
||||
return output_frames
|
||||
|
||||
@@ -214,10 +214,10 @@ class SDXLVideoPipeline(SDXLImagePipeline):
|
||||
progress_bar_st.progress(progress_id / len(self.scheduler.timesteps))
|
||||
|
||||
# Decode image
|
||||
image = self.decode_video(latents, **tiler_kwargs)
|
||||
output_frames = self.decode_video(latents, **tiler_kwargs)
|
||||
|
||||
# Post-process
|
||||
if smoother is not None and (num_inference_steps in smoother_progress_ids or -1 in smoother_progress_ids):
|
||||
output_frames = smoother(output_frames, original_frames=input_frames)
|
||||
|
||||
return image
|
||||
return output_frames
|
||||
|
||||
@@ -4,7 +4,7 @@ import numpy as np
|
||||
from einops import rearrange, repeat
|
||||
import lightning as pl
|
||||
from diffsynth import ModelManager, SVDImageEncoder, SVDUNet, SVDVAEEncoder, ContinuousODEScheduler, load_state_dict
|
||||
from diffsynth.pipelines.stable_video_diffusion import SVDCLIPImageProcessor
|
||||
from diffsynth.pipelines.svd_video import SVDCLIPImageProcessor
|
||||
from diffsynth.models.svd_unet import TemporalAttentionBlock
|
||||
|
||||
|
||||
@@ -131,14 +131,14 @@ class LightningModel(pl.LightningModule):
|
||||
self.image_encoder.requires_grad_(False)
|
||||
|
||||
self.unet = SVDUNet(add_positional_conv=add_positional_conv).to(dtype=torch.float16, device=self.device)
|
||||
self.unet.load_state_dict(SVDUNet.state_dict_converter().from_civitai(state_dict), strict=False)
|
||||
self.unet.load_state_dict(SVDUNet.state_dict_converter().from_civitai(state_dict, add_positional_conv=add_positional_conv), strict=False)
|
||||
self.unet.train()
|
||||
self.unet.requires_grad_(False)
|
||||
for block in self.unet.blocks:
|
||||
if isinstance(block, TemporalAttentionBlock):
|
||||
block.requires_grad_(True)
|
||||
|
||||
self.vae_encoder = SVDVAEEncoder.to(dtype=torch.float16, device=self.device)
|
||||
self.vae_encoder = SVDVAEEncoder().to(dtype=torch.float16, device=self.device)
|
||||
self.vae_encoder.load_state_dict(SVDVAEEncoder.state_dict_converter().from_civitai(state_dict))
|
||||
self.vae_encoder.eval()
|
||||
self.vae_encoder.requires_grad_(False)
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
from diffsynth import ModelManager, SD3ImagePipeline, download_models, load_state_dict
|
||||
import torch
|
||||
|
||||
|
||||
# Download models (automatically)
|
||||
# `models/stable_diffusion_3/sd3_medium_incl_clips.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/sd3_medium_incl_clips.safetensors)
|
||||
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
||||
download_models(["StableDiffusion3_without_T5", "TextualInversion_VeryBadImageNegative_v1.3"])
|
||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||
model_manager.load_textual_inversions("models/textual_inversion")
|
||||
model_manager.load_models(["models/stable_diffusion_3/sd3_medium_incl_clips.safetensors"])
|
||||
pipe = SD3ImagePipeline.from_model_manager(model_manager)
|
||||
|
||||
|
||||
for seed in range(4):
|
||||
torch.manual_seed(seed)
|
||||
image = pipe(
|
||||
prompt="a girl, highly detailed, absurd res, perfect image",
|
||||
negative_prompt="verybadimagenegative_v1.3",
|
||||
cfg_scale=4.5,
|
||||
num_inference_steps=50, width=1024, height=1024,
|
||||
)
|
||||
image.save(f"image_with_textual_inversion_{seed}.jpg")
|
||||
|
||||
torch.manual_seed(seed)
|
||||
image = pipe(
|
||||
prompt="a girl, highly detailed, absurd res, perfect image",
|
||||
negative_prompt="",
|
||||
cfg_scale=4.5,
|
||||
num_inference_steps=50, width=1024, height=1024,
|
||||
)
|
||||
image.save(f"image_without_textual_inversion_{seed}.jpg")
|
||||
Reference in New Issue
Block a user