mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-04-14 13:48:20 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9654ec3c65 |
@@ -83,7 +83,7 @@ class Flux2ImagePipeline(BasePipeline):
|
||||
input_image: Image.Image = None,
|
||||
denoising_strength: float = 1.0,
|
||||
# Edit
|
||||
edit_image: Union[Image.Image, List[Image.Image]] = None,
|
||||
edit_image: List[Image.Image] = None,
|
||||
edit_image_auto_resize: bool = True,
|
||||
# Shape
|
||||
height: int = 1024,
|
||||
|
||||
@@ -200,9 +200,9 @@ class FluxImagePipeline(BasePipeline):
|
||||
# Steps
|
||||
num_inference_steps: int = 30,
|
||||
# local prompts
|
||||
multidiffusion_prompts=(),
|
||||
multidiffusion_masks=(),
|
||||
multidiffusion_scales=(),
|
||||
multidiffusion_prompts:tuple[str] =(),
|
||||
multidiffusion_masks:tuple[str]=(),
|
||||
multidiffusion_scales:tuple[str]=(),
|
||||
# Kontext
|
||||
kontext_images: Union[list[Image.Image], Image.Image] = None,
|
||||
# ControlNet
|
||||
|
||||
@@ -170,45 +170,45 @@ class LTX2AudioVideoPipeline(BasePipeline):
|
||||
self,
|
||||
# Prompt
|
||||
prompt: str,
|
||||
negative_prompt: Optional[str] = "",
|
||||
negative_prompt: str = "",
|
||||
denoising_strength: float = 1.0,
|
||||
# Image-to-video
|
||||
input_images: Optional[list[Image.Image]] = None,
|
||||
input_images_indexes: Optional[list[int]] = [0],
|
||||
input_images_strength: Optional[float] = 1.0,
|
||||
input_images: list[Image.Image] = None,
|
||||
input_images_indexes: list[int] = [0],
|
||||
input_images_strength: float = 1.0,
|
||||
# In-Context Video Control
|
||||
in_context_videos: Optional[list[list[Image.Image]]] = None,
|
||||
in_context_downsample_factor: Optional[int] = 2,
|
||||
in_context_videos: list[list[Image.Image]] = None,
|
||||
in_context_downsample_factor: int = 2,
|
||||
# Video-to-video
|
||||
retake_video: Optional[list[Image.Image]] = None,
|
||||
retake_video_regions: Optional[list[tuple[float, float]]] = None,
|
||||
retake_video: list[Image.Image] = None,
|
||||
retake_video_regions: list[tuple[float, float]] = None,
|
||||
# Audio-to-video
|
||||
retake_audio: Optional[torch.Tensor] = None,
|
||||
audio_sample_rate: Optional[int] = 48000,
|
||||
retake_audio_regions: Optional[list[tuple[float, float]]] = None,
|
||||
retake_audio: torch.Tensor = None,
|
||||
audio_sample_rate: int = 48000,
|
||||
retake_audio_regions: list[tuple[float, float]] = None,
|
||||
# Randomness
|
||||
seed: Optional[int] = None,
|
||||
rand_device: Optional[str] = "cpu",
|
||||
seed: int = None,
|
||||
rand_device: str = "cpu",
|
||||
# Shape
|
||||
height: Optional[int] = 512,
|
||||
width: Optional[int] = 768,
|
||||
num_frames: Optional[int] = 121,
|
||||
frame_rate: Optional[int] = 24,
|
||||
height: int = 512,
|
||||
width: int = 768,
|
||||
num_frames: int = 121,
|
||||
frame_rate: int = 24,
|
||||
# Classifier-free guidance
|
||||
cfg_scale: Optional[float] = 3.0,
|
||||
cfg_scale: float = 3.0,
|
||||
# Scheduler
|
||||
num_inference_steps: Optional[int] = 30,
|
||||
num_inference_steps: int = 30,
|
||||
# VAE tiling
|
||||
tiled: Optional[bool] = True,
|
||||
tile_size_in_pixels: Optional[int] = 512,
|
||||
tile_overlap_in_pixels: Optional[int] = 128,
|
||||
tile_size_in_frames: Optional[int] = 128,
|
||||
tile_overlap_in_frames: Optional[int] = 24,
|
||||
tiled: bool = True,
|
||||
tile_size_in_pixels: int = 512,
|
||||
tile_overlap_in_pixels: int = 128,
|
||||
tile_size_in_frames: int = 128,
|
||||
tile_overlap_in_frames: int = 24,
|
||||
# Special Pipelines
|
||||
use_two_stage_pipeline: Optional[bool] = False,
|
||||
stage2_spatial_upsample_factor: Optional[int] = 2,
|
||||
clear_lora_before_state_two: Optional[bool] = False,
|
||||
use_distilled_pipeline: Optional[bool] = False,
|
||||
use_two_stage_pipeline: bool = False,
|
||||
stage2_spatial_upsample_factor: int = 2,
|
||||
clear_lora_before_state_two: bool = False,
|
||||
use_distilled_pipeline: bool = False,
|
||||
# progress_bar
|
||||
progress_bar_cmd=tqdm,
|
||||
):
|
||||
|
||||
@@ -116,32 +116,32 @@ class MovaAudioVideoPipeline(BasePipeline):
|
||||
self,
|
||||
# Prompt
|
||||
prompt: str,
|
||||
negative_prompt: Optional[str] = "",
|
||||
negative_prompt: str = "",
|
||||
# Image-to-video
|
||||
input_image: Optional[Image.Image] = None,
|
||||
input_image: Image.Image = None,
|
||||
# First-last-frame-to-video
|
||||
end_image: Optional[Image.Image] = None,
|
||||
end_image: Image.Image = None,
|
||||
# Video-to-video
|
||||
denoising_strength: Optional[float] = 1.0,
|
||||
denoising_strength: float = 1.0,
|
||||
# Randomness
|
||||
seed: Optional[int] = None,
|
||||
rand_device: Optional[str] = "cpu",
|
||||
seed: int = None,
|
||||
rand_device: str = "cpu",
|
||||
# Shape
|
||||
height: Optional[int] = 352,
|
||||
width: Optional[int] = 640,
|
||||
num_frames: Optional[int] = 81,
|
||||
frame_rate: Optional[int] = 24,
|
||||
height: int = 352,
|
||||
width: int = 640,
|
||||
num_frames: int = 81,
|
||||
frame_rate: int = 24,
|
||||
# Classifier-free guidance
|
||||
cfg_scale: Optional[float] = 5.0,
|
||||
cfg_scale: float = 5.0,
|
||||
# Boundary
|
||||
switch_DiT_boundary: Optional[float] = 0.9,
|
||||
switch_DiT_boundary: float = 0.9,
|
||||
# Scheduler
|
||||
num_inference_steps: Optional[int] = 50,
|
||||
sigma_shift: Optional[float] = 5.0,
|
||||
num_inference_steps: int = 50,
|
||||
sigma_shift: float = 5.0,
|
||||
# VAE tiling
|
||||
tiled: Optional[bool] = True,
|
||||
tile_size: Optional[tuple[int, int]] = (30, 52),
|
||||
tile_stride: Optional[tuple[int, int]] = (15, 26),
|
||||
tiled: bool = True,
|
||||
tile_size: tuple[int, int] = (30, 52),
|
||||
tile_stride: tuple[int, int] = (15, 26),
|
||||
# progress_bar
|
||||
progress_bar_cmd=tqdm,
|
||||
):
|
||||
|
||||
@@ -191,81 +191,81 @@ class WanVideoPipeline(BasePipeline):
|
||||
self,
|
||||
# Prompt
|
||||
prompt: str,
|
||||
negative_prompt: Optional[str] = "",
|
||||
negative_prompt: str = "",
|
||||
# Image-to-video
|
||||
input_image: Optional[Image.Image] = None,
|
||||
input_image: Image.Image = None,
|
||||
# First-last-frame-to-video
|
||||
end_image: Optional[Image.Image] = None,
|
||||
end_image: Image.Image = None,
|
||||
# Video-to-video
|
||||
input_video: Optional[list[Image.Image]] = None,
|
||||
denoising_strength: Optional[float] = 1.0,
|
||||
input_video: list[Image.Image] = None,
|
||||
denoising_strength: float = 1.0,
|
||||
# Speech-to-video
|
||||
input_audio: Optional[np.array] = None,
|
||||
audio_embeds: Optional[torch.Tensor] = None,
|
||||
audio_sample_rate: Optional[int] = 16000,
|
||||
s2v_pose_video: Optional[list[Image.Image]] = None,
|
||||
s2v_pose_latents: Optional[torch.Tensor] = None,
|
||||
motion_video: Optional[list[Image.Image]] = None,
|
||||
input_audio: np.array = None,
|
||||
audio_embeds: torch.Tensor = None,
|
||||
audio_sample_rate: int = 16000,
|
||||
s2v_pose_video: list[Image.Image] = None,
|
||||
s2v_pose_latents: torch.Tensor = None,
|
||||
motion_video: list[Image.Image] = None,
|
||||
# ControlNet
|
||||
control_video: Optional[list[Image.Image]] = None,
|
||||
reference_image: Optional[Image.Image] = None,
|
||||
control_video: list[Image.Image] = None,
|
||||
reference_image: Image.Image = None,
|
||||
# Camera control
|
||||
camera_control_direction: Optional[Literal["Left", "Right", "Up", "Down", "LeftUp", "LeftDown", "RightUp", "RightDown"]] = None,
|
||||
camera_control_speed: Optional[float] = 1/54,
|
||||
camera_control_origin: Optional[tuple] = (0, 0.532139961, 0.946026558, 0.5, 0.5, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0),
|
||||
camera_control_direction: Literal["Left", "Right", "Up", "Down", "LeftUp", "LeftDown", "RightUp", "RightDown"] = None,
|
||||
camera_control_speed: float = 1/54,
|
||||
camera_control_origin: tuple = (0, 0.532139961, 0.946026558, 0.5, 0.5, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0),
|
||||
# VACE
|
||||
vace_video: Optional[list[Image.Image]] = None,
|
||||
vace_video_mask: Optional[Image.Image] = None,
|
||||
vace_reference_image: Optional[Image.Image] = None,
|
||||
vace_scale: Optional[float] = 1.0,
|
||||
vace_video: list[Image.Image] = None,
|
||||
vace_video_mask: Image.Image = None,
|
||||
vace_reference_image: Image.Image = None,
|
||||
vace_scale: float = 1.0,
|
||||
# Animate
|
||||
animate_pose_video: Optional[list[Image.Image]] = None,
|
||||
animate_face_video: Optional[list[Image.Image]] = None,
|
||||
animate_inpaint_video: Optional[list[Image.Image]] = None,
|
||||
animate_mask_video: Optional[list[Image.Image]] = None,
|
||||
animate_pose_video: list[Image.Image] = None,
|
||||
animate_face_video: list[Image.Image] = None,
|
||||
animate_inpaint_video: list[Image.Image] = None,
|
||||
animate_mask_video: list[Image.Image] = None,
|
||||
# VAP
|
||||
vap_video: Optional[list[Image.Image]] = None,
|
||||
vap_prompt: Optional[str] = " ",
|
||||
negative_vap_prompt: Optional[str] = " ",
|
||||
vap_video: list[Image.Image] = None,
|
||||
vap_prompt: str = " ",
|
||||
negative_vap_prompt: str = " ",
|
||||
# Randomness
|
||||
seed: Optional[int] = None,
|
||||
rand_device: Optional[str] = "cpu",
|
||||
seed: int = None,
|
||||
rand_device: str = "cpu",
|
||||
# Shape
|
||||
height: Optional[int] = 480,
|
||||
width: Optional[int] = 832,
|
||||
num_frames=81,
|
||||
height: int = 480,
|
||||
width: int = 832,
|
||||
num_frames: int = 81,
|
||||
# Classifier-free guidance
|
||||
cfg_scale: Optional[float] = 5.0,
|
||||
cfg_merge: Optional[bool] = False,
|
||||
cfg_scale: float = 5.0,
|
||||
cfg_merge: bool = False,
|
||||
# Boundary
|
||||
switch_DiT_boundary: Optional[float] = 0.875,
|
||||
switch_DiT_boundary: float = 0.875,
|
||||
# Scheduler
|
||||
num_inference_steps: Optional[int] = 50,
|
||||
sigma_shift: Optional[float] = 5.0,
|
||||
num_inference_steps: int = 50,
|
||||
sigma_shift: float = 5.0,
|
||||
# Speed control
|
||||
motion_bucket_id: Optional[int] = None,
|
||||
motion_bucket_id: int = None,
|
||||
# LongCat-Video
|
||||
longcat_video: Optional[list[Image.Image]] = None,
|
||||
longcat_video: list[Image.Image] = None,
|
||||
# VAE tiling
|
||||
tiled: Optional[bool] = True,
|
||||
tile_size: Optional[tuple[int, int]] = (30, 52),
|
||||
tile_stride: Optional[tuple[int, int]] = (15, 26),
|
||||
tiled: bool = True,
|
||||
tile_size: tuple[int, int] = (30, 52),
|
||||
tile_stride: tuple[int, int] = (15, 26),
|
||||
# Sliding window
|
||||
sliding_window_size: Optional[int] = None,
|
||||
sliding_window_stride: Optional[int] = None,
|
||||
sliding_window_size: int = None,
|
||||
sliding_window_stride: int = None,
|
||||
# Teacache
|
||||
tea_cache_l1_thresh: Optional[float] = None,
|
||||
tea_cache_model_id: Optional[str] = "",
|
||||
tea_cache_l1_thresh: float = None,
|
||||
tea_cache_model_id: str = "",
|
||||
# WanToDance
|
||||
wantodance_music_path: Optional[str] = None,
|
||||
wantodance_reference_image: Optional[Image.Image] = None,
|
||||
wantodance_fps: Optional[float] = 30,
|
||||
wantodance_keyframes: Optional[list[Image.Image]] = None,
|
||||
wantodance_keyframes_mask: Optional[list[int]] = None,
|
||||
wantodance_music_path: str = None,
|
||||
wantodance_reference_image: Image.Image = None,
|
||||
wantodance_fps: float = 30,
|
||||
wantodance_keyframes: list[Image.Image] = None,
|
||||
wantodance_keyframes_mask: list[int] = None,
|
||||
framewise_decoding: bool = False,
|
||||
# progress_bar
|
||||
progress_bar_cmd=tqdm,
|
||||
output_type: Optional[Literal["quantized", "floatpoint"]] = "quantized",
|
||||
output_type: Literal["quantized", "floatpoint"] = "quantized",
|
||||
):
|
||||
# Scheduler
|
||||
self.scheduler.set_timesteps(num_inference_steps, denoising_strength=denoising_strength, shift=sigma_shift)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import importlib, inspect, pkgutil, traceback, torch, os, re
|
||||
from typing import Union, List, Optional, Tuple, Iterable, Dict
|
||||
from contextlib import contextmanager
|
||||
|
||||
import streamlit as st
|
||||
from diffsynth import ModelConfig
|
||||
from diffsynth.diffusion.base_pipeline import ControlNetInput
|
||||
@@ -280,4 +281,3 @@ def launch_webui():
|
||||
print(f"unsupported result format: {result}")
|
||||
|
||||
launch_webui()
|
||||
# streamlit run examples/dev_tools/webui.py --server.fileWatcherType none
|
||||
|
||||
Reference in New Issue
Block a user