Compare commits

...

1 Commits

Author SHA1 Message Date
霜洁
9654ec3c65 fix pipeline args 2026-04-14 10:11:09 +08:00
6 changed files with 102 additions and 102 deletions

View File

@@ -83,7 +83,7 @@ class Flux2ImagePipeline(BasePipeline):
input_image: Image.Image = None, input_image: Image.Image = None,
denoising_strength: float = 1.0, denoising_strength: float = 1.0,
# Edit # Edit
edit_image: Union[Image.Image, List[Image.Image]] = None, edit_image: List[Image.Image] = None,
edit_image_auto_resize: bool = True, edit_image_auto_resize: bool = True,
# Shape # Shape
height: int = 1024, height: int = 1024,

View File

@@ -200,9 +200,9 @@ class FluxImagePipeline(BasePipeline):
# Steps # Steps
num_inference_steps: int = 30, num_inference_steps: int = 30,
# local prompts # local prompts
multidiffusion_prompts=(), multidiffusion_prompts:tuple[str] =(),
multidiffusion_masks=(), multidiffusion_masks:tuple[str]=(),
multidiffusion_scales=(), multidiffusion_scales:tuple[str]=(),
# Kontext # Kontext
kontext_images: Union[list[Image.Image], Image.Image] = None, kontext_images: Union[list[Image.Image], Image.Image] = None,
# ControlNet # ControlNet

View File

@@ -170,45 +170,45 @@ class LTX2AudioVideoPipeline(BasePipeline):
self, self,
# Prompt # Prompt
prompt: str, prompt: str,
negative_prompt: Optional[str] = "", negative_prompt: str = "",
denoising_strength: float = 1.0, denoising_strength: float = 1.0,
# Image-to-video # Image-to-video
input_images: Optional[list[Image.Image]] = None, input_images: list[Image.Image] = None,
input_images_indexes: Optional[list[int]] = [0], input_images_indexes: list[int] = [0],
input_images_strength: Optional[float] = 1.0, input_images_strength: float = 1.0,
# In-Context Video Control # In-Context Video Control
in_context_videos: Optional[list[list[Image.Image]]] = None, in_context_videos: list[list[Image.Image]] = None,
in_context_downsample_factor: Optional[int] = 2, in_context_downsample_factor: int = 2,
# Video-to-video # Video-to-video
retake_video: Optional[list[Image.Image]] = None, retake_video: list[Image.Image] = None,
retake_video_regions: Optional[list[tuple[float, float]]] = None, retake_video_regions: list[tuple[float, float]] = None,
# Audio-to-video # Audio-to-video
retake_audio: Optional[torch.Tensor] = None, retake_audio: torch.Tensor = None,
audio_sample_rate: Optional[int] = 48000, audio_sample_rate: int = 48000,
retake_audio_regions: Optional[list[tuple[float, float]]] = None, retake_audio_regions: list[tuple[float, float]] = None,
# Randomness # Randomness
seed: Optional[int] = None, seed: int = None,
rand_device: Optional[str] = "cpu", rand_device: str = "cpu",
# Shape # Shape
height: Optional[int] = 512, height: int = 512,
width: Optional[int] = 768, width: int = 768,
num_frames: Optional[int] = 121, num_frames: int = 121,
frame_rate: Optional[int] = 24, frame_rate: int = 24,
# Classifier-free guidance # Classifier-free guidance
cfg_scale: Optional[float] = 3.0, cfg_scale: float = 3.0,
# Scheduler # Scheduler
num_inference_steps: Optional[int] = 30, num_inference_steps: int = 30,
# VAE tiling # VAE tiling
tiled: Optional[bool] = True, tiled: bool = True,
tile_size_in_pixels: Optional[int] = 512, tile_size_in_pixels: int = 512,
tile_overlap_in_pixels: Optional[int] = 128, tile_overlap_in_pixels: int = 128,
tile_size_in_frames: Optional[int] = 128, tile_size_in_frames: int = 128,
tile_overlap_in_frames: Optional[int] = 24, tile_overlap_in_frames: int = 24,
# Special Pipelines # Special Pipelines
use_two_stage_pipeline: Optional[bool] = False, use_two_stage_pipeline: bool = False,
stage2_spatial_upsample_factor: Optional[int] = 2, stage2_spatial_upsample_factor: int = 2,
clear_lora_before_state_two: Optional[bool] = False, clear_lora_before_state_two: bool = False,
use_distilled_pipeline: Optional[bool] = False, use_distilled_pipeline: bool = False,
# progress_bar # progress_bar
progress_bar_cmd=tqdm, progress_bar_cmd=tqdm,
): ):

View File

@@ -116,32 +116,32 @@ class MovaAudioVideoPipeline(BasePipeline):
self, self,
# Prompt # Prompt
prompt: str, prompt: str,
negative_prompt: Optional[str] = "", negative_prompt: str = "",
# Image-to-video # Image-to-video
input_image: Optional[Image.Image] = None, input_image: Image.Image = None,
# First-last-frame-to-video # First-last-frame-to-video
end_image: Optional[Image.Image] = None, end_image: Image.Image = None,
# Video-to-video # Video-to-video
denoising_strength: Optional[float] = 1.0, denoising_strength: float = 1.0,
# Randomness # Randomness
seed: Optional[int] = None, seed: int = None,
rand_device: Optional[str] = "cpu", rand_device: str = "cpu",
# Shape # Shape
height: Optional[int] = 352, height: int = 352,
width: Optional[int] = 640, width: int = 640,
num_frames: Optional[int] = 81, num_frames: int = 81,
frame_rate: Optional[int] = 24, frame_rate: int = 24,
# Classifier-free guidance # Classifier-free guidance
cfg_scale: Optional[float] = 5.0, cfg_scale: float = 5.0,
# Boundary # Boundary
switch_DiT_boundary: Optional[float] = 0.9, switch_DiT_boundary: float = 0.9,
# Scheduler # Scheduler
num_inference_steps: Optional[int] = 50, num_inference_steps: int = 50,
sigma_shift: Optional[float] = 5.0, sigma_shift: float = 5.0,
# VAE tiling # VAE tiling
tiled: Optional[bool] = True, tiled: bool = True,
tile_size: Optional[tuple[int, int]] = (30, 52), tile_size: tuple[int, int] = (30, 52),
tile_stride: Optional[tuple[int, int]] = (15, 26), tile_stride: tuple[int, int] = (15, 26),
# progress_bar # progress_bar
progress_bar_cmd=tqdm, progress_bar_cmd=tqdm,
): ):

View File

@@ -191,81 +191,81 @@ class WanVideoPipeline(BasePipeline):
self, self,
# Prompt # Prompt
prompt: str, prompt: str,
negative_prompt: Optional[str] = "", negative_prompt: str = "",
# Image-to-video # Image-to-video
input_image: Optional[Image.Image] = None, input_image: Image.Image = None,
# First-last-frame-to-video # First-last-frame-to-video
end_image: Optional[Image.Image] = None, end_image: Image.Image = None,
# Video-to-video # Video-to-video
input_video: Optional[list[Image.Image]] = None, input_video: list[Image.Image] = None,
denoising_strength: Optional[float] = 1.0, denoising_strength: float = 1.0,
# Speech-to-video # Speech-to-video
input_audio: Optional[np.array] = None, input_audio: np.array = None,
audio_embeds: Optional[torch.Tensor] = None, audio_embeds: torch.Tensor = None,
audio_sample_rate: Optional[int] = 16000, audio_sample_rate: int = 16000,
s2v_pose_video: Optional[list[Image.Image]] = None, s2v_pose_video: list[Image.Image] = None,
s2v_pose_latents: Optional[torch.Tensor] = None, s2v_pose_latents: torch.Tensor = None,
motion_video: Optional[list[Image.Image]] = None, motion_video: list[Image.Image] = None,
# ControlNet # ControlNet
control_video: Optional[list[Image.Image]] = None, control_video: list[Image.Image] = None,
reference_image: Optional[Image.Image] = None, reference_image: Image.Image = None,
# Camera control # Camera control
camera_control_direction: Optional[Literal["Left", "Right", "Up", "Down", "LeftUp", "LeftDown", "RightUp", "RightDown"]] = None, camera_control_direction: Literal["Left", "Right", "Up", "Down", "LeftUp", "LeftDown", "RightUp", "RightDown"] = None,
camera_control_speed: Optional[float] = 1/54, camera_control_speed: float = 1/54,
camera_control_origin: Optional[tuple] = (0, 0.532139961, 0.946026558, 0.5, 0.5, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0), camera_control_origin: tuple = (0, 0.532139961, 0.946026558, 0.5, 0.5, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0),
# VACE # VACE
vace_video: Optional[list[Image.Image]] = None, vace_video: list[Image.Image] = None,
vace_video_mask: Optional[Image.Image] = None, vace_video_mask: Image.Image = None,
vace_reference_image: Optional[Image.Image] = None, vace_reference_image: Image.Image = None,
vace_scale: Optional[float] = 1.0, vace_scale: float = 1.0,
# Animate # Animate
animate_pose_video: Optional[list[Image.Image]] = None, animate_pose_video: list[Image.Image] = None,
animate_face_video: Optional[list[Image.Image]] = None, animate_face_video: list[Image.Image] = None,
animate_inpaint_video: Optional[list[Image.Image]] = None, animate_inpaint_video: list[Image.Image] = None,
animate_mask_video: Optional[list[Image.Image]] = None, animate_mask_video: list[Image.Image] = None,
# VAP # VAP
vap_video: Optional[list[Image.Image]] = None, vap_video: list[Image.Image] = None,
vap_prompt: Optional[str] = " ", vap_prompt: str = " ",
negative_vap_prompt: Optional[str] = " ", negative_vap_prompt: str = " ",
# Randomness # Randomness
seed: Optional[int] = None, seed: int = None,
rand_device: Optional[str] = "cpu", rand_device: str = "cpu",
# Shape # Shape
height: Optional[int] = 480, height: int = 480,
width: Optional[int] = 832, width: int = 832,
num_frames=81, num_frames: int = 81,
# Classifier-free guidance # Classifier-free guidance
cfg_scale: Optional[float] = 5.0, cfg_scale: float = 5.0,
cfg_merge: Optional[bool] = False, cfg_merge: bool = False,
# Boundary # Boundary
switch_DiT_boundary: Optional[float] = 0.875, switch_DiT_boundary: float = 0.875,
# Scheduler # Scheduler
num_inference_steps: Optional[int] = 50, num_inference_steps: int = 50,
sigma_shift: Optional[float] = 5.0, sigma_shift: float = 5.0,
# Speed control # Speed control
motion_bucket_id: Optional[int] = None, motion_bucket_id: int = None,
# LongCat-Video # LongCat-Video
longcat_video: Optional[list[Image.Image]] = None, longcat_video: list[Image.Image] = None,
# VAE tiling # VAE tiling
tiled: Optional[bool] = True, tiled: bool = True,
tile_size: Optional[tuple[int, int]] = (30, 52), tile_size: tuple[int, int] = (30, 52),
tile_stride: Optional[tuple[int, int]] = (15, 26), tile_stride: tuple[int, int] = (15, 26),
# Sliding window # Sliding window
sliding_window_size: Optional[int] = None, sliding_window_size: int = None,
sliding_window_stride: Optional[int] = None, sliding_window_stride: int = None,
# Teacache # Teacache
tea_cache_l1_thresh: Optional[float] = None, tea_cache_l1_thresh: float = None,
tea_cache_model_id: Optional[str] = "", tea_cache_model_id: str = "",
# WanToDance # WanToDance
wantodance_music_path: Optional[str] = None, wantodance_music_path: str = None,
wantodance_reference_image: Optional[Image.Image] = None, wantodance_reference_image: Image.Image = None,
wantodance_fps: Optional[float] = 30, wantodance_fps: float = 30,
wantodance_keyframes: Optional[list[Image.Image]] = None, wantodance_keyframes: list[Image.Image] = None,
wantodance_keyframes_mask: Optional[list[int]] = None, wantodance_keyframes_mask: list[int] = None,
framewise_decoding: bool = False, framewise_decoding: bool = False,
# progress_bar # progress_bar
progress_bar_cmd=tqdm, progress_bar_cmd=tqdm,
output_type: Optional[Literal["quantized", "floatpoint"]] = "quantized", output_type: Literal["quantized", "floatpoint"] = "quantized",
): ):
# Scheduler # Scheduler
self.scheduler.set_timesteps(num_inference_steps, denoising_strength=denoising_strength, shift=sigma_shift) self.scheduler.set_timesteps(num_inference_steps, denoising_strength=denoising_strength, shift=sigma_shift)

View File

@@ -1,6 +1,7 @@
import importlib, inspect, pkgutil, traceback, torch, os, re import importlib, inspect, pkgutil, traceback, torch, os, re
from typing import Union, List, Optional, Tuple, Iterable, Dict from typing import Union, List, Optional, Tuple, Iterable, Dict
from contextlib import contextmanager from contextlib import contextmanager
import streamlit as st import streamlit as st
from diffsynth import ModelConfig from diffsynth import ModelConfig
from diffsynth.diffusion.base_pipeline import ControlNetInput from diffsynth.diffusion.base_pipeline import ControlNetInput
@@ -280,4 +281,3 @@ def launch_webui():
print(f"unsupported result format: {result}") print(f"unsupported result format: {result}")
launch_webui() launch_webui()
# streamlit run examples/dev_tools/webui.py --server.fileWatcherType none