mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-22 08:40:47 +00:00
update examples and downloaders
This commit is contained in:
@@ -48,23 +48,129 @@ preset_models_on_huggingface = {
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
preset_models_on_modelscope = {
|
preset_models_on_modelscope = {
|
||||||
|
# Hunyuan DiT
|
||||||
"HunyuanDiT": [
|
"HunyuanDiT": [
|
||||||
("modelscope/HunyuanDiT", "t2i/clip_text_encoder/pytorch_model.bin", "models/HunyuanDiT/t2i/clip_text_encoder"),
|
("modelscope/HunyuanDiT", "t2i/clip_text_encoder/pytorch_model.bin", "models/HunyuanDiT/t2i/clip_text_encoder"),
|
||||||
("modelscope/HunyuanDiT", "t2i/mt5/pytorch_model.bin", "models/HunyuanDiT/t2i/mt5"),
|
("modelscope/HunyuanDiT", "t2i/mt5/pytorch_model.bin", "models/HunyuanDiT/t2i/mt5"),
|
||||||
("modelscope/HunyuanDiT", "t2i/model/pytorch_model_ema.pt", "models/HunyuanDiT/t2i/model"),
|
("modelscope/HunyuanDiT", "t2i/model/pytorch_model_ema.pt", "models/HunyuanDiT/t2i/model"),
|
||||||
("modelscope/HunyuanDiT", "t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin", "models/HunyuanDiT/t2i/sdxl-vae-fp16-fix"),
|
("modelscope/HunyuanDiT", "t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin", "models/HunyuanDiT/t2i/sdxl-vae-fp16-fix"),
|
||||||
],
|
],
|
||||||
|
# Stable Video Diffusion
|
||||||
"stable-video-diffusion-img2vid-xt": [
|
"stable-video-diffusion-img2vid-xt": [
|
||||||
("AI-ModelScope/stable-video-diffusion-img2vid-xt", "svd_xt.safetensors", "models/stable_video_diffusion"),
|
("AI-ModelScope/stable-video-diffusion-img2vid-xt", "svd_xt.safetensors", "models/stable_video_diffusion"),
|
||||||
],
|
],
|
||||||
|
# ExVideo
|
||||||
"ExVideo-SVD-128f-v1": [
|
"ExVideo-SVD-128f-v1": [
|
||||||
("ECNU-CILab/ExVideo-SVD-128f-v1", "model.fp16.safetensors", "models/stable_video_diffusion"),
|
("ECNU-CILab/ExVideo-SVD-128f-v1", "model.fp16.safetensors", "models/stable_video_diffusion"),
|
||||||
],
|
],
|
||||||
|
# Stable Diffusion
|
||||||
|
"StableDiffusion_v15": [
|
||||||
|
("AI-ModelScope/stable-diffusion-v1-5", "v1-5-pruned-emaonly.safetensors", "models/stable_diffusion"),
|
||||||
|
],
|
||||||
|
"DreamShaper_8": [
|
||||||
|
("sd_lora/dreamshaper_8", "dreamshaper_8.safetensors", "models/stable_diffusion"),
|
||||||
|
],
|
||||||
|
"AingDiffusion_v12": [
|
||||||
|
("sd_lora/aingdiffusion_v12", "aingdiffusion_v12.safetensors", "models/stable_diffusion"),
|
||||||
|
],
|
||||||
|
"Flat2DAnimerge_v45Sharp": [
|
||||||
|
("sd_lora/Flat-2D-Animerge", "flat2DAnimerge_v45Sharp.safetensors", "models/stable_diffusion"),
|
||||||
|
],
|
||||||
|
# Textual Inversion
|
||||||
|
"TextualInversion_VeryBadImageNegative_v1.3": [
|
||||||
|
("sd_lora/verybadimagenegative_v1.3", "verybadimagenegative_v1.3.pt", "models/textual_inversion"),
|
||||||
|
],
|
||||||
|
# Stable Diffusion XL
|
||||||
|
"StableDiffusionXL_v1": [
|
||||||
|
("AI-ModelScope/stable-diffusion-xl-base-1.0", "sd_xl_base_1.0.safetensors", "models/stable_diffusion_xl"),
|
||||||
|
],
|
||||||
|
"BluePencilXL_v200": [
|
||||||
|
("sd_lora/bluePencilXL_v200", "bluePencilXL_v200.safetensors", "models/stable_diffusion_xl"),
|
||||||
|
],
|
||||||
|
"StableDiffusionXL_Turbo": [
|
||||||
|
("AI-ModelScope/sdxl-turbo", "sd_xl_turbo_1.0_fp16.safetensors", "models/stable_diffusion_xl_turbo"),
|
||||||
|
],
|
||||||
|
# ControlNet
|
||||||
|
"ControlNet_v11f1p_sd15_depth": [
|
||||||
|
("AI-ModelScope/ControlNet-v1-1", "control_v11f1p_sd15_depth.pth", "models/ControlNet"),
|
||||||
|
("sd_lora/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators")
|
||||||
|
],
|
||||||
|
"ControlNet_v11p_sd15_softedge": [
|
||||||
|
("AI-ModelScope/ControlNet-v1-1", "control_v11p_sd15_softedge.pth", "models/ControlNet"),
|
||||||
|
("sd_lora/Annotators", "ControlNetHED.pth", "models/Annotators")
|
||||||
|
],
|
||||||
|
"ControlNet_v11f1e_sd15_tile": [
|
||||||
|
("AI-ModelScope/ControlNet-v1-1", "control_v11f1e_sd15_tile.pth", "models/ControlNet")
|
||||||
|
],
|
||||||
|
"ControlNet_v11p_sd15_lineart": [
|
||||||
|
("AI-ModelScope/ControlNet-v1-1", "control_v11p_sd15_lineart.pth", "models/ControlNet"),
|
||||||
|
("sd_lora/Annotators", "sk_model.pth", "models/Annotators"),
|
||||||
|
("sd_lora/Annotators", "sk_model2.pth", "models/Annotators")
|
||||||
|
],
|
||||||
|
# AnimateDiff
|
||||||
|
"AnimateDiff_v2": [
|
||||||
|
("Shanghai_AI_Laboratory/animatediff", "mm_sd_v15_v2.ckpt", "models/AnimateDiff"),
|
||||||
|
],
|
||||||
|
"AnimateDiff_xl_beta": [
|
||||||
|
("Shanghai_AI_Laboratory/animatediff", "mm_sdxl_v10_beta.ckpt", "models/AnimateDiff"),
|
||||||
|
],
|
||||||
|
# RIFE
|
||||||
|
"RIFE": [
|
||||||
|
("Damo_XR_Lab/cv_rife_video-frame-interpolation", "flownet.pkl", "models/RIFE"),
|
||||||
|
],
|
||||||
|
# Beautiful Prompt
|
||||||
|
"BeautifulPrompt": [
|
||||||
|
("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
|
||||||
|
("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "generation_config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
|
||||||
|
("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "model.safetensors", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
|
||||||
|
("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "special_tokens_map.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
|
||||||
|
("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "tokenizer.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
|
||||||
|
("AI-ModelScope/pai-bloom-1b1-text2prompt-sd", "tokenizer_config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"),
|
||||||
|
],
|
||||||
|
# Translator
|
||||||
|
"opus-mt-zh-en": [
|
||||||
|
("moxying/opus-mt-zh-en", "config.json", "models/translator/opus-mt-zh-en"),
|
||||||
|
("moxying/opus-mt-zh-en", "generation_config.json", "models/translator/opus-mt-zh-en"),
|
||||||
|
("moxying/opus-mt-zh-en", "metadata.json", "models/translator/opus-mt-zh-en"),
|
||||||
|
("moxying/opus-mt-zh-en", "pytorch_model.bin", "models/translator/opus-mt-zh-en"),
|
||||||
|
("moxying/opus-mt-zh-en", "source.spm", "models/translator/opus-mt-zh-en"),
|
||||||
|
("moxying/opus-mt-zh-en", "target.spm", "models/translator/opus-mt-zh-en"),
|
||||||
|
("moxying/opus-mt-zh-en", "tokenizer_config.json", "models/translator/opus-mt-zh-en"),
|
||||||
|
("moxying/opus-mt-zh-en", "vocab.json", "models/translator/opus-mt-zh-en"),
|
||||||
|
],
|
||||||
|
# IP-Adapter
|
||||||
|
"IP-Adapter-SD": [
|
||||||
|
("AI-ModelScope/IP-Adapter", "models/image_encoder/model.safetensors", "models/IpAdapter/stable_diffusion/image_encoder"),
|
||||||
|
("AI-ModelScope/IP-Adapter", "models/ip-adapter_sd15.bin", "models/IpAdapter/stable_diffusion"),
|
||||||
|
],
|
||||||
|
"IP-Adapter-SDXL": [
|
||||||
|
("AI-ModelScope/IP-Adapter", "sdxl_models/image_encoder/model.safetensors", "models/IpAdapter/stable_diffusion_xl/image_encoder"),
|
||||||
|
("AI-ModelScope/IP-Adapter", "sdxl_models/ip-adapter_sdxl.bin", "models/IpAdapter/stable_diffusion_xl"),
|
||||||
|
],
|
||||||
}
|
}
|
||||||
Preset_model_id: TypeAlias = Literal[
|
Preset_model_id: TypeAlias = Literal[
|
||||||
"HunyuanDiT",
|
"HunyuanDiT",
|
||||||
"stable-video-diffusion-img2vid-xt",
|
"stable-video-diffusion-img2vid-xt",
|
||||||
"ExVideo-SVD-128f-v1"
|
"ExVideo-SVD-128f-v1",
|
||||||
|
"StableDiffusion_v15",
|
||||||
|
"DreamShaper_8",
|
||||||
|
"AingDiffusion_v12",
|
||||||
|
"Flat2DAnimerge_v45Sharp",
|
||||||
|
"TextualInversion_VeryBadImageNegative_v1.3",
|
||||||
|
"StableDiffusionXL_v1",
|
||||||
|
"BluePencilXL_v200",
|
||||||
|
"StableDiffusionXL_Turbo",
|
||||||
|
"ControlNet_v11f1p_sd15_depth",
|
||||||
|
"ControlNet_v11p_sd15_softedge",
|
||||||
|
"ControlNet_v11f1e_sd15_tile",
|
||||||
|
"ControlNet_v11p_sd15_lineart",
|
||||||
|
"AnimateDiff_v2",
|
||||||
|
"AnimateDiff_xl_beta",
|
||||||
|
"RIFE",
|
||||||
|
"BeautifulPrompt",
|
||||||
|
"opus-mt-zh-en",
|
||||||
|
"IP-Adapter-SD",
|
||||||
|
"IP-Adapter-SDXL",
|
||||||
]
|
]
|
||||||
Preset_model_website: TypeAlias = Literal[
|
Preset_model_website: TypeAlias = Literal[
|
||||||
"HuggingFace",
|
"HuggingFace",
|
||||||
@@ -80,6 +186,26 @@ website_to_download_fn = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def download_models(
|
||||||
|
model_id_list: List[Preset_model_id] = [],
|
||||||
|
downloading_priority: List[Preset_model_website] = ["ModelScope", "HuggingFace"],
|
||||||
|
):
|
||||||
|
downloaded_files = []
|
||||||
|
for model_id in model_id_list:
|
||||||
|
for website in downloading_priority:
|
||||||
|
if model_id in website_to_preset_models[website]:
|
||||||
|
for model_id, origin_file_path, local_dir in website_to_preset_models[website][model_id]:
|
||||||
|
# Check if the file is downloaded.
|
||||||
|
file_to_download = os.path.join(local_dir, os.path.basename(origin_file_path))
|
||||||
|
if file_to_download in downloaded_files:
|
||||||
|
continue
|
||||||
|
# Download
|
||||||
|
website_to_download_fn[website](model_id, origin_file_path, local_dir)
|
||||||
|
if os.path.basename(origin_file_path) in os.listdir(local_dir):
|
||||||
|
downloaded_files.append(file_to_download)
|
||||||
|
return downloaded_files
|
||||||
|
|
||||||
|
|
||||||
class ModelManager:
|
class ModelManager:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -94,28 +220,19 @@ class ModelManager:
|
|||||||
self.model = {}
|
self.model = {}
|
||||||
self.model_path = {}
|
self.model_path = {}
|
||||||
self.textual_inversion_dict = {}
|
self.textual_inversion_dict = {}
|
||||||
downloaded_files = self.download_models(model_id_list, downloading_priority)
|
downloaded_files = download_models(model_id_list, downloading_priority)
|
||||||
self.load_models(downloaded_files + file_path_list)
|
self.load_models(downloaded_files + file_path_list)
|
||||||
|
|
||||||
def download_models(
|
def load_model_from_origin(
|
||||||
self,
|
self,
|
||||||
model_id_list: List[Preset_model_id] = [],
|
download_from: Preset_model_website = "ModelScope",
|
||||||
downloading_priority: List[Preset_model_website] = ["ModelScope", "HuggingFace"],
|
model_id = "",
|
||||||
|
origin_file_path = "",
|
||||||
|
local_dir = ""
|
||||||
):
|
):
|
||||||
downloaded_files = []
|
website_to_download_fn[download_from](model_id, origin_file_path, local_dir)
|
||||||
for model_id in model_id_list:
|
file_to_download = os.path.join(local_dir, os.path.basename(origin_file_path))
|
||||||
for website in downloading_priority:
|
self.load_model(file_to_download)
|
||||||
if model_id in website_to_preset_models[website]:
|
|
||||||
for model_id, origin_file_path, local_dir in website_to_preset_models[website][model_id]:
|
|
||||||
# Check if the file is downloaded.
|
|
||||||
file_to_download = os.path.join(local_dir, os.path.basename(origin_file_path))
|
|
||||||
if file_to_download in downloaded_files:
|
|
||||||
continue
|
|
||||||
# Download
|
|
||||||
website_to_download_fn[website](model_id, origin_file_path, local_dir)
|
|
||||||
if os.path.basename(origin_file_path) in os.listdir(local_dir):
|
|
||||||
downloaded_files.append(file_to_download)
|
|
||||||
return downloaded_files
|
|
||||||
|
|
||||||
def is_stable_video_diffusion(self, state_dict):
|
def is_stable_video_diffusion(self, state_dict):
|
||||||
param_name = "model.diffusion_model.output_blocks.9.1.time_stack.0.norm_in.weight"
|
param_name = "model.diffusion_model.output_blocks.9.1.time_stack.0.norm_in.weight"
|
||||||
@@ -158,7 +275,7 @@ class ModelManager:
|
|||||||
|
|
||||||
def is_translator(self, state_dict):
|
def is_translator(self, state_dict):
|
||||||
param_name = "model.encoder.layers.5.self_attn_layer_norm.weight"
|
param_name = "model.encoder.layers.5.self_attn_layer_norm.weight"
|
||||||
return param_name in state_dict and len(state_dict) == 254
|
return param_name in state_dict and len(state_dict) == 258
|
||||||
|
|
||||||
def is_ipadapter(self, state_dict):
|
def is_ipadapter(self, state_dict):
|
||||||
return "image_proj" in state_dict and "ip_adapter" in state_dict and state_dict["image_proj"]["proj.weight"].shape == torch.Size([3072, 1024])
|
return "image_proj" in state_dict and "ip_adapter" in state_dict and state_dict["image_proj"]["proj.weight"].shape == torch.Size([3072, 1024])
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ class SDIpAdapter(torch.nn.Module):
|
|||||||
|
|
||||||
def set_less_adapter(self):
|
def set_less_adapter(self):
|
||||||
# IP-Adapter for SD v1.5 doesn't support this feature.
|
# IP-Adapter for SD v1.5 doesn't support this feature.
|
||||||
self.set_full_adapter(self)
|
self.set_full_adapter()
|
||||||
|
|
||||||
def forward(self, hidden_states, scale=1.0):
|
def forward(self, hidden_states, scale=1.0):
|
||||||
hidden_states = self.image_proj(hidden_states)
|
hidden_states = self.image_proj(hidden_states)
|
||||||
|
|||||||
@@ -87,6 +87,7 @@ class SDXLImagePipeline(torch.nn.Module):
|
|||||||
input_image=None,
|
input_image=None,
|
||||||
ipadapter_images=None,
|
ipadapter_images=None,
|
||||||
ipadapter_scale=1.0,
|
ipadapter_scale=1.0,
|
||||||
|
ipadapter_use_instant_style=False,
|
||||||
controlnet_image=None,
|
controlnet_image=None,
|
||||||
denoising_strength=1.0,
|
denoising_strength=1.0,
|
||||||
height=1024,
|
height=1024,
|
||||||
@@ -134,6 +135,10 @@ class SDXLImagePipeline(torch.nn.Module):
|
|||||||
|
|
||||||
# IP-Adapter
|
# IP-Adapter
|
||||||
if ipadapter_images is not None:
|
if ipadapter_images is not None:
|
||||||
|
if ipadapter_use_instant_style:
|
||||||
|
self.ipadapter.set_less_adapter()
|
||||||
|
else:
|
||||||
|
self.ipadapter.set_full_adapter()
|
||||||
ipadapter_image_encoding = self.ipadapter_image_encoder(ipadapter_images)
|
ipadapter_image_encoding = self.ipadapter_image_encoder(ipadapter_images)
|
||||||
ipadapter_kwargs_list_posi = self.ipadapter(ipadapter_image_encoding, scale=ipadapter_scale)
|
ipadapter_kwargs_list_posi = self.ipadapter(ipadapter_image_encoding, scale=ipadapter_scale)
|
||||||
ipadapter_kwargs_list_nega = self.ipadapter(torch.zeros_like(ipadapter_image_encoding))
|
ipadapter_kwargs_list_nega = self.ipadapter(torch.zeros_like(ipadapter_image_encoding))
|
||||||
|
|||||||
@@ -41,6 +41,10 @@ class SDXLPrompter(Prompter):
|
|||||||
add_text_embeds, prompt_emb_2 = text_encoder_2(input_ids_2, clip_skip=clip_skip_2)
|
add_text_embeds, prompt_emb_2 = text_encoder_2(input_ids_2, clip_skip=clip_skip_2)
|
||||||
|
|
||||||
# Merge
|
# Merge
|
||||||
|
if prompt_emb_1.shape[0] != prompt_emb_2.shape[0]:
|
||||||
|
max_batch_size = min(prompt_emb_1.shape[0], prompt_emb_2.shape[0])
|
||||||
|
prompt_emb_1 = prompt_emb_1[: max_batch_size]
|
||||||
|
prompt_emb_2 = prompt_emb_2[: max_batch_size]
|
||||||
prompt_emb = torch.concatenate([prompt_emb_1, prompt_emb_2], dim=-1)
|
prompt_emb = torch.concatenate([prompt_emb_1, prompt_emb_2], dim=-1)
|
||||||
|
|
||||||
# For very long prompt, we only use the first 77 tokens to compute `add_text_embeds`.
|
# For very long prompt, we only use the first 77 tokens to compute `add_text_embeds`.
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from diffsynth import SDVideoPipelineRunner
|
from diffsynth import SDVideoPipelineRunner, download_models
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion/aingdiffusion_v12.safetensors`: [link](https://civitai.com/api/download/models/229575)
|
# `models/stable_diffusion/aingdiffusion_v12.safetensors`: [link](https://civitai.com/api/download/models/229575)
|
||||||
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
||||||
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
||||||
@@ -9,7 +9,13 @@ from diffsynth import SDVideoPipelineRunner
|
|||||||
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
||||||
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
||||||
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
||||||
|
download_models([
|
||||||
|
"AingDiffusion_v12",
|
||||||
|
"AnimateDiff_v2",
|
||||||
|
"ControlNet_v11p_sd15_lineart",
|
||||||
|
"ControlNet_v11f1e_sd15_tile",
|
||||||
|
"TextualInversion_VeryBadImageNegative_v1.3"
|
||||||
|
])
|
||||||
# The original video in the example is https://www.bilibili.com/video/BV1iG411a7sQ/.
|
# The original video in the example is https://www.bilibili.com/video/BV1iG411a7sQ/.
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
@@ -63,7 +69,7 @@ config = {
|
|||||||
"end_frame_id": 30
|
"end_frame_id": 30
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"output_folder": "data/examples/diffutoon/output",
|
"output_folder": "output",
|
||||||
"fps": 30
|
"fps": 30
|
||||||
},
|
},
|
||||||
"pipeline": {
|
"pipeline": {
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
from diffsynth import SDVideoPipelineRunner
|
from diffsynth import SDVideoPipelineRunner, download_models
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion/aingdiffusion_v12.safetensors`: [link](https://civitai.com/api/download/models/229575)
|
# `models/stable_diffusion/aingdiffusion_v12.safetensors`: [link](https://civitai.com/api/download/models/229575)
|
||||||
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
||||||
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
||||||
@@ -14,7 +14,15 @@ import os
|
|||||||
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
||||||
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
||||||
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
||||||
|
download_models([
|
||||||
|
"AingDiffusion_v12",
|
||||||
|
"AnimateDiff_v2",
|
||||||
|
"ControlNet_v11p_sd15_lineart",
|
||||||
|
"ControlNet_v11f1e_sd15_tile",
|
||||||
|
"ControlNet_v11f1p_sd15_depth",
|
||||||
|
"ControlNet_v11p_sd15_softedge",
|
||||||
|
"TextualInversion_VeryBadImageNegative_v1.3"
|
||||||
|
])
|
||||||
# The original video in the example is https://www.bilibili.com/video/BV1zu4y1s7Ec/.
|
# The original video in the example is https://www.bilibili.com/video/BV1zu4y1s7Ec/.
|
||||||
|
|
||||||
config_stage_1 = {
|
config_stage_1 = {
|
||||||
@@ -67,7 +75,7 @@ config_stage_1 = {
|
|||||||
"end_frame_id": 30
|
"end_frame_id": 30
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"output_folder": "data/examples/diffutoon_edit/color_video",
|
"output_folder": "output/color_video",
|
||||||
"fps": 25
|
"fps": 25
|
||||||
},
|
},
|
||||||
"smoother_configs": [
|
"smoother_configs": [
|
||||||
@@ -153,7 +161,7 @@ config_stage_2 = {
|
|||||||
"end_frame_id": 30
|
"end_frame_id": 30
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"output_folder": "data/examples/diffutoon_edit/output",
|
"output_folder": "output/edited_video",
|
||||||
"fps": 30
|
"fps": 30
|
||||||
},
|
},
|
||||||
"pipeline": {
|
"pipeline": {
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
from diffsynth import ModelManager, SDVideoPipeline, ControlNetConfigUnit, VideoData, save_video, save_frames
|
from diffsynth import ModelManager, SDVideoPipeline, ControlNetConfigUnit, VideoData, save_video, download_models
|
||||||
from diffsynth.extensions.RIFE import RIFESmoother
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors`: [link](https://civitai.com/api/download/models/266360?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
# `models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors`: [link](https://civitai.com/api/download/models/266360?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
||||||
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
||||||
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
||||||
@@ -11,8 +10,13 @@ import torch
|
|||||||
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
||||||
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
||||||
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
||||||
# `models/RIFE/flownet.pkl`: [link](https://drive.google.com/file/d/1APIzVeI-4ZZCEuIRE1m6WYfSCaOsi_7_/view?usp=sharing)
|
download_models([
|
||||||
|
"Flat2DAnimerge_v45Sharp",
|
||||||
|
"AnimateDiff_v2",
|
||||||
|
"ControlNet_v11p_sd15_lineart",
|
||||||
|
"ControlNet_v11f1e_sd15_tile",
|
||||||
|
"TextualInversion_VeryBadImageNegative_v1.3"
|
||||||
|
])
|
||||||
|
|
||||||
# Load models
|
# Load models
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||||
@@ -22,7 +26,6 @@ model_manager.load_models([
|
|||||||
"models/AnimateDiff/mm_sd_v15_v2.ckpt",
|
"models/AnimateDiff/mm_sd_v15_v2.ckpt",
|
||||||
"models/ControlNet/control_v11p_sd15_lineart.pth",
|
"models/ControlNet/control_v11p_sd15_lineart.pth",
|
||||||
"models/ControlNet/control_v11f1e_sd15_tile.pth",
|
"models/ControlNet/control_v11f1e_sd15_tile.pth",
|
||||||
"models/RIFE/flownet.pkl"
|
|
||||||
])
|
])
|
||||||
pipe = SDVideoPipeline.from_model_manager(
|
pipe = SDVideoPipeline.from_model_manager(
|
||||||
model_manager,
|
model_manager,
|
||||||
@@ -39,12 +42,11 @@ pipe = SDVideoPipeline.from_model_manager(
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
smoother = RIFESmoother.from_model_manager(model_manager)
|
|
||||||
|
|
||||||
# Load video (we only use 60 frames for quick testing)
|
# Load video (we only use 60 frames for quick testing)
|
||||||
# The original video is here: https://www.bilibili.com/video/BV19w411A7YJ/
|
# The original video is here: https://www.bilibili.com/video/BV19w411A7YJ/
|
||||||
video = VideoData(
|
video = VideoData(
|
||||||
video_file="data/bilibili_videos/៸៸᳐_⩊_៸៸᳐ 66 微笑调查队🌻/៸៸᳐_⩊_៸៸᳐ 66 微笑调查队🌻 - 1.66 微笑调查队🌻(Av278681824,P1).mp4",
|
video_file="data/examples/bilibili/BV19w411A7YJ.mp4",
|
||||||
height=1024, width=1024)
|
height=1024, width=1024)
|
||||||
input_video = [video[i] for i in range(40*60, 41*60)]
|
input_video = [video[i] for i in range(40*60, 41*60)]
|
||||||
|
|
||||||
@@ -59,7 +61,6 @@ output_video = pipe(
|
|||||||
animatediff_batch_size=32, animatediff_stride=16,
|
animatediff_batch_size=32, animatediff_stride=16,
|
||||||
vram_limit_level=0,
|
vram_limit_level=0,
|
||||||
)
|
)
|
||||||
output_video = smoother(output_video)
|
|
||||||
|
|
||||||
# Save video
|
# Save video
|
||||||
save_video(output_video, "output_video.mp4", fps=60)
|
save_video(output_video, "output_video.mp4", fps=60)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from diffsynth import save_video, ModelManager, SVDVideoPipeline, HunyuanDiTImagePipeline
|
from diffsynth import save_video, ModelManager, SVDVideoPipeline, HunyuanDiTImagePipeline, download_models
|
||||||
from diffsynth import ModelManager
|
from diffsynth import ModelManager
|
||||||
import torch, os
|
import torch, os
|
||||||
|
|
||||||
@@ -31,7 +31,14 @@ import torch, os
|
|||||||
def generate_image():
|
def generate_image():
|
||||||
# Load models
|
# Load models
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "True"
|
os.environ["TOKENIZERS_PARALLELISM"] = "True"
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda", model_id_list=["HunyuanDiT"])
|
download_models(["HunyuanDiT"])
|
||||||
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda",
|
||||||
|
file_path_list=[
|
||||||
|
"models/HunyuanDiT/t2i/clip_text_encoder/pytorch_model.bin",
|
||||||
|
"models/HunyuanDiT/t2i/mt5/pytorch_model.bin",
|
||||||
|
"models/HunyuanDiT/t2i/model/pytorch_model_ema.pt",
|
||||||
|
"models/HunyuanDiT/t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin",
|
||||||
|
])
|
||||||
pipe = HunyuanDiTImagePipeline.from_model_manager(model_manager)
|
pipe = HunyuanDiTImagePipeline.from_model_manager(model_manager)
|
||||||
|
|
||||||
# Generate an image
|
# Generate an image
|
||||||
@@ -47,7 +54,12 @@ def generate_image():
|
|||||||
|
|
||||||
def generate_video(image):
|
def generate_video(image):
|
||||||
# Load models
|
# Load models
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda", model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"])
|
download_models(["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"])
|
||||||
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda",
|
||||||
|
file_path_list=[
|
||||||
|
"models/stable_video_diffusion/svd_xt.safetensors",
|
||||||
|
"models/stable_video_diffusion/model.fp16.safetensors",
|
||||||
|
])
|
||||||
pipe = SVDVideoPipeline.from_model_manager(model_manager)
|
pipe = SVDVideoPipeline.from_model_manager(model_manager)
|
||||||
|
|
||||||
# Generate a video
|
# Generate a video
|
||||||
@@ -65,7 +77,12 @@ def generate_video(image):
|
|||||||
|
|
||||||
def upscale_video(image, video):
|
def upscale_video(image, video):
|
||||||
# Load models
|
# Load models
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda", model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"])
|
download_models(["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"])
|
||||||
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda",
|
||||||
|
file_path_list=[
|
||||||
|
"models/stable_video_diffusion/svd_xt.safetensors",
|
||||||
|
"models/stable_video_diffusion/model.fp16.safetensors",
|
||||||
|
])
|
||||||
pipe = SVDVideoPipeline.from_model_manager(model_manager)
|
pipe = SVDVideoPipeline.from_model_manager(model_manager)
|
||||||
|
|
||||||
# Generate a video
|
# Generate a video
|
||||||
|
|||||||
@@ -1,3 +1,44 @@
|
|||||||
# IP-Adapter
|
# IP-Adapter
|
||||||
|
|
||||||
The features of IP-Adapter in DiffSynth Studio is not completed. Please wait for us.
|
IP-Adapter is a interesting model, which can adopt the content or style of another image to generate a new image.
|
||||||
|
|
||||||
|
## Example: Content Controlling in Stable Diffusion
|
||||||
|
|
||||||
|
Based on Stable Diffusion, we can transfer the object to another scene. See [`sd_ipadapter.py`](./sd_ipadapter.py).
|
||||||
|
|
||||||
|
|First, we generate a car. The prompt is "masterpiece, best quality, a car".|Next, utilizing IP-Adapter, we move the car to the road. The prompt is "masterpiece, best quality, a car running on the road".|
|
||||||
|
|-|-|
|
||||||
|
|||
|
||||||
|
|
||||||
|
## Example: Content and Style Controlling in Stable Diffusion XL
|
||||||
|
|
||||||
|
The IP-Adapter model based on Stable Diffusion XL is more powerful. You have the option to use the content or style. See [`sdxl_ipadapter.py`](./sdxl_ipadapter.py).
|
||||||
|
|
||||||
|
* Content controlling (original usage of IP-Adapter)
|
||||||
|
|
||||||
|
|First, we generate a rabbit.|Next, enable IP-Adapter and let the rabbit jump.|For comparision, disable IP-Adapter to see the generated image.|
|
||||||
|
|-|-|-|
|
||||||
|
||||
|
||||||
|
|
||||||
|
|
||||||
|
* Style controlling (InstantStyle)
|
||||||
|
|
||||||
|
|First, we generate a rabbit.|Next, enable InstantStyle and convert the rabbit to a cat.|For comparision, disable IP-Adapter to see the generated image.|
|
||||||
|
|-|-|-|
|
||||||
|
||||
|
||||||
|
|
||||||
|
## Example: Image Fusing (Experimental)
|
||||||
|
|
||||||
|
Since IP-Adapter can control the content based on more than one image, we can do something interesting. See [`sdxl_ipadapter_multi_reference.py`](sdxl_ipadapter_multi_reference.py).
|
||||||
|
|
||||||
|
We have two pokemons here:
|
||||||
|
|
||||||
|
|Charizard|Pikachu|
|
||||||
|
|-|-|
|
||||||
|
|||
|
||||||
|
|
||||||
|
Fuse!
|
||||||
|
|
||||||
|
|Pikazard ???|
|
||||||
|
|-|
|
||||||
|
||
|
||||||
|
|||||||
38
examples/Ip-Adapter/sd_ipadapter.py
Normal file
38
examples/Ip-Adapter/sd_ipadapter.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
from diffsynth import ModelManager, SDImagePipeline, download_models
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
# Download models (automatically)
|
||||||
|
# `models/stable_diffusion/dreamshaper_8.safetensors`: [link](https://civitai.com/api/download/models/128713?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
||||||
|
# `models/IpAdapter/stable_diffusion/image_encoder/model.safetensors`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors)
|
||||||
|
# `models/IpAdapter/stable_diffusion/ip-adapter_sd15.bin`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter_sd15.bin)
|
||||||
|
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
||||||
|
download_models(["DreamShaper_8", "IP-Adapter-SD", "TextualInversion_VeryBadImageNegative_v1.3"])
|
||||||
|
|
||||||
|
# Load models
|
||||||
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||||
|
model_manager.load_textual_inversions("models/textual_inversion")
|
||||||
|
model_manager.load_models([
|
||||||
|
"models/stable_diffusion/aingdiffusion_v12.safetensors",
|
||||||
|
"models/IpAdapter/stable_diffusion/image_encoder/model.safetensors",
|
||||||
|
"models/IpAdapter/stable_diffusion/ip-adapter_sd15.bin"
|
||||||
|
])
|
||||||
|
pipe = SDImagePipeline.from_model_manager(model_manager)
|
||||||
|
|
||||||
|
torch.manual_seed(1)
|
||||||
|
style_image = pipe(
|
||||||
|
prompt="masterpiece, best quality, a car",
|
||||||
|
negative_prompt="verybadimagenegative_v1.3",
|
||||||
|
cfg_scale=7, clip_skip=2,
|
||||||
|
height=512, width=512, num_inference_steps=50,
|
||||||
|
)
|
||||||
|
style_image.save("car.jpg")
|
||||||
|
|
||||||
|
image = pipe(
|
||||||
|
prompt="masterpiece, best quality, a car running on the road",
|
||||||
|
negative_prompt="verybadimagenegative_v1.3",
|
||||||
|
cfg_scale=7, clip_skip=2,
|
||||||
|
height=512, width=512, num_inference_steps=50,
|
||||||
|
ipadapter_images=[style_image], ipadapter_scale=1.0
|
||||||
|
)
|
||||||
|
image.save("car_on_the_road.jpg")
|
||||||
@@ -1,36 +1,61 @@
|
|||||||
from diffsynth import ModelManager, SDXLImagePipeline
|
from diffsynth import ModelManager, SDXLImagePipeline, download_models
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
|
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
|
||||||
# `models/IpAdapter/image_encoder/model.safetensors`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors)
|
# `models/IpAdapter/stable_diffusion_xl/image_encoder/model.safetensors`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors)
|
||||||
# `models/IpAdapter/ip-adapter_sdxl.bin`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/ip-adapter_sdxl.safetensors)
|
# `models/IpAdapter/stable_diffusion_xl/ip-adapter_sdxl.bin`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/ip-adapter_sdxl.safetensors)
|
||||||
|
download_models(["StableDiffusionXL_v1", "IP-Adapter-SDXL"])
|
||||||
|
|
||||||
# Load models
|
# Load models
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||||
model_manager.load_models([
|
model_manager.load_models([
|
||||||
"models/stable_diffusion_xl/sd_xl_base_1.0.safetensors",
|
"models/stable_diffusion_xl/sd_xl_base_1.0.safetensors",
|
||||||
"models/IpAdapter/image_encoder/model.safetensors",
|
"models/IpAdapter/stable_diffusion_xl/image_encoder/model.safetensors",
|
||||||
"models/IpAdapter/ip-adapter_sdxl.bin"
|
"models/IpAdapter/stable_diffusion_xl/ip-adapter_sdxl.bin"
|
||||||
])
|
])
|
||||||
pipe = SDXLImagePipeline.from_model_manager(model_manager)
|
pipe = SDXLImagePipeline.from_model_manager(model_manager)
|
||||||
pipe.ipadapter.set_less_adapter()
|
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(123456)
|
||||||
style_image = pipe(
|
style_image = pipe(
|
||||||
prompt="Starry Night, blue sky, by van Gogh",
|
prompt="a rabbit in a garden, colorful flowers",
|
||||||
negative_prompt="dark, gray",
|
negative_prompt="anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
|
||||||
cfg_scale=5,
|
cfg_scale=5,
|
||||||
height=1024, width=1024, num_inference_steps=30,
|
height=1024, width=1024, num_inference_steps=50,
|
||||||
)
|
)
|
||||||
style_image.save("style_image.jpg")
|
style_image.save("rabbit.jpg")
|
||||||
|
|
||||||
image = pipe(
|
image = pipe(
|
||||||
prompt="a cat",
|
prompt="a cat",
|
||||||
negative_prompt="",
|
negative_prompt="",
|
||||||
cfg_scale=5,
|
cfg_scale=5,
|
||||||
height=1024, width=1024, num_inference_steps=30,
|
height=1024, width=1024, num_inference_steps=50,
|
||||||
ipadapter_images=[style_image]
|
ipadapter_images=[style_image], ipadapter_use_instant_style=True
|
||||||
)
|
)
|
||||||
image.save("transferred_image.jpg")
|
image.save("rabbit_to_cat.jpg")
|
||||||
|
|
||||||
|
image = pipe(
|
||||||
|
prompt="a rabbit is jumping",
|
||||||
|
negative_prompt="",
|
||||||
|
cfg_scale=5,
|
||||||
|
height=1024, width=1024, num_inference_steps=50,
|
||||||
|
ipadapter_images=[style_image], ipadapter_use_instant_style=False, ipadapter_scale=0.5
|
||||||
|
)
|
||||||
|
image.save("rabbit_to_jumping_rabbit.jpg")
|
||||||
|
|
||||||
|
image = pipe(
|
||||||
|
prompt="a cat",
|
||||||
|
negative_prompt="",
|
||||||
|
cfg_scale=5,
|
||||||
|
height=1024, width=1024, num_inference_steps=50,
|
||||||
|
)
|
||||||
|
image.save("rabbit_to_cat_without_ipa.jpg")
|
||||||
|
|
||||||
|
image = pipe(
|
||||||
|
prompt="a rabbit is jumping",
|
||||||
|
negative_prompt="",
|
||||||
|
cfg_scale=5,
|
||||||
|
height=1024, width=1024, num_inference_steps=50,
|
||||||
|
)
|
||||||
|
image.save("rabbit_to_jumping_rabbit_without_ipa.jpg")
|
||||||
34
examples/Ip-Adapter/sdxl_ipadapter_multi_reference.py
Normal file
34
examples/Ip-Adapter/sdxl_ipadapter_multi_reference.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
from diffsynth import ModelManager, SDXLImagePipeline, download_models
|
||||||
|
import torch, requests
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
|
# Download models (automatically)
|
||||||
|
# `models/stable_diffusion_xl/bluePencilXL_v200.safetensors`: [link](https://civitai.com/api/download/models/245614?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
||||||
|
# `models/IpAdapter/stable_diffusion_xl/image_encoder/model.safetensors`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors)
|
||||||
|
# `models/IpAdapter/stable_diffusion_xl/ip-adapter_sdxl.bin`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/ip-adapter_sdxl.safetensors)
|
||||||
|
download_models(["BluePencilXL_v200", "IP-Adapter-SDXL"])
|
||||||
|
|
||||||
|
# Load models
|
||||||
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||||
|
model_manager.load_models([
|
||||||
|
"models/stable_diffusion_xl/bluePencilXL_v200.safetensors",
|
||||||
|
"models/IpAdapter/stable_diffusion_xl/image_encoder/model.safetensors",
|
||||||
|
"models/IpAdapter/stable_diffusion_xl/ip-adapter_sdxl.bin"
|
||||||
|
])
|
||||||
|
pipe = SDXLImagePipeline.from_model_manager(model_manager)
|
||||||
|
|
||||||
|
image_1 = Image.open(requests.get("https://media.52poke.com/wiki/7/7e/006Charizard.png", stream=True).raw).convert("RGB").resize((1024, 1024))
|
||||||
|
image_1.save("Charizard.jpg")
|
||||||
|
image_2 = Image.open(requests.get("https://media.52poke.com/wiki/0/0d/025Pikachu.png", stream=True).raw).convert("RGB").resize((1024, 1024))
|
||||||
|
image_2.save("Pikachu.jpg")
|
||||||
|
|
||||||
|
torch.manual_seed(0)
|
||||||
|
image = pipe(
|
||||||
|
prompt="a pokemon, maybe Charizard, maybe Pikachu",
|
||||||
|
negative_prompt="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
|
||||||
|
cfg_scale=5,
|
||||||
|
height=1024, width=1024, num_inference_steps=50,
|
||||||
|
ipadapter_images=[image_1, image_2], ipadapter_use_instant_style=False, ipadapter_scale=0.7
|
||||||
|
)
|
||||||
|
image.save(f"Pikazard.jpg")
|
||||||
@@ -1,24 +1,31 @@
|
|||||||
from diffsynth import ModelManager, SDVideoPipeline, ControlNetConfigUnit, VideoData, save_video
|
from diffsynth import ModelManager, SDVideoPipeline, ControlNetConfigUnit, VideoData, save_video, download_models
|
||||||
from diffsynth.processors.FastBlend import FastBlendSmoother
|
from diffsynth.processors.FastBlend import FastBlendSmoother
|
||||||
from diffsynth.processors.PILEditor import ContrastEditor, SharpnessEditor
|
from diffsynth.processors.PILEditor import ContrastEditor, SharpnessEditor
|
||||||
from diffsynth.processors.sequencial_processor import SequencialProcessor
|
from diffsynth.processors.sequencial_processor import SequencialProcessor
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion/dreamshaper_8.safetensors`: [link](https://civitai.com/api/download/models/128713?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
# `models/stable_diffusion/dreamshaper_8.safetensors`: [link](https://civitai.com/api/download/models/128713?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
||||||
# `models/ControlNet/control_v11f1p_sd15_depth.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1p_sd15_depth.pth)
|
# `models/ControlNet/control_v11f1p_sd15_depth.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1p_sd15_depth.pth)
|
||||||
# `models/ControlNet/control_v11p_sd15_softedge.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_softedge.pth)
|
# `models/ControlNet/control_v11p_sd15_softedge.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_softedge.pth)
|
||||||
# `models/Annotators/dpt_hybrid-midas-501f0c75.pt`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/dpt_hybrid-midas-501f0c75.pt)
|
# `models/Annotators/dpt_hybrid-midas-501f0c75.pt`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/dpt_hybrid-midas-501f0c75.pt)
|
||||||
# `models/Annotators/ControlNetHED.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetHED.pth)
|
# `models/Annotators/ControlNetHED.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetHED.pth)
|
||||||
|
download_models([
|
||||||
|
"ControlNet_v11f1p_sd15_depth",
|
||||||
|
"ControlNet_v11p_sd15_softedge",
|
||||||
|
"DreamShaper_8"
|
||||||
|
])
|
||||||
|
|
||||||
# Load models
|
# Load models
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
model_manager = ModelManager(
|
||||||
model_manager.load_models([
|
torch_dtype=torch.float16, device="cuda",
|
||||||
"models/stable_diffusion/dreamshaper_8.safetensors",
|
file_path_list=[
|
||||||
"models/ControlNet/control_v11f1p_sd15_depth.pth",
|
"models/stable_diffusion/dreamshaper_8.safetensors",
|
||||||
"models/ControlNet/control_v11p_sd15_softedge.pth"
|
"models/ControlNet/control_v11f1p_sd15_depth.pth",
|
||||||
])
|
"models/ControlNet/control_v11p_sd15_softedge.pth",
|
||||||
|
]
|
||||||
|
)
|
||||||
pipe = SDVideoPipeline.from_model_manager(
|
pipe = SDVideoPipeline.from_model_manager(
|
||||||
model_manager,
|
model_manager,
|
||||||
[
|
[
|
||||||
@@ -38,7 +45,7 @@ smoother = SequencialProcessor([FastBlendSmoother(), ContrastEditor(rate=1.1), S
|
|||||||
|
|
||||||
# Load video
|
# Load video
|
||||||
# Original video: https://pixabay.com/videos/flow-rocks-water-fluent-stones-159627/
|
# Original video: https://pixabay.com/videos/flow-rocks-water-fluent-stones-159627/
|
||||||
video = VideoData(video_file="data/pixabay100/159627 (1080p).mp4", height=512, width=768)
|
video = VideoData(video_file="data/examples/pixabay100/159627 (1080p).mp4", height=512, width=768)
|
||||||
input_video = [video[i] for i in range(128)]
|
input_video = [video[i] for i in range(128)]
|
||||||
|
|
||||||
# Rerender
|
# Rerender
|
||||||
|
|||||||
@@ -20,6 +20,14 @@ models/HunyuanDiT/
|
|||||||
└── diffusion_pytorch_model.bin
|
└── diffusion_pytorch_model.bin
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You can use the following code to download these files:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from diffsynth import download_models
|
||||||
|
|
||||||
|
download_models(["HunyuanDiT"])
|
||||||
|
```
|
||||||
|
|
||||||
## Inference
|
## Inference
|
||||||
|
|
||||||
### Text-to-image with highres-fix
|
### Text-to-image with highres-fix
|
||||||
|
|||||||
@@ -1,16 +1,15 @@
|
|||||||
from diffsynth import ModelManager, SDXLImagePipeline
|
from diffsynth import ModelManager, SDXLImagePipeline, download_models
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
|
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
|
||||||
# `models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd/`: [link](https://huggingface.co/alibaba-pai/pai-bloom-1b1-text2prompt-sd)
|
# `models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd/`: [link](https://huggingface.co/alibaba-pai/pai-bloom-1b1-text2prompt-sd)
|
||||||
# `models/translator/opus-mt-zh-en/`: [link](https://huggingface.co/Helsinki-NLP/opus-mt-en-zh)
|
# `models/translator/opus-mt-zh-en/`: [link](https://huggingface.co/Helsinki-NLP/opus-mt-en-zh)
|
||||||
|
download_models(["StableDiffusionXL_v1", "BeautifulPrompt", "opus-mt-zh-en"])
|
||||||
|
|
||||||
# Load models
|
# Load models
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||||
model_manager.load_textual_inversions("models/textual_inversion")
|
|
||||||
model_manager.load_models([
|
model_manager.load_models([
|
||||||
"models/stable_diffusion_xl/sd_xl_base_1.0.safetensors",
|
"models/stable_diffusion_xl/sd_xl_base_1.0.safetensors",
|
||||||
"models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd/model.safetensors",
|
"models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd/model.safetensors",
|
||||||
|
|||||||
@@ -1,23 +1,23 @@
|
|||||||
from diffsynth import ModelManager, SDImagePipeline, ControlNetConfigUnit
|
from diffsynth import ModelManager, SDImagePipeline, ControlNetConfigUnit, download_models
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion/aingdiffusion_v12.safetensors`: [link](https://civitai.com/api/download/models/229575?type=Model&format=SafeTensor&size=full&fp=fp16)
|
# `models/stable_diffusion/aingdiffusion_v12.safetensors`: [link](https://civitai.com/api/download/models/229575?type=Model&format=SafeTensor&size=full&fp=fp16)
|
||||||
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
||||||
# `models/ControlNet/control_v11f1e_sd15_tile.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth)
|
# `models/ControlNet/control_v11f1e_sd15_tile.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth)
|
||||||
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
||||||
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
||||||
|
download_models(["AingDiffusion_v12", "ControlNet_v11p_sd15_lineart", "ControlNet_v11f1e_sd15_tile"])
|
||||||
|
|
||||||
|
|
||||||
# Load models
|
# Load models
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda",
|
||||||
model_manager.load_textual_inversions("models/textual_inversion")
|
file_path_list=[
|
||||||
model_manager.load_models([
|
"models/stable_diffusion/aingdiffusion_v12.safetensors",
|
||||||
"models/stable_diffusion/aingdiffusion_v12.safetensors",
|
"models/ControlNet/control_v11f1e_sd15_tile.pth",
|
||||||
"models/ControlNet/control_v11f1e_sd15_tile.pth",
|
"models/ControlNet/control_v11p_sd15_lineart.pth"
|
||||||
"models/ControlNet/control_v11p_sd15_lineart.pth"
|
])
|
||||||
])
|
|
||||||
pipe = SDImagePipeline.from_model_manager(
|
pipe = SDImagePipeline.from_model_manager(
|
||||||
model_manager,
|
model_manager,
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
from diffsynth import ModelManager, SDXLImagePipeline
|
from diffsynth import ModelManager, SDXLImagePipeline, download_models
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion_xl/bluePencilXL_v200.safetensors`: [link](https://civitai.com/api/download/models/245614?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
# `models/stable_diffusion_xl/bluePencilXL_v200.safetensors`: [link](https://civitai.com/api/download/models/245614?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
||||||
|
download_models(["BluePencilXL_v200"])
|
||||||
|
|
||||||
# Load models
|
# Load models
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
from diffsynth import ModelManager, SDXLImagePipeline
|
from diffsynth import ModelManager, SDXLImagePipeline, download_models
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion_xl_turbo/sd_xl_turbo_1.0_fp16.safetensors`: [link](https://huggingface.co/stabilityai/sdxl-turbo/resolve/main/sd_xl_turbo_1.0_fp16.safetensors)
|
# `models/stable_diffusion_xl_turbo/sd_xl_turbo_1.0_fp16.safetensors`: [link](https://huggingface.co/stabilityai/sdxl-turbo/resolve/main/sd_xl_turbo_1.0_fp16.safetensors)
|
||||||
|
download_models(["StableDiffusionXL_Turbo"])
|
||||||
|
|
||||||
# Load models
|
# Load models
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
from diffsynth import ModelManager, SDImagePipeline, SDVideoPipeline, ControlNetConfigUnit, VideoData, save_video, save_frames
|
from diffsynth import ModelManager, SDImagePipeline, SDVideoPipeline, save_video, download_models
|
||||||
from diffsynth.extensions.RIFE import RIFEInterpolater
|
from diffsynth.extensions.RIFE import RIFEInterpolater
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion/dreamshaper_8.safetensors`: [link](https://civitai.com/api/download/models/128713?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
# `models/stable_diffusion/dreamshaper_8.safetensors`: [link](https://civitai.com/api/download/models/128713?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
||||||
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
||||||
# `models/RIFE/flownet.pkl`: [link](https://drive.google.com/file/d/1APIzVeI-4ZZCEuIRE1m6WYfSCaOsi_7_/view?usp=sharing)
|
# `models/RIFE/flownet.pkl`: [link](https://drive.google.com/file/d/1APIzVeI-4ZZCEuIRE1m6WYfSCaOsi_7_/view?usp=sharing)
|
||||||
|
download_models(["DreamShaper_8", "AnimateDiff_v2", "RIFE"])
|
||||||
|
|
||||||
# Load models
|
# Load models
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
from diffsynth import ModelManager, SDXLVideoPipeline, save_video
|
from diffsynth import ModelManager, SDXLVideoPipeline, save_video, download_models
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
|
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
|
||||||
# `models/AnimateDiff/mm_sdxl_v10_beta.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sdxl_v10_beta.ckpt)
|
# `models/AnimateDiff/mm_sdxl_v10_beta.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sdxl_v10_beta.ckpt)
|
||||||
|
download_models(["StableDiffusionXL_v1", "AnimateDiff_xl_beta"])
|
||||||
|
|
||||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||||
model_manager.load_models([
|
model_manager.load_models([
|
||||||
@@ -25,4 +25,4 @@ video = pipe(
|
|||||||
height=1024, width=1024, num_frames=16,
|
height=1024, width=1024, num_frames=16,
|
||||||
num_inference_steps=100,
|
num_inference_steps=100,
|
||||||
)
|
)
|
||||||
save_video(video, "video.mp4", fps=16)
|
save_video(video, "output_video.mp4", fps=16)
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
from diffsynth import save_video, SDXLImagePipeline, ModelManager, SVDVideoPipeline
|
from diffsynth import save_video, SDXLImagePipeline, ModelManager, SVDVideoPipeline, download_models
|
||||||
from diffsynth import ModelManager
|
from diffsynth import ModelManager
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models (automatically)
|
||||||
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
|
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
|
||||||
# `models/stable_video_diffusion/svd_xt.safetensors`: [link](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/resolve/main/svd_xt.safetensors)
|
# `models/stable_video_diffusion/svd_xt.safetensors`: [link](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/resolve/main/svd_xt.safetensors)
|
||||||
|
download_models(["StableDiffusionXL_v1", "stable-video-diffusion-img2vid-xt"])
|
||||||
|
|
||||||
prompt = "cloud, wind"
|
prompt = "cloud, wind"
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@@ -21,8 +21,7 @@ image = pipe(
|
|||||||
cfg_scale=6,
|
cfg_scale=6,
|
||||||
height=1024, width=1024, num_inference_steps=50,
|
height=1024, width=1024, num_inference_steps=50,
|
||||||
)
|
)
|
||||||
pipe.to("cpu")
|
model_manager.to("cpu")
|
||||||
torch.cuda.empty_cache()
|
|
||||||
|
|
||||||
# 2. Image-to-video using SVD
|
# 2. Image-to-video using SVD
|
||||||
model_manager = ModelManager()
|
model_manager = ModelManager()
|
||||||
@@ -34,4 +33,4 @@ video = pipe(
|
|||||||
motion_bucket_id=127,
|
motion_bucket_id=127,
|
||||||
num_inference_steps=50
|
num_inference_steps=50
|
||||||
)
|
)
|
||||||
save_video(video, "video.mp4", fps=15)
|
save_video(video, "output_video.mp4", fps=15)
|
||||||
|
|||||||
Reference in New Issue
Block a user