From 447e75cd068c73707a2cf31847a57127ddde3c29 Mon Sep 17 00:00:00 2001 From: Qing112 Date: Sat, 14 Sep 2024 11:35:01 +0800 Subject: [PATCH] update model_config and downloader --- diffsynth/configs/model_config.py | 129 +++++++++++++++++++++++++++++- diffsynth/models/downloader.py | 16 ++-- 2 files changed, 139 insertions(+), 6 deletions(-) diff --git a/diffsynth/configs/model_config.py b/diffsynth/configs/model_config.py index ad9c24b..c0630a5 100644 --- a/diffsynth/configs/model_config.py +++ b/diffsynth/configs/model_config.py @@ -76,7 +76,6 @@ model_loader_configs = [ (None, "1aafa3cc91716fb6b300cc1cd51b85a3", ["flux_vae_encoder", "flux_vae_decoder"], [FluxVAEEncoder, FluxVAEDecoder], "diffusers"), (None, "21ea55f476dfc4fd135587abb59dfe5d", ["flux_vae_encoder", "flux_vae_decoder"], [FluxVAEEncoder, FluxVAEDecoder], "civitai"), (None, "a29710fea6dddb0314663ee823598e50", ["flux_dit"], [FluxDiT], "civitai"), - (None, "57b02550baab820169365b3ee3afa2c9", ["flux_dit"], [FluxDiT], "civitai"), (None, "280189ee084bca10f70907bf6ce1649d", ["cog_vae_encoder", "cog_vae_decoder"], [CogVAEEncoder, CogVAEDecoder], "diffusers"), (None, "9b9313d104ac4df27991352fec013fd4", ["rife"], [IFNet], "civitai"), (None, "6b7116078c4170bfbeaedc8fe71f6649", ["esrgan"], [RRDBNet], "civitai"), @@ -111,6 +110,118 @@ preset_models_on_huggingface = { "ExVideo-SVD-128f-v1": [ ("ECNU-CILab/ExVideo-SVD-128f-v1", "model.fp16.safetensors", "models/stable_video_diffusion"), ], + # Stable Diffusion + "StableDiffusion_v15": [ + ("benjamin-paine/stable-diffusion-v1-5", "v1-5-pruned-emaonly.safetensors", "models/stable_diffusion"), + ], + "DreamShaper_8": [ + ("Yntec/Dreamshaper8", "dreamshaper_8.safetensors", "models/stable_diffusion"), + ], + # Textual Inversion + "TextualInversion_VeryBadImageNegative_v1.3": [ + ("gemasai/verybadimagenegative_v1.3", "verybadimagenegative_v1.3.pt", "models/textual_inversion"), + ], + # Stable Diffusion XL + "StableDiffusionXL_v1": [ + ("stabilityai/stable-diffusion-xl-base-1.0", "sd_xl_base_1.0.safetensors", "models/stable_diffusion_xl"), + ], + "BluePencilXL_v200": [ + ("frankjoshua/bluePencilXL_v200", "bluePencilXL_v200.safetensors", "models/stable_diffusion_xl"), + ], + "StableDiffusionXL_Turbo": [ + ("stabilityai/sdxl-turbo", "sd_xl_turbo_1.0_fp16.safetensors", "models/stable_diffusion_xl_turbo"), + ], + # Stable Diffusion 3 + "StableDiffusion3": [ + ("stabilityai/stable-diffusion-3-medium", "sd3_medium_incl_clips_t5xxlfp16.safetensors", "models/stable_diffusion_3"), + ], + "StableDiffusion3_without_T5": [ + ("stabilityai/stable-diffusion-3-medium", "sd3_medium_incl_clips.safetensors", "models/stable_diffusion_3"), + ], + # ControlNet + "ControlNet_v11f1p_sd15_depth": [ + ("lllyasviel/ControlNet-v1-1", "control_v11f1p_sd15_depth.pth", "models/ControlNet"), + ("lllyasviel/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators") + ], + "ControlNet_v11p_sd15_softedge": [ + ("lllyasviel/ControlNet-v1-1", "control_v11p_sd15_softedge.pth", "models/ControlNet"), + ("lllyasviel/Annotators", "ControlNetHED.pth", "models/Annotators") + ], + "ControlNet_v11f1e_sd15_tile": [ + ("lllyasviel/ControlNet-v1-1", "control_v11f1e_sd15_tile.pth", "models/ControlNet") + ], + "ControlNet_v11p_sd15_lineart": [ + ("lllyasviel/ControlNet-v1-1", "control_v11p_sd15_lineart.pth", "models/ControlNet"), + ("lllyasviel/Annotators", "sk_model.pth", "models/Annotators"), + ("lllyasviel/Annotators", "sk_model2.pth", "models/Annotators") + ], + "ControlNet_union_sdxl_promax": [ + ("xinsir/controlnet-union-sdxl-1.0", "diffusion_pytorch_model_promax.safetensors", "models/ControlNet/controlnet_union"), + ("lllyasviel/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators") + ], + # AnimateDiff + "AnimateDiff_v2": [ + ("guoyww/animatediff", "mm_sd_v15_v2.ckpt", "models/AnimateDiff"), + ], + "AnimateDiff_xl_beta": [ + ("guoyww/animatediff", "mm_sdxl_v10_beta.ckpt", "models/AnimateDiff"), + ], + + # Qwen Prompt + "QwenPrompt": [ + ("Qwen/Qwen2-1.5B-Instruct", "config.json", "models/QwenPrompt/qwen2-1.5b-instruct"), + ("Qwen/Qwen2-1.5B-Instruct", "generation_config.json", "models/QwenPrompt/qwen2-1.5b-instruct"), + ("Qwen/Qwen2-1.5B-Instruct", "model.safetensors", "models/QwenPrompt/qwen2-1.5b-instruct"), + ("Qwen/Qwen2-1.5B-Instruct", "special_tokens_map.json", "models/QwenPrompt/qwen2-1.5b-instruct"), + ("Qwen/Qwen2-1.5B-Instruct", "tokenizer.json", "models/QwenPrompt/qwen2-1.5b-instruct"), + ("Qwen/Qwen2-1.5B-Instruct", "tokenizer_config.json", "models/QwenPrompt/qwen2-1.5b-instruct"), + ("Qwen/Qwen2-1.5B-Instruct", "merges.txt", "models/QwenPrompt/qwen2-1.5b-instruct"), + ("Qwen/Qwen2-1.5B-Instruct", "vocab.json", "models/QwenPrompt/qwen2-1.5b-instruct"), + ], + # Beautiful Prompt + "BeautifulPrompt": [ + ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"), + ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "generation_config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"), + ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "model.safetensors", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"), + ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "special_tokens_map.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"), + ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "tokenizer.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"), + ("alibaba-pai/pai-bloom-1b1-text2prompt-sd", "tokenizer_config.json", "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd"), + ], + # Omost prompt + "OmostPrompt":[ + ("lllyasviel/omost-llama-3-8b-4bits", "model-00001-of-00002.safetensors", "models/OmostPrompt/omost-llama-3-8b-4bits"), + ("lllyasviel/omost-llama-3-8b-4bits", "model-00002-of-00002.safetensors", "models/OmostPrompt/omost-llama-3-8b-4bits"), + ("lllyasviel/omost-llama-3-8b-4bits", "tokenizer.json", "models/OmostPrompt/omost-llama-3-8b-4bits"), + ("lllyasviel/omost-llama-3-8b-4bits", "tokenizer_config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"), + ("lllyasviel/omost-llama-3-8b-4bits", "config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"), + ("lllyasviel/omost-llama-3-8b-4bits", "generation_config.json", "models/OmostPrompt/omost-llama-3-8b-4bits"), + ("lllyasviel/omost-llama-3-8b-4bits", "model.safetensors.index.json", "models/OmostPrompt/omost-llama-3-8b-4bits"), + ("lllyasviel/omost-llama-3-8b-4bits", "special_tokens_map.json", "models/OmostPrompt/omost-llama-3-8b-4bits"), + ], + + # Translator + "opus-mt-zh-en": [ + ("Helsinki-NLP/opus-mt-zh-en", "config.json", "models/translator/opus-mt-zh-en"), + ("Helsinki-NLP/opus-mt-zh-en", "generation_config.json", "models/translator/opus-mt-zh-en"), + ("Helsinki-NLP/opus-mt-zh-en", "metadata.json", "models/translator/opus-mt-zh-en"), + ("Helsinki-NLP/opus-mt-zh-en", "pytorch_model.bin", "models/translator/opus-mt-zh-en"), + ("Helsinki-NLP/opus-mt-zh-en", "source.spm", "models/translator/opus-mt-zh-en"), + ("Helsinki-NLP/opus-mt-zh-en", "target.spm", "models/translator/opus-mt-zh-en"), + ("Helsinki-NLP/opus-mt-zh-en", "tokenizer_config.json", "models/translator/opus-mt-zh-en"), + ("Helsinki-NLP/opus-mt-zh-en", "vocab.json", "models/translator/opus-mt-zh-en"), + ], + # IP-Adapter + "IP-Adapter-SD": [ + ("h94/IP-Adapter", "models/image_encoder/model.safetensors", "models/IpAdapter/stable_diffusion/image_encoder"), + ("h94/IP-Adapter", "models/ip-adapter_sd15.bin", "models/IpAdapter/stable_diffusion"), + ], + "IP-Adapter-SDXL": [ + ("h94/IP-Adapter", "sdxl_models/image_encoder/model.safetensors", "models/IpAdapter/stable_diffusion_xl/image_encoder"), + ("h94/IP-Adapter", "sdxl_models/ip-adapter_sdxl.bin", "models/IpAdapter/stable_diffusion_xl"), + ], + "SDXL-vae-fp16-fix": [ + ("madebyollin/sdxl-vae-fp16-fix", "diffusion_pytorch_model.safetensors", "models/sdxl-vae-fp16-fix") + ], # Kolors "Kolors": [ ("Kwai-Kolors/Kolors", "text_encoder/config.json", "models/kolors/Kolors/text_encoder"), @@ -135,6 +246,22 @@ preset_models_on_huggingface = { ("black-forest-labs/FLUX.1-dev", "ae.safetensors", "models/FLUX/FLUX.1-dev"), ("black-forest-labs/FLUX.1-dev", "flux1-dev.safetensors", "models/FLUX/FLUX.1-dev"), ], + # RIFE + "RIFE": [ + ("AlexWortega/RIFE", "flownet.pkl", "models/RIFE"), + ], + # CogVideo + "CogVideoX-5B": [ + ("THUDM/CogVideoX-5b", "text_encoder/config.json", "models/CogVideo/CogVideoX-5b/text_encoder"), + ("THUDM/CogVideoX-5b", "text_encoder/model.safetensors.index.json", "models/CogVideo/CogVideoX-5b/text_encoder"), + ("THUDM/CogVideoX-5b", "text_encoder/model-00001-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/text_encoder"), + ("THUDM/CogVideoX-5b", "text_encoder/model-00002-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/text_encoder"), + ("THUDM/CogVideoX-5b", "transformer/config.json", "models/CogVideo/CogVideoX-5b/transformer"), + ("THUDM/CogVideoX-5b", "transformer/diffusion_pytorch_model.safetensors.index.json", "models/CogVideo/CogVideoX-5b/transformer"), + ("THUDM/CogVideoX-5b", "transformer/diffusion_pytorch_model-00001-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/transformer"), + ("THUDM/CogVideoX-5b", "transformer/diffusion_pytorch_model-00002-of-00002.safetensors", "models/CogVideo/CogVideoX-5b/transformer"), + ("THUDM/CogVideoX-5b", "vae/diffusion_pytorch_model.safetensors", "models/CogVideo/CogVideoX-5b/vae"), + ], } preset_models_on_modelscope = { # Hunyuan DiT diff --git a/diffsynth/models/downloader.py b/diffsynth/models/downloader.py index 861a7f1..6801d71 100644 --- a/diffsynth/models/downloader.py +++ b/diffsynth/models/downloader.py @@ -23,12 +23,18 @@ def download_from_modelscope(model_id, origin_file_path, local_dir): def download_from_huggingface(model_id, origin_file_path, local_dir): os.makedirs(local_dir, exist_ok=True) - if os.path.basename(origin_file_path) in os.listdir(local_dir): - print(f" {os.path.basename(origin_file_path)} has been already in {local_dir}.") - return + file_name = os.path.basename(origin_file_path) + if file_name in os.listdir(local_dir): + return f"{file_name} has already been downloaded to {local_dir}." else: - print(f" Start downloading {os.path.join(local_dir, os.path.basename(origin_file_path))}") - hf_hub_download(model_id, origin_file_path, local_dir=local_dir) + print(f"Start downloading {os.path.join(local_dir, file_name)}") + hf_hub_download(model_id, origin_file_path, local_dir=local_dir) + + downloaded_file_path = os.path.join(local_dir, origin_file_path) + target_file_path = os.path.join(local_dir, file_name) + if downloaded_file_path != target_file_path: + shutil.move(downloaded_file_path, target_file_path) + shutil.rmtree(os.path.join(local_dir, origin_file_path.split("/")[0])) Preset_model_website: TypeAlias = Literal[