diff --git a/diffsynth/configs/model_config.py b/diffsynth/configs/model_config.py index 651d8df..79a6969 100644 --- a/diffsynth/configs/model_config.py +++ b/diffsynth/configs/model_config.py @@ -143,6 +143,9 @@ preset_models_on_modelscope = { "StableDiffusionXL_Turbo": [ ("AI-ModelScope/sdxl-turbo", "sd_xl_turbo_1.0_fp16.safetensors", "models/stable_diffusion_xl_turbo"), ], + "SDXL_lora_zyd232_ChineseInkStyle_SDXL_v1_0": [ + ("sd_lora/zyd232_ChineseInkStyle_SDXL_v1_0", "zyd232_ChineseInkStyle_SDXL_v1_0.safetensors", "models/lora"), + ], # Stable Diffusion 3 "StableDiffusion3": [ ("AI-ModelScope/stable-diffusion-3-medium", "sd3_medium_incl_clips_t5xxlfp16.safetensors", "models/stable_diffusion_3"), @@ -228,6 +231,16 @@ preset_models_on_modelscope = { "SDXL-vae-fp16-fix": [ ("AI-ModelScope/sdxl-vae-fp16-fix", "diffusion_pytorch_model.safetensors", "models/sdxl-vae-fp16-fix") ], + # FLUX + "FLUX.1-dev": [ + ("AI-ModelScope/FLUX.1-dev", "text_encoder/model.safetensors", "models/FLUX/FLUX.1-dev/text_encoder"), + ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/config.json", "models/FLUX/FLUX.1-dev/text_encoder_2"), + ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/model-00001-of-00002.safetensors", "models/FLUX/FLUX.1-dev/text_encoder_2"), + ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/model-00002-of-00002.safetensors", "models/FLUX/FLUX.1-dev/text_encoder_2"), + ("AI-ModelScope/FLUX.1-dev", "text_encoder_2/model.safetensors.index.json", "models/FLUX/FLUX.1-dev/text_encoder_2"), + ("AI-ModelScope/FLUX.1-dev", "ae.safetensors", "models/FLUX/FLUX.1-dev"), + ("AI-ModelScope/FLUX.1-dev", "flux1-dev.safetensors", "models/FLUX/FLUX.1-dev"), + ] } Preset_model_id: TypeAlias = Literal[ "HunyuanDiT", @@ -257,4 +270,6 @@ Preset_model_id: TypeAlias = Literal[ "Kolors", "SDXL-vae-fp16-fix", "ControlNet_union_sdxl_promax", + "FLUX.1-dev", + "SDXL_lora_zyd232_ChineseInkStyle_SDXL_v1_0", ] \ No newline at end of file diff --git a/diffsynth/pipelines/dancer.py b/diffsynth/pipelines/dancer.py index 60400f8..b7746d3 100644 --- a/diffsynth/pipelines/dancer.py +++ b/diffsynth/pipelines/dancer.py @@ -136,6 +136,10 @@ def lets_dance_xl( device = "cuda", vram_limit_level = 0, ): + # 0. Text embedding alignment (only for video processing) + if encoder_hidden_states.shape[0] != sample.shape[0]: + encoder_hidden_states = encoder_hidden_states.repeat(sample.shape[0], 1, 1, 1) + # 1. ControlNet controlnet_insert_block_id = 22 if controlnet is not None and controlnet_frames is not None: diff --git a/examples/image_synthesis/README.md b/examples/image_synthesis/README.md index 63febf8..5fcfa9d 100644 --- a/examples/image_synthesis/README.md +++ b/examples/image_synthesis/README.md @@ -38,6 +38,20 @@ LoRA Training: [`../train/kolors/`](../train/kolors/) |-|-| |![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/53ef6f41-da11-4701-8665-9f64392607bf)|![image_2048](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/66bb7a75-fe31-44e5-90eb-d3140ee4686d)| +Kolors also support the models trained for SD-XL. For example, ControlNets and LoRAs. See [`kolors_with_sdxl_models.py`](./kolors_with_sdxl_models.py) + +LoRA: https://civitai.com/models/73305/zyd232s-ink-style + +|Base model|with LoRA (alpha=0.5)|with LoRA (alpha=1.0)|with LoRA (alpha=1.5)| +|-|-|-|-| +|![image_0 0](https://github.com/user-attachments/assets/a222eae3-6e0a-4ea6-b301-99e74e2bc11a)|![image_0 5](https://github.com/user-attachments/assets/e429c501-530c-43f6-a30b-9f97996c91a2)|![image_1 0](https://github.com/user-attachments/assets/0ddeed4b-250d-4b5c-a4fa-2db50f63bf1c)|![image_1 5](https://github.com/user-attachments/assets/db35a89d-6325-4422-921e-14fb6ad66c92)| + +ControlNet: https://huggingface.co/xinsir/controlnet-union-sdxl-1.0 + +|Reference image|Depth image|with ControlNet|with ControlNet| +|-|-|-|-| +|![image_0 0](https://github.com/user-attachments/assets/a222eae3-6e0a-4ea6-b301-99e74e2bc11a)|![controlnet_input](https://github.com/user-attachments/assets/d16b2785-bc1f-4184-b170-ae90f1d704c1)|![image_depth_1](https://github.com/user-attachments/assets/90a94780-7b56-4786-8a25-aae118eda171)|![image_depth_2](https://github.com/user-attachments/assets/05eb1309-9c98-49e7-a8ee-f376ceedf18e)| + ### Example: Hunyuan-DiT Example script: [`hunyuan_dit_text_to_image.py`](./hunyuan_dit_text_to_image.py) diff --git a/examples/image_synthesis/flux_text_to_image.py b/examples/image_synthesis/flux_text_to_image.py index 721225a..7cbfdc9 100644 --- a/examples/image_synthesis/flux_text_to_image.py +++ b/examples/image_synthesis/flux_text_to_image.py @@ -1,7 +1,8 @@ import torch -from diffsynth import ModelManager, FluxImagePipeline +from diffsynth import ModelManager, FluxImagePipeline, download_models +download_models(["FLUX.1-dev"]) model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda") model_manager.load_models([ "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors", @@ -9,10 +10,11 @@ model_manager.load_models([ "models/FLUX/FLUX.1-dev/ae.safetensors", "models/FLUX/FLUX.1-dev/flux1-dev.safetensors" ]) - pipe = FluxImagePipeline.from_model_manager(model_manager) + torch.manual_seed(6) image = pipe( - "Anime style. A girl with long silver hair is under water, wearing a blue dress. Her eyes are blue. Her hair is waving in the water." + "A captivating fantasy magic woman portrait set in the deep sea. The woman, with blue spaghetti strap silk dress, swims in the sea. Her flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her. Smooth, delicate and fair skin.", + num_inference_steps=30 ) -image.save("image.jpg") +image.save("image_1024.jpg") diff --git a/examples/image_synthesis/kolors_with_sdxl_models.py b/examples/image_synthesis/kolors_with_sdxl_models.py index 7163909..946d65e 100644 --- a/examples/image_synthesis/kolors_with_sdxl_models.py +++ b/examples/image_synthesis/kolors_with_sdxl_models.py @@ -43,7 +43,7 @@ def run_kolors_with_controlnet(): def run_kolors_with_lora(): - download_models(["Kolors"]) + download_models(["Kolors", "SDXL_lora_zyd232_ChineseInkStyle_SDXL_v1_0"]) model_manager = ModelManager(torch_dtype=torch.float16, device="cuda", file_path_list=[ "models/kolors/Kolors/text_encoder",