rearrange examples

2026-03-23 09:28:12 +00:00 · 2024-06-06 18:50:07 +08:00
parent f6de5eef4d
commit 4d4a095420
20 changed files with 140 additions and 45 deletions
--- a/examples/image_synthesis/README.md
+++ b/examples/image_synthesis/README.md
@@ -0,0 +1,43 @@
+# Image Synthesis
+
+Image synthesis is the base feature of DiffSynth Studio.
+
+### Example: Stable Diffusion
+
+We can generate images with very high resolution. Please see `examples/sd_text_to_image.py` for more details.
+
+|512*512|1024*1024|2048*2048|4096*4096|
+|-|-|-|-|
+|![512](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/55f679e9-7445-4605-9315-302e93d11370)|![1024](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/6fc84611-8da6-4a1f-8fee-9a34eba3b4a5)|![2048](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/9087a73c-9164-4c58-b2a0-effc694143fb)|![4096](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/edee9e71-fc39-4d1c-9ca9-fa52002c67ac)|
+
+### Example: Stable Diffusion XL
+
+Generate images with Stable Diffusion XL. Please see `examples/sdxl_text_to_image.py` for more details.
+
+|1024*1024|2048*2048|
+|-|-|
+|![1024](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/67687748-e738-438c-aee5-96096f09ac90)|![2048](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/584186bc-9855-4140-878e-99541f9a757f)|
+
+### Example: Stable Diffusion XL Turbo
+
+Generate images with Stable Diffusion XL Turbo. You can see `examples/sdxl_turbo.py` for more details, but we highly recommend you to use it in the WebUI.
+
+|"black car"|"red car"|
+|-|-|
+|![black_car](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/7fbfd803-68d4-44f3-8713-8c925fec47d0)|![black_car_to_red_car](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/aaf886e4-c33c-4fd8-98e2-29eef117ba00)|
+
+### Example: Prompt Processing
+
+If you are not native English user, we provide translation service for you. Our prompter can translate other language to English and refine it using "BeautifulPrompt" models. Please see `examples/sd_prompt_refining.py` for more details.
+
+Prompt: "一个漂亮的女孩". The [translation model](https://huggingface.co/Helsinki-NLP/opus-mt-en-zh) will translate it to English.
+
+|seed=0|seed=1|seed=2|seed=3|
+|-|-|-|-|
+|![0_](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/ebb25ca8-7ce1-4d9e-8081-59a867c70c4d)|![1_](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/a7e79853-3c1a-471a-9c58-c209ec4b76dd)|![2_](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/a292b959-a121-481f-b79c-61cc3346f810)|![3_](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/1c19b54e-5a6f-4d48-960b-a7b2b149bb4c)|
+
+Prompt: "一个漂亮的女孩". The [translation model](https://huggingface.co/Helsinki-NLP/opus-mt-en-zh) will translate it to English. Then the [refining model](https://huggingface.co/alibaba-pai/pai-bloom-1b1-text2prompt-sd) will refine the translated prompt for better visual quality.
+
+|seed=0|seed=1|seed=2|seed=3|
+|-|-|-|-|
+|![0](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/778b1bd9-44e0-46ac-a99c-712b3fc9aaa4)|![1](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/c03479b8-2082-4c6e-8e1c-3582b98686f6)|![2](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/edb33d21-3288-4a55-96ca-a4bfe1b50b00)|![3](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/7848cfc1-cad5-4848-8373-41d24e98e584)|
--- a/examples/image_synthesis/sd_prompt_refining.py
+++ b/examples/image_synthesis/sd_prompt_refining.py
@@ -0,0 +1,31 @@
+from diffsynth import ModelManager, SDXLImagePipeline
+import torch
+
+
+# Download models
+# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
+# `models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd/`: [link](https://huggingface.co/alibaba-pai/pai-bloom-1b1-text2prompt-sd)
+# `models/translator/opus-mt-zh-en/`: [link](https://huggingface.co/Helsinki-NLP/opus-mt-en-zh)
+
+
+# Load models
+model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
+model_manager.load_textual_inversions("models/textual_inversion")
+model_manager.load_models([
+    "models/stable_diffusion_xl/sd_xl_base_1.0.safetensors",
+    "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd/model.safetensors",
+    "models/translator/opus-mt-zh-en/pytorch_model.bin"
+])
+pipe = SDXLImagePipeline.from_model_manager(model_manager)
+
+prompt = "一个漂亮的女孩"
+negative_prompt = ""
+
+for seed in range(4):
+    torch.manual_seed(seed)
+    image = pipe(
+        prompt=prompt, negative_prompt=negative_prompt,
+        height=1024, width=1024,
+        num_inference_steps=30
+    )
+    image.save(f"{seed}.jpg")
--- a/examples/image_synthesis/sd_text_to_image.py
+++ b/examples/image_synthesis/sd_text_to_image.py
@@ -0,0 +1,75 @@
+from diffsynth import ModelManager, SDImagePipeline, ControlNetConfigUnit
+import torch
+
+
+# Download models
+# `models/stable_diffusion/aingdiffusion_v12.safetensors`: [link](https://civitai.com/api/download/models/229575?type=Model&format=SafeTensor&size=full&fp=fp16)
+# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
+# `models/ControlNet/control_v11f1e_sd15_tile.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth)
+# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
+# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
+
+
+# Load models
+model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
+model_manager.load_textual_inversions("models/textual_inversion")
+model_manager.load_models([
+    "models/stable_diffusion/aingdiffusion_v12.safetensors",
+    "models/ControlNet/control_v11f1e_sd15_tile.pth",
+    "models/ControlNet/control_v11p_sd15_lineart.pth"
+])
+pipe = SDImagePipeline.from_model_manager(
+    model_manager,
+    [
+        ControlNetConfigUnit(
+            processor_id="tile",
+            model_path=rf"models/ControlNet/control_v11f1e_sd15_tile.pth",
+            scale=0.5
+        ),
+        ControlNetConfigUnit(
+            processor_id="lineart",
+            model_path=rf"models/ControlNet/control_v11p_sd15_lineart.pth",
+            scale=0.7
+        ),
+    ]
+)
+
+prompt = "masterpiece, best quality, solo, long hair, wavy hair, silver hair, blue eyes, blue dress, medium breasts, dress, underwater, air bubble, floating hair, refraction, portrait,"
+negative_prompt = "worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, cleavage, nsfw,"
+
+torch.manual_seed(0)
+image = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    cfg_scale=7.5, clip_skip=1,
+    height=512, width=512, num_inference_steps=80,
+)
+image.save("512.jpg")
+
+image = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    cfg_scale=7.5, clip_skip=1,
+    input_image=image.resize((1024, 1024)), controlnet_image=image.resize((1024, 1024)),
+    height=1024, width=1024, num_inference_steps=40, denoising_strength=0.7,
+)
+image.save("1024.jpg")
+
+image = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    cfg_scale=7.5, clip_skip=1,
+    input_image=image.resize((2048, 2048)), controlnet_image=image.resize((2048, 2048)),
+    height=2048, width=2048, num_inference_steps=20, denoising_strength=0.7,
+)
+image.save("2048.jpg")
+
+image = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    cfg_scale=7.5, clip_skip=1,
+    input_image=image.resize((4096, 4096)), controlnet_image=image.resize((4096, 4096)),
+    height=4096, width=4096, num_inference_steps=10, denoising_strength=0.5,
+    tiled=True, tile_size=128, tile_stride=64
+)
+image.save("4096.jpg")
--- a/examples/image_synthesis/sdxl_text_to_image.py
+++ b/examples/image_synthesis/sdxl_text_to_image.py
@@ -0,0 +1,34 @@
+from diffsynth import ModelManager, SDXLImagePipeline
+import torch
+
+
+# Download models
+# `models/stable_diffusion_xl/bluePencilXL_v200.safetensors`: [link](https://civitai.com/api/download/models/245614?type=Model&format=SafeTensor&size=pruned&fp=fp16)
+
+
+# Load models
+model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
+model_manager.load_models(["models/stable_diffusion_xl/bluePencilXL_v200.safetensors"])
+pipe = SDXLImagePipeline.from_model_manager(model_manager)
+
+prompt = "masterpiece, best quality, solo, long hair, wavy hair, silver hair, blue eyes, blue dress, medium breasts, dress, underwater, air bubble, floating hair, refraction, portrait,"
+negative_prompt = "worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, cleavage, nsfw,"
+
+torch.manual_seed(0)
+image = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    cfg_scale=6,
+    height=1024, width=1024, num_inference_steps=60,
+)
+image.save("1024.jpg")
+
+image = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    cfg_scale=6,
+    input_image=image.resize((2048, 2048)),
+    height=2048, width=2048, num_inference_steps=60, denoising_strength=0.5
+)
+image.save("2048.jpg")
+
--- a/examples/image_synthesis/sdxl_turbo.py
+++ b/examples/image_synthesis/sdxl_turbo.py
@@ -0,0 +1,31 @@
+from diffsynth import ModelManager, SDXLImagePipeline
+import torch
+
+
+# Download models
+# `models/stable_diffusion_xl_turbo/sd_xl_turbo_1.0_fp16.safetensors`: [link](https://huggingface.co/stabilityai/sdxl-turbo/resolve/main/sd_xl_turbo_1.0_fp16.safetensors)
+
+
+# Load models
+model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
+model_manager.load_models(["models/stable_diffusion_xl_turbo/sd_xl_turbo_1.0_fp16.safetensors"])
+pipe = SDXLImagePipeline.from_model_manager(model_manager)
+
+# Text to image
+torch.manual_seed(0)
+image = pipe(
+    prompt="black car",
+    # Do not modify the following parameters!
+    cfg_scale=1, height=512, width=512, num_inference_steps=1, progress_bar_cmd=lambda x:x
+)
+image.save(f"black_car.jpg")
+
+# Image to image
+torch.manual_seed(0)
+image = pipe(
+    prompt="red car",
+    input_image=image, denoising_strength=0.7,
+    # Do not modify the following parameters!
+    cfg_scale=1, height=512, width=512, num_inference_steps=1, progress_bar_cmd=lambda x:x
+)
+image.save(f"black_car_to_red_car.jpg")