support flux highresfix

2026-03-22 08:40:47 +00:00 · 2024-08-19 13:35:27 +08:00
parent 80aa4d8e19
commit 778a2d8f84
7 changed files with 111 additions and 25 deletions
--- a/examples/image_synthesis/README.md
+++ b/examples/image_synthesis/README.md
@@ -2,10 +2,20 @@

 Image synthesis is the base feature of DiffSynth Studio. We can generate images with very high resolution.

+### Example: FLUX
+
+Example script: [`flux_text_to_image.py`](./flux_text_to_image.py)
+
+|1024*1024 (original)|1024*1024 (classifier-free guidance)|2048*2048 (highres-fix)|
+|-|-|-|
+|![image_1024](https://github.com/user-attachments/assets/d8e66872-8739-43e4-8c2b-eda9daba0450)|![image_1024_cfg](https://github.com/user-attachments/assets/1073c70d-018f-47e4-9342-bc580b4c7c59)|![image_2048_highres](https://github.com/user-attachments/assets/8719c1a8-b341-48c1-a085-364c3a7d25f0)|
+
 ### Example: Stable Diffusion

 Example script: [`sd_text_to_image.py`](./sd_text_to_image.py)

+LoRA Training: [`../train/stable_diffusion/`](../train/stable_diffusion/)
+
 |512*512|1024*1024|2048*2048|4096*4096|
 |-|-|-|-|
 |![512](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/55f679e9-7445-4605-9315-302e93d11370)|![1024](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/6fc84611-8da6-4a1f-8fee-9a34eba3b4a5)|![2048](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/9087a73c-9164-4c58-b2a0-effc694143fb)|![4096](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/edee9e71-fc39-4d1c-9ca9-fa52002c67ac)|
@@ -14,6 +24,8 @@ Example script: [`sd_text_to_image.py`](./sd_text_to_image.py)

 Example script: [`sdxl_text_to_image.py`](./sdxl_text_to_image.py)

+LoRA Training: [`../train/stable_diffusion_xl/`](../train/stable_diffusion_xl/)
+
 |1024*1024|2048*2048|
 |-|-|
 |![1024](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/67687748-e738-438c-aee5-96096f09ac90)|![2048](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/584186bc-9855-4140-878e-99541f9a757f)|
--- a/examples/image_synthesis/flux_text_to_image.py
+++ b/examples/image_synthesis/flux_text_to_image.py
@@ -12,9 +12,30 @@ model_manager.load_models([
 ])
 pipe = FluxImagePipeline.from_model_manager(model_manager)

+prompt = "A captivating fantasy magic woman portrait set in the deep sea. The woman, with blue spaghetti strap silk dress, swims in the sea. Her flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her. Smooth, delicate and fair skin."
+negative_prompt = "worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, dim, fuzzy, depth of Field, nsfw,"
+
+# Disable classifier-free guidance (consistent with the original implementation of FLUX.1)
 torch.manual_seed(6)
 image = pipe(
-    "A captivating fantasy magic woman portrait set in the deep sea. The woman, with blue spaghetti strap silk dress, swims in the sea. Her flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her. Smooth, delicate and fair skin.",
-    num_inference_steps=30
+    prompt=prompt,
+    num_inference_steps=30,
 )
 image.save("image_1024.jpg")
+
+# Enable classifier-free guidance
+torch.manual_seed(6)
+image = pipe(
+    prompt=prompt, negative_prompt=negative_prompt,
+    num_inference_steps=30, cfg_scale=2.0
+)
+image.save("image_1024_cfg.jpg")
+
+# Highres-fix
+torch.manual_seed(7)
+image = pipe(
+    prompt=prompt,
+    num_inference_steps=30,
+    input_image=image.resize((2048, 2048)), height=2048, width=2048, denoising_strength=0.6, tiled=True
+)
+image.save("image_2048_highres.jpg")