update examples

2026-03-18 22:08:13 +00:00 · 2024-08-22 10:35:58 +08:00
parent d6d14859e3
commit 66f1ff43e9
4 changed files with 51 additions and 23 deletions
--- a/examples/image_synthesis/README.md
+++ b/examples/image_synthesis/README.md
@@ -6,9 +6,11 @@ Image synthesis is the base feature of DiffSynth Studio. We can generate images

 Example script: [`flux_text_to_image.py`](./flux_text_to_image.py)

+The original version of FLUX doesn't support classifier-free guidance; however, we believe that this guidance mechanism is an important feature for synthesizing beautiful images. You can enable it using the parameter `cfg_scale`, and the extra guidance scale introduced by FLUX is `embedded_guidance`.
+
 |1024*1024 (original)|1024*1024 (classifier-free guidance)|2048*2048 (highres-fix)|
 |-|-|-|
-|![image_1024](https://github.com/user-attachments/assets/d8e66872-8739-43e4-8c2b-eda9daba0450)|![image_1024_cfg](https://github.com/user-attachments/assets/1073c70d-018f-47e4-9342-bc580b4c7c59)|![image_2048_highres](https://github.com/user-attachments/assets/8719c1a8-b341-48c1-a085-364c3a7d25f0)|
+|![image_1024](https://github.com/user-attachments/assets/ce01327f-068f-45f5-aba9-0fa45eb26199)|![image_1024_cfg](https://github.com/user-attachments/assets/6af5b106-0673-4e58-9213-cd9157eef4c0)|![image_2048_highres](https://github.com/user-attachments/assets/a4bb776f-d9f0-4450-968c-c5d090a3ab4c)|

 ### Example: Stable Diffusion

--- a/examples/image_synthesis/flux_text_to_image.py
+++ b/examples/image_synthesis/flux_text_to_image.py
@@ -12,14 +12,14 @@ model_manager.load_models([
 ])
 pipe = FluxImagePipeline.from_model_manager(model_manager)

-prompt = "A captivating fantasy magic woman portrait set in the deep sea. The woman, with blue spaghetti strap silk dress, swims in the sea. Her flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her. Smooth, delicate and fair skin."
-negative_prompt = "worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, dim, fuzzy, depth of Field, nsfw,"
+prompt = "CG. Full body. A captivating fantasy magic woman portrait in the deep sea. The woman, with blue spaghetti strap silk dress, swims in the sea. Her flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her. Smooth, delicate and fair skin."
+negative_prompt = "dark, worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, dim, fuzzy, depth of Field, nsfw,"

 # Disable classifier-free guidance (consistent with the original implementation of FLUX.1)
 torch.manual_seed(6)
 image = pipe(
    prompt=prompt,
-    num_inference_steps=30,
+    num_inference_steps=30, embedded_guidance=3.5
 )
 image.save("image_1024.jpg")

@@ -27,7 +27,7 @@ image.save("image_1024.jpg")
 torch.manual_seed(6)
 image = pipe(
    prompt=prompt, negative_prompt=negative_prompt,
-    num_inference_steps=30, cfg_scale=2.0
+    num_inference_steps=30, cfg_scale=2.0, embedded_guidance=3.5
 )
 image.save("image_1024_cfg.jpg")

@@ -35,7 +35,7 @@ image.save("image_1024_cfg.jpg")
 torch.manual_seed(7)
 image = pipe(
    prompt=prompt,
-    num_inference_steps=30,
+    num_inference_steps=30, embedded_guidance=3.5,
    input_image=image.resize((2048, 2048)), height=2048, width=2048, denoising_strength=0.6, tiled=True
 )
 image.save("image_2048_highres.jpg")