step1x, teacache, flex refactor

2026-03-18 22:08:13 +00:00 · 2025-06-23 17:06:00 +08:00
parent 1113d305d1
commit cc6cd26733
4 changed files with 195 additions and 1 deletions
--- a/examples/flux/flex_text_to_image.py
+++ b/examples/flux/flex_text_to_image.py
@@ -0,0 +1,50 @@
+import torch
+from diffsynth.pipelines.flux_image_new import FluxImagePipeline, ModelConfig
+from diffsynth.controlnets.processors import Annotator
+import numpy as np
+from PIL import Image
+
+
+pipe = FluxImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="ostris/Flex.2-preview", origin_file_pattern="Flex.2-preview.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="text_encoder/model.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="text_encoder_2/"),
+        ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="ae.safetensors"),
+    ],
+)
+
+image = pipe(
+    prompt="portrait of a beautiful Asian girl, long hair, red t-shirt, sunshine, beach",
+    num_inference_steps=50, embedded_guidance=3.5,
+    seed=0
+)
+image.save(f"image_1.jpg")
+
+mask = np.zeros((1024, 1024, 3), dtype=np.uint8)
+mask[200:400, 400:700] = 255
+mask = Image.fromarray(mask)
+mask.save(f"image_mask.jpg")
+
+inpaint_image = image
+
+image = pipe(
+    prompt="portrait of a beautiful Asian girl with sunglasses, long hair, red t-shirt, sunshine, beach",
+    num_inference_steps=50, embedded_guidance=3.5,
+    flex_inpaint_image=inpaint_image, flex_inpaint_mask=mask,
+    seed=4
+)
+image.save(f"image_2_new.jpg")
+
+control_image = Annotator("canny")(image)
+control_image.save("image_control.jpg")
+
+image = pipe(
+    prompt="portrait of a beautiful Asian girl with sunglasses, long hair, yellow t-shirt, sunshine, beach",
+    num_inference_steps=50, embedded_guidance=3.5,
+    flex_control_image=control_image,
+    seed=4
+)
+image.save(f"image_3_new.jpg")
--- a/examples/flux/flux_teacache.py
+++ b/examples/flux/flux_teacache.py
@@ -0,0 +1,24 @@
+import torch
+from diffsynth.pipelines.flux_image_new import FluxImagePipeline, ModelConfig
+
+
+pipe = FluxImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="flux1-dev.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="text_encoder/model.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="text_encoder_2/"),
+        ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="ae.safetensors"),
+    ],
+)
+
+
+prompt = "CG, masterpiece, best quality, solo, long hair, wavy hair, silver hair, blue eyes, blue dress, medium breasts, dress, underwater, air bubble, floating hair, refraction, portrait. The girl's flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her."
+
+for tea_cache_l1_thresh in [None, 0.2, 0.4, 0.6, 0.8]:
+    image = pipe(
+        prompt=prompt, embedded_guidance=3.5, seed=0,
+        num_inference_steps=50, tea_cache_l1_thresh=tea_cache_l1_thresh
+    )
+    image.save(f"image_{tea_cache_l1_thresh}.png")
--- a/examples/flux/step1x.py
+++ b/examples/flux/step1x.py
@@ -0,0 +1,44 @@
+import torch
+from diffsynth.pipelines.flux_image_new import FluxImagePipeline, ModelConfig
+from modelscope import snapshot_download
+from PIL import Image
+import numpy as np
+
+
+snapshot_download("Qwen/Qwen2.5-VL-7B-Instruct", cache_dir="./models")
+snapshot_download("stepfun-ai/Step1X-Edit", cache_dir="./models")
+
+pipe = FluxImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(path="models/Qwen/Qwen2.5-VL-7B-Instruct"),
+        ModelConfig(model_id="stepfun-ai/Step1X-Edit", origin_file_pattern="step1x-edit-i1258.safetensors"),
+        ModelConfig(model_id="stepfun-ai/Step1X-Edit", origin_file_pattern="vae.safetensors"),
+    ],
+)
+
+
+pipe.enable_vram_management()
+
+image = Image.fromarray(np.zeros((1248, 832, 3), dtype=np.uint8) + 255)
+image = pipe(
+    prompt="draw red flowers in Chinese ink painting style",
+    step1x_reference_image=image,
+    width=832, height=1248, cfg_scale=6,
+    seed=1,
+    rand_device='cuda'
+)
+image.save("image_1.jpg")
+
+
+
+image = pipe(
+    prompt="add more flowers in Chinese ink painting style",
+    step1x_reference_image=image,
+    width=832, height=1248, cfg_scale=6,
+    seed=2,
+    rand_device='cuda'
+)
+image.save("image_2.jpg")
+