refine code

2026-03-18 22:08:13 +00:00 · 2025-01-02 19:54:09 +08:00
parent 2872fdaf48
commit 6f743fc4b6
6 changed files with 263 additions and 247 deletions
--- a/examples/EntityControl/entity_control.py
+++ b/examples/EntityControl/entity_control.py
@@ -1,57 +1,43 @@
-import torch
 from diffsynth import ModelManager, FluxImagePipeline, download_customized_models
+from modelscope import dataset_snapshot_download
 from examples.EntityControl.utils import visualize_masks
 from PIL import Image
-import requests
-from io import BytesIO
+import torch
+

 # download and load model
-lora_path = download_customized_models(
-    model_id="DiffSynth-Studio/Eligen",
-    origin_file_path="model_bf16.safetensors",
-    local_dir="models/lora/entity_control"
-)[0]
 model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda", model_id_list=["FLUX.1-dev"])
-model_manager.load_lora(lora_path, lora_alpha=1.)
+model_manager.load_lora(
+    download_customized_models(
+        model_id="DiffSynth-Studio/Eligen",
+        origin_file_path="model_bf16.safetensors",
+        local_dir="models/lora/entity_control"
+    ),
+    lora_alpha=1
+)
 pipe = FluxImagePipeline.from_model_manager(model_manager)

-# prepare inputs
-image_shape = 1024
-seed = 4
-# set True to apply regional attention in negative prompt prediction for better results with more time
-use_seperated_negtive_prompt = False
-mask_urls = [
-    'https://github.com/user-attachments/assets/02905f6e-40c2-4482-9abe-b1ce50ccabbf',
-    'https://github.com/user-attachments/assets/a4cf4361-abf7-4556-ba94-74683eda4cb7',
-    'https://github.com/user-attachments/assets/b6595ff4-7269-4d8f-acf0-5df40bd6c59f',
-    'https://github.com/user-attachments/assets/941d39a7-3aa1-437f-8b2a-4adb15d2fb3e',
-    'https://github.com/user-attachments/assets/400c4086-5398-4291-b1b5-22d8483c08d9',
-    'https://github.com/user-attachments/assets/ce324c77-fa1d-4aad-a5cb-698f0d5eca70',
-    'https://github.com/user-attachments/assets/4e62325f-a60c-44f7-b53b-6da0869bb9db'
-]
-# prepare entity masks, entity prompts, global prompt and negative prompt
-masks = []
-for url in mask_urls:
-    response = requests.get(url)
-    mask = Image.open(BytesIO(response.content)).resize((image_shape, image_shape), resample=Image.NEAREST)
-    masks.append(mask)
+# download and load mask images
+dataset_snapshot_download(dataset_id="DiffSynth-Studio/examples_in_diffsynth", local_dir="./", allow_file_pattern="data/examples/eligen/mask*")
+masks = [Image.open(f"./data/examples/eligen/mask{i}.png") for i in range(1, 8)]
+
 entity_prompts = ["A beautiful woman", "mirror", "necklace", "glasses", "earring", "white dress", "jewelry headpiece"]
 global_prompt = "A beautiful woman wearing white dress, holding a mirror, with a warm light background;"
 negative_prompt = "worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, cleavage, nsfw"

 # generate image
-torch.manual_seed(seed)
 image = pipe(
    prompt=global_prompt,
    cfg_scale=3.0,
    negative_prompt=negative_prompt,
    num_inference_steps=50,
    embedded_guidance=3.5,
-    height=image_shape,
-    width=image_shape,
-    entity_prompts=entity_prompts,
-    entity_masks=masks,
-    use_seperated_negtive_prompt=use_seperated_negtive_prompt,
+    seed=4,
+    height=1024,
+    width=1024,
+    eligen_entity_prompts=entity_prompts,
+    eligen_entity_masks=masks,
+    enable_eligen_on_negative=False,
 )
 image.save(f"entity_control.png")
 visualize_masks(image, masks, entity_prompts, f"entity_control_with_mask.png")
--- a/examples/EntityControl/entity_control_ipadapter.py
+++ b/examples/EntityControl/entity_control_ipadapter.py
@@ -1,51 +1,46 @@
-import torch
 from diffsynth import ModelManager, FluxImagePipeline, download_customized_models
+from modelscope import dataset_snapshot_download
 from examples.EntityControl.utils import visualize_masks
 from PIL import Image
-import requests
-from io import BytesIO
+import torch

-lora_path = download_customized_models(
-    model_id="DiffSynth-Studio/Eligen",
-    origin_file_path="model_bf16.safetensors",
-    local_dir="models/lora/entity_control"
-)[0]
+
+# download and load model
 model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda", model_id_list=["FLUX.1-dev", "InstantX/FLUX.1-dev-IP-Adapter"])
-model_manager.load_lora(lora_path, lora_alpha=1.)
+model_manager.load_lora(
+    download_customized_models(
+        model_id="DiffSynth-Studio/Eligen",
+        origin_file_path="model_bf16.safetensors",
+        local_dir="models/lora/entity_control"
+    ),
+    lora_alpha=1
+)
 pipe = FluxImagePipeline.from_model_manager(model_manager)

-# prepare inputs
-image_shape = 1024
-seed = 4
-# set True to apply regional attention in negative prompt prediction for better results with more time
-use_seperated_negtive_prompt = False
-mask_urls = [
-    'https://github.com/user-attachments/assets/e6745b3f-ab2b-4612-9bb5-b7235474a9a4',
-    'https://github.com/user-attachments/assets/5ddf9a89-32fa-4540-89ad-e956130942b3',
-    'https://github.com/user-attachments/assets/9d8a0bb0-6817-497e-af85-44f2512afe79'
-]
-# prepare entity masks, entity prompts, global prompt and negative prompt
-masks = []
-for url in mask_urls:
-    response = requests.get(url)
-    mask = Image.open(BytesIO(response.content)).resize((image_shape, image_shape), resample=Image.NEAREST)
-    masks.append(mask)
+# download and load mask images
+dataset_snapshot_download(dataset_id="DiffSynth-Studio/examples_in_diffsynth", local_dir="./", allow_file_pattern="data/examples/eligen/ipadapter*")
+masks = [Image.open(f"./data/examples/eligen/ipadapter_mask_{i}.png") for i in range(1, 4)]
+
 entity_prompts = ['A girl', 'hat', 'sunset']
 global_prompt = "A girl wearing a hat, looking at the sunset"
 negative_prompt = "worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, cleavage, nsfw"
+reference_img = Image.open("./data/examples/eligen/ipadapter_image.png")

-response = requests.get('https://github.com/user-attachments/assets/019bbfaa-04b3-4de6-badb-32b67c29a1bc')
-reference_img = Image.open(BytesIO(response.content)).convert('RGB').resize((image_shape, image_shape))
-
-torch.manual_seed(seed)
+# generate image
 image = pipe(
    prompt=global_prompt,
    cfg_scale=3.0,
    negative_prompt=negative_prompt,
-    num_inference_steps=50, embedded_guidance=3.5, height=image_shape, width=image_shape,
-    entity_prompts=entity_prompts, entity_masks=masks,
-    use_seperated_negtive_prompt=use_seperated_negtive_prompt,
-    ipadapter_images=[reference_img], ipadapter_scale=0.7
+    num_inference_steps=50,
+    embedded_guidance=3.5,
+    seed=4,
+    height=1024,
+    width=1024,
+    eligen_entity_prompts=entity_prompts,
+    eligen_entity_masks=masks,
+    enable_eligen_on_negative=False,
+    ipadapter_images=[reference_img],
+    ipadapter_scale=0.7
 )
 image.save(f"styled_entity_control.png")
 visualize_masks(image, masks, entity_prompts, f"styled_entity_control_with_mask.png")
--- a/examples/EntityControl/entity_inpaint.py
+++ b/examples/EntityControl/entity_inpaint.py
@@ -1,58 +1,45 @@
-import torch
-from diffsynth import ModelManager, FluxImagePipeline, download_customized_models, FluxImageLoraPipeline
+from diffsynth import ModelManager, FluxImagePipeline, download_customized_models
+from modelscope import dataset_snapshot_download
 from examples.EntityControl.utils import visualize_masks
-import os
-import json
 from PIL import Image
-import requests
-from io import BytesIO
+import torch

 # download and load model
-lora_path = download_customized_models(
-    model_id="DiffSynth-Studio/Eligen",
-    origin_file_path="model_bf16.safetensors",
-    local_dir="models/lora/entity_control"
-)[0]
 model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda", model_id_list=["FLUX.1-dev"])
-model_manager.load_lora(lora_path, lora_alpha=1.)
+model_manager.load_lora(
+    download_customized_models(
+        model_id="DiffSynth-Studio/Eligen",
+        origin_file_path="model_bf16.safetensors",
+        local_dir="models/lora/entity_control"
+    ),
+    lora_alpha=1
+)
 pipe = FluxImagePipeline.from_model_manager(model_manager)

-# prepare inputs
-image_shape = 1024
-seed = 0
-# set True to apply regional attention in negative prompt prediction for better results with more time
-use_seperated_negtive_prompt = False
-mask_urls = [
-    'https://github.com/user-attachments/assets/0cf78663-5314-4280-a065-31ded7a24a46',
-    'https://github.com/user-attachments/assets/bd3938b8-72a8-4d56-814f-f6445971b91d'
-]
-# prepare entity masks, entity prompts, global prompt and negative prompt
-masks = []
-for url in mask_urls:
-    response = requests.get(url)
-    mask = Image.open(BytesIO(response.content)).resize((image_shape, image_shape), resample=Image.NEAREST)
-    masks.append(mask)
+# download and load mask images
+dataset_snapshot_download(dataset_id="DiffSynth-Studio/examples_in_diffsynth", local_dir="./", allow_file_pattern="data/examples/eligen/inpaint*")
+masks = [Image.open(f"./data/examples/eligen/inpaint_mask_{i}.png") for i in range(1, 3)]
+input_image = Image.open("./data/examples/eligen/inpaint_image.jpg")
+
 entity_prompts = ["A person wear red shirt", "Airplane"]
 global_prompt = "A person walking on the path in front of a house; An airplane in the sky"
 negative_prompt = "worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, cleavage, nsfw, blur"

-response = requests.get('https://github.com/user-attachments/assets/fa4d6ba5-08fd-4fc7-adbb-19898d839364')
-inpaint_input = Image.open(BytesIO(response.content)).convert('RGB').resize((image_shape, image_shape))
-
 # generate image
-torch.manual_seed(seed)
 image = pipe(
    prompt=global_prompt,
+    input_image=input_image,
    cfg_scale=3.0,
    negative_prompt=negative_prompt,
    num_inference_steps=50,
    embedded_guidance=3.5,
-    height=image_shape,
-    width=image_shape,
-    entity_prompts=entity_prompts,
-    entity_masks=masks,
-    inpaint_input=inpaint_input,
-    use_seperated_negtive_prompt=use_seperated_negtive_prompt,
+    seed=0,
+    height=1024,
+    width=1024,
+    eligen_entity_prompts=entity_prompts,
+    eligen_entity_masks=masks,
+    enable_eligen_on_negative=False,
+    enable_eligen_inpaint=True,
 )
 image.save(f"entity_inpaint.png")
-visualize_masks(image, masks, entity_prompts, f"entity_inpaint_with_mask.png")
+visualize_masks(image, masks, entity_prompts, f"entity_inpaint_with_mask.png")