diff --git a/examples/EntityControl/README.md b/examples/EntityControl/README.md index 0b540f0..92c220d 100644 --- a/examples/EntityControl/README.md +++ b/examples/EntityControl/README.md @@ -80,4 +80,4 @@ Demonstration of the entity transfer results with EliGen and In-Context LoRA, se |Entity to Transfer|Transfer Target Image|Transfer Example 1|Transfer Example 2| |-|-|-|-| -|![image_1_base](https://github.com/user-attachments/assets/bb3d4a46-8d82-4d3c-bce8-8c01a9973b8d)|![image_1_enhance](https://github.com/user-attachments/assets/44c0f422-525e-42ca-991b-f407f8faafc3)|![image_1_enhance](https://github.com/user-attachments/assets/a042ff5b-2748-4d91-8321-cec8f9eb73e4)|![image_1_enhance](https://github.com/user-attachments/assets/98f2d1b1-16e1-4c8f-b521-5cd68b567293)| \ No newline at end of file +|![image_1_base](https://github.com/user-attachments/assets/0d40ef22-0a09-420d-bd5a-bfb93120b60d)|![image_1_enhance](https://github.com/user-attachments/assets/f6c58ef2-54c1-4d86-8429-dad2eb0e0685)|![image_1_enhance](https://github.com/user-attachments/assets/05eed2e3-097d-40af-8aae-1e0c75051f32)|![image_1_enhance](https://github.com/user-attachments/assets/54314d16-244b-411e-8a91-96c500efa5f5)| \ No newline at end of file diff --git a/examples/EntityControl/entity_transfer.py b/examples/EntityControl/entity_transfer.py index 1d40303..bb92c24 100644 --- a/examples/EntityControl/entity_transfer.py +++ b/examples/EntityControl/entity_transfer.py @@ -1,13 +1,10 @@ from diffsynth import ModelManager, FluxImagePipeline, download_customized_models -from diffsynth.data.video import crop_and_resize from modelscope import dataset_snapshot_download from examples.EntityControl.utils import visualize_masks from PIL import Image -import numpy as np import torch - def build_pipeline(): model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda", model_id_list=["FLUX.1-dev"]) model_manager.load_lora( @@ -30,16 +27,13 @@ def build_pipeline(): return pipe -def generate(pipe: FluxImagePipeline, logo_image, target_image, mask, height, width, prompt, logo_prompt, image_save_path, mask_save_path): - mask = Image.fromarray(np.concatenate([ - np.ones((height, width, 3), dtype=np.uint8) * 0, - np.array(crop_and_resize(mask, height, width)), - ], axis=1)) +def generate(pipe: FluxImagePipeline, source_image, target_image, mask, height, width, prompt, entity_prompt, image_save_path, mask_save_path, seed=0): + input_mask = Image.new('RGB', (width * 2, height)) + input_mask.paste(mask.resize((width, height), resample=Image.NEAREST).convert('RGB'), (width, 0)) - input_image = Image.fromarray(np.concatenate([ - np.array(crop_and_resize(logo_image, height, width)), - np.array(crop_and_resize(target_image, height, width)), - ], axis=1)) + input_image = Image.new('RGB', (width * 2, height)) + input_image.paste(source_image.resize((width, height)).convert('RGB'), (0, 0)) + input_image.paste(target_image.resize((width, height)).convert('RGB'), (width, 0)) image = pipe( prompt=prompt, @@ -48,41 +42,43 @@ def generate(pipe: FluxImagePipeline, logo_image, target_image, mask, height, wi negative_prompt="", num_inference_steps=50, embedded_guidance=3.5, - seed=0, + seed=seed, height=height, width=width * 2, - eligen_entity_prompts=[logo_prompt], - eligen_entity_masks=[mask], + eligen_entity_prompts=[entity_prompt], + eligen_entity_masks=[input_mask], enable_eligen_on_negative=False, enable_eligen_inpaint=True, ) - image.save(image_save_path) - visualize_masks(image, [mask], [logo_prompt], mask_save_path) + target_image = image.crop((width, 0, 2 * width, height)) + target_image.save(image_save_path) + visualize_masks(target_image, [mask], [entity_prompt], mask_save_path) + return target_image pipe = build_pipeline() dataset_snapshot_download(dataset_id="DiffSynth-Studio/examples_in_diffsynth", local_dir="./", allow_file_pattern="data/examples/eligen/logo_transfer/*") -logo_image = Image.open("data/examples/eligen/logo_transfer/logo_transfer_logo.png") -target_image = Image.open("data/examples/eligen/logo_transfer/logo_transfer_target_image.png") prompt="The two-panel image showcases the joyful identity, with the left panel showing a rabbit graphic; [LEFT] while the right panel translates the design onto a shopping tote with the rabbit logo in black, held by a person in a market setting, emphasizing the brand's approachable and eco-friendly vibe." logo_prompt="a rabbit logo" -mask = Image.open("data/examples/eligen/logo_transfer/logo_transfer_mask_1.png") +logo_image = Image.open("data/examples/eligen/logo_transfer/source_image.png") +target_image = Image.open("data/examples/eligen/logo_transfer/target_image.png") +mask = Image.open("data/examples/eligen/logo_transfer/mask_1.png") generate( pipe, logo_image, target_image, mask, - height=1024, width=736, - prompt=prompt, logo_prompt=logo_prompt, + height=1024, width=1024, + prompt=prompt, entity_prompt=logo_prompt, image_save_path="entity_transfer_1.png", mask_save_path="entity_transfer_with_mask_1.png" ) -mask = Image.open("data/examples/eligen/logo_transfer/logo_transfer_mask_2.png") +mask = Image.open("data/examples/eligen/logo_transfer/mask_2.png") generate( pipe, logo_image, target_image, mask, - height=1024, width=736, - prompt=prompt, logo_prompt=logo_prompt, + height=1024, width=1024, + prompt=prompt, entity_prompt=logo_prompt, image_save_path="entity_transfer_2.png", mask_save_path="entity_transfer_with_mask_2.png" )