diff --git a/examples/EntityControl/entity_transfer.py b/examples/EntityControl/entity_transfer.py new file mode 100644 index 0000000..372e68e --- /dev/null +++ b/examples/EntityControl/entity_transfer.py @@ -0,0 +1,88 @@ +from diffsynth import ModelManager, FluxImagePipeline, download_customized_models +from diffsynth.data.video import crop_and_resize +from modelscope import dataset_snapshot_download +from examples.EntityControl.utils import visualize_masks +from PIL import Image +import numpy as np +import torch + + + +def build_pipeline(): + model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda", model_id_list=["FLUX.1-dev"]) + model_manager.load_lora( + download_customized_models( + model_id="DiffSynth-Studio/Eligen", + origin_file_path="model_bf16.safetensors", + local_dir="models/lora/entity_control" + ), + lora_alpha=1 + ) + model_manager.load_lora( + download_customized_models( + model_id="iic/In-Context-LoRA", + origin_file_path="visual-identity-design.safetensors", + local_dir="models/lora/In-Context-LoRA" + ), + lora_alpha=1 + ) + pipe = FluxImagePipeline.from_model_manager(model_manager) + return pipe + + +def generate(pipe: FluxImagePipeline, logo_image, target_image, mask, height, width, prompt, logo_prompt, image_save_path, mask_save_path): + mask = Image.fromarray(np.concatenate([ + np.ones((height, width, 3), dtype=np.uint8) * 0, + np.array(crop_and_resize(mask, height, width)), + ], axis=1)) + + input_image = Image.fromarray(np.concatenate([ + np.array(crop_and_resize(logo_image, height, width)), + np.array(crop_and_resize(target_image, height, width)), + ], axis=1)) + + image = pipe( + prompt=prompt, + input_image=input_image, + cfg_scale=3.0, + negative_prompt="", + num_inference_steps=50, + embedded_guidance=3.5, + seed=0, + height=height, + width=width * 2, + eligen_entity_prompts=[logo_prompt], + eligen_entity_masks=[mask], + enable_eligen_on_negative=False, + enable_eligen_inpaint=True, + ) + image.save(image_save_path) + visualize_masks(image, [mask], [logo_prompt], mask_save_path) + + +pipe = build_pipeline() + +dataset_snapshot_download(dataset_id="DiffSynth-Studio/examples_in_diffsynth", local_dir="./", allow_file_pattern="data/examples/eligen/logo_transfer*") +logo_image = Image.open("data/examples/eligen/logo_transfer_logo.png") +target_image = Image.open("data/examples/eligen/logo_transfer_target_image.png") + +prompt="The two-panel image showcases the joyful identity, with the left panel showing a rabbit graphic; [LEFT] while the right panel translates the design onto a shopping tote with the rabbit logo in black, held by a person in a market setting, emphasizing the brand's approachable and eco-friendly vibe." +logo_prompt="a rabbit logo" + +mask = Image.open("data/examples/eligen/logo_transfer_mask_1.png") +generate( + pipe, logo_image, target_image, mask, + height=1024, width=736, + prompt=prompt, logo_prompt=logo_prompt, + image_save_path="entity_transfer_1.png", + mask_save_path="entity_transfer_with_mask_1.png" +) + +mask = Image.open("data/examples/eligen/logo_transfer_mask_2.png") +generate( + pipe, logo_image, target_image, mask, + height=1024, width=736, + prompt=prompt, logo_prompt=logo_prompt, + image_save_path="entity_transfer_2.png", + mask_save_path="entity_transfer_with_mask_2.png" +)