From 1b3c204d209f7db1d0f181c992e41f0efa8f3302 Mon Sep 17 00:00:00 2001 From: mi804 <1576993271@qq.com> Date: Fri, 20 Jun 2025 14:49:09 +0800 Subject: [PATCH] flux_ipadapter_refactor --- diffsynth/pipelines/flux_image_new.py | 28 +++++++++++++++++++++++++- examples/flux/flux_ipadapter.py | 29 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 examples/flux/flux_ipadapter.py diff --git a/diffsynth/pipelines/flux_image_new.py b/diffsynth/pipelines/flux_image_new.py index bab84b8..3a1d382 100644 --- a/diffsynth/pipelines/flux_image_new.py +++ b/diffsynth/pipelines/flux_image_new.py @@ -43,6 +43,7 @@ class FluxImagePipeline(BasePipeline): FluxImageUnit_InputImageEmbedder(), FluxImageUnit_ImageIDs(), FluxImageUnit_EmbeddedGuidanceEmbedder(), + FluxImageUnit_IPAdapter(), ] self.model_fn = model_fn_flux_image @@ -98,7 +99,9 @@ class FluxImagePipeline(BasePipeline): pipe.vae_decoder = model_manager.fetch_model("flux_vae_decoder") pipe.vae_encoder = model_manager.fetch_model("flux_vae_encoder") pipe.prompter.fetch_models(pipe.text_encoder_1, pipe.text_encoder_2) - + pipe.ipadapter = model_manager.fetch_model("flux_ipadapter") + pipe.ipadapter_image_encoder = model_manager.fetch_model("siglip_vision_model") + return pipe @@ -294,6 +297,29 @@ class FluxImageUnit_EmbeddedGuidanceEmbedder(PipelineUnit): return {"guidance": guidance} +class FluxImageUnit_IPAdapter(PipelineUnit): + def __init__(self): + super().__init__( + take_over=True, + onload_model_names=("ipadapter_image_encoder", "ipadapter") + ) + + def process(self, pipe: FluxImagePipeline, inputs_shared, inputs_posi, inputs_nega): + ipadapter_images, ipadapter_scale = inputs_shared.get("ipadapter_images", None), inputs_shared.get("ipadapter_scale", 1.0) + if ipadapter_images is None: + return inputs_shared, inputs_posi, inputs_nega + + pipe.load_models_to_device(self.onload_model_names) + images = [image.convert("RGB").resize((384, 384), resample=3) for image in ipadapter_images] + images = [pipe.preprocess_image(image).to(device=pipe.device, dtype=pipe.torch_dtype) for image in images] + ipadapter_images = torch.cat(images, dim=0) + ipadapter_image_encoding = pipe.ipadapter_image_encoder(ipadapter_images).pooler_output + + inputs_posi.update({"ipadapter_kwargs_list": pipe.ipadapter(ipadapter_image_encoding, scale=ipadapter_scale)}) + if inputs_shared.get("cfg_scale", 1.0) != 1.0: + inputs_nega.update({"ipadapter_kwargs_list": pipe.ipadapter(torch.zeros_like(ipadapter_image_encoding))}) + return inputs_shared, inputs_posi, inputs_nega + class TeaCache: def __init__(self, num_inference_steps, rel_l1_thresh): diff --git a/examples/flux/flux_ipadapter.py b/examples/flux/flux_ipadapter.py new file mode 100644 index 0000000..6214e4d --- /dev/null +++ b/examples/flux/flux_ipadapter.py @@ -0,0 +1,29 @@ +import torch +from PIL import Image +from diffsynth import save_video, VideoData, download_models +from diffsynth.pipelines.flux_image_new import FluxImagePipeline, ModelConfig +from modelscope import dataset_snapshot_download + +#TODO: repalce the local path with model_id +pipe = FluxImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="flux1-dev.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="text_encoder/model.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="text_encoder_2/"), + ModelConfig(model_id="black-forest-labs/FLUX.1-dev", origin_file_pattern="ae.safetensors"), + ModelConfig(model_id="InstantX/FLUX.1-dev-IP-Adapter", origin_file_pattern="ip-adapter.bin"), + ModelConfig(path="models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder") + ], +) + +seed = 42 +origin_prompt = "a rabbit in a garden, colorful flowers" +image = pipe(prompt=origin_prompt, height=1280, width=960, seed=seed) +image.save("style image.jpg") + +torch.manual_seed(seed) +image = pipe(prompt="A piggy", height=1280, width=960, seed=seed, + ipadapter_images=[image], ipadapter_scale=0.7) +image.save("A piggy.jpg")