Merge pull request #756 from mi804/flux-eligen

fix flux-eligen bug
2026-03-22 08:40:47 +00:00 · 2025-08-06 20:09:00 +08:00
parent d0b9b25db7 970403f78e
commit d20680baae
3 changed files with 3 additions and 4 deletions
--- a/diffsynth/models/flux_dit.py
+++ b/diffsynth/models/flux_dit.py
@@ -375,8 +375,7 @@ class FluxDiT(torch.nn.Module):
        return attention_mask
-    def process_entity_masks(self, hidden_states, prompt_emb, entity_prompt_emb, entity_masks, text_ids, image_ids):
+    def process_entity_masks(self, hidden_states, prompt_emb, entity_prompt_emb, entity_masks, text_ids, image_ids, repeat_dim):
        repeat_dim = hidden_states.shape[1]
        max_masks = 0
        attention_mask = None
        prompt_embs = [prompt_emb]
--- a/diffsynth/pipelines/flux_image.py
+++ b/diffsynth/pipelines/flux_image.py
@@ -762,7 +762,7 @@ def lets_dance_flux(
    hidden_states = dit.x_embedder(hidden_states)
    if entity_prompt_emb is not None and entity_masks is not None:
-        prompt_emb, image_rotary_emb, attention_mask = dit.process_entity_masks(hidden_states, prompt_emb, entity_prompt_emb, entity_masks, text_ids, image_ids)
+        prompt_emb, image_rotary_emb, attention_mask = dit.process_entity_masks(hidden_states, prompt_emb, entity_prompt_emb, entity_masks, text_ids, image_ids, 16)
    else:
        prompt_emb = dit.context_embedder(prompt_emb)
        image_rotary_emb = dit.pos_embedder(torch.cat((text_ids, image_ids), dim=1))
--- a/diffsynth/pipelines/flux_image_new.py
+++ b/diffsynth/pipelines/flux_image_new.py
@@ -1233,7 +1233,7 @@ def model_fn_flux_image(
    # EliGen
    if entity_prompt_emb is not None and entity_masks is not None:
-        prompt_emb, image_rotary_emb, attention_mask = dit.process_entity_masks(hidden_states, prompt_emb, entity_prompt_emb, entity_masks, text_ids, image_ids)
+        prompt_emb, image_rotary_emb, attention_mask = dit.process_entity_masks(hidden_states, prompt_emb, entity_prompt_emb, entity_masks, text_ids, image_ids, latents.shape[1])
    else:
        prompt_emb = dit.context_embedder(prompt_emb)
        image_rotary_emb = dit.pos_embedder(torch.cat((text_ids, image_ids), dim=1))