bugfix

2026-03-18 22:08:13 +00:00 · 2025-12-20 14:00:22 +08:00
parent c6722b3f56
commit 20e1aaf908
4 changed files with 10 additions and 5 deletions
--- a/diffsynth/models/general_modules.py
+++ b/diffsynth/models/general_modules.py
@@ -87,6 +87,7 @@ class TimestepEmbeddings(torch.nn.Module):
            self.timestep_embedder = torch.nn.Sequential(
                torch.nn.Linear(dim_in, dim_out), torch.nn.SiLU(), torch.nn.Linear(dim_out, dim_out)
            )
        self.use_additional_t_cond = use_additional_t_cond
        if use_additional_t_cond:
            self.addition_t_embedding = torch.nn.Embedding(2, dim_out)
--- a/diffsynth/pipelines/qwen_image.py
+++ b/diffsynth/pipelines/qwen_image.py
@@ -762,7 +762,7 @@ def model_fn_qwen_image(
    conditioning = dit.time_text_embed(
        timestep,
        image.dtype,
-        addition_t_cond=None if layer_num is None else torch.tensor([0]).to(device=image.device, dtype=torch.long)
+        addition_t_cond=None if not dit.time_text_embed.use_additional_t_cond else torch.tensor([0]).to(device=image.device, dtype=torch.long)
    )
    if entity_prompt_emb is not None:
--- a/examples/qwen_image/model_training/validate_full/Qwen-Image-Layered.py
+++ b/examples/qwen_image/model_training/validate_full/Qwen-Image-Layered.py
@@ -18,9 +18,11 @@ state_dict = load_state_dict("models/train/Qwen-Image-Layered_full/epoch-1.safet
 pipe.dit.load_state_dict(state_dict)
 prompt = "a poster"
 input_image = Image.open("data/example_image_dataset/layer/image.png").convert("RGBA").resize((864, 480))
-image = pipe(
+images = pipe(
    prompt, seed=0,
    height=480, width=864,
    layer_input_image=input_image, layer_num=3,
 )
-image.save("image.jpg")
+for i, image in enumerate(images):
    if i == 0: continue # The first image is the input image.
    image.save(f"image_{i}.png")
--- a/examples/qwen_image/model_training/validate_lora/Qwen-Image-Layered.py
+++ b/examples/qwen_image/model_training/validate_lora/Qwen-Image-Layered.py
@@ -17,9 +17,11 @@ pipe = QwenImagePipeline.from_pretrained(
 pipe.load_lora(pipe.dit, "models/train/Qwen-Image-Layered_lora/epoch-4.safetensors")
 prompt = "a poster"
 input_image = Image.open("data/example_image_dataset/layer/image.png").convert("RGBA").resize((864, 480))
-image = pipe(
+images = pipe(
    prompt, seed=0,
    height=480, width=864,
    layer_input_image=input_image, layer_num=3,
 )
-image.save("image.jpg")
+for i, image in enumerate(images):
    if i == 0: continue # The first image is the input image.
    image.save(f"image_{i}.png")