ltx iclora train

2026-03-18 22:08:13 +00:00 · 2026-02-27 18:43:53 +08:00
parent 5996c2b068
commit 8b9a094c1b
7 changed files with 271 additions and 14 deletions
--- a/diffsynth/diffusion/base_pipeline.py
+++ b/diffsynth/diffusion/base_pipeline.py
@@ -94,20 +94,23 @@ class BasePipeline(torch.nn.Module):
        return self


-    def check_resize_height_width(self, height, width, num_frames=None):
+    def check_resize_height_width(self, height, width, num_frames=None, verbose=1):
        # Shape check
        if height % self.height_division_factor != 0:
            height = (height + self.height_division_factor - 1) // self.height_division_factor * self.height_division_factor
-            print(f"height % {self.height_division_factor} != 0. We round it up to {height}.")
+            if verbose > 0:
+                print(f"height % {self.height_division_factor} != 0. We round it up to {height}.")
        if width % self.width_division_factor != 0:
            width = (width + self.width_division_factor - 1) // self.width_division_factor * self.width_division_factor
-            print(f"width % {self.width_division_factor} != 0. We round it up to {width}.")
+            if verbose > 0:
+                print(f"width % {self.width_division_factor} != 0. We round it up to {width}.")
        if num_frames is None:
            return height, width
        else:
            if num_frames % self.time_division_factor != self.time_division_remainder:
                num_frames = (num_frames + self.time_division_factor - 1) // self.time_division_factor * self.time_division_factor + self.time_division_remainder
-                print(f"num_frames % {self.time_division_factor} != {self.time_division_remainder}. We round it up to {num_frames}.")
+                if verbose > 0:
+                    print(f"num_frames % {self.time_division_factor} != {self.time_division_remainder}. We round it up to {num_frames}.")
            return height, width, num_frames


--- a/diffsynth/pipelines/ltx2_audio_video.py
+++ b/diffsynth/pipelines/ltx2_audio_video.py
@@ -565,7 +565,7 @@ class LTX2AudioVideoUnit_InContextVideoEmbedder(PipelineUnit):
        expected_height = height // in_context_downsample_factor // 2 if use_two_stage_pipeline else height // in_context_downsample_factor
        expected_width = width // in_context_downsample_factor // 2 if use_two_stage_pipeline else width // in_context_downsample_factor
        current_h, current_w, current_f = in_context_video[0].size[1], in_context_video[0].size[0], len(in_context_video)
-        h, w, f = pipe.check_resize_height_width(expected_height, expected_width, current_f)
+        h, w, f = pipe.check_resize_height_width(expected_height, expected_width, current_f, verbose=0)
        if current_h != h or current_w != w:
            in_context_video = [img.resize((w, h)) for img in in_context_video]
        if current_f != f: