skip audio loading if no audio in video (#1397)

2026-04-16 15:28:21 +00:00 · 2026-04-15 13:52:10 +08:00
parent 45d973e87d
commit 8f18e24597
2 changed files with 18 additions and 14 deletions
--- a/diffsynth/core/data/operators.py
+++ b/diffsynth/core/data/operators.py
@@ -1,4 +1,4 @@
-import math
+import math, warnings
 import torch, torchvision, imageio, os
 import imageio.v3 as iio
 from PIL import Image
@@ -260,15 +260,19 @@ class LoadAudioWithTorchaudio(DataProcessingOperator, FrameSamplerByRateMixin):
        FrameSamplerByRateMixin.__init__(self, num_frames, time_division_factor, time_division_remainder, frame_rate, fix_frame_rate)
    def __call__(self, data: str):
-        reader = self.get_reader(data)
+        try:
-        num_frames = self.get_num_frames(reader)
+            reader = self.get_reader(data)
-        duration = num_frames / self.frame_rate
+            num_frames = self.get_num_frames(reader)
-        waveform, sample_rate = torchaudio.load(data)
+            duration = num_frames / self.frame_rate
-        target_samples = int(duration * sample_rate)
+            waveform, sample_rate = torchaudio.load(data)
-        current_samples = waveform.shape[-1]
+            target_samples = int(duration * sample_rate)
-        if current_samples > target_samples:
+            current_samples = waveform.shape[-1]
-            waveform = waveform[..., :target_samples]
+            if current_samples > target_samples:
-        elif current_samples < target_samples:
+                waveform = waveform[..., :target_samples]
-            padding = target_samples - current_samples
+            elif current_samples < target_samples:
-            waveform = torch.nn.functional.pad(waveform, (0, padding))
+                padding = target_samples - current_samples
-        return waveform, sample_rate
+                waveform = torch.nn.functional.pad(waveform, (0, padding))
            return waveform, sample_rate
        except:
            warnings.warn(f"Cannot load audio in {data}. The audio will be `None`.")
            return None
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "diffsynth"
-version = "2.0.8"
+version = "2.0.9"
 description = "Enjoy the magic of Diffusion models!"
 authors = [{name = "ModelScope Team"}]
 license = {text = "Apache-2.0"}