skip audio loading if no audio in video (#1397)

This commit is contained in:
Zhongjie Duan
2026-04-15 13:52:10 +08:00
committed by GitHub
parent 45d973e87d
commit 8f18e24597
2 changed files with 18 additions and 14 deletions

View File

@@ -1,4 +1,4 @@
import math
import math, warnings
import torch, torchvision, imageio, os
import imageio.v3 as iio
from PIL import Image
@@ -260,15 +260,19 @@ class LoadAudioWithTorchaudio(DataProcessingOperator, FrameSamplerByRateMixin):
FrameSamplerByRateMixin.__init__(self, num_frames, time_division_factor, time_division_remainder, frame_rate, fix_frame_rate)
def __call__(self, data: str):
reader = self.get_reader(data)
num_frames = self.get_num_frames(reader)
duration = num_frames / self.frame_rate
waveform, sample_rate = torchaudio.load(data)
target_samples = int(duration * sample_rate)
current_samples = waveform.shape[-1]
if current_samples > target_samples:
waveform = waveform[..., :target_samples]
elif current_samples < target_samples:
padding = target_samples - current_samples
waveform = torch.nn.functional.pad(waveform, (0, padding))
return waveform, sample_rate
try:
reader = self.get_reader(data)
num_frames = self.get_num_frames(reader)
duration = num_frames / self.frame_rate
waveform, sample_rate = torchaudio.load(data)
target_samples = int(duration * sample_rate)
current_samples = waveform.shape[-1]
if current_samples > target_samples:
waveform = waveform[..., :target_samples]
elif current_samples < target_samples:
padding = target_samples - current_samples
waveform = torch.nn.functional.pad(waveform, (0, padding))
return waveform, sample_rate
except:
warnings.warn(f"Cannot load audio in {data}. The audio will be `None`.")
return None

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "diffsynth"
version = "2.0.8"
version = "2.0.9"
description = "Enjoy the magic of Diffusion models!"
authors = [{name = "ModelScope Team"}]
license = {text = "Apache-2.0"}