skip audio loading if no audio in video (#1397)

This commit is contained in:
Zhongjie Duan
2026-04-15 13:52:10 +08:00
committed by GitHub
parent 45d973e87d
commit 8f18e24597
2 changed files with 18 additions and 14 deletions

View File

@@ -1,4 +1,4 @@
import math import math, warnings
import torch, torchvision, imageio, os import torch, torchvision, imageio, os
import imageio.v3 as iio import imageio.v3 as iio
from PIL import Image from PIL import Image
@@ -260,15 +260,19 @@ class LoadAudioWithTorchaudio(DataProcessingOperator, FrameSamplerByRateMixin):
FrameSamplerByRateMixin.__init__(self, num_frames, time_division_factor, time_division_remainder, frame_rate, fix_frame_rate) FrameSamplerByRateMixin.__init__(self, num_frames, time_division_factor, time_division_remainder, frame_rate, fix_frame_rate)
def __call__(self, data: str): def __call__(self, data: str):
reader = self.get_reader(data) try:
num_frames = self.get_num_frames(reader) reader = self.get_reader(data)
duration = num_frames / self.frame_rate num_frames = self.get_num_frames(reader)
waveform, sample_rate = torchaudio.load(data) duration = num_frames / self.frame_rate
target_samples = int(duration * sample_rate) waveform, sample_rate = torchaudio.load(data)
current_samples = waveform.shape[-1] target_samples = int(duration * sample_rate)
if current_samples > target_samples: current_samples = waveform.shape[-1]
waveform = waveform[..., :target_samples] if current_samples > target_samples:
elif current_samples < target_samples: waveform = waveform[..., :target_samples]
padding = target_samples - current_samples elif current_samples < target_samples:
waveform = torch.nn.functional.pad(waveform, (0, padding)) padding = target_samples - current_samples
return waveform, sample_rate waveform = torch.nn.functional.pad(waveform, (0, padding))
return waveform, sample_rate
except:
warnings.warn(f"Cannot load audio in {data}. The audio will be `None`.")
return None

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "diffsynth" name = "diffsynth"
version = "2.0.8" version = "2.0.9"
description = "Enjoy the magic of Diffusion models!" description = "Enjoy the magic of Diffusion models!"
authors = [{name = "ModelScope Team"}] authors = [{name = "ModelScope Team"}]
license = {text = "Apache-2.0"} license = {text = "Apache-2.0"}