mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-18 22:08:13 +00:00
* support mova inference * mova media_io * add unified audio_video api & fix bug of mono audio input for ltx * support mova train * mova docs * fix bug
44 lines
1.5 KiB
Python
44 lines
1.5 KiB
Python
import av
|
|
import numpy as np
|
|
from io import BytesIO
|
|
from .audio_video import write_video_audio as write_video_audio_ltx2
|
|
|
|
|
|
def encode_single_frame(output_file: str, image_array: np.ndarray, crf: float) -> None:
|
|
container = av.open(output_file, "w", format="mp4")
|
|
try:
|
|
stream = container.add_stream("libx264", rate=1, options={"crf": str(crf), "preset": "veryfast"})
|
|
# Round to nearest multiple of 2 for compatibility with video codecs
|
|
height = image_array.shape[0] // 2 * 2
|
|
width = image_array.shape[1] // 2 * 2
|
|
image_array = image_array[:height, :width]
|
|
stream.height = height
|
|
stream.width = width
|
|
av_frame = av.VideoFrame.from_ndarray(image_array, format="rgb24").reformat(format="yuv420p")
|
|
container.mux(stream.encode(av_frame))
|
|
container.mux(stream.encode())
|
|
finally:
|
|
container.close()
|
|
|
|
|
|
def decode_single_frame(video_file: str) -> np.array:
|
|
container = av.open(video_file)
|
|
try:
|
|
stream = next(s for s in container.streams if s.type == "video")
|
|
frame = next(container.decode(stream))
|
|
finally:
|
|
container.close()
|
|
return frame.to_ndarray(format="rgb24")
|
|
|
|
|
|
def ltx2_preprocess(image: np.array, crf: float = 33) -> np.array:
|
|
if crf == 0:
|
|
return image
|
|
|
|
with BytesIO() as output_file:
|
|
encode_single_frame(output_file, image, crf)
|
|
video_bytes = output_file.getvalue()
|
|
with BytesIO(video_bytes) as video_file:
|
|
image_array = decode_single_frame(video_file)
|
|
return image_array
|