Files
DiffSynth-Studio/examples/ace_step/model_inference/acestep-v15-turbo-shift1.py
2026-04-17 17:06:26 +08:00

53 lines
1.6 KiB
Python

"""
Ace-Step 1.5 Turbo (shift=1) — Text-to-Music inference example.
Uses shift=1.0 (no timestep transformation) for smoother, slower denoising.
"""
from diffsynth.pipelines.ace_step import AceStepPipeline, ModelConfig
import torch
import soundfile as sf
pipe = AceStepPipeline.from_pretrained(
torch_dtype=torch.bfloat16,
device="cuda",
model_configs=[
ModelConfig(
model_id="ACE-Step/Ace-Step1.5",
origin_file_pattern="acestep-v15-turbo/model.safetensors"
),
ModelConfig(
model_id="ACE-Step/Ace-Step1.5",
origin_file_pattern="acestep-v15-turbo/model.safetensors"
),
ModelConfig(
model_id="ACE-Step/Ace-Step1.5",
origin_file_pattern="Qwen3-Embedding-0.6B/model.safetensors"
),
],
tokenizer_config=ModelConfig(
model_id="ACE-Step/Ace-Step1.5",
origin_file_pattern="Qwen3-Embedding-0.6B/"
),
vae_config=ModelConfig(
model_id="ACE-Step/Ace-Step1.5",
origin_file_pattern="vae/"
),
)
prompt = "A gentle acoustic guitar melody with soft piano accompaniment, peaceful and warm atmosphere"
lyrics = "[Verse 1]\nSunlight filtering through the trees\nA quiet moment, just the breeze\n\n[Chorus]\nPeaceful heart, open mind\nLeaving all the noise behind"
audio = pipe(
prompt=prompt,
lyrics=lyrics,
duration=30.0,
seed=42,
num_inference_steps=8,
cfg_scale=1.0,
shift=1.0, # shift=1: no timestep transformation
)
sf.write("acestep-v15-turbo-shift1.wav", audio.cpu().numpy(), pipe.sample_rate)
print(f"Saved, shape: {audio.shape}")