mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-04-24 06:46:13 +00:00
path
This commit is contained in:
@@ -20,10 +20,10 @@ lyrics = '[Intro - Synth Brass Fanfare]\n\n[Verse 1]\n黑夜里的风吹过耳
|
||||
dataset_snapshot_download(
|
||||
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
||||
local_dir="data/diffsynth_example_dataset",
|
||||
allow_file_pattern="ace_step/acestep-v15-base/audio.wav",
|
||||
allow_file_pattern="ace_step/acestep-v15-base-CoverTask/audio.wav",
|
||||
)
|
||||
|
||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base-CoverTask/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||
# audio_cover_strength controls the steps of doing cover tasks. [0, num_inference_steps * audio_cover_strength] steps will be cover steps, and the rest will be regular text-to-music generation steps.
|
||||
# denoising_strength controls how the output audio is influenced by the source audio in cover tasks.
|
||||
audio = pipe(
|
||||
|
||||
@@ -20,10 +20,10 @@ lyrics = '[Intro - Synth Brass Fanfare]\n\n[Verse 1]\n黑夜里的风吹过耳
|
||||
dataset_snapshot_download(
|
||||
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
||||
local_dir="data/diffsynth_example_dataset",
|
||||
allow_file_pattern="ace_step/acestep-v15-base/audio.wav",
|
||||
allow_file_pattern="ace_step/acestep-v15-base-RepaintTask/audio.wav",
|
||||
)
|
||||
|
||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base-RepaintTask/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||
# repainting_ranges are in seconds, and will be converted to frames internally in the pipeline. The negative value in repainting_ranges means the padding from the start of the audio.
|
||||
# For example, repainting_ranges=[(-10, 30), (160, 200)] means we want to repaint the audio from -10s to 30s (with 10s padding before the start) and from 160s to 200s. The non-existent parts will be padded with silence.
|
||||
# Repainting strength denotes the intensity of repainting area, where 0 means no repainting (keep the original audio) and 1 means full repainting.
|
||||
|
||||
@@ -32,10 +32,10 @@ lyrics = '[Intro - Synth Brass Fanfare]\n\n[Verse 1]\n黑夜里的风吹过耳
|
||||
dataset_snapshot_download(
|
||||
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
||||
local_dir="data/diffsynth_example_dataset",
|
||||
allow_file_pattern="ace_step/acestep-v15-base/audio.wav",
|
||||
allow_file_pattern="ace_step/acestep-v15-base-CoverTask/audio.wav",
|
||||
)
|
||||
|
||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base-CoverTask/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||
# audio_cover_strength controls the steps of doing cover tasks. [0, num_inference_steps * audio_cover_strength] steps will be cover steps, and the rest will be regular text-to-music generation steps.
|
||||
# denoising_strength controls how the output audio is influenced by the source audio in cover tasks.
|
||||
audio = pipe(
|
||||
|
||||
@@ -32,10 +32,10 @@ lyrics = '[Intro - Synth Brass Fanfare]\n\n[Verse 1]\n黑夜里的风吹过耳
|
||||
dataset_snapshot_download(
|
||||
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
||||
local_dir="data/diffsynth_example_dataset",
|
||||
allow_file_pattern="ace_step/acestep-v15-base/audio.wav",
|
||||
allow_file_pattern="ace_step/acestep-v15-base-RepaintTask/audio.wav",
|
||||
)
|
||||
|
||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base-RepaintTask/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||
# repainting_ranges are in seconds, and will be converted to frames internally in the pipeline. The negative value in repainting_ranges means the padding from the start of the audio.
|
||||
# For example, repainting_ranges=[(-10, 30), (160, 200)] means we want to repaint the audio from -10s to 30s (with 10s padding before the start) and from 160s to 200s. The non-existent parts will be padded with silence.
|
||||
# Repainting strength denotes the intensity of repainting area, where 0 means no repainting (keep the original audio) and 1 means full repainting.
|
||||
|
||||
Reference in New Issue
Block a user