mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-04-24 15:06:17 +00:00
path
This commit is contained in:
@@ -20,10 +20,10 @@ lyrics = '[Intro - Synth Brass Fanfare]\n\n[Verse 1]\n黑夜里的风吹过耳
|
|||||||
dataset_snapshot_download(
|
dataset_snapshot_download(
|
||||||
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
||||||
local_dir="data/diffsynth_example_dataset",
|
local_dir="data/diffsynth_example_dataset",
|
||||||
allow_file_pattern="ace_step/acestep-v15-base/audio.wav",
|
allow_file_pattern="ace_step/acestep-v15-base-CoverTask/audio.wav",
|
||||||
)
|
)
|
||||||
|
|
||||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base-CoverTask/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||||
# audio_cover_strength controls the steps of doing cover tasks. [0, num_inference_steps * audio_cover_strength] steps will be cover steps, and the rest will be regular text-to-music generation steps.
|
# audio_cover_strength controls the steps of doing cover tasks. [0, num_inference_steps * audio_cover_strength] steps will be cover steps, and the rest will be regular text-to-music generation steps.
|
||||||
# denoising_strength controls how the output audio is influenced by the source audio in cover tasks.
|
# denoising_strength controls how the output audio is influenced by the source audio in cover tasks.
|
||||||
audio = pipe(
|
audio = pipe(
|
||||||
|
|||||||
@@ -20,10 +20,10 @@ lyrics = '[Intro - Synth Brass Fanfare]\n\n[Verse 1]\n黑夜里的风吹过耳
|
|||||||
dataset_snapshot_download(
|
dataset_snapshot_download(
|
||||||
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
||||||
local_dir="data/diffsynth_example_dataset",
|
local_dir="data/diffsynth_example_dataset",
|
||||||
allow_file_pattern="ace_step/acestep-v15-base/audio.wav",
|
allow_file_pattern="ace_step/acestep-v15-base-RepaintTask/audio.wav",
|
||||||
)
|
)
|
||||||
|
|
||||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base-RepaintTask/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||||
# repainting_ranges are in seconds, and will be converted to frames internally in the pipeline. The negative value in repainting_ranges means the padding from the start of the audio.
|
# repainting_ranges are in seconds, and will be converted to frames internally in the pipeline. The negative value in repainting_ranges means the padding from the start of the audio.
|
||||||
# For example, repainting_ranges=[(-10, 30), (160, 200)] means we want to repaint the audio from -10s to 30s (with 10s padding before the start) and from 160s to 200s. The non-existent parts will be padded with silence.
|
# For example, repainting_ranges=[(-10, 30), (160, 200)] means we want to repaint the audio from -10s to 30s (with 10s padding before the start) and from 160s to 200s. The non-existent parts will be padded with silence.
|
||||||
# Repainting strength denotes the intensity of repainting area, where 0 means no repainting (keep the original audio) and 1 means full repainting.
|
# Repainting strength denotes the intensity of repainting area, where 0 means no repainting (keep the original audio) and 1 means full repainting.
|
||||||
|
|||||||
@@ -32,10 +32,10 @@ lyrics = '[Intro - Synth Brass Fanfare]\n\n[Verse 1]\n黑夜里的风吹过耳
|
|||||||
dataset_snapshot_download(
|
dataset_snapshot_download(
|
||||||
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
||||||
local_dir="data/diffsynth_example_dataset",
|
local_dir="data/diffsynth_example_dataset",
|
||||||
allow_file_pattern="ace_step/acestep-v15-base/audio.wav",
|
allow_file_pattern="ace_step/acestep-v15-base-CoverTask/audio.wav",
|
||||||
)
|
)
|
||||||
|
|
||||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base-CoverTask/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||||
# audio_cover_strength controls the steps of doing cover tasks. [0, num_inference_steps * audio_cover_strength] steps will be cover steps, and the rest will be regular text-to-music generation steps.
|
# audio_cover_strength controls the steps of doing cover tasks. [0, num_inference_steps * audio_cover_strength] steps will be cover steps, and the rest will be regular text-to-music generation steps.
|
||||||
# denoising_strength controls how the output audio is influenced by the source audio in cover tasks.
|
# denoising_strength controls how the output audio is influenced by the source audio in cover tasks.
|
||||||
audio = pipe(
|
audio = pipe(
|
||||||
|
|||||||
@@ -32,10 +32,10 @@ lyrics = '[Intro - Synth Brass Fanfare]\n\n[Verse 1]\n黑夜里的风吹过耳
|
|||||||
dataset_snapshot_download(
|
dataset_snapshot_download(
|
||||||
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
dataset_id="DiffSynth-Studio/diffsynth_example_dataset",
|
||||||
local_dir="data/diffsynth_example_dataset",
|
local_dir="data/diffsynth_example_dataset",
|
||||||
allow_file_pattern="ace_step/acestep-v15-base/audio.wav",
|
allow_file_pattern="ace_step/acestep-v15-base-RepaintTask/audio.wav",
|
||||||
)
|
)
|
||||||
|
|
||||||
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
src_audio, sr = read_audio("data/diffsynth_example_dataset/ace_step/acestep-v15-base-RepaintTask/audio.wav", resample=True, resample_rate=pipe.vae.sampling_rate)
|
||||||
# repainting_ranges are in seconds, and will be converted to frames internally in the pipeline. The negative value in repainting_ranges means the padding from the start of the audio.
|
# repainting_ranges are in seconds, and will be converted to frames internally in the pipeline. The negative value in repainting_ranges means the padding from the start of the audio.
|
||||||
# For example, repainting_ranges=[(-10, 30), (160, 200)] means we want to repaint the audio from -10s to 30s (with 10s padding before the start) and from 160s to 200s. The non-existent parts will be padded with silence.
|
# For example, repainting_ranges=[(-10, 30), (160, 200)] means we want to repaint the audio from -10s to 30s (with 10s padding before the start) and from 160s to 200s. The non-existent parts will be padded with silence.
|
||||||
# Repainting strength denotes the intensity of repainting area, where 0 means no repainting (keep the original audio) and 1 means full repainting.
|
# Repainting strength denotes the intensity of repainting area, where 0 means no repainting (keep the original audio) and 1 means full repainting.
|
||||||
|
|||||||
Reference in New Issue
Block a user