ace-step train

This commit is contained in:
mi804
2026-04-22 17:58:10 +08:00
parent b0680ef711
commit c53c813c12
42 changed files with 1235 additions and 30 deletions

View File

@@ -0,0 +1,18 @@
# Dataset: data/diffsynth_example_dataset/ace_step/Ace-Step1.5/
# Download: modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ace_step/Ace-Step1.5/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/ace_step/model_training/train.py \
--learning_rate 1e-5 \
--num_epochs 2 \
--trainable_models "dit" \
--use_gradient_checkpointing \
--find_unused_parameters \
--dataset_base_path "./data/diffsynth_example_dataset/ace_step/Ace-Step1.5" \
--dataset_metadata_path "./data/diffsynth_example_dataset/ace_step/Ace-Step1.5/metadata.json" \
--model_id_with_origin_paths "ACE-Step/Ace-Step1.5:acestep-v15-turbo/model.safetensors,ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/model.safetensors,ACE-Step/Ace-Step1.5:vae/diffusion_pytorch_model.safetensors" \
--tokenizer_path "ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/" \
--silence_latent_path "ACE-Step/Ace-Step1.5:acestep-v15-turbo/silence_latent.pt" \
--remove_prefix_in_ckpt "pipe.dit." \
--dataset_repeat 50 \
--output_path "./models/train/Ace-Step1.5_full" \
--data_file_keys "audio"

View File

@@ -0,0 +1,18 @@
# Dataset: data/diffsynth_example_dataset/ace_step/acestep-v15-base/
# Download: modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ace_step/acestep-v15-base/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/ace_step/model_training/train.py \
--learning_rate 1e-5 \
--num_epochs 2 \
--trainable_models "dit" \
--use_gradient_checkpointing \
--find_unused_parameters \
--dataset_base_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-base" \
--dataset_metadata_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-base/metadata.json" \
--model_id_with_origin_paths "ACE-Step/acestep-v15-base:model.safetensors,ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/model.safetensors,ACE-Step/Ace-Step1.5:vae/diffusion_pytorch_model.safetensors" \
--tokenizer_path "ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/" \
--silence_latent_path "ACE-Step/Ace-Step1.5:acestep-v15-turbo/silence_latent.pt" \
--remove_prefix_in_ckpt "pipe.dit." \
--dataset_repeat 50 \
--output_path "./models/train/acestep-v15-base_full" \
--data_file_keys "audio"

View File

@@ -0,0 +1,18 @@
# Dataset: data/diffsynth_example_dataset/ace_step/acestep-v15-sft/
# Download: modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ace_step/acestep-v15-sft/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/ace_step/model_training/train.py \
--learning_rate 1e-5 \
--num_epochs 2 \
--trainable_models "dit" \
--use_gradient_checkpointing \
--find_unused_parameters \
--dataset_base_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-sft" \
--dataset_metadata_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-sft/metadata.json" \
--model_id_with_origin_paths "ACE-Step/acestep-v15-sft:model.safetensors,ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/model.safetensors,ACE-Step/Ace-Step1.5:vae/diffusion_pytorch_model.safetensors" \
--tokenizer_path "ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/" \
--silence_latent_path "ACE-Step/Ace-Step1.5:acestep-v15-turbo/silence_latent.pt" \
--remove_prefix_in_ckpt "pipe.dit." \
--dataset_repeat 50 \
--output_path "./models/train/acestep-v15-sft_full" \
--data_file_keys "audio"

View File

@@ -0,0 +1,18 @@
# Dataset: data/diffsynth_example_dataset/ace_step/acestep-v15-turbo-continuous/
# Download: modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ace_step/acestep-v15-turbo-continuous/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/ace_step/model_training/train.py \
--learning_rate 1e-5 \
--num_epochs 2 \
--trainable_models "dit" \
--use_gradient_checkpointing \
--find_unused_parameters \
--dataset_base_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-turbo-continuous" \
--dataset_metadata_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-turbo-continuous/metadata.json" \
--model_id_with_origin_paths "ACE-Step/acestep-v15-turbo-continuous:model.safetensors,ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/model.safetensors,ACE-Step/Ace-Step1.5:vae/diffusion_pytorch_model.safetensors" \
--tokenizer_path "ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/" \
--silence_latent_path "ACE-Step/Ace-Step1.5:acestep-v15-turbo/silence_latent.pt" \
--remove_prefix_in_ckpt "pipe.dit." \
--dataset_repeat 50 \
--output_path "./models/train/acestep-v15-turbo-continuous_full" \
--data_file_keys "audio"

View File

@@ -0,0 +1,18 @@
# Dataset: data/diffsynth_example_dataset/ace_step/acestep-v15-turbo-shift1/
# Download: modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ace_step/acestep-v15-turbo-shift1/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/ace_step/model_training/train.py \
--learning_rate 1e-5 \
--num_epochs 2 \
--trainable_models "dit" \
--use_gradient_checkpointing \
--find_unused_parameters \
--dataset_base_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-turbo-shift1" \
--dataset_metadata_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-turbo-shift1/metadata.json" \
--model_id_with_origin_paths "ACE-Step/acestep-v15-turbo-shift1:model.safetensors,ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/model.safetensors,ACE-Step/Ace-Step1.5:vae/diffusion_pytorch_model.safetensors" \
--tokenizer_path "ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/" \
--silence_latent_path "ACE-Step/Ace-Step1.5:acestep-v15-turbo/silence_latent.pt" \
--remove_prefix_in_ckpt "pipe.dit." \
--dataset_repeat 50 \
--output_path "./models/train/acestep-v15-turbo-shift1_full" \
--data_file_keys "audio"

View File

@@ -0,0 +1,18 @@
# Dataset: data/diffsynth_example_dataset/ace_step/acestep-v15-turbo-shift3/
# Download: modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ace_step/acestep-v15-turbo-shift3/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/ace_step/model_training/train.py \
--learning_rate 1e-5 \
--num_epochs 2 \
--trainable_models "dit" \
--use_gradient_checkpointing \
--find_unused_parameters \
--dataset_base_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-turbo-shift3" \
--dataset_metadata_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-turbo-shift3/metadata.json" \
--model_id_with_origin_paths "ACE-Step/acestep-v15-turbo-shift3:model.safetensors,ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/model.safetensors,ACE-Step/Ace-Step1.5:vae/diffusion_pytorch_model.safetensors" \
--tokenizer_path "ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/" \
--silence_latent_path "ACE-Step/Ace-Step1.5:acestep-v15-turbo/silence_latent.pt" \
--remove_prefix_in_ckpt "pipe.dit." \
--dataset_repeat 50 \
--output_path "./models/train/acestep-v15-turbo-shift3_full" \
--data_file_keys "audio"

View File

@@ -0,0 +1,18 @@
# Dataset: data/diffsynth_example_dataset/ace_step/acestep-v15-xl-base/
# Download: modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ace_step/acestep-v15-xl-base/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/ace_step/model_training/train.py \
--learning_rate 1e-5 \
--num_epochs 2 \
--trainable_models "dit" \
--use_gradient_checkpointing \
--find_unused_parameters \
--dataset_base_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-xl-base" \
--dataset_metadata_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-xl-base/metadata.json" \
--model_id_with_origin_paths "ACE-Step/acestep-v15-xl-base:model-*.safetensors,ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/model.safetensors,ACE-Step/Ace-Step1.5:vae/diffusion_pytorch_model.safetensors" \
--tokenizer_path "ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/" \
--silence_latent_path "ACE-Step/Ace-Step1.5:acestep-v15-turbo/silence_latent.pt" \
--remove_prefix_in_ckpt "pipe.dit." \
--dataset_repeat 50 \
--output_path "./models/train/acestep-v15-xl-base_full" \
--data_file_keys "audio"

View File

@@ -0,0 +1,18 @@
# Dataset: data/diffsynth_example_dataset/ace_step/acestep-v15-xl-sft/
# Download: modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ace_step/acestep-v15-xl-sft/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/ace_step/model_training/train.py \
--learning_rate 1e-5 \
--num_epochs 2 \
--trainable_models "dit" \
--use_gradient_checkpointing \
--find_unused_parameters \
--dataset_base_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-xl-sft" \
--dataset_metadata_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-xl-sft/metadata.json" \
--model_id_with_origin_paths "ACE-Step/acestep-v15-xl-sft:model-*.safetensors,ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/model.safetensors,ACE-Step/Ace-Step1.5:vae/diffusion_pytorch_model.safetensors" \
--tokenizer_path "ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/" \
--silence_latent_path "ACE-Step/Ace-Step1.5:acestep-v15-turbo/silence_latent.pt" \
--remove_prefix_in_ckpt "pipe.dit." \
--dataset_repeat 50 \
--output_path "./models/train/acestep-v15-xl-sft_full" \
--data_file_keys "audio"

View File

@@ -0,0 +1,18 @@
# Dataset: data/diffsynth_example_dataset/ace_step/acestep-v15-xl-turbo/
# Download: modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "ace_step/acestep-v15-xl-turbo/*" --local_dir ./data/diffsynth_example_dataset
accelerate launch examples/ace_step/model_training/train.py \
--learning_rate 1e-5 \
--num_epochs 2 \
--trainable_models "dit" \
--use_gradient_checkpointing \
--find_unused_parameters \
--dataset_base_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-xl-turbo" \
--dataset_metadata_path "./data/diffsynth_example_dataset/ace_step/acestep-v15-xl-turbo/metadata.json" \
--model_id_with_origin_paths "ACE-Step/acestep-v15-xl-turbo:model-*.safetensors,ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/model.safetensors,ACE-Step/Ace-Step1.5:vae/diffusion_pytorch_model.safetensors" \
--tokenizer_path "ACE-Step/Ace-Step1.5:Qwen3-Embedding-0.6B/" \
--silence_latent_path "ACE-Step/Ace-Step1.5:acestep-v15-turbo/silence_latent.pt" \
--remove_prefix_in_ckpt "pipe.dit." \
--dataset_repeat 50 \
--output_path "./models/train/acestep-v15-xl-turbo_full" \
--data_file_keys "audio"