mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-23 09:28:12 +00:00
support sd35-lora
This commit is contained in:
@@ -256,6 +256,72 @@ image = pipe(
|
|||||||
image.save("image_with_lora.jpg")
|
image.save("image_with_lora.jpg")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Stable Diffusion 3.5 Series
|
||||||
|
|
||||||
|
|
||||||
|
You need to download the text encoders and DiT model files. Please use the following code to download these files:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from diffsynth import download_models
|
||||||
|
|
||||||
|
download_models(["StableDiffusion3.5-large"])
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
models/stable_diffusion_3
|
||||||
|
├── Put Stable Diffusion 3 checkpoints here.txt
|
||||||
|
├── sd3.5_large.safetensors
|
||||||
|
└── text_encoders
|
||||||
|
├── clip_g.safetensors
|
||||||
|
├── clip_l.safetensors
|
||||||
|
└── t5xxl_fp16.safetensors
|
||||||
|
```
|
||||||
|
|
||||||
|
Launch the training task using the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
CUDA_VISIBLE_DEVICES="0" python examples/train/stable_diffusion_3/train_sd3_lora.py \
|
||||||
|
--pretrained_path models/stable_diffusion_3/text_encoders/clip_g.safetensors,models/stable_diffusion_3/text_encoders/clip_l.safetensors,models/stable_diffusion_3/text_encoders/t5xxl_fp16.safetensors,models/stable_diffusion_3/sd3.5_large.safetensors \
|
||||||
|
--dataset_path data/dog \
|
||||||
|
--output_path ./models \
|
||||||
|
--max_epochs 1 \
|
||||||
|
--steps_per_epoch 500 \
|
||||||
|
--height 1024 \
|
||||||
|
--width 1024 \
|
||||||
|
--center_crop \
|
||||||
|
--precision "16" \
|
||||||
|
--learning_rate 1e-4 \
|
||||||
|
--lora_rank 4 \
|
||||||
|
--lora_alpha 4 \
|
||||||
|
--use_gradient_checkpointing
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information about the parameters, please use `python examples/train/stable_diffusion_3/train_sd3_lora.py -h` to see the details.
|
||||||
|
|
||||||
|
After training, use `model_manager.load_lora` to load the LoRA for inference.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from diffsynth import ModelManager, SD3ImagePipeline
|
||||||
|
import torch
|
||||||
|
|
||||||
|
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda",
|
||||||
|
file_path_list=[
|
||||||
|
"models/stable_diffusion_3/text_encoders/clip_g.safetensors",
|
||||||
|
"models/stable_diffusion_3/text_encoders/clip_l.safetensors",
|
||||||
|
"models/stable_diffusion_3/text_encoders/t5xxl_fp16.safetensors",
|
||||||
|
"models/stable_diffusion_3/sd3.5_large.safetensors"
|
||||||
|
])
|
||||||
|
model_manager.load_lora("models/lightning_logs/version_0/checkpoints/epoch=0-step=500.ckpt", lora_alpha=1.0)
|
||||||
|
pipe = SD3ImagePipeline.from_model_manager(model_manager)
|
||||||
|
|
||||||
|
torch.manual_seed(0)
|
||||||
|
image = pipe(
|
||||||
|
prompt="a dog is jumping, flowers around the dog, the background is mountains and clouds",
|
||||||
|
num_inference_steps=30, cfg_scale=7
|
||||||
|
)
|
||||||
|
image.save("image_with_lora.jpg")
|
||||||
|
```
|
||||||
|
|
||||||
### Stable Diffusion 3
|
### Stable Diffusion 3
|
||||||
|
|
||||||
Only one file is required in the training script. You can use [`sd3_medium_incl_clips.safetensors`](https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/sd3_medium_incl_clips.safetensors) (without T5 encoder) or [`sd3_medium_incl_clips_t5xxlfp16.safetensors`](https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/sd3_medium_incl_clips_t5xxlfp16.safetensors) (with T5 encoder). Please use the following code to download these files:
|
Only one file is required in the training script. You can use [`sd3_medium_incl_clips.safetensors`](https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/sd3_medium_incl_clips.safetensors) (without T5 encoder) or [`sd3_medium_incl_clips_t5xxlfp16.safetensors`](https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/sd3_medium_incl_clips_t5xxlfp16.safetensors) (with T5 encoder). Please use the following code to download these files:
|
||||||
@@ -285,7 +351,7 @@ CUDA_VISIBLE_DEVICES="0" python examples/train/stable_diffusion_3/train_sd3_lora
|
|||||||
--height 1024 \
|
--height 1024 \
|
||||||
--width 1024 \
|
--width 1024 \
|
||||||
--center_crop \
|
--center_crop \
|
||||||
--precision "16-mixed" \
|
--precision "16" \
|
||||||
--learning_rate 1e-4 \
|
--learning_rate 1e-4 \
|
||||||
--lora_rank 4 \
|
--lora_rank 4 \
|
||||||
--lora_alpha 4 \
|
--lora_alpha 4 \
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "True"
|
|||||||
class LightningModel(LightningModelForT2ILoRA):
|
class LightningModel(LightningModelForT2ILoRA):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
torch_dtype=torch.float16, pretrained_weights=[],
|
torch_dtype=torch.float16, pretrained_weights=[], preset_lora_path=None,
|
||||||
learning_rate=1e-4, use_gradient_checkpointing=True,
|
learning_rate=1e-4, use_gradient_checkpointing=True,
|
||||||
lora_rank=4, lora_alpha=4, lora_target_modules="to_q,to_k,to_v,to_out", init_lora_weights="gaussian",
|
lora_rank=4, lora_alpha=4, lora_target_modules="to_q,to_k,to_v,to_out", init_lora_weights="gaussian",
|
||||||
):
|
):
|
||||||
@@ -16,7 +16,12 @@ class LightningModel(LightningModelForT2ILoRA):
|
|||||||
model_manager = ModelManager(torch_dtype=torch_dtype, device=self.device)
|
model_manager = ModelManager(torch_dtype=torch_dtype, device=self.device)
|
||||||
model_manager.load_models(pretrained_weights)
|
model_manager.load_models(pretrained_weights)
|
||||||
self.pipe = SD3ImagePipeline.from_model_manager(model_manager)
|
self.pipe = SD3ImagePipeline.from_model_manager(model_manager)
|
||||||
self.pipe.scheduler.set_timesteps(1000)
|
self.pipe.scheduler.set_timesteps(1000, training=True)
|
||||||
|
|
||||||
|
if preset_lora_path is not None:
|
||||||
|
preset_lora_path = preset_lora_path.split(",")
|
||||||
|
for path in preset_lora_path:
|
||||||
|
model_manager.load_lora(path)
|
||||||
|
|
||||||
self.freeze_parameters()
|
self.freeze_parameters()
|
||||||
self.add_lora_to_model(self.pipe.denoising_model(), lora_rank=lora_rank, lora_alpha=lora_alpha, lora_target_modules=lora_target_modules, init_lora_weights=init_lora_weights)
|
self.add_lora_to_model(self.pipe.denoising_model(), lora_rank=lora_rank, lora_alpha=lora_alpha, lora_target_modules=lora_target_modules, init_lora_weights=init_lora_weights)
|
||||||
@@ -29,14 +34,26 @@ def parse_args():
|
|||||||
type=str,
|
type=str,
|
||||||
default=None,
|
default=None,
|
||||||
required=True,
|
required=True,
|
||||||
help="Path to pretrained model. For example, `models/stable_diffusion_3/sd3_medium_incl_clips.safetensors` or `models/stable_diffusion_3/sd3_medium_incl_clips_t5xxlfp16.safetensors`.",
|
help="Path to pretrained models, seperated by comma. For example, SD3: `models/stable_diffusion_3/sd3_medium_incl_clips_t5xxlfp16.safetensors`, SD3.5-large: `models/stable_diffusion_3/text_encoders/clip_g.safetensors,models/stable_diffusion_3/text_encoders/clip_l.safetensors,models/stable_diffusion_3/text_encoders/t5xxl_fp16.safetensors,models/stable_diffusion_3/sd3.5_large.safetensors`",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--lora_target_modules",
|
"--lora_target_modules",
|
||||||
type=str,
|
type=str,
|
||||||
default="a_to_qkv,b_to_qkv",
|
default="a_to_qkv,b_to_qkv,norm_1_a.linear,norm_1_b.linear,a_to_out,b_to_out,ff_a.0,ff_a.2,ff_b.0,ff_b.2",
|
||||||
help="Layers with LoRA modules.",
|
help="Layers with LoRA modules.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--preset_lora_path",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Preset LoRA path.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--num_timesteps",
|
||||||
|
type=int,
|
||||||
|
default=1000,
|
||||||
|
help="Number of total timesteps. For turbo models, please set this parameter to the number of expected number of inference steps.",
|
||||||
|
)
|
||||||
parser = add_general_parsers(parser)
|
parser = add_general_parsers(parser)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
return args
|
return args
|
||||||
@@ -46,7 +63,8 @@ if __name__ == '__main__':
|
|||||||
args = parse_args()
|
args = parse_args()
|
||||||
model = LightningModel(
|
model = LightningModel(
|
||||||
torch_dtype=torch.float32 if args.precision == "32" else torch.float16,
|
torch_dtype=torch.float32 if args.precision == "32" else torch.float16,
|
||||||
pretrained_weights=[args.pretrained_path],
|
pretrained_weights=args.pretrained_path.split(","),
|
||||||
|
preset_lora_path=args.preset_lora_path,
|
||||||
learning_rate=args.learning_rate,
|
learning_rate=args.learning_rate,
|
||||||
use_gradient_checkpointing=args.use_gradient_checkpointing,
|
use_gradient_checkpointing=args.use_gradient_checkpointing,
|
||||||
lora_rank=args.lora_rank,
|
lora_rank=args.lora_rank,
|
||||||
|
|||||||
Reference in New Issue
Block a user