support SD3 LoRA

This commit is contained in:
Artiprocher
2024-07-10 10:07:02 +08:00
parent 8113f95278
commit 979a8814f1
13 changed files with 1030 additions and 32 deletions

View File

@@ -1,10 +1,10 @@
# Image Synthesis
Image synthesis is the base feature of DiffSynth Studio.
Image synthesis is the base feature of DiffSynth Studio. We can generate images with very high resolution.
### Example: Stable Diffusion
We can generate images with very high resolution. Please see [`sd_text_to_image.py`](./sd_text_to_image.py) for more details.
Example script: [`sd_text_to_image.py`](./sd_text_to_image.py)
|512*512|1024*1024|2048*2048|4096*4096|
|-|-|-|-|
@@ -12,7 +12,7 @@ We can generate images with very high resolution. Please see [`sd_text_to_image.
### Example: Stable Diffusion XL
Generate images with Stable Diffusion XL. Please see [`sdxl_text_to_image.py`](./sdxl_text_to_image.py) for more details.
Example script: [`sdxl_text_to_image.py`](./sdxl_text_to_image.py)
|1024*1024|2048*2048|
|-|-|
@@ -20,15 +20,29 @@ Generate images with Stable Diffusion XL. Please see [`sdxl_text_to_image.py`](.
### Example: Stable Diffusion 3
Generate images with Stable Diffusion 3. High resolution is also supported in this model. See [`sd3_text_to_image.py`](./sd3_text_to_image.py).
Example script: [`sd3_text_to_image.py`](./sd3_text_to_image.py)
LoRA Training: [`../train/stable_diffusion_3/`](../train/stable_diffusion_3/)
|1024*1024|2048*2048|
|-|-|
|![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/4df346db-6f91-420a-b4c1-26e205376098)|![image_2048](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/1386c802-e580-4101-939d-f1596802df9d)|
### Example: Hunyuan-DiT
Example script: [`hunyuan_dit_text_to_image.py`](./hunyuan_dit_text_to_image.py)
LoRA Training: [`../train/hunyuan_dit/`](../train/hunyuan_dit/)
|1024*1024|2048*2048|
|-|-|
|![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/60b022c8-df3f-4541-95ab-bf39f2fa8bb5)|![image_2048](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/87919ea8-d428-4963-8257-da05f3901bbb)|
### Example: Stable Diffusion XL Turbo
Generate images with Stable Diffusion XL Turbo. You can see [`sdxl_turbo.py`](./sdxl_turbo.py) for more details, but we highly recommend you to use it in the WebUI.
Example script: [`sdxl_turbo.py`](./sdxl_turbo.py)
We highly recommend you to use this model in the WebUI.
|"black car"|"red car"|
|-|-|

View File

@@ -0,0 +1,42 @@
from diffsynth import ModelManager, HunyuanDiTImagePipeline, download_models
import torch
# Download models (automatically)
# `models/HunyuanDiT/t2i/clip_text_encoder/pytorch_model.bin`: [link](https://huggingface.co/Tencent-Hunyuan/HunyuanDiT/resolve/main/t2i/clip_text_encoder/pytorch_model.bin)
# `models/HunyuanDiT/t2i/mt5/pytorch_model.bin`: [link](https://huggingface.co/Tencent-Hunyuan/HunyuanDiT/resolve/main/t2i/mt5/pytorch_model.bin)
# `models/HunyuanDiT/t2i/model/pytorch_model_ema.pt`: [link](https://huggingface.co/Tencent-Hunyuan/HunyuanDiT/resolve/main/t2i/model/pytorch_model_ema.pt)
# `models/HunyuanDiT/t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin`: [link](https://huggingface.co/Tencent-Hunyuan/HunyuanDiT/resolve/main/t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin)
download_models(["HunyuanDiT"])
# Load models
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
model_manager.load_models([
"models/HunyuanDiT/t2i/clip_text_encoder/pytorch_model.bin",
"models/HunyuanDiT/t2i/mt5/pytorch_model.bin",
"models/HunyuanDiT/t2i/model/pytorch_model_ema.pt",
"models/HunyuanDiT/t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin"
])
pipe = HunyuanDiTImagePipeline.from_model_manager(model_manager)
prompt = "一幅充满诗意美感的全身肖像画,画中一位银发、蓝色眼睛、身穿蓝色连衣裙的少女漂浮在水下,周围是光彩的气泡,和煦的阳光透过水面折射进水下"
negative_prompt = "错误的眼睛,糟糕的人脸,毁容,糟糕的艺术,变形,多余的肢体,模糊的颜色,模糊,重复,病态,残缺,"
# Enjoy!
torch.manual_seed(0)
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=50, height=1024, width=1024,
)
image.save("image_1024.png")
# Highres fix
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
input_image=image.resize((2048, 2048)),
num_inference_steps=50, height=2048, width=2048,
denoising_strength=0.4, tiled=True,
)
image.save("image_2048.png")

View File

@@ -6,7 +6,7 @@ import torch
# `models/stable_diffusion_3/sd3_medium_incl_clips.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/sd3_medium_incl_clips.safetensors)
download_models(["StableDiffusion3"])
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda",
file_path_list=["models/stable_diffusion_3/sd3_medium_incl_clips_t5xxlfp16.safetensors"])
file_path_list=["models/stable_diffusion_3/sd3_medium_incl_clips.safetensors"])
pipe = SD3ImagePipeline.from_model_manager(model_manager)