support kolors! (#106)

2026-03-24 01:48:13 +00:00 · 2024-07-11 21:43:45 +08:00
parent 2a4709e572
commit 9c6607f78d
20 changed files with 2510 additions and 281 deletions
--- a/examples/image_synthesis/README.md
+++ b/examples/image_synthesis/README.md
@@ -28,6 +28,16 @@ LoRA Training: [`../train/stable_diffusion_3/`](../train/stable_diffusion_3/)
 |-|-|
 |![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/4df346db-6f91-420a-b4c1-26e205376098)|![image_2048](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/1386c802-e580-4101-939d-f1596802df9d)|

+### Example: Kolors
+
+Example script: [`kolors_text_to_image.py`](./kolors_text_to_image.py)
+
+LoRA Training: [`../train/kolors/`](../train/kolors/)
+
+|1024*1024|2048*2048|
+|-|-|
+|![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/53ef6f41-da11-4701-8665-9f64392607bf)|![image_2048](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/66bb7a75-fe31-44e5-90eb-d3140ee4686d)|
+
 ### Example: Hunyuan-DiT

 Example script: [`hunyuan_dit_text_to_image.py`](./hunyuan_dit_text_to_image.py)
--- a/examples/image_synthesis/kolors_text_to_image.py
+++ b/examples/image_synthesis/kolors_text_to_image.py
@@ -0,0 +1,34 @@
+from diffsynth import ModelManager, KolorsImagePipeline, download_models
+import torch
+
+# Download models
+# https://huggingface.co/Kwai-Kolors/Kolors
+download_models(["Kolors"])
+model_manager = ModelManager(torch_dtype=torch.float16, device="cuda",
+                             file_path_list=[
+                                 "models/kolors/Kolors/text_encoder",
+                                 "models/kolors/Kolors/unet/diffusion_pytorch_model.safetensors",
+                                 "models/kolors/Kolors/vae/diffusion_pytorch_model.safetensors"
+                             ])
+pipe = KolorsImagePipeline.from_model_manager(model_manager)
+
+prompt = "一幅充满诗意美感的全身画，泛红的肤色，画中一位银色长发、蓝色眼睛、肤色红润、身穿蓝色吊带连衣裙的少女漂浮在水下，面向镜头，周围是光彩的气泡，和煦的阳光透过水面折射进水下"
+negative_prompt = "半身，苍白的肤色，蜡黄的肤色，尸体，错误的眼睛，糟糕的人脸，毁容，糟糕的艺术，变形，多余的肢体，模糊的颜色，模糊，重复，病态，残缺，错误的手指，口红，腮红"
+
+torch.manual_seed(7)
+image = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    num_inference_steps=50,
+    cfg_scale=4,
+)
+image.save(f"image_1024.jpg")
+
+image = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    input_image=image.resize((2048, 2048)), denoising_strength=0.4, height=2048, width=2048,
+    num_inference_steps=50,
+    cfg_scale=4,
+)
+image.save("image_2048.jpg")