From f58ba5a784d6830f087c0f3099fb219a4abbe75d Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Thu, 16 Apr 2026 20:24:22 +0800 Subject: [PATCH] update docs --- README.md | 19 +- README_zh.md | 19 +- diffsynth/diffusion/template.py | 3 + .../Template_Model_Inference.md | 330 ++++++++++++++++++ .../Template_Model_Training.md | 297 ++++++++++++++++ .../Understanding_Diffusion_Templates.md | 62 ++++ docs/en/Model_Details/FLUX2.md | 9 + docs/en/README.md | 17 +- docs/en/index.rst | 8 + .../Template_Model_Inference.md | 330 ++++++++++++++++++ .../Template_Model_Training.md | 317 +++++++++++++++++ .../Understanding_Diffusion_Templates.md | 61 ++++ docs/zh/Model_Details/FLUX2.md | 9 + docs/zh/README.md | 17 +- docs/zh/index.rst | 8 + .../Template-KleinBase4B-Aesthetic.py | 52 +++ .../Template-KleinBase4B-Brightness.py | 43 +++ .../Template-KleinBase4B-ControlNet.py | 54 +++ .../Template-KleinBase4B-Edit.py | 54 +++ .../Template-KleinBase4B-Inpaint.py | 56 +++ .../Template-KleinBase4B-PandaMeme.py | 43 +++ .../Template-KleinBase4B-Sharpness.py | 35 ++ .../Template-KleinBase4B-SoftRGB.py | 52 +++ .../Template-KleinBase4B-Upscaler.py | 54 +++ .../model_inference/Template-KleinBase4B.py | 256 -------------- .../Template-KleinBase4B-Aesthetic.py | 63 ++++ .../Template-KleinBase4B-Brightness.py | 55 +++ .../Template-KleinBase4B-ControlNet.py | 66 ++++ .../Template-KleinBase4B-Edit.py | 66 ++++ .../Template-KleinBase4B-Inpaint.py | 68 ++++ .../Template-KleinBase4B-PandaMeme.py | 55 +++ .../Template-KleinBase4B-Sharpness.py | 47 +++ .../Template-KleinBase4B-SoftRGB.py | 64 ++++ .../Template-KleinBase4B-Upscaler.py | 66 ++++ .../full/Template-KleinBase4B-Aesthetic.sh | 19 + .../full/Template-KleinBase4B-Brightness.sh | 18 + .../full/Template-KleinBase4B-ControlNet.sh | 18 + ...Base4B.sh => Template-KleinBase4B-Edit.sh} | 15 +- .../full/Template-KleinBase4B-Inpaint.sh | 18 + .../full/Template-KleinBase4B-PandaMeme.sh | 18 + .../full/Template-KleinBase4B-Sharpness.sh | 18 + .../full/Template-KleinBase4B-SoftRGB.sh | 18 + .../full/Template-KleinBase4B-Upscaler.sh | 18 + .../scripts/brightness/model.py | 62 ++++ ...> convert_base_model_to_template_model.py} | 0 .../FLUX.2-klein-base-4B_lora.sh | 34 ++ .../Template-KleinBase4B-Brightness.sh | 36 ++ .../Template-KleinBase4B-Aesthetic.py | 55 +++ .../Template-KleinBase4B-Brightness.py | 46 +++ .../Template-KleinBase4B-ControlNet.py | 57 +++ .../Template-KleinBase4B-Edit.py | 57 +++ .../Template-KleinBase4B-Inpaint.py | 59 ++++ .../Template-KleinBase4B-PandaMeme.py | 46 +++ .../Template-KleinBase4B-Sharpness.py | 38 ++ .../Template-KleinBase4B-SoftRGB.py | 55 +++ .../Template-KleinBase4B-Upscaler.py | 57 +++ 56 files changed, 3237 insertions(+), 280 deletions(-) create mode 100644 docs/en/Diffusion_Templates/Template_Model_Inference.md create mode 100644 docs/en/Diffusion_Templates/Template_Model_Training.md create mode 100644 docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md create mode 100644 docs/zh/Diffusion_Templates/Template_Model_Inference.md create mode 100644 docs/zh/Diffusion_Templates/Template_Model_Training.md create mode 100644 docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Brightness.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Edit.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py delete mode 100644 examples/flux2/model_inference/Template-KleinBase4B.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh rename examples/flux2/model_training/full/{Template-KleinBase4B.sh => Template-KleinBase4B-Edit.sh} (52%) create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh create mode 100644 examples/flux2/model_training/scripts/brightness/model.py rename examples/flux2/model_training/scripts/{convert_base_model_to_skill_model.py => convert_base_model_to_template_model.py} (100%) create mode 100644 examples/flux2/model_training/special/split_training/FLUX.2-klein-base-4B_lora.sh create mode 100644 examples/flux2/model_training/special/split_training/Template-KleinBase4B-Brightness.sh create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py diff --git a/README.md b/README.md index b9f8ab0..fb905d2 100644 --- a/README.md +++ b/README.md @@ -343,11 +343,20 @@ Example code for FLUX.2 is available at: [/examples/flux2/](/examples/flux2/) | Model ID | Inference | Low-VRAM Inference | Full Training | Full Training Validation | LoRA Training | LoRA Training Validation | |-|-|-|-|-|-|-| -|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](/examples/flux2/model_inference/FLUX.2-dev.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| -|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| -|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| -|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| -|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| +|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| +|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| +|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| +|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| diff --git a/README_zh.md b/README_zh.md index 76b29d8..5d5a4f7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -343,11 +343,20 @@ FLUX.2 的示例代码位于:[/examples/flux2/](/examples/flux2/) |模型 ID|推理|低显存推理|全量训练|全量训练后验证|LoRA 训练|LoRA 训练后验证| |-|-|-|-|-|-|-| -|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](/examples/flux2/model_inference/FLUX.2-dev.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| -|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| -|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| -|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| -|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| +|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| +|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| +|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| +|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| diff --git a/diffsynth/diffusion/template.py b/diffsynth/diffusion/template.py index 9277dd7..c685ad6 100644 --- a/diffsynth/diffusion/template.py +++ b/diffsynth/diffusion/template.py @@ -88,6 +88,9 @@ class TemplatePipeline(torch.nn.Module): self.model_configs = model_configs self.lazy_loading = lazy_loading if lazy_loading: + for model_config in model_configs: + TemplatePipeline.check_vram_config(model_config) + model_config.download_if_necessary() self.models = None else: models = [] diff --git a/docs/en/Diffusion_Templates/Template_Model_Inference.md b/docs/en/Diffusion_Templates/Template_Model_Inference.md new file mode 100644 index 0000000..8e1a0b0 --- /dev/null +++ b/docs/en/Diffusion_Templates/Template_Model_Inference.md @@ -0,0 +1,330 @@ +# Template Model Inference + +## Enabling Template Models on Base Model Pipelines + +Using the base model [black-forest-labs/FLUX.2-klein-base-4B](https://modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B) as an example, when generating images using only the base model: + +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +# Load base model +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +# Generate an image +image = pipe( + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, +) +image.save("image.png") +``` + +The Template model [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) can control image brightness during generation. Through the `TemplatePipeline` model, it can be loaded from ModelScope (via `ModelConfig(model_id="xxx/xxx")`) or from a local path (via `ModelConfig(path="xxx")`). Inputting `scale=0.8` increases image brightness. Note that in the code, input parameters for `pipe` must be transferred to `template_pipeline`, and `template_inputs` should be added. + +```python +# Load Template model +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ], +) +# Generate an image +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{"scale": 0.8}], +) +image.save("image_0.8.png") +``` + +## CFG Enhancement for Template Models + +Template models can enable CFG (Classifier-Free Guidance) to make control effects more pronounced. For example, with the model [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness), adding `negative_template_inputs` to the TemplatePipeline input parameters and setting its scale to 0.5 will generate images with more noticeable brightness variations by contrasting both sides. + +```python +# Generate an image with CFG +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{"scale": 0.8}], + negative_template_inputs=[{"scale": 0.5}], +) +image.save("image_0.8_cfg.png") +``` + +## Low VRAM Support + +Template models currently do not support the main framework's VRAM management, but lazy loading can be used - loading Template models only when needed for inference. This significantly reduces VRAM requirements when enabling multiple Template models, with peak VRAM usage being that of a single Template model. Add parameter `lazy_loading=True` to enable. + +```python +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ], + lazy_loading=True, +) +``` + +The base model's Pipeline and Template Pipeline are completely independent and can enable VRAM management on demand. + +When Template model outputs contain LoRA in Template Cache, you need to enable VRAM management for the base model's Pipeline or enable LoRA hot loading (using the code below), otherwise LoRA weights will be叠加. + +```python +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) +``` + +## Enabling Multiple Template Models + +`TemplatePipeline` can load multiple Template models. During inference, use `model_id` in `template_inputs` to distinguish inputs for each Template model. + +After enabling VRAM management for the base model's Pipeline and lazy loading for Template Pipeline, you can load any number of Template models. + +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.bfloat16, + "onload_device": "cuda", + "preparing_dtype": torch.bfloat16, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + lazy_loading=True, + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme"), + ], +) +``` + +### Super-Resolution + Sharpness Enhancement + +Combining [DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler) and [DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) can upscale blurry images while improving detail clarity. + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 3, + "image": Image.open("data/assets/image_lowres_100.jpg"), + "prompt": "A cat is sitting on a stone.", + }, + { + "model_id": 5, + "scale": 1, + }, + ], + negative_template_inputs = [ + { + "model_id": 3, + "image": Image.open("data/assets/image_lowres_100.jpg"), + "prompt": "", + }, + { + "model_id": 5, + "scale": 0, + }, + ], +) +image.save("image_Upscaler_Sharpness.png") +``` + +| Low Resolution Input | High Resolution Output | +|----------------------|------------------------| +| ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_lowres_100.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Upscaler_Sharpness.png) | + +### Structure Control + Aesthetic Alignment + Sharpness Enhancement + +[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) controls composition, [DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) fills in details, and [DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) ensures clarity. Combining these three Template models produces exquisite images. + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone, bathed in bright sunshine.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", + }, + { + "model_id": 7, + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.0, + "merge_type": "mean", + }, + { + "model_id": 5, + "scale": 0.8, + }, + ], + negative_template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "", + }, + { + "model_id": 7, + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.0, + "merge_type": "mean", + }, + { + "model_id": 5, + "scale": 0, + }, + ], +) +image.save("image_Controlnet_Aesthetic_Sharpness.png") +``` + +| Structure Control Image | Output Image | +|-------------------------|--------------| +| ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Aesthetic_Sharpness.png) | + +### Structure Control + Image Editing + Color Adjustment + +[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) controls composition, [DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) preserves original image details like fur texture, and [DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB) controls color tones, creating an artistic masterpiece. + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone. Colored ink painting.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "A cat is sitting on a stone. Colored ink painting.", + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "Convert the image style to colored ink painting.", + }, + { + "model_id": 4, + "R": 0.9, + "G": 0.5, + "B": 0.3, + }, + ], + negative_template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "", + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "", + }, + ], +) +image.save("image_Controlnet_Edit_SoftRGB.png") +``` + +| Structure Control Image | Editing Input Image | Output Image | +|-------------------------|---------------------|--------------| +| ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Edit_SoftRGB.png) | + +### Brightness Control + Image Editing + Local Redrawing + +[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) generates bright scenes, [DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) references original image layout, and [DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) keeps background unchanged, generating cross-dimensional content. + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone. Flat anime style.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 0, + "scale": 0.6, + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "Convert the image style to flat anime style.", + }, + { + "model_id": 6, + "image": Image.open("data/assets/image_reference.jpg"), + "mask": Image.open("data/assets/image_mask_1.jpg"), + "force_inpaint": True, + }, + ], + negative_template_inputs = [ + { + "model_id": 0, + "scale": 0.5, + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "", + }, + { + "model_id": 6, + "image": Image.open("data/assets/image_reference.jpg"), + "mask": Image.open("data/assets/image_mask_1.jpg"), + }, + ], +) +image.save("image_Brightness_Edit_Inpaint.png") +``` + +| Reference Image | Redrawing Area | Output Image | +|------------------|----------------|--------------| +| ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_mask_1.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Brightness_Edit_Inpaint.png) | \ No newline at end of file diff --git a/docs/en/Diffusion_Templates/Template_Model_Training.md b/docs/en/Diffusion_Templates/Template_Model_Training.md new file mode 100644 index 0000000..d9f1d23 --- /dev/null +++ b/docs/en/Diffusion_Templates/Template_Model_Training.md @@ -0,0 +1,297 @@ +# Template Model Training + +DiffSynth-Studio currently provides comprehensive Template training support for [black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B), with more model adaptations coming soon. + +## Continuing Training from Pretrained Models + +To continue training from our pretrained models, refer to the table in [FLUX.2](../Model_Details/FLUX2.md#model-overview) to find the corresponding training script. + +## Building New Template Models + +### Template Model Component Format + +A Template model binds to a model repository (or local folder) containing a code file `model.py` as the entry point. Here's the template for `model.py`: + +```python +import torch + +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + + @torch.no_grad() + def process_inputs(self, xxx, **kwargs): + yyy = xxx + return {"yyy": yyy} + + def forward(self, yyy, **kwargs): + zzz = yyy + return {"zzz": zzz} + +class DataProcessor: + def __call__(self, www, **kwargs): + xxx = www + return {"xxx": xxx} + +TEMPLATE_MODEL = CustomizedTemplateModel +TEMPLATE_MODEL_PATH = "model.safetensors" +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +During Template model inference, Template Input passes through `TEMPLATE_MODEL`'s `process_inputs` and `forward` to generate Template Cache. + +```mermaid +flowchart LR; + i@{shape: text, label: "Template Input"}-->p[process_inputs]; + subgraph TEMPLATE_MODEL + p[process_inputs]-->f[forward] + end + f[forward]-->c@{shape: text, label: "Template Cache"}; +``` + +During Template model training, Template Input comes from the dataset through `TEMPLATE_DATA_PROCESSOR`. + +```mermaid +flowchart LR; + d@{shape: text, label: "Dataset"}-->dp[TEMPLATE_DATA_PROCESSOR]-->p[process_inputs]; + subgraph TEMPLATE_MODEL + p[process_inputs]-->f[forward] + end + f[forward]-->c@{shape: text, label: "Template Cache"}; +``` + +#### `TEMPLATE_MODEL` + +`TEMPLATE_MODEL` implements the Template model logic, inheriting from `torch.nn.Module` with required `process_inputs` and `forward` methods. These two methods form the complete Template model inference process, split into two stages to better support [two-stage split training](https://diffsynth-studio-doc.readthedocs.io/en/latest/Training/Split_Training.html). + +* `process_inputs` must use `@torch.no_grad()` for gradient-free computation +* `forward` must contain all gradient computations required for training + +Both methods should accept `**kwargs` for compatibility. Reserved parameters include: + +* To interact with the base model Pipeline (e.g., call text encoder), add `pipe` parameter to method inputs +* To enable Gradient Checkpointing, add `use_gradient_checkpointing` and `use_gradient_checkpointing_offload` to `forward` inputs +* Multiple Template models use `model_id` to distinguish Template Inputs - do not use this field in method parameters + +#### `TEMPLATE_MODEL_PATH` (Optional) + +`TEMPLATE_MODEL_PATH` specifies the relative path to pretrained weights. For example: + +```python +TEMPLATE_MODEL_PATH = "model.safetensors" +``` + +For multi-file models: + +```python +TEMPLATE_MODEL_PATH = [ + "model-00001-of-00003.safetensors", + "model-00002-of-00003.safetensors", + "model-00003-of-00003.safetensors", +] +``` + +Set to `None` for random initialization: + +```python +TEMPLATE_MODEL_PATH = None +``` + +#### `TEMPLATE_DATA_PROCESSOR` (Optional) + +To train Template models with DiffSynth-Studio, datasets should contain `template_inputs` fields in `metadata.json`. These fields pass through `TEMPLATE_DATA_PROCESSOR` to generate inputs for Template model methods. + +For example, the brightness control model [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) takes `scale` as input: + +```json +[ + { + "image": "images/image_1.jpg", + "prompt": "a cat", + "template_inputs": {"scale": 0.2} + }, + { + "image": "images/image_2.jpg", + "prompt": "a dog", + "template_inputs": {"scale": 0.6} + } +] +``` + +```python +class DataProcessor: + def __call__(self, scale, **kwargs): + return {"scale": scale} + +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +Or calculate scale from image paths: + +```json +[ + { + "image": "images/image_1.jpg", + "prompt": "a cat", + "template_inputs": {"image": "/path/to/your/dataset/images/image_1.jpg"} + } +] +``` + +```python +class DataProcessor: + def __call__(self, image, **kwargs): + image = Image.open(image) + image = np.array(image) + return {"scale": image.astype(np.float32).mean() / 255} + +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +### Training Template Models + +A Template model is "trainable" if its Template Cache variables are fully decoupled from the base model Pipeline - these variables should reach `model_fn` without participating in any Pipeline Unit calculations. + +For training with [black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B), use these training script parameters: + +* `--extra_inputs`: Additional inputs. Use `template_inputs` for text-to-image models, `edit_image,template_inputs` for image editing models +* `--template_model_id_or_path`: Template model ID or local path (use `:` suffix for ModelScope IDs, e.g., `"DiffSynth-Studio/Template-KleinBase4B-Brightness:"`) +* `--remove_prefix_in_ckpt`: State dict prefix to remove when saving models (use `"pipe.template_model."`) +* `--trainable_models`: Trainable components (use `"template_model"` for full model, or `"template_model.xxx,template_model.yyy"` for specific components) + +Example training script: + +```shell +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "examples/flux2/model_training/scripts/brightness" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_example" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters +``` + +### Interacting with Base Model Pipeline Components + +Template models can interact with base model Pipelines. For example, using the text encoder: + +```python +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.xxx = xxx() + + @torch.no_grad() + def process_inputs(self, text, pipe, **kwargs): + input_ids = pipe.tokenizer(text) + text_emb = pipe.text_encoder(input_ids) + return {"text_emb": text_emb} + + def forward(self, text_emb, pipe, **kwargs): + kv_cache = self.xxx(text_emb) + return {"kv_cache": kv_cache} + +TEMPLATE_MODEL = CustomizedTemplateModel +``` + +### Using Non-Trainable Components + +For models with pretrained components: + +```python +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.image_encoder = XXXEncoder.from_pretrained(xxx) + self.mlp = MLP() + + @torch.no_grad() + def process_inputs(self, image, **kwargs): + emb = self.image_encoder(image) + return {"emb": emb} + + def forward(self, emb, **kwargs): + kv_cache = self.mlp(emb) + return {"kv_cache": kv_cache} + +TEMPLATE_MODEL = CustomizedTemplateModel +``` + +Set `--trainable_models template_model.mlp` to train only the MLP component. + +### Uploading Template Models + +After training, follow these steps to upload to ModelScope: + +1. Set model path in `model.py`: +```python +TEMPLATE_MODEL_PATH = "model.safetensors" +``` + +2. Upload using ModelScope CLI: +```shell +modelscope upload user_name/your_model_id /path/to/your/model.py model.py --token ms-xxx +``` + +3. Package model files: +```python +from diffsynth.diffusion.template import load_template_model, load_state_dict +from safetensors.torch import save_file +import torch + +model = load_template_model("path/to/your/template/model", torch_dtype=torch.bfloat16, device="cpu") +state_dict = load_state_dict("path/to/your/ckpt/epoch-1.safetensors", torch_dtype=torch.bfloat16, device="cpu") +state_dict.update(model.state_dict()) +save_file(state_dict, "model.safetensors") +``` + +4. Upload model file: +```shell +modelscope upload user_name/your_model_id /path/to/your/model/epoch-1.safetensors model.safetensors --token ms-xxx +``` + +5. Verify inference: +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +# Load base model +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) + +# Load Template model +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="user_name/your_model_id") + ], +) + +# Generate image +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{xxx}], +) +image.save("image.png") \ No newline at end of file diff --git a/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md b/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md new file mode 100644 index 0000000..1da52a8 --- /dev/null +++ b/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md @@ -0,0 +1,62 @@ +# Understanding Diffusion Templates + +The Diffusion Templates framework is a controllable generation plugin framework in DiffSynth-Studio that provides additional controllable generation capabilities for Diffusion models. + +## Framework Structure + +The Diffusion Templates framework structure is shown below: + +```mermaid +flowchart TD; + subgraph Template Pipeline + si@{shape: text, label: "Template Input"}-->i1@{shape: text, label: "Template Input 1"}; + si@{shape: text, label: "Template Input"}-->i2@{shape: text, label: "Template Input 2"}; + si@{shape: text, label: "Template Input"}-->i3@{shape: text, label: "Template Input 3"}; + i1@{shape: text, label: "Template Input 1"}-->m1[Template Model 1]-->c1@{shape: text, label: "Template Cache 1"}; + i2@{shape: text, label: "Template Input 2"}-->m2[Template Model 2]-->c2@{shape: text, label: "Template Cache 2"}; + i3@{shape: text, label: "Template Input 3"}-->m3[Template Model 3]-->c3@{shape: text, label: "Template Cache 3"}; + c1-->c@{shape: text, label: "Template Cache"}; + c2-->c; + c3-->c; + end + i@{shape: text, label: "Model Input"}-->m[Diffusion Pipeline]-->o@{shape: text, label: "Model Output"}; + c-->m; +``` + +The framework contains these module designs: + +* **Template Input**: Template model input. Format: Python dictionary with fields determined by each Template model (e.g., `{"scale": 0.8}`) +* **Template Model**: Template model, loadable from ModelScope (`ModelConfig(model_id="xxx/xxx")`) or local path (`ModelConfig(path="xxx")`) +* **Template Cache**: Template model output. Format: Python dictionary with fields matching base model Pipeline input parameters +* **Template Pipeline**: Module for managing multiple Template models. Handles model loading and cache integration + +When the Diffusion Templates framework is disabled, base model components (Text Encoder, DiT, VAE) are loaded into the Diffusion Pipeline. Model Input (prompt, height, width) produces Model Output (e.g., images). + +When enabled, Template models are loaded into the Template Pipeline. The Template Pipeline outputs Template Cache (a subset of Diffusion Pipeline input parameters) for subsequent processing in the Diffusion Pipeline. This enables controllable generation by intercepting part of the Diffusion Pipeline's input parameters. + +## Model Capability Medium + +Template Cache is defined as a subset of Diffusion Pipeline input parameters, ensuring framework generality. We restrict Template model inputs to only be Diffusion Pipeline parameters. The KV-Cache is particularly suitable as a Diffusion medium: + +* Proven effective in LLM Skills (prompts are converted to KV-Cache) +* Has "high permission" in Diffusion models - can directly control image generation +* Supports sequence-level concatenation for multiple Template models +* Requires minimal development (add pipeline parameter and integrate to model) + +Other potential Template mediums: +* **Residual**: Used in ControlNet for point-to-point control, but has resolution limitations and potential conflicts when merging +* **LoRA**: Treated as input parameters rather than model components + +**Currently, we only support KV-Cache and LoRA as Template Cache mediums in FLUX.2 Pipeline, with plans to support more models and mediums in the future.** + +## Template Model Format + +A Template model has this structure: + +``` +Template_Model +├── model.py +└── model.safetensors +``` + +Where `model.py` is the entry point and `model.safetensors` contains model weights. For implementation details, see [Template Model Training](Template_Model_Training.md) or [existing Template models](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness). \ No newline at end of file diff --git a/docs/en/Model_Details/FLUX2.md b/docs/en/Model_Details/FLUX2.md index f3bb020..6012879 100644 --- a/docs/en/Model_Details/FLUX2.md +++ b/docs/en/Model_Details/FLUX2.md @@ -66,6 +66,15 @@ image.save("image.jpg") |[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| |[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| |[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| Special Training Scripts: diff --git a/docs/en/README.md b/docs/en/README.md index d7e2893..e36d21a 100644 --- a/docs/en/README.md +++ b/docs/en/README.md @@ -18,6 +18,9 @@ graph LR; I_want_to_explore_new_technologies_based_on_this_project-->sec5[Section 5: API Reference]; I_want_to_explore_new_technologies_based_on_this_project-->sec6[Section 6: Academic Guide]; I_encountered_a_problem-->sec7[Section 7: Frequently Asked Questions]; + I_want_to_explore_new_technologies_based_on_this_project-->sec6[Section 6: Diffusion Templates] + I_want_to_explore_new_technologies_based_on_this_project-->sec8[Section 8: Academic Guide]; + I_encountered_a_problem-->sec9[Section 9: Frequently Asked Questions]; ``` @@ -75,7 +78,15 @@ This section introduces the independent core module `diffsynth.core` in `DiffSyn * [`diffsynth.core.loader`](./API_Reference/core/loader.md): Model download and loading * [`diffsynth.core.vram`](./API_Reference/core/vram.md): VRAM management -## Section 6: Academic Guide +## Section 6: Diffusion Templates + +This section introduces the controllable generation plugin framework for Diffusion models, explaining the framework's operation mechanism and how to use Template models for inference and training. + +* [Understanding Diffusion Templates](./Diffusion_Templates/Understanding_Diffusion_Templates.md) +* [Template Model Inference](./Diffusion_Templates/Template_Model_Inference.md) +* [Template Model Training](./Diffusion_Templates/Template_Model_Training.md) + +## Section 7: Academic Guide This section introduces how to use `DiffSynth-Studio` to train new models, helping researchers explore new model technologies. @@ -84,8 +95,8 @@ This section introduces how to use `DiffSynth-Studio` to train new models, helpi * Designing controllable generation models 【coming soon】 * Creating new training paradigms 【coming soon】 -## Section 7: Frequently Asked Questions +## Section 8: Frequently Asked Questions This section summarizes common developer questions. If you encounter issues during usage or development, please refer to this section. If you still cannot resolve the problem, please submit an issue on GitHub. -* [Frequently Asked Questions](./QA.md) \ No newline at end of file +* [Frequently Asked Questions](./QA.md) diff --git a/docs/en/index.rst b/docs/en/index.rst index 4b933ca..34c00b6 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -60,6 +60,14 @@ Welcome to DiffSynth-Studio's Documentation API_Reference/core/loader API_Reference/core/vram +.. toctree:: + :maxdepth: 2 + :caption: Diffusion Templates + + Diffusion_Templates/Understanding_Diffusion_Templates.md + Diffusion_Templates/Template_Model_Inference.md + Diffusion_Templates/Template_Model_Training.md + .. toctree:: :maxdepth: 2 :caption: Research Guide diff --git a/docs/zh/Diffusion_Templates/Template_Model_Inference.md b/docs/zh/Diffusion_Templates/Template_Model_Inference.md new file mode 100644 index 0000000..8fdd8e6 --- /dev/null +++ b/docs/zh/Diffusion_Templates/Template_Model_Inference.md @@ -0,0 +1,330 @@ +# Template 模型推理 + +## 在基础模型 Pipeline 上启用 Template 模型 + +我们以基础模型 [black-forest-labs/FLUX.2-klein-base-4B](https://modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B) 为例,当仅使用基础模型生成图像时 + +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +# Load base model +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +# Generate an image +image = pipe( + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, +) +image.save("image.png") +``` + +Template 模型 [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) 可以控制模型生成图像的亮度。通过 `TemplatePipeline` 模型,可从魔搭模型库加载(`ModelConfig(model_id="xxx/xxx")`)或从本地路径加载(`ModelConfig(path="xxx")`)。输入 scale=0.8 提高图像的亮度。注意在代码中,需将 `pipe` 的输入参数转移到 `template_pipeline` 中,并添加 `template_inputs`。 + +```python +# Load Template model +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ], +) +# Generate an image +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{"scale": 0.8}], +) +image.save("image_0.8.png") +``` + +## Template 模型的 CFG 增强 + +Template 模型可以开启 CFG(Classifier-Free Guidance),使其控制效果更明显。例如模型 [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness),在 `TemplatePipeline` 的输入参数中添加 `negative_template_inputs` 并将其 scale 设置为 0.5,模型就会对比两侧的差异,生成亮度变化更明显的图像。 + +```python +# Generate an image with CFG +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{"scale": 0.8}], + negative_template_inputs=[{"scale": 0.5}], +) +image.save("image_0.8_cfg.png") +``` + +## 低显存支持 + +Template 模型暂不支持主框架的显存管理,但可以使用惰性加载,仅在需要推理时加载对应的 Template 模型,这在启用多个 Template 模型时可以显著降低显存需求,显存占用峰值为单个 Template 模型的显存占用量。添加参数 `lazy_loading=True` 即可。 + +```python +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ], + lazy_loading=True, +) +``` + +基础模型的 Pipeline 与 Template Pipeline 完全独立,可按需开启显存管理。 + +当 Template 模型输出的 Template Cache 包含 LoRA 时,需对基础模型的 Pipeline 开启显存管理或开启 LoRA 热加载(使用以下代码),否则会导致 LoRA 权重叠加。 + +```python +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) +``` + +## 启用多个 Template 模型 + +`TemplatePipeline` 可以加载多个 Template 模型,推理时在 `template_inputs` 中使用 `model_id` 区分每个 Template 模型的输入。 + +对基础模型 Pipeline 存管理,对 Template Pipeline 开启惰性加载后,你可以加载任意多个 Template 模型。 + +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.bfloat16, + "onload_device": "cuda", + "preparing_dtype": torch.bfloat16, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + lazy_loading=True, + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme"), + ], +) +``` + +### 超分辨率 + 锐利激发 + +组合 [DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler) 和 [DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness),可以将模糊图片高清化,同时提高细节部分的清晰度。 + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 3, + "image": Image.open("data/assets/image_lowres_100.jpg"), + "prompt": "A cat is sitting on a stone.", + }, + { + "model_id": 5, + "scale": 1, + }, + ], + negative_template_inputs = [ + { + "model_id": 3, + "image": Image.open("data/assets/image_lowres_100.jpg"), + "prompt": "", + }, + { + "model_id": 5, + "scale": 0, + }, + ], +) +image.save("image_Upscaler_Sharpness.png") +``` + +|低清晰度输入|高清晰度输出| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_lowres_100.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Upscaler_Sharpness.png)| + +### 结构控制 + 美学对齐 + 锐利激发 + +[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) 负责控制构图,[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) 负责填充细节,[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) 负责保证清晰度,融合三个 Template 模型可以获得精美的画面。 + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone, bathed in bright sunshine.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", + }, + { + "model_id": 7, + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.0, + "merge_type": "mean", + }, + { + "model_id": 5, + "scale": 0.8, + }, + ], + negative_template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "", + }, + { + "model_id": 7, + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.0, + "merge_type": "mean", + }, + { + "model_id": 5, + "scale": 0, + }, + ], +) +image.save("image_Controlnet_Aesthetic_Sharpness.png") +``` + +|结构控制图|输出图| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Aesthetic_Sharpness.png)| + +### 结构控制 + 图像编辑 + 色彩调节 + +[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) 负责控制构图,[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) 负责保留原图的毛发纹理等细节,[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB) 负责控制画面色调,一副极具艺术感的画作被渲染出来。 + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone. Colored ink painting.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "A cat is sitting on a stone. Colored ink painting.", + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "Convert the image style to colored ink painting.", + }, + { + "model_id": 4, + "R": 0.9, + "G": 0.5, + "B": 0.3, + }, + ], + negative_template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "", + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "", + }, + ], +) +image.save("image_Controlnet_Edit_SoftRGB.png") +``` + +|结构控制图|编辑输入图|输出图| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Edit_SoftRGB.png)| + +### 亮度控制 + 图像编辑 + 局部重绘 + +[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) 负责生成明亮的画面,[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) 负责参考原图布局,[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) 负责控制背景不变,生成跨越二次元的画面内容。 + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone. Flat anime style.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 0, + "scale": 0.6, + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "Convert the image style to flat anime style.", + }, + { + "model_id": 6, + "image": Image.open("data/assets/image_reference.jpg"), + "mask": Image.open("data/assets/image_mask_1.jpg"), + "force_inpaint": True, + }, + ], + negative_template_inputs = [ + { + "model_id": 0, + "scale": 0.5, + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "", + }, + { + "model_id": 6, + "image": Image.open("data/assets/image_reference.jpg"), + "mask": Image.open("data/assets/image_mask_1.jpg"), + }, + ], +) +image.save("image_Brightness_Edit_Inpaint.png") +``` + +|参考图|重绘区域|输出图| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_mask_1.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Brightness_Edit_Inpaint.png)| diff --git a/docs/zh/Diffusion_Templates/Template_Model_Training.md b/docs/zh/Diffusion_Templates/Template_Model_Training.md new file mode 100644 index 0000000..a45180d --- /dev/null +++ b/docs/zh/Diffusion_Templates/Template_Model_Training.md @@ -0,0 +1,317 @@ +# Template 模型训练 + +DiffSynth-Studio 目前已为 [black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B) 提供了全面的 Templates 训练支持,更多模型的适配敬请期待。 + +## 基于预训练 Template 模型继续训练 + +如需基于我们预训练好的模型进行继续训练,请参考[FLUX.2](../Model_Details/FLUX2.md#模型总览) 中的表格,找到对应的训练脚本。 + +## 构建新的 Template 模型 + +### Template 模型组件格式 + +一个 Template 模型与一个模型库(或一个本地文件夹)绑定,模型库中有代码文件 `model.py` 作为唯一入口。`model.py` 的模板如下: + +```python +import torch + +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + + @torch.no_grad() + def process_inputs(self, xxx, **kwargs): + yyy = xxx + return {"yyy": yyy} + + def forward(self, yyy, **kwargs): + zzz = yyy + return {"zzz": zzz} + +class DataProcessor: + def __call__(self, www, **kwargs): + xxx = www + return {"xxx": xxx} + +TEMPLATE_MODEL = CustomizedTemplateModel +TEMPLATE_MODEL_PATH = "model.safetensors" +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +在 Template 模型推理时,Template Input 先后经过 `TEMPLATE_MODEL` 的 `process_inputs` 和 `forward` 得到 Template Cache。 + +```mermaid +flowchart LR; + i@{shape: text, label: "Template Input"}-->p[process_inputs]; + subgraph TEMPLATE_MODEL + p[process_inputs]-->f[forward] + end + f[forward]-->c@{shape: text, label: "Template Cache"}; +``` + +在 Template 模型训练时,Template Input 不再是用户的输入,而是从数据集中获取,由 `TEMPLATE_DATA_PROCESSOR` 进行计算得到。 + +```mermaid +flowchart LR; + d@{shape: text, label: "Dataset"}-->dp[TEMPLATE_DATA_PROCESSOR]-->p[process_inputs]; + subgraph TEMPLATE_MODEL + p[process_inputs]-->f[forward] + end + f[forward]-->c@{shape: text, label: "Template Cache"}; +``` + +#### `TEMPLATE_MODEL` + +`TEMPLATE_MODEL` 是 Template 模型的代码实现,需继承 `torch.nn.Module`,并编写 `process_inputs` 与 `forward` 两个函数。`process_inputs` 与 `forward` 构成完整的 Template 模型推理过程,我们将其拆分为两部分,是为了在训练中更容易适配[两阶段拆分训练](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Training/Split_Training.html)。 + +* `process_inputs` 需带有装饰器 `@torch.no_grad()`,进行不包含梯度的计算 +* `forward` 需包含训练模型所需的全部梯度计算过程,其输入与 `process_inputs` 的输出相同 + +`process_inputs` 与 `forward` 需包含 `**kwargs`,保证兼容性,此外,我们提供了以下预留的参数 + +* 如需在 `process_inputs` 与 `forward` 中和基础模型 Pipeline 进行交互,例如调用基础模型 Pipeline 中的文本编码器进行计算,可在 `process_inputs` 与 `forward` 的输入参数中增加字段 `pipe` +* 如需在训练中启用 Gradient Checkpointing,可在 `forward` 的输入参数中增加字段 `use_gradient_checkpointing` 与 `use_gradient_checkpointing_offload` +* 多个 Template 模型需通过 `model_id` 区分 Template Inputs,请不要在 `process_inputs` 与 `forward` 的输入参数中使用这个字段 + +#### `TEMPLATE_MODEL_PATH`(可选项) + +`TEMPLATE_MODEL_PATH` 是模型预训练权重文件的相对路径,例如 + +```python +TEMPLATE_MODEL_PATH = "model.safetensors" +``` + +如需从多个模型文件中加载,可使用列表 + +```python +TEMPLATE_MODEL_PATH = [ + "model-00001-of-00003.safetensors", + "model-00002-of-00003.safetensors", + "model-00003-of-00003.safetensors", +] +``` + +如果需要随机初始化模型参数(模型还未训练),或不需要初始化模型参数,可将其设置为 `None`,或不设置 + +```python +TEMPLATE_MODEL_PATH = None +``` + +#### `TEMPLATE_DATA_PROCESSOR`(可选项) + +如需使用 DiffSynth-Studio 训练 Template 模型,则需构建训练数据集,数据集中的 `metadata.json` 包含 `template_inputs` 字段。`metadata.json` 中的 `template_inputs` 并不是直接输入给 Template 模型 `process_inputs` 的参数,而是提供给 `TEMPLATE_DATA_PROCESSOR` 的输入参数,由 `TEMPLATE_DATA_PROCESSOR` 计算出输入给 Template 模型 `process_inputs` 的参数。 + +例如,[DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) 这一亮度控制模型的输入参数是 `scale`,即图像的亮度数值。`scale` 可以直接写在 `metadata.json` 中,此时 `TEMPLATE_DATA_PROCESSOR` 只需要传递参数: + +```json +[ + { + "image": "images/image_1.jpg", + "prompt": "a cat", + "template_inputs": {"scale": 0.2} + }, + { + "image": "images/image_2.jpg", + "prompt": "a dog", + "template_inputs": {"scale": 0.6} + } +] +``` + +```python +class DataProcessor: + def __call__(self, scale, **kwargs): + return {"scale": scale} + +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +也可在 `metadata.json` 中填写图像路径,直接在训练过程中计算 `scale`。 + +```json +[ + { + "image": "images/image_1.jpg", + "prompt": "a cat", + "template_inputs": {"image": "/path/to/your/dataset/images/image_1.jpg"} + }, + { + "image": "images/image_2.jpg", + "prompt": "a dog", + "template_inputs": {"image": "/path/to/your/dataset/images/image_1.jpg"} + } +] +``` + +```python +class DataProcessor: + def __call__(self, image, **kwargs): + image = Image.open(image) + image = np.array(image) + return {"scale": image.astype(np.float32).mean() / 255} + +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +### 训练 Template 模型 + +Template 模型“可训练”的充分条件是:Template Cache 中的变量计算与基础模型 Pipeline 完全解耦,这些变量在推理过程中输入给基础模型 Pipeline 后,不会参与任何 Pipeline Unit 的计算,直达 `model_fn`。 + +如果 Template 模型是“可训练”的,那么可以使用 DiffSynth-Studio 进行训练,以基础模型 [black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B) 为例,在训练脚本中,填写字段: + +* `--extra_inputs`:额外输入,训练文生图模型的 Template 模型时只需填 `template_inputs`,训练图像编辑模型的 Template 模型时需填 `edit_image,template_inputs` +* `--template_model_id_or_path`:Template 模型的魔搭模型 ID 或本地路径,框架会优先匹配本地路径,若本地路径不存在则从魔搭模型库中下载该模型,填写模型 ID 时,以“:”结尾,例如 `"DiffSynth-Studio/Template-KleinBase4B-Brightness:"` +* `--remove_prefix_in_ckpt`:保存模型文件时,移除的 state dict 变量名前缀,填 `"pipe.template_model."` 即可 +* `--trainable_models`:可训练模型,填写 `template_model` 即可,若只需训练其中的某个组件,则需填写 `template_model.xxx,template_model.yyy`,以逗号分隔 + +以下是一个样例训练脚本,它会自动下载一个样例数据集,随机初始化模型权重后开始训练亮度控制模型: + +```shell +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "examples/flux2/model_training/scripts/brightness" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_example" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters +``` + +### 与基础模型 Pipeline 组件交互 + +Diffusion Template 框架允许 Template 模型与基础模型 Pipeline 进行交互。例如,你可能需要使用基础模型 Pipeline 中的 text encoder 对文本进行编码,此时在 `process_inputs` 和 `forward` 中使用预留字段 `pipe` 即可。 + +```python +import torch + +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.xxx = xxx() + + @torch.no_grad() + def process_inputs(self, text, pipe, **kwargs): + input_ids = pipe.tokenizer(text) + text_emb = pipe.text_encoder(text_emb) + return {"text_emb": text_emb} + + def forward(self, text_emb, pipe, **kwargs): + kv_cache = self.xxx(text_emb) + return {"kv_cache": kv_cache} + +TEMPLATE_MODEL = CustomizedTemplateModel +``` + +### 使用非训练的模型组件 + +在设计 Template 模型时,如果需要使用预训练的模型且不希望在训练过程中更新这部分参数,例如 + +```python +import torch + +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.image_encoder = XXXEncoder.from_pretrained(xxx) + self.mlp = MLP() + + @torch.no_grad() + def process_inputs(self, image, **kwargs): + emb = self.image_encoder(image) + return {"emb": emb} + + def forward(self, emb, **kwargs): + kv_cache = self.mlp(emb) + return {"kv_cache": kv_cache} + +TEMPLATE_MODEL = CustomizedTemplateModel +``` + +此时需在训练命令中通过参数 `--trainable_models template_model.mlp` 设置为仅训练 `mlp` 部分。 + +### 上传 Template 模型 + +完成训练后,按照以下步骤可上传 Template 模型到魔搭社区 + +Step 1:在 `model.py` 中填入训练好的模型文件名,例如 + +```python +TEMPLATE_MODEL_PATH = "model.safetensors" +``` + +Step 2:使用以下命令上传 `model.py`,其中 `--token ms-xxx` 在 https://modelscope.cn/my/access/token 获取 + +```shell +modelscope upload user_name/your_model_id /path/to/your/model.py model.py --token ms-xxx +``` + +Step 3:确认模型文件 + +确认要上传的模型文件,例如 `epoch-1.safetensors`、`step-2000.safetensors`。 + +注意,DiffSynth-Studio 保存的模型文件中只包含可训练的参数,如果模型中包括非训练参数,则需要重新将非训练的模型参数打包才能进行推理,你可以通过以下代码进行打包: + +```python +from diffsynth.diffusion.template import load_template_model, load_state_dict +from safetensors.torch import save_file +import torch + +model = load_template_model("path/to/your/template/model", torch_dtype=torch.bfloat16, device="cpu") +state_dict = load_state_dict("path/to/your/ckpt/epoch-1.safetensors", torch_dtype=torch.bfloat16, device="cpu") +state_dict.update(model.state_dict()) +save_file(state_dict, "model.safetensors") +``` + +Step 4:上传模型文件 + +```shell +modelscope upload user_name/your_model_id /path/to/your/model/epoch-1.safetensors model.safetensors --token ms-xxx +``` + +Step 5:验证模型推理效果 + +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +# Load base model +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +# Load Template model +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="user_name/your_model_id") + ], +) +# Generate an image +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{xxx}], +) +image.save("image.png") +``` + diff --git a/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md b/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md new file mode 100644 index 0000000..622e6a9 --- /dev/null +++ b/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md @@ -0,0 +1,61 @@ +# 理解 Diffusion Templates + +## 框架结构 + +Diffusion Templates 框架的结构如下图所示: + +```mermaid +flowchart TD; + subgraph Template Pipeline + si@{shape: text, label: "Template Input"}-->i1@{shape: text, label: "Template Input 1"}; + si@{shape: text, label: "Template Input"}-->i2@{shape: text, label: "Template Input 2"}; + si@{shape: text, label: "Template Input"}-->i3@{shape: text, label: "Template Input 3"}; + i1@{shape: text, label: "Template Input 1"}-->m1[Template Model 1]-->c1@{shape: text, label: "Template Cache 1"}; + i2@{shape: text, label: "Template Input 2"}-->m2[Template Model 2]-->c2@{shape: text, label: "Template Cache 2"}; + i3@{shape: text, label: "Template Input 3"}-->m3[Template Model 3]-->c3@{shape: text, label: "Template Cache 3"}; + c1-->c@{shape: text, label: "Template Cache"}; + c2-->c; + c3-->c; + end + i@{shape: text, label: "Model Input"}-->m[Diffusion Pipeline]-->o@{shape: text, label: "Model Output"}; + c-->m; +``` + +框架包含以下模块设计: + +* Template Input: Template 模型的输入。其格式为 Python 字典,其中的字段由每个 Template 模型自身决定,例如 `{"scale": 0.8}` +* Template Model: Template 模型,可从魔搭模型库加载(`ModelConfig(model_id="xxx/xxx")`)或从本地路径加载(`ModelConfig(path="xxx")`) +* Template Cache: Template 模型的输出。其格式为 Python 字典,其中的字段仅支持对应基础模型 Pipeline 中的输入参数字段。 +* Template Pipeline: 用于调度多个 Template 模型的模块。该模块负责加载 Template 模型、整合多个 Template 模型的输出 + +当 Diffusion Templates 框架未启用时,基础模型组件(包括 Text Encoder、DiT、VAE 等)被加载到 Diffusion Pipeline 中,输入 Model Input(包括 prompt、height、width 等),输出 Model Output(例如图像)。 + +当 Diffusion Templates 框架启用后,若干个 Template 模型被加载到 Template Pipeline 中,Template Pipeline 输出 Template Cache(Diffusion Pipeline 输入参数的子集),并交由 Diffusion Pipeline 进行后续的进一步处理。Template Pipeline 通过接管一部分 Diffusion Pipeline 的输入参数来实现可控生成。 + +## 模型能力媒介 + +注意到,Template Cache 的格式被定义为 Diffusion Pipeline 输入参数的子集,这是框架通用性设计的基本保证,我们限制 Template 模型的输入只能是 Diffusion Pipeline 的输入参数。因此,我们需要为 Diffusion Pipeline 设计额外的输入参数作为模型能力媒介。其中,KV-Cache 是非常适合 Diffusion 的模型能力媒介 + +* 技术路线已经在 LLM Skills 上得到了验证,LLM 中输入的提示词也会被潜在地转化为 KV-Cache +* KV-Cache 具有 Diffusion 模型的“高权限”,在生图模型上能够直接影响甚至完全控制生图结果,这保证 Diffusion Template 模型具备足够高的能力上限 +* KV-Cache 可以直接在序列层面拼接,让多个 Template 模型同时生效 +* KV-Cache 在框架层面的开发量少,增加一个 Pipeline 的输入参数并穿透到模型内部即可,可以快速适配新的 Diffusion 基础模型 + +另外,还有以下媒介也可以用于 Template: + +* Residual:残差,在 ControlNet 中使用较多,适合做点对点的控制,和 KVCache 相比缺点是不能支持任意分辨率以及多个 Residual 融合时可能冲突 +* LoRA:不要把它当成模型的一部分,而是把它当成模型的输入参数,LoRA 本质上是一系列张量,也可以作为模型能力的媒介 + +**目前,我们仅在 FLUX.2 的 Pipeline 上提供了 KV-Cache 和 LoRA 作为 Template Cache 的支持,后续会考虑支持更多模型和更多模型能力媒介。** + +## Template 模型格式 + +一个 Template 模型的格式为: + +``` +Template_Model +├── model.py +└── model.safetensors +``` + +其中,`model.py` 是模型的入口,`model.safetensors` 是 Template 模型的权重文件。关于如何构建 Template 模型,请参考文档 [Template 模型训练](Template_Model_Training.md),或参考[现有的 Template 模型](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)。 diff --git a/docs/zh/Model_Details/FLUX2.md b/docs/zh/Model_Details/FLUX2.md index 66725e6..16f2872 100644 --- a/docs/zh/Model_Details/FLUX2.md +++ b/docs/zh/Model_Details/FLUX2.md @@ -66,6 +66,15 @@ image.save("image.jpg") |[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| |[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| |[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| 特殊训练脚本: diff --git a/docs/zh/README.md b/docs/zh/README.md index 8cec5d6..e1c463b 100644 --- a/docs/zh/README.md +++ b/docs/zh/README.md @@ -16,8 +16,9 @@ graph LR; 我想要基于此框架进行二次开发-->sec5[Section 5: API 参考]; 我想要基于本项目探索新的技术-->sec4[Section 4: 模型接入]; 我想要基于本项目探索新的技术-->sec5[Section 5: API 参考]; - 我想要基于本项目探索新的技术-->sec6[Section 6: 学术导引]; - 我遇到了问题-->sec7[Section 7: 常见问题]; + 我想要基于本项目探索新的技术-->sec6[Section 6: Diffusion Templates] + 我想要基于本项目探索新的技术-->sec7[Section 7: 学术导引]; + 我遇到了问题-->sec8[Section 8: 常见问题]; ``` @@ -75,7 +76,15 @@ graph LR; * [`diffsynth.core.loader`](./API_Reference/core/loader.md): 模型下载与加载 * [`diffsynth.core.vram`](./API_Reference/core/vram.md): 显存管理 -## Section 6: 学术导引 +## Section 6: Diffusion Templates + +本节介绍 Diffusion 模型可控生成插件框架 Diffusion Templates,讲解 Diffusion Templates 框架的运行机制,展示如何使用 Template 模型进行推理和训练。 + +* [理解 Diffusion Templates](./Diffusion_Templates/Understanding_Diffusion_Templates.md) +* [Template 模型推理](./Diffusion_Templates/Template_Model_Inference.md) +* [Template 模型训练](./Diffusion_Templates/Template_Model_Training.md) + +## Section 7: 学术导引 本节介绍如何利用 `DiffSynth-Studio` 训练新的模型,帮助科研工作者探索新的模型技术。 @@ -84,7 +93,7 @@ graph LR; * 设计可控生成模型【coming soon】 * 创建新的训练范式【coming soon】 -## Section 7: 常见问题 +## Section 8: 常见问题 本节总结了开发者常见的问题,如果你在使用和开发中遇到了问题,请参考本节内容,如果仍无法解决,请到 GitHub 上给我们提 issue。 diff --git a/docs/zh/index.rst b/docs/zh/index.rst index 42256b3..8042013 100644 --- a/docs/zh/index.rst +++ b/docs/zh/index.rst @@ -60,6 +60,14 @@ API_Reference/core/loader API_Reference/core/vram +.. toctree:: + :maxdepth: 2 + :caption: Diffusion Templates + + Diffusion_Templates/Understanding_Diffusion_Templates.md + Diffusion_Templates/Template_Model_Inference.md + Diffusion_Templates/Template_Model_Training.md + .. toctree:: :maxdepth: 2 :caption: 学术导引 diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py b/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py new file mode 100644 index 0000000..455a238 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py @@ -0,0 +1,52 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important! +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")], +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 1.0, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 1.0, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_1.0.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.5, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.5, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_2.5.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py b/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py new file mode 100644 index 0000000..9a25f50 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py @@ -0,0 +1,43 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness")], +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.7}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_light.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.5}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.3}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_dark.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py b/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py new file mode 100644 index 0000000..d0c33a9 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py @@ -0,0 +1,54 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet")], +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone, bathed in bright sunshine.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_sunshine.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone, surrounded by colorful magical particles.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_magic.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Edit.py b/examples/flux2/model_inference/Template-KleinBase4B-Edit.py new file mode 100644 index 0000000..e229f1c --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Edit.py @@ -0,0 +1,54 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit")], +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="Put a hat on this cat.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Put a hat on this cat.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_hat.jpg") +image = template( + pipe, + prompt="Make the cat turn its head to look to the right.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Make the cat turn its head to look to the right.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_head.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py b/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py new file mode 100644 index 0000000..c582637 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py @@ -0,0 +1,56 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint")], +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="An orange cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + "force_inpaint": True, + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + }], +) +image.save("image_Inpaint_1.jpg") +image = template( + pipe, + prompt="A cat wearing sunglasses is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], +) +image.save("image_Inpaint_2.jpg") + diff --git a/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py b/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py new file mode 100644 index 0000000..058816c --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py @@ -0,0 +1,43 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")], +) +image = template( + pipe, + prompt="A meme with a sleepy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_sleepy.jpg") +image = template( + pipe, + prompt="A meme with a happy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_happy.jpg") +image = template( + pipe, + prompt="A meme with a surprised expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_surprised.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py b/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py new file mode 100644 index 0000000..e621bd7 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py @@ -0,0 +1,35 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness")], +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.1}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.8}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.8.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py b/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py new file mode 100644 index 0000000..943ed6c --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py @@ -0,0 +1,52 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB")], +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 128/255, + "G": 128/255, + "B": 128/255 + }], +) +image.save("image_rgb_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 208/255, + "G": 185/255, + "B": 138/255 + }], +) +image.save("image_rgb_warm.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 94/255, + "G": 163/255, + "B": 174/255 + }], +) +image.save("image_rgb_cold.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py b/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py new file mode 100644 index 0000000..d527ffb --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py @@ -0,0 +1,54 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler")], +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_1.png") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_2.png") diff --git a/examples/flux2/model_inference/Template-KleinBase4B.py b/examples/flux2/model_inference/Template-KleinBase4B.py deleted file mode 100644 index 5b2dd93..0000000 --- a/examples/flux2/model_inference/Template-KleinBase4B.py +++ /dev/null @@ -1,256 +0,0 @@ -from diffsynth.diffusion.template import TemplatePipeline -from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig -import torch -from PIL import Image -import numpy as np - -def load_template_pipeline(model_ids): - template = TemplatePipeline.from_pretrained( - torch_dtype=torch.bfloat16, - device="cuda", - model_configs=[ModelConfig(model_id=model_id) for model_id in model_ids], - ) - return template - -# Base Model -pipe = Flux2ImagePipeline.from_pretrained( - torch_dtype=torch.bfloat16, - device="cuda", - model_configs=[ - ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), - ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), - ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), - ], - tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), -) -# image = pipe( -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# ) -# image.save("image_base.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Brightness"]) -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{"scale": 0.7}], -# negative_template_inputs = [{"scale": 0.5}] -# ) -# image.save("image_Brightness_light.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{"scale": 0.5}], -# negative_template_inputs = [{"scale": 0.5}] -# ) -# image.save("image_Brightness_normal.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{"scale": 0.3}], -# negative_template_inputs = [{"scale": 0.5}] -# ) -# image.save("image_Brightness_dark.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-ControlNet"]) -# image = template( -# pipe, -# prompt="A cat is sitting on a stone, bathed in bright sunshine.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_depth.jpg"), -# "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_depth.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_ControlNet_sunshine.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone, surrounded by colorful magical particles.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_depth.jpg"), -# "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_depth.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_ControlNet_magic.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Edit"]) -# image = template( -# pipe, -# prompt="Put a hat on this cat.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "prompt": "Put a hat on this cat.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_Edit_hat.jpg") -# image = template( -# pipe, -# prompt="Make the cat turn its head to look to the right.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "prompt": "Make the cat turn its head to look to the right.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_Edit_head.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Upscaler"]) -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_lowres_512.jpg"), -# "prompt": "A cat is sitting on a stone.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_lowres_512.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_Upscaler_1.png") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_lowres_100.jpg"), -# "prompt": "A cat is sitting on a stone.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_lowres_100.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_Upscaler_2.png") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-SoftRGB"]) -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "R": 128/255, -# "G": 128/255, -# "B": 128/255 -# }], -# ) -# image.save("image_rgb_normal.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "R": 208/255, -# "G": 185/255, -# "B": 138/255 -# }], -# ) -# image.save("image_rgb_warm.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "R": 94/255, -# "G": 163/255, -# "B": 174/255 -# }], -# ) -# image.save("image_rgb_cold.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-PandaMeme"]) -# image = template( -# pipe, -# prompt="A meme with a sleepy expression.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{}], -# negative_template_inputs = [{}], -# ) -# image.save("image_PandaMeme_sleepy.jpg") -# image = template( -# pipe, -# prompt="A meme with a happy expression.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{}], -# negative_template_inputs = [{}], -# ) -# image.save("image_PandaMeme_happy.jpg") -# image = template( -# pipe, -# prompt="A meme with a surprised expression.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{}], -# negative_template_inputs = [{}], -# ) -# image.save("image_PandaMeme_surprised.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Sharpness"]) -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{"scale": 0.1}], -# negative_template_inputs = [{"scale": 0.5}], -# ) -# image.save("image_Sharpness_0.1.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{"scale": 0.8}], -# negative_template_inputs = [{"scale": 0.5}], -# ) -# image.save("image_Sharpness_0.8.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Inpaint"]) -# image = template( -# pipe, -# prompt="An orange cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "mask": Image.open("data/assets/image_mask_1.jpg"), -# "force_inpaint": True, -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "mask": Image.open("data/assets/image_mask_1.jpg"), -# }], -# ) -# image.save("image_Inpaint_1.jpg") -# image = template( -# pipe, -# prompt="A cat wearing sunglasses is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "mask": Image.open("data/assets/image_mask_2.jpg"), -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "mask": Image.open("data/assets/image_mask_2.jpg"), -# }], -# ) -# image.save("image_Inpaint_2.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py new file mode 100644 index 0000000..2c6f60a --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py @@ -0,0 +1,63 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")], + lazy_loading=True, +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 1.0, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 1.0, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_1.0.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.5, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.5, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_2.5.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py new file mode 100644 index 0000000..8210e66 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py @@ -0,0 +1,55 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness")], + lazy_loading=True, +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.7}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_light.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.5}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.3}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_dark.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py new file mode 100644 index 0000000..3f469de --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py @@ -0,0 +1,66 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet")], + lazy_loading=True, +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone, bathed in bright sunshine.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_sunshine.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone, surrounded by colorful magical particles.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_magic.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py new file mode 100644 index 0000000..c63fb9b --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py @@ -0,0 +1,66 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit")], + lazy_loading=True, +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="Put a hat on this cat.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Put a hat on this cat.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_hat.jpg") +image = template( + pipe, + prompt="Make the cat turn its head to look to the right.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Make the cat turn its head to look to the right.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_head.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py new file mode 100644 index 0000000..3106cba --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py @@ -0,0 +1,68 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint")], + lazy_loading=True, +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="An orange cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + "force_inpaint": True, + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + }], +) +image.save("image_Inpaint_1.jpg") +image = template( + pipe, + prompt="A cat wearing sunglasses is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], +) +image.save("image_Inpaint_2.jpg") + diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py new file mode 100644 index 0000000..3caa8e8 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py @@ -0,0 +1,55 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")], + lazy_loading=True, +) +image = template( + pipe, + prompt="A meme with a sleepy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_sleepy.jpg") +image = template( + pipe, + prompt="A meme with a happy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_happy.jpg") +image = template( + pipe, + prompt="A meme with a surprised expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_surprised.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py new file mode 100644 index 0000000..042f1a5 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py @@ -0,0 +1,47 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness")], + lazy_loading=True, +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.1}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.8}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.8.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py new file mode 100644 index 0000000..80c7ac8 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py @@ -0,0 +1,64 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB")], + lazy_loading=True, +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 128/255, + "G": 128/255, + "B": 128/255 + }], +) +image.save("image_rgb_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 208/255, + "G": 185/255, + "B": 138/255 + }], +) +image.save("image_rgb_warm.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 94/255, + "G": 163/255, + "B": 174/255 + }], +) +image.save("image_rgb_cold.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py new file mode 100644 index 0000000..d303cb2 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py @@ -0,0 +1,66 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler")], + lazy_loading=True, +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_1.png") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_2.png") diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh new file mode 100644 index 0000000..bee9771 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh @@ -0,0 +1,19 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Aesthetic/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Aesthetic \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Aesthetic/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Aesthetic:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Aesthetic_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --enable_lora_hot_loading diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh new file mode 100644 index 0000000..2506a62 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh b/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh new file mode 100644 index 0000000..ee0d23b --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-ControlNet/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-ControlNet \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-ControlNet/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-ControlNet:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-ControlNet_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh similarity index 52% rename from examples/flux2/model_training/full/Template-KleinBase4B.sh rename to examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh index 093f1ef..536f963 100644 --- a/examples/flux2/model_training/full/Template-KleinBase4B.sh +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh @@ -1,17 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Edit/*" --local_dir ./data/diffsynth_example_dataset + accelerate launch examples/flux2/model_training/train.py \ - --dataset_base_path xxx \ - --dataset_metadata_path xxx/metadata.jsonl \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Edit \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Edit/metadata.jsonl \ --extra_inputs "template_inputs" \ --max_pixels 1048576 \ - --dataset_repeat 1 \ + --dataset_repeat 50 \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ - --template_model_id_or_path "xxx" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Edit:" \ --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ --learning_rate 1e-4 \ - --num_epochs 999 \ + --num_epochs 2 \ --remove_prefix_in_ckpt "pipe.template_model." \ - --output_path "./models/train/Template-KleinBase4B_full" \ + --output_path "./models/train/Template-KleinBase4B-Edit_full" \ --trainable_models "template_model" \ - --save_steps 1000 \ --use_gradient_checkpointing \ --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh new file mode 100644 index 0000000..19ddb67 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Inpaint/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Inpaint \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Inpaint/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Inpaint:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Inpaint_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh b/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh new file mode 100644 index 0000000..861eb57 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-PandaMeme/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-PandaMeme:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-PandaMeme_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh new file mode 100644 index 0000000..3afa3a2 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Sharpness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Sharpness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Sharpness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Sharpness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Sharpness_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh b/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh new file mode 100644 index 0000000..add0ea8 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-SoftRGB/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-SoftRGB \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-SoftRGB/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-SoftRGB:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-SoftRGB_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh new file mode 100644 index 0000000..aab063b --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Upscaler/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Upscaler \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Upscaler/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Upscaler:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Upscaler_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/scripts/brightness/model.py b/examples/flux2/model_training/scripts/brightness/model.py new file mode 100644 index 0000000..9be2cb0 --- /dev/null +++ b/examples/flux2/model_training/scripts/brightness/model.py @@ -0,0 +1,62 @@ +import torch, math +from PIL import Image +import numpy as np + + +class SingleValueEncoder(torch.nn.Module): + def __init__(self, dim_in=256, dim_out=4096, length=32): + super().__init__() + self.length = length + self.prefer_value_embedder = torch.nn.Sequential(torch.nn.Linear(dim_in, dim_out), torch.nn.SiLU(), torch.nn.Linear(dim_out, dim_out)) + self.positional_embedding = torch.nn.Parameter(torch.randn(self.length, dim_out)) + + def get_timestep_embedding(self, timesteps, embedding_dim, max_period=10000): + half_dim = embedding_dim // 2 + exponent = -math.log(max_period) * torch.arange(0, half_dim, dtype=torch.float32, device=timesteps.device) / half_dim + emb = timesteps[:, None].float() * torch.exp(exponent)[None, :] + emb = torch.cat([torch.cos(emb), torch.sin(emb)], dim=-1) + return emb + + def forward(self, value, dtype): + emb = self.get_timestep_embedding(value * 1000, 256).to(dtype) + emb = self.prefer_value_embedder(emb).squeeze(0) + base_embeddings = emb.expand(self.length, -1) + positional_embedding = self.positional_embedding.to(dtype=base_embeddings.dtype, device=base_embeddings.device) + learned_embeddings = base_embeddings + positional_embedding + return learned_embeddings + + +class ValueFormatModel(torch.nn.Module): + def __init__(self, num_double_blocks=5, num_single_blocks=20, dim=3072, num_heads=24, length=512): + super().__init__() + self.block_names = [f"double_{i}" for i in range(num_double_blocks)] + [f"single_{i}" for i in range(num_single_blocks)] + self.proj_k = torch.nn.ModuleDict({block_name: SingleValueEncoder(dim_out=dim, length=length) for block_name in self.block_names}) + self.proj_v = torch.nn.ModuleDict({block_name: SingleValueEncoder(dim_out=dim, length=length) for block_name in self.block_names}) + self.num_heads = num_heads + self.length = length + + @torch.no_grad() + def process_inputs(self, pipe, scale, **kwargs): + return {"value": torch.Tensor([scale]).to(dtype=pipe.torch_dtype, device=pipe.device)} + + def forward(self, value, **kwargs): + kv_cache = {} + for block_name in self.block_names: + k = self.proj_k[block_name](value, value.dtype) + k = k.view(1, self.length, self.num_heads, -1) + v = self.proj_v[block_name](value, value.dtype) + v = v.view(1, self.length, self.num_heads, -1) + kv_cache[block_name] = (k, v) + return {"kv_cache": kv_cache} + + +class DataAnnotator: + def __call__(self, image, **kwargs): + image = Image.open(image) + image = np.array(image) + return {"scale": image.astype(np.float32).mean() / 255} + + +TEMPLATE_MODEL = ValueFormatModel +TEMPLATE_MODEL_PATH = None # You should modify this parameter after training +TEMPLATE_DATA_PROCESSOR = DataAnnotator \ No newline at end of file diff --git a/examples/flux2/model_training/scripts/convert_base_model_to_skill_model.py b/examples/flux2/model_training/scripts/convert_base_model_to_template_model.py similarity index 100% rename from examples/flux2/model_training/scripts/convert_base_model_to_skill_model.py rename to examples/flux2/model_training/scripts/convert_base_model_to_template_model.py diff --git a/examples/flux2/model_training/special/split_training/FLUX.2-klein-base-4B_lora.sh b/examples/flux2/model_training/special/split_training/FLUX.2-klein-base-4B_lora.sh new file mode 100644 index 0000000..25751d1 --- /dev/null +++ b/examples/flux2/model_training/special/split_training/FLUX.2-klein-base-4B_lora.sh @@ -0,0 +1,34 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-4B/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/example_image_dataset \ + --dataset_metadata_path data/example_image_dataset/metadata.csv \ + --max_pixels 1048576 \ + --dataset_repeat 1 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 5 \ + --remove_prefix_in_ckpt "pipe.dit." \ + --output_path "./models/train/FLUX.2-klein-base-4B_lora_cache" \ + --lora_base_model "dit" \ + --lora_target_modules "to_q,to_k,to_v,to_out.0,add_q_proj,add_k_proj,add_v_proj,to_add_out,linear_in,linear_out,to_qkv_mlp_proj,single_transformer_blocks.0.attn.to_out,single_transformer_blocks.1.attn.to_out,single_transformer_blocks.2.attn.to_out,single_transformer_blocks.3.attn.to_out,single_transformer_blocks.4.attn.to_out,single_transformer_blocks.5.attn.to_out,single_transformer_blocks.6.attn.to_out,single_transformer_blocks.7.attn.to_out,single_transformer_blocks.8.attn.to_out,single_transformer_blocks.9.attn.to_out,single_transformer_blocks.10.attn.to_out,single_transformer_blocks.11.attn.to_out,single_transformer_blocks.12.attn.to_out,single_transformer_blocks.13.attn.to_out,single_transformer_blocks.14.attn.to_out,single_transformer_blocks.15.attn.to_out,single_transformer_blocks.16.attn.to_out,single_transformer_blocks.17.attn.to_out,single_transformer_blocks.18.attn.to_out,single_transformer_blocks.19.attn.to_out" \ + --lora_rank 32 \ + --use_gradient_checkpointing \ + --task "sft:data_process" + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path "./models/train/FLUX.2-klein-base-4B_lora_cache" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 5 \ + --remove_prefix_in_ckpt "pipe.dit." \ + --output_path "./models/train/FLUX.2-klein-base-4B_lora" \ + --lora_base_model "dit" \ + --lora_target_modules "to_q,to_k,to_v,to_out.0,add_q_proj,add_k_proj,add_v_proj,to_add_out,linear_in,linear_out,to_qkv_mlp_proj,single_transformer_blocks.0.attn.to_out,single_transformer_blocks.1.attn.to_out,single_transformer_blocks.2.attn.to_out,single_transformer_blocks.3.attn.to_out,single_transformer_blocks.4.attn.to_out,single_transformer_blocks.5.attn.to_out,single_transformer_blocks.6.attn.to_out,single_transformer_blocks.7.attn.to_out,single_transformer_blocks.8.attn.to_out,single_transformer_blocks.9.attn.to_out,single_transformer_blocks.10.attn.to_out,single_transformer_blocks.11.attn.to_out,single_transformer_blocks.12.attn.to_out,single_transformer_blocks.13.attn.to_out,single_transformer_blocks.14.attn.to_out,single_transformer_blocks.15.attn.to_out,single_transformer_blocks.16.attn.to_out,single_transformer_blocks.17.attn.to_out,single_transformer_blocks.18.attn.to_out,single_transformer_blocks.19.attn.to_out" \ + --lora_rank 32 \ + --use_gradient_checkpointing \ + --task "sft:train" diff --git a/examples/flux2/model_training/special/split_training/Template-KleinBase4B-Brightness.sh b/examples/flux2/model_training/special/split_training/Template-KleinBase4B-Brightness.sh new file mode 100644 index 0000000..b214595 --- /dev/null +++ b/examples/flux2/model_training/special/split_training/Template-KleinBase4B-Brightness.sh @@ -0,0 +1,36 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 1 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:data_process" + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:train" diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py new file mode 100644 index 0000000..bdd66d5 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py @@ -0,0 +1,55 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important! +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Aesthetic_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +image = template( + pipe, + prompt="a bird with fire", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": [1], + "lora_scales": 1.0, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": [1], + "lora_scales": 1.0, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_1.0.jpg") +image = template( + pipe, + prompt="a bird with fire", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": [1], + "lora_scales": 2.5, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": [1], + "lora_scales": 2.5, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_2.5.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py new file mode 100644 index 0000000..7701faf --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py @@ -0,0 +1,46 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Brightness_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.7}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_light.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.5}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.3}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_dark.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py new file mode 100644 index 0000000..c12b977 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py @@ -0,0 +1,57 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-ControlNet_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone, bathed in bright sunshine.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_sunshine.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone, surrounded by colorful magical particles.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_magic.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py new file mode 100644 index 0000000..5e6d2b5 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py @@ -0,0 +1,57 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Edit_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="Put a hat on this cat.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Put a hat on this cat.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_hat.jpg") +image = template( + pipe, + prompt="Make the cat turn its head to look to the right.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Make the cat turn its head to look to the right.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_head.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py new file mode 100644 index 0000000..5b29df7 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py @@ -0,0 +1,59 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Inpaint_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="An orange cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + "force_inpaint": True, + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + }], +) +image.save("image_Inpaint_1.jpg") +image = template( + pipe, + prompt="A cat wearing sunglasses is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], +) +image.save("image_Inpaint_2.jpg") + diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py new file mode 100644 index 0000000..ad457b3 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py @@ -0,0 +1,46 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-PandaMeme_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +image = template( + pipe, + prompt="A meme with a sleepy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_sleepy.jpg") +image = template( + pipe, + prompt="A meme with a happy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_happy.jpg") +image = template( + pipe, + prompt="A meme with a surprised expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_surprised.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py new file mode 100644 index 0000000..2a9f584 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py @@ -0,0 +1,38 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Sharpness_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.1}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.8}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.8.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py new file mode 100644 index 0000000..4886530 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py @@ -0,0 +1,55 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-SoftRGB_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 128/255, + "G": 128/255, + "B": 128/255 + }], +) +image.save("image_rgb_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 208/255, + "G": 185/255, + "B": 138/255 + }], +) +image.save("image_rgb_warm.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 94/255, + "G": 163/255, + "B": 174/255 + }], +) +image.save("image_rgb_cold.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py new file mode 100644 index 0000000..b19c25c --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py @@ -0,0 +1,57 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Upscaler_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_1.png") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_2.png")