From 13f2618da2b678a3c7bdde61c9ddb976b5e4fd9d Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Mon, 20 Apr 2026 10:56:29 +0800 Subject: [PATCH] add a new model --- README.md | 1 + README_zh.md | 1 + .../Introducing_Diffusion_Templates.md | 67 ++++++++++++++++++ .../Template_Model_Training.md | 49 ++++++++++++- .../Understanding_Diffusion_Templates.md | 2 +- docs/en/Model_Details/FLUX2.md | 1 + docs/en/README.md | 3 +- docs/en/index.rst | 1 + .../Introducing_Diffusion_Templates.md | 68 +++++++++++++++++++ .../Template_Model_Training.md | 49 ++++++++++++- .../Understanding_Diffusion_Templates.md | 2 +- docs/zh/Model_Details/FLUX2.md | 1 + docs/zh/README.md | 3 +- docs/zh/index.rst | 1 + .../Template-KleinBase4B-ContentRef.py | 52 ++++++++++++++ .../Template-KleinBase4B-ContentRef.py | 63 +++++++++++++++++ .../full/Template-KleinBase4B-ContentRef.sh | 19 ++++++ examples/flux2/model_training/train.py | 2 +- .../Template-KleinBase4B-ContentRef.py | 55 +++++++++++++++ 19 files changed, 433 insertions(+), 7 deletions(-) create mode 100644 docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md create mode 100644 docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py diff --git a/README.md b/README.md index fb905d2..253ef87 100644 --- a/README.md +++ b/README.md @@ -357,6 +357,7 @@ Example code for FLUX.2 is available at: [/examples/flux2/](/examples/flux2/) |[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| diff --git a/README_zh.md b/README_zh.md index 5d5a4f7..bc66027 100644 --- a/README_zh.md +++ b/README_zh.md @@ -357,6 +357,7 @@ FLUX.2 的示例代码位于:[/examples/flux2/](/examples/flux2/) |[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| diff --git a/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md new file mode 100644 index 0000000..f31775f --- /dev/null +++ b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -0,0 +1,67 @@ +# Diffusion Templates + +Diffusion Templates is a controllable generation plugin framework for Diffusion models in DiffSynth-Studio, providing additional controllable generation capabilities for base models. + +* Open Source Code: [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) +* Technical Report: coming soon +* Documentation Reference + * Introducing Diffusion Templates: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html) + * Diffusion Templates Architecture Details: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html) + * Template Model Inference: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Inference.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Inference.html) + * Template Model Training: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Training.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Training.html) +* Online Demo: [ModelScope Creative Space](https://modelscope.cn/studios/DiffSynth-Studio/Diffusion-Templates) +* Models: [Collection](https://modelscope.cn/collections/DiffSynth-Studio/KleinBase4B-Templates) + * Structure Control: [DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) + * Brightness Adjustment: [DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) + * Color Adjustment: [DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB) + * Image Editing: [DiffSynth-Studio/Template-KleinBase4B-Edit](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) + * Super Resolution: [DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler) + * Sharpness Enhancement: [DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) + * Aesthetic Alignment: [DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) + * Inpainting: [DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) + * Content Reference: [DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) + * Panda Meme (Easter Egg Model): [DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme) +* Datasets: [Collection](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2--shujuji) + * [DiffSynth-Studio/ImagePulseV2-Edit-Inpaint](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint) + * [DiffSynth-Studio/ImagePulseV2-TextImage](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage) + * [DiffSynth-Studio/ImagePulseV2-Edit-Background](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background) + * [DiffSynth-Studio/ImagePulseV2-Edit-Clothes](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes) + * [DiffSynth-Studio/ImagePulseV2-Edit-Pose](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose) + * [DiffSynth-Studio/ImagePulseV2-Edit-Change](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change) + * [DiffSynth-Studio/ImagePulseV2-Edit-AddRemove](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove) + * [DiffSynth-Studio/ImagePulseV2-Edit-Upscale](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale) + * [DiffSynth-Studio/ImagePulseV2-TextImage-Human](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human) + * [DiffSynth-Studio/ImagePulseV2-Edit-Crop](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop) + * [DiffSynth-Studio/ImagePulseV2-Edit-Light](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light) + * [DiffSynth-Studio/ImagePulseV2-Edit-Structure](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure) + * [DiffSynth-Studio/ImagePulseV2-Edit-HumanFace](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace) + * [DiffSynth-Studio/ImagePulseV2-Edit-Angle](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle) + * [DiffSynth-Studio/ImagePulseV2-Edit-Style](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style) + * [DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution) + * [DiffSynth-Studio/ImagePulseV2-Edit-Merge](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge) + +## Model Gallery + +* Super Resolution + Sharpness Enhancement: Generate ultra-high-clarity images + +|Low Resolution Input|High Resolution Output| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_lowres_100.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Upscaler_Sharpness.png)| + +* Structure Control + Aesthetic Alignment + Sharpness Enhancement: Fully-armed ControlNet + +|Structure Control Image|Output Image| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Aesthetic_Sharpness.png)| + +* Structure Control + Image Editing + Color Adjustment: Artistic style creation at will + +|Structure Control Image|Editing Input Image|Output Image| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Edit_SoftRGB.png)| + +* Brightness Control + Image Editing + Inpainting: Transport elements across dimensions + +|Reference Image|Inpaint Region|Output Image| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_mask_1.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Brightness_Edit_Inpaint.png)| diff --git a/docs/en/Diffusion_Templates/Template_Model_Training.md b/docs/en/Diffusion_Templates/Template_Model_Training.md index d9f1d23..b32b69c 100644 --- a/docs/en/Diffusion_Templates/Template_Model_Training.md +++ b/docs/en/Diffusion_Templates/Template_Model_Training.md @@ -228,9 +228,56 @@ TEMPLATE_MODEL = CustomizedTemplateModel Set `--trainable_models template_model.mlp` to train only the MLP component. +### Training on Low VRAM Devices + +The framework supports splitting Template model training into two stages: the first stage performs gradient-free computation, and the second stage performs gradient updates. For more information, refer to the documentation: [Two-stage Split Training](https://diffsynth-studio-doc.readthedocs.io/en/latest/Training/Split_Training.html). Here's a sample script: + +```shell +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 1 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:data_process" + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:train" +``` + +Two-stage split training can reduce VRAM requirements and improve training speed. The training process is lossless in precision, but requires significant disk space for storing cache files. + +To further reduce VRAM requirements, you can enable fp8 precision by adding the parameters `--fp8_models "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors"` and `--fp8_models "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors"` to the two-stage training. Note that fp8 precision can only be enabled on non-trainable model components and introduces minor errors. + ### Uploading Template Models -After training, follow these steps to upload to ModelScope: +After training, follow these steps to upload Template models to ModelScope for wider distribution. 1. Set model path in `model.py`: ```python diff --git a/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md b/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md index 1da52a8..900d111 100644 --- a/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md +++ b/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md @@ -1,4 +1,4 @@ -# Understanding Diffusion Templates +# Diffusion Templates Architecture Details The Diffusion Templates framework is a controllable generation plugin framework in DiffSynth-Studio that provides additional controllable generation capabilities for Diffusion models. diff --git a/docs/en/Model_Details/FLUX2.md b/docs/en/Model_Details/FLUX2.md index 6012879..ba1a754 100644 --- a/docs/en/Model_Details/FLUX2.md +++ b/docs/en/Model_Details/FLUX2.md @@ -75,6 +75,7 @@ image.save("image.jpg") |[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| Special Training Scripts: diff --git a/docs/en/README.md b/docs/en/README.md index e36d21a..1bd1b5a 100644 --- a/docs/en/README.md +++ b/docs/en/README.md @@ -82,7 +82,8 @@ This section introduces the independent core module `diffsynth.core` in `DiffSyn This section introduces the controllable generation plugin framework for Diffusion models, explaining the framework's operation mechanism and how to use Template models for inference and training. -* [Understanding Diffusion Templates](./Diffusion_Templates/Understanding_Diffusion_Templates.md) +* [Introducing Diffusion Templates](./Diffusion_Templates/Introducing_Diffusion_Templates.md) +* [Diffusion Templates Architecture Details](./Diffusion_Templates/Understanding_Diffusion_Templates.md) * [Template Model Inference](./Diffusion_Templates/Template_Model_Inference.md) * [Template Model Training](./Diffusion_Templates/Template_Model_Training.md) diff --git a/docs/en/index.rst b/docs/en/index.rst index 34c00b6..0e1eecb 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -64,6 +64,7 @@ Welcome to DiffSynth-Studio's Documentation :maxdepth: 2 :caption: Diffusion Templates + Diffusion_Templates/Introducing_Diffusion_Templates.md Diffusion_Templates/Understanding_Diffusion_Templates.md Diffusion_Templates/Template_Model_Inference.md Diffusion_Templates/Template_Model_Training.md diff --git a/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md new file mode 100644 index 0000000..21795e1 --- /dev/null +++ b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -0,0 +1,68 @@ +# Diffusion Templates + +Diffusion Templates 是 DiffSynth-Studio 中的 Diffusion 模型可控生成插件框架,可以为基础模型提供额外的可控生成能力。 + +* 开源代码:[DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) +* 技术报告:coming soon +* 文档参考 + * Diffusion Templates 简介:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html) + * Diffusion Templates 架构详解:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html) + * Template 模型推理:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Inference.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Inference.html) + * Template 模型训练:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Training.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Training.html) +* 在线体验:[魔搭社区创空间](https://modelscope.cn/studios/DiffSynth-Studio/Diffusion-Templates) +* 模型:[合集](https://modelscope.cn/collections/DiffSynth-Studio/KleinBase4B-Templates) + * 结构控制:[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) + * 亮度调节:[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) + * 色彩调节:[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB) + * 图像编辑:[DiffSynth-Studio/Template-KleinBase4B-Edit](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) + * 超分辨率:[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler) + * 锐利激发:[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) + * 美学对齐:[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) + * 局部重绘:[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) + * 内容参考:[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) + * 魔性熊猫(彩蛋模型):[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme) +* 数据集:[合集](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2--shujuji) + * [DiffSynth-Studio/ImagePulseV2-Edit-Inpaint](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint) + * [DiffSynth-Studio/ImagePulseV2-TextImage](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage) + * [DiffSynth-Studio/ImagePulseV2-Edit-Background](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background) + * [DiffSynth-Studio/ImagePulseV2-Edit-Clothes](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes) + * [DiffSynth-Studio/ImagePulseV2-Edit-Pose](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose) + * [DiffSynth-Studio/ImagePulseV2-Edit-Change](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change) + * [DiffSynth-Studio/ImagePulseV2-Edit-AddRemove](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove) + * [DiffSynth-Studio/ImagePulseV2-Edit-Upscale](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale) + * [DiffSynth-Studio/ImagePulseV2-TextImage-Human](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human) + * [DiffSynth-Studio/ImagePulseV2-Edit-Crop](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop) + * [DiffSynth-Studio/ImagePulseV2-Edit-Light](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light) + * [DiffSynth-Studio/ImagePulseV2-Edit-Structure](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure) + * [DiffSynth-Studio/ImagePulseV2-Edit-HumanFace](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace) + * [DiffSynth-Studio/ImagePulseV2-Edit-Angle](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle) + * [DiffSynth-Studio/ImagePulseV2-Edit-Style](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style) + * [DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution) + * [DiffSynth-Studio/ImagePulseV2-Edit-Merge](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge) + +## 模型效果一览 + +* 超分辨率 + 锐利激发:生成清晰度极高的图像 + +|低清晰度输入|高清晰度输出| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_lowres_100.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Upscaler_Sharpness.png)| + +* 结构控制 + 美学对齐 + 锐利激发:全副武装的 ControlNet + +|结构控制图|输出图| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Aesthetic_Sharpness.png)| + +* 结构控制 + 图像编辑 + 色彩调节:随心所欲的艺术风格创作 + +|结构控制图|编辑输入图|输出图| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Edit_SoftRGB.png)| + +* 亮度控制 + 图像编辑 + 局部重绘:让图中的部分元素跨越次元 + +|参考图|重绘区域|输出图| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_mask_1.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Brightness_Edit_Inpaint.png)| + diff --git a/docs/zh/Diffusion_Templates/Template_Model_Training.md b/docs/zh/Diffusion_Templates/Template_Model_Training.md index a45180d..b3726ab 100644 --- a/docs/zh/Diffusion_Templates/Template_Model_Training.md +++ b/docs/zh/Diffusion_Templates/Template_Model_Training.md @@ -239,9 +239,56 @@ TEMPLATE_MODEL = CustomizedTemplateModel 此时需在训练命令中通过参数 `--trainable_models template_model.mlp` 设置为仅训练 `mlp` 部分。 +### 在低显存的设备上训练 + +框架支持将 Template 模型的训练拆分为两阶段,第一阶段进行无梯度计算,第二阶段进行梯度更新,更多信息请参考文档:[两阶段拆分训练](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Training/Split_Training.html),以下是样例脚本: + +```shell +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 1 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:data_process" + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:train" +``` + +两阶段拆分训练可以降低显存需求,提高训练速度,训练过程是无损精度的,但需要较大硬盘空间用于存储 Cache 文件。 + +如需进一步减少显存需求,可开启 fp8 精度,在两阶段训练中添加参数 `--fp8_models "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors"` 和 `--fp8_models "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors"` 即可,fp8 精度只能在非训练模型组件上启用,且存在少量误差。 + ### 上传 Template 模型 -完成训练后,按照以下步骤可上传 Template 模型到魔搭社区 +完成训练后,按照以下步骤可上传 Template 模型到魔搭社区,供更多人下载使用。 Step 1:在 `model.py` 中填入训练好的模型文件名,例如 diff --git a/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md b/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md index 622e6a9..183a338 100644 --- a/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md +++ b/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md @@ -1,4 +1,4 @@ -# 理解 Diffusion Templates +# Diffusion Templates 架构详解 ## 框架结构 diff --git a/docs/zh/Model_Details/FLUX2.md b/docs/zh/Model_Details/FLUX2.md index 16f2872..13381a5 100644 --- a/docs/zh/Model_Details/FLUX2.md +++ b/docs/zh/Model_Details/FLUX2.md @@ -75,6 +75,7 @@ image.save("image.jpg") |[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| 特殊训练脚本: diff --git a/docs/zh/README.md b/docs/zh/README.md index e1c463b..b1d1ace 100644 --- a/docs/zh/README.md +++ b/docs/zh/README.md @@ -80,7 +80,8 @@ graph LR; 本节介绍 Diffusion 模型可控生成插件框架 Diffusion Templates,讲解 Diffusion Templates 框架的运行机制,展示如何使用 Template 模型进行推理和训练。 -* [理解 Diffusion Templates](./Diffusion_Templates/Understanding_Diffusion_Templates.md) +* [Diffusion Templates 简介](./Diffusion_Templates/Introducing_Diffusion_Templates.md) +* [Diffusion Templates 架构详解](./Diffusion_Templates/Understanding_Diffusion_Templates.md) * [Template 模型推理](./Diffusion_Templates/Template_Model_Inference.md) * [Template 模型训练](./Diffusion_Templates/Template_Model_Training.md) diff --git a/docs/zh/index.rst b/docs/zh/index.rst index 8042013..b3caee7 100644 --- a/docs/zh/index.rst +++ b/docs/zh/index.rst @@ -64,6 +64,7 @@ :maxdepth: 2 :caption: Diffusion Templates + Diffusion_Templates/Introducing_Diffusion_Templates.md Diffusion_Templates/Understanding_Diffusion_Templates.md Diffusion_Templates/Template_Model_Inference.md Diffusion_Templates/Template_Model_Training.md diff --git a/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py b/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py new file mode 100644 index 0000000..839e16c --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py @@ -0,0 +1,52 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image +import numpy as np + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important! +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ContentRef")], +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_1.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_2.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py new file mode 100644 index 0000000..6c0dc13 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py @@ -0,0 +1,63 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image +import numpy as np + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ContentRef")], + lazy_loading=True, +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_1.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_2.jpg") diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh b/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh new file mode 100644 index 0000000..52d3c2d --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh @@ -0,0 +1,19 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-ContentRef/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-ContentRef \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-ContentRef/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-ContentRef:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-ContentRef_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --enable_lora_hot_loading diff --git a/examples/flux2/model_training/train.py b/examples/flux2/model_training/train.py index 144f41e..a1af889 100644 --- a/examples/flux2/model_training/train.py +++ b/examples/flux2/model_training/train.py @@ -29,7 +29,7 @@ class Flux2ImageTrainingModule(DiffusionTrainingModule): tokenizer_config = self.parse_path_or_model_id(tokenizer_path, default_value=ModelConfig(model_id="black-forest-labs/FLUX.2-dev", origin_file_pattern="tokenizer/")) self.pipe = Flux2ImagePipeline.from_pretrained(torch_dtype=torch.bfloat16, device=device, model_configs=model_configs, tokenizer_config=tokenizer_config) self.pipe = self.load_training_template_model(self.pipe, template_model_id_or_path, args.use_gradient_checkpointing, args.use_gradient_checkpointing_offload) - self.pipe = self.split_pipeline_units(task, self.pipe, trainable_models, lora_base_model) + self.pipe = self.split_pipeline_units(task, self.pipe, trainable_models, lora_base_model, remove_unnecessary_params=True) if enable_lora_hot_loading: self.pipe.dit = self.pipe.enable_lora_hot_loading(self.pipe.dit) # Training mode diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py new file mode 100644 index 0000000..cdb9405 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py @@ -0,0 +1,55 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch +from modelscope import dataset_snapshot_download +from PIL import Image +import numpy as np + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important! +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ContentRef")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-ContentRef_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_1.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_2.jpg")