From b84f906964dd7cc5b6babd24885117784ff8c9d4 Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Tue, 3 Dec 2024 15:30:01 +0800 Subject: [PATCH 1/4] support artaug --- examples/ArtAug/README.md | 12 ++++++++++++ examples/ArtAug/artaug_flux.py | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 examples/ArtAug/README.md create mode 100644 examples/ArtAug/artaug_flux.py diff --git a/examples/ArtAug/README.md b/examples/ArtAug/README.md new file mode 100644 index 0000000..8c9c5a8 --- /dev/null +++ b/examples/ArtAug/README.md @@ -0,0 +1,12 @@ +# ArtAug + +ArtAug is an approach designed to improve text-to-image synthesis models through synthesis-understanding interactions. We have trained an ArtAug enhancement module for FLUX.1-dev in the format of LoRA. See [`./artaug_flux.py`](./artaug_flux.py). + +|FLUX.1-dev|FLUX.1-dev + ArtAug LoRA| +|-|-| +|![image_1_base](https://github.com/user-attachments/assets/e1d5c505-b423-45fe-be01-25c2758f5417)|![image_1_enhance](https://github.com/user-attachments/assets/335908e3-d0bd-41c2-9d99-d10528a2d719)| +|![image_2_base](https://github.com/user-attachments/assets/7f38e8d4-3c62-492e-bd96-be60f0855037)|![image_2_enhance](https://github.com/user-attachments/assets/ae3a1daf-7a7c-44fd-bdbc-1d2a83bc3de3)| +|![image_3_base](https://github.com/user-attachments/assets/e2ae4879-9202-45d6-9df7-fbcbd2093d19)|![image_3_enhance](https://github.com/user-attachments/assets/4df6e5b9-65de-408b-88c6-51db39aad801)| +|![image_4_base](https://github.com/user-attachments/assets/dbc65387-60df-4a18-b1bb-45eaa5be5c1d)|![image_4_enhance](https://github.com/user-attachments/assets/fc19860d-3e28-468b-b013-8745255ac6db)| +|![image_5_base](https://github.com/user-attachments/assets/bb65c1ba-c0c6-4d3b-b3ef-bdbbb5f03a48)|![image_5_enhance](https://github.com/user-attachments/assets/03570c62-9a0b-428f-8c86-6e01c1421202)| +|![image_6_base](https://github.com/user-attachments/assets/18e9a4e7-2afd-4ca9-bc49-7736042c25dc)|![image_6_enhance](https://github.com/user-attachments/assets/aa73571f-098a-4e65-9eda-b9729ba379cd)| diff --git a/examples/ArtAug/artaug_flux.py b/examples/ArtAug/artaug_flux.py new file mode 100644 index 0000000..0a2e5eb --- /dev/null +++ b/examples/ArtAug/artaug_flux.py @@ -0,0 +1,24 @@ +import torch +from diffsynth import ModelManager, FluxImagePipeline, download_customized_models + +prompt = "a beautiful Asian girl." + +# Generate an image using FLUX.1-dev +model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda", model_id_list=["FLUX.1-dev"]) +pipe = FluxImagePipeline.from_model_manager(model_manager) + +image = pipe(prompt=prompt, seed=0) +image.save("image.jpg") + +# Download ArtAug LoRA +lora_path = download_customized_models( + model_id="DiffSynth-Studio/ArtAug-lora-FLUX.1dev-v1", + origin_file_path="merged_lora.safetensors", + local_dir="models/lora", + downloading_priority=["ModelScope", "HuggingFace"] +)[0] +model_manager.load_lora(lora_path, lora_alpha=1.0) + +# Generate an image using FLUX.1-dev + ArtAug +image = pipe(prompt=prompt, seed=0) +image.save("image_artaug.jpg") From 469a0405a1fe5113f71bbab1110ef34ebe3b6e68 Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Wed, 18 Dec 2024 20:32:23 +0800 Subject: [PATCH 2/4] ArtAug --- README.md | 6 +++++ examples/ArtAug/README.md | 47 ++++++++++++++++++++++++++++------ examples/ArtAug/artaug_flux.py | 2 +- 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ba691bc..891d85f 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,12 @@ Until now, DiffSynth Studio has supported the following models: ## News +- **December 18, 2024** We propose ArtAug, an approach designed to improve text-to-image synthesis models through synthesis-understanding interactions. We have trained an ArtAug enhancement module for FLUX.1-dev in the format of LoRA. This model integrates the aesthetic understanding of Qwen2-VL-72B into FLUX.1-dev, leading to an improvement in the quality of generated images. + - Paper: https://arxiv.org/abs/2412.12888 + - Examples: https://github.com/modelscope/DiffSynth-Studio/tree/main/examples/ArtAug + - Model: [ModelScope](https://www.modelscope.cn/models/DiffSynth-Studio/ArtAug-lora-FLUX.1dev-v1), [HuggingFace](https://huggingface.co/ECNU-CILab/ArtAug-lora-FLUX.1dev-v1) + - Demo: [ModelScope](https://modelscope.cn/aigc/imageGeneration?tab=advanced&versionId=7228&modelType=LoRA&sdVersion=FLUX_1&modelUrl=modelscope%3A%2F%2FDiffSynth-Studio%2FArtAug-lora-FLUX.1dev-v1%3Frevision%3Dv1.0), HuggingFace (Coming soon) + - **October 25, 2024** We provide extensive FLUX ControlNet support. This project supports many different ControlNet models that can be freely combined, even if their structures differ. Additionally, ControlNet models are compatible with high-resolution refinement and partition control techniques, enabling very powerful controllable image generation. See [`./examples/ControlNet/`](./examples/ControlNet/). - **October 8, 2024.** We release the extended LoRA based on CogVideoX-5B and ExVideo. You can download this model from [ModelScope](https://modelscope.cn/models/ECNU-CILab/ExVideo-CogVideoX-LoRA-129f-v1) or [HuggingFace](https://huggingface.co/ECNU-CILab/ExVideo-CogVideoX-LoRA-129f-v1). diff --git a/examples/ArtAug/README.md b/examples/ArtAug/README.md index 8c9c5a8..854ddc1 100644 --- a/examples/ArtAug/README.md +++ b/examples/ArtAug/README.md @@ -1,12 +1,43 @@ -# ArtAug +# FLUX Aesthetics Enhancement LoRA -ArtAug is an approach designed to improve text-to-image synthesis models through synthesis-understanding interactions. We have trained an ArtAug enhancement module for FLUX.1-dev in the format of LoRA. See [`./artaug_flux.py`](./artaug_flux.py). +## Introduction + +This is a LoRA model trained for FLUX.1-dev, which enhances the aesthetic quality of images generated by the model. The improvements include, but are not limited to: rich details, beautiful lighting and shadows, aesthetic composition, and clear visuals. This model does not require any trigger words. + +* Paper: https://arxiv.org/abs/2412.12888 +* Github: https://github.com/modelscope/DiffSynth-Studio +* Model: [ModelScope](https://www.modelscope.cn/models/DiffSynth-Studio/ArtAug-lora-FLUX.1dev-v1), [HuggingFace](https://huggingface.co/ECNU-CILab/ArtAug-lora-FLUX.1dev-v1) +* Demo: [ModelScope](https://modelscope.cn/aigc/imageGeneration?tab=advanced&versionId=7228&modelType=LoRA&sdVersion=FLUX_1&modelUrl=modelscope%3A%2F%2FDiffSynth-Studio%2FArtAug-lora-FLUX.1dev-v1%3Frevision%3Dv1.0), HuggingFace (Coming soon) + +## Methodology + +![](workflow.jpg) + +The ArtAug project is inspired by reasoning approaches like GPT-o1, which rely on model interaction and self-correction. We developed a framework aimed at enhancing the capabilities of image generation models through interaction with image understanding models. The training process of ArtAug consists of the following steps: + +1. **Synthesis-Understanding Interaction**: After generating an image using the image generation model, we employ a multimodal large language model (Qwen2-VL-72B) to analyze the image content and provide suggestions for modifications, which then lead to the regeneration of a higher quality image. + +2. **Data Generation and Filtering**: Interactive generation involves long inference times and sometimes produce poor image content. Therefore, we generate a large batch of image pairs offline, filter them, and use them for subsequent training. + +3. **Differential Training**: We apply differential training techniques to train a LoRA model, enabling it to learn the differences between images before and after enhancement, rather than directly training on the dataset of enhanced images. + +4. **Iterative Enhancement**: The trained LoRA model is fused into the base model, and the entire process is repeated multiple times with the fused model until the interaction algorithm no longer provides significant enhancements. The LoRA models produced in each iteration are combined to produce this final model. + +This model integrates the aesthetic understanding of Qwen2-VL-72B into FLUX.1[dev], leading to an improvement in the quality of generated images. + +## Usage + +Please see [./artaug_flux.py](./artaug_flux.py) for more details. + +Since this model is encapsulated in the universal FLUX LoRA format, it can be loaded by most LoRA loaders, allowing you to integrate this LoRA model into your own workflow. + +## Examples |FLUX.1-dev|FLUX.1-dev + ArtAug LoRA| |-|-| -|![image_1_base](https://github.com/user-attachments/assets/e1d5c505-b423-45fe-be01-25c2758f5417)|![image_1_enhance](https://github.com/user-attachments/assets/335908e3-d0bd-41c2-9d99-d10528a2d719)| -|![image_2_base](https://github.com/user-attachments/assets/7f38e8d4-3c62-492e-bd96-be60f0855037)|![image_2_enhance](https://github.com/user-attachments/assets/ae3a1daf-7a7c-44fd-bdbc-1d2a83bc3de3)| -|![image_3_base](https://github.com/user-attachments/assets/e2ae4879-9202-45d6-9df7-fbcbd2093d19)|![image_3_enhance](https://github.com/user-attachments/assets/4df6e5b9-65de-408b-88c6-51db39aad801)| -|![image_4_base](https://github.com/user-attachments/assets/dbc65387-60df-4a18-b1bb-45eaa5be5c1d)|![image_4_enhance](https://github.com/user-attachments/assets/fc19860d-3e28-468b-b013-8745255ac6db)| -|![image_5_base](https://github.com/user-attachments/assets/bb65c1ba-c0c6-4d3b-b3ef-bdbbb5f03a48)|![image_5_enhance](https://github.com/user-attachments/assets/03570c62-9a0b-428f-8c86-6e01c1421202)| -|![image_6_base](https://github.com/user-attachments/assets/18e9a4e7-2afd-4ca9-bc49-7736042c25dc)|![image_6_enhance](https://github.com/user-attachments/assets/aa73571f-098a-4e65-9eda-b9729ba379cd)| +|![](gallary/image_1_base.jpg)|![](gallary/image_1_enhance.jpg)| +|![](gallary/image_2_base.jpg)|![](gallary/image_2_enhance.jpg)| +|![](gallary/image_3_base.jpg)|![](gallary/image_3_enhance.jpg)| +|![](gallary/image_4_base.jpg)|![](gallary/image_4_enhance.jpg)| +|![](gallary/image_5_base.jpg)|![](gallary/image_5_enhance.jpg)| +|![](gallary/image_6_base.jpg)|![](gallary/image_6_enhance.jpg)| diff --git a/examples/ArtAug/artaug_flux.py b/examples/ArtAug/artaug_flux.py index 0a2e5eb..1789ae2 100644 --- a/examples/ArtAug/artaug_flux.py +++ b/examples/ArtAug/artaug_flux.py @@ -10,7 +10,7 @@ pipe = FluxImagePipeline.from_model_manager(model_manager) image = pipe(prompt=prompt, seed=0) image.save("image.jpg") -# Download ArtAug LoRA +# Download and load ArtAug LoRA lora_path = download_customized_models( model_id="DiffSynth-Studio/ArtAug-lora-FLUX.1dev-v1", origin_file_path="merged_lora.safetensors", From 95a0f0bedcfb58b7ce348d4e9e59c1dff3e367a6 Mon Sep 17 00:00:00 2001 From: Zhongjie Duan <35051019+Artiprocher@users.noreply.github.com> Date: Wed, 18 Dec 2024 20:42:50 +0800 Subject: [PATCH 3/4] Update README.md --- examples/ArtAug/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/ArtAug/README.md b/examples/ArtAug/README.md index 854ddc1..21939f9 100644 --- a/examples/ArtAug/README.md +++ b/examples/ArtAug/README.md @@ -11,7 +11,7 @@ This is a LoRA model trained for FLUX.1-dev, which enhances the aesthetic qualit ## Methodology -![](workflow.jpg) +![workflow](https://github.com/user-attachments/assets/cee969af-d49f-4480-911c-bedc1c095f9b) The ArtAug project is inspired by reasoning approaches like GPT-o1, which rely on model interaction and self-correction. We developed a framework aimed at enhancing the capabilities of image generation models through interaction with image understanding models. The training process of ArtAug consists of the following steps: @@ -35,9 +35,9 @@ Since this model is encapsulated in the universal FLUX LoRA format, it can be lo |FLUX.1-dev|FLUX.1-dev + ArtAug LoRA| |-|-| -|![](gallary/image_1_base.jpg)|![](gallary/image_1_enhance.jpg)| -|![](gallary/image_2_base.jpg)|![](gallary/image_2_enhance.jpg)| -|![](gallary/image_3_base.jpg)|![](gallary/image_3_enhance.jpg)| -|![](gallary/image_4_base.jpg)|![](gallary/image_4_enhance.jpg)| -|![](gallary/image_5_base.jpg)|![](gallary/image_5_enhance.jpg)| -|![](gallary/image_6_base.jpg)|![](gallary/image_6_enhance.jpg)| +|![image_1_base](https://github.com/user-attachments/assets/e1d5c505-b423-45fe-be01-25c2758f5417)|![image_1_enhance](https://github.com/user-attachments/assets/335908e3-d0bd-41c2-9d99-d10528a2d719)| +|![image_2_base](https://github.com/user-attachments/assets/7f38e8d4-3c62-492e-bd96-be60f0855037)|![image_2_enhance](https://github.com/user-attachments/assets/ae3a1daf-7a7c-44fd-bdbc-1d2a83bc3de3)| +|![image_3_base](https://github.com/user-attachments/assets/e2ae4879-9202-45d6-9df7-fbcbd2093d19)|![image_3_enhance](https://github.com/user-attachments/assets/4df6e5b9-65de-408b-88c6-51db39aad801)| +|![image_4_base](https://github.com/user-attachments/assets/dbc65387-60df-4a18-b1bb-45eaa5be5c1d)|![image_4_enhance](https://github.com/user-attachments/assets/fc19860d-3e28-468b-b013-8745255ac6db)| +|![image_5_base](https://github.com/user-attachments/assets/bb65c1ba-c0c6-4d3b-b3ef-bdbbb5f03a48)|![image_5_enhance](https://github.com/user-attachments/assets/03570c62-9a0b-428f-8c86-6e01c1421202)| +|![image_6_base](https://github.com/user-attachments/assets/18e9a4e7-2afd-4ca9-bc49-7736042c25dc)|![image_6_enhance](https://github.com/user-attachments/assets/aa73571f-098a-4e65-9eda-b9729ba379cd)| From 29cebf0becc0285520b8f8c0933164207d1b0a9f Mon Sep 17 00:00:00 2001 From: Zhongjie Duan <35051019+Artiprocher@users.noreply.github.com> Date: Wed, 18 Dec 2024 20:43:53 +0800 Subject: [PATCH 4/4] Update artaug_flux.py --- examples/ArtAug/artaug_flux.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/examples/ArtAug/artaug_flux.py b/examples/ArtAug/artaug_flux.py index 1789ae2..07043f0 100644 --- a/examples/ArtAug/artaug_flux.py +++ b/examples/ArtAug/artaug_flux.py @@ -1,24 +1,14 @@ import torch from diffsynth import ModelManager, FluxImagePipeline, download_customized_models -prompt = "a beautiful Asian girl." - -# Generate an image using FLUX.1-dev -model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda", model_id_list=["FLUX.1-dev"]) -pipe = FluxImagePipeline.from_model_manager(model_manager) - -image = pipe(prompt=prompt, seed=0) -image.save("image.jpg") - -# Download and load ArtAug LoRA lora_path = download_customized_models( model_id="DiffSynth-Studio/ArtAug-lora-FLUX.1dev-v1", origin_file_path="merged_lora.safetensors", - local_dir="models/lora", - downloading_priority=["ModelScope", "HuggingFace"] + local_dir="models/lora" )[0] +model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda", model_id_list=["FLUX.1-dev"]) model_manager.load_lora(lora_path, lora_alpha=1.0) +pipe = FluxImagePipeline.from_model_manager(model_manager) -# Generate an image using FLUX.1-dev + ArtAug -image = pipe(prompt=prompt, seed=0) +image = pipe(prompt="a house", seed=0) image.save("image_artaug.jpg")