mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-04-08 17:18:21 +00:00
Diffusion Templates framework
This commit is contained in:
@@ -1,56 +0,0 @@
|
||||
from diffsynth.diffusion.skills import SkillsPipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
skills = SkillsPipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Skills-ControlNet"),
|
||||
ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Skills-Brightness"),
|
||||
],
|
||||
)
|
||||
skill_cache = skills(
|
||||
positive_inputs = [
|
||||
{
|
||||
"model_id": 0,
|
||||
"image": Image.open("xxx.jpg"),
|
||||
"prompt": "一位长发少女,四周环绕着魔法粒子",
|
||||
},
|
||||
{
|
||||
"model_id": 1,
|
||||
"scale": 0.6,
|
||||
},
|
||||
],
|
||||
negative_inputs = [
|
||||
{
|
||||
"model_id": 0,
|
||||
"image": Image.open("xxx.jpg"),
|
||||
"prompt": "一位长发少女,四周环绕着魔法粒子",
|
||||
},
|
||||
{
|
||||
"model_id": 1,
|
||||
"scale": 0.5,
|
||||
},
|
||||
],
|
||||
pipe=pipe,
|
||||
)
|
||||
image = pipe(
|
||||
prompt="一位长发少女,四周环绕着魔法粒子",
|
||||
seed=0, rand_device="cuda", num_inference_steps=50, cfg_scale=4,
|
||||
height=1024, width=1024,
|
||||
**skill_cache,
|
||||
)
|
||||
image.save("image.jpg")
|
||||
256
examples/flux2/model_inference/Template-KleinBase4B.py
Normal file
256
examples/flux2/model_inference/Template-KleinBase4B.py
Normal file
@@ -0,0 +1,256 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
def load_template_pipeline(model_ids):
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id=model_id) for model_id in model_ids],
|
||||
)
|
||||
return template
|
||||
|
||||
# Base Model
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
# image = pipe(
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# )
|
||||
# image.save("image_base.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Brightness"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{"scale": 0.7}],
|
||||
# negative_template_inputs = [{"scale": 0.5}]
|
||||
# )
|
||||
# image.save("image_Brightness_light.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{"scale": 0.5}],
|
||||
# negative_template_inputs = [{"scale": 0.5}]
|
||||
# )
|
||||
# image.save("image_Brightness_normal.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{"scale": 0.3}],
|
||||
# negative_template_inputs = [{"scale": 0.5}]
|
||||
# )
|
||||
# image.save("image_Brightness_dark.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-ControlNet"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone, bathed in bright sunshine.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_depth.jpg"),
|
||||
# "prompt": "A cat is sitting on a stone, bathed in bright sunshine.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_depth.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_ControlNet_sunshine.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone, surrounded by colorful magical particles.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_depth.jpg"),
|
||||
# "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_depth.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_ControlNet_magic.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Edit"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="Put a hat on this cat.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "prompt": "Put a hat on this cat.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Edit_hat.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="Make the cat turn its head to look to the right.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "prompt": "Make the cat turn its head to look to the right.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Edit_head.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Upscaler"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_lowres_512.jpg"),
|
||||
# "prompt": "A cat is sitting on a stone.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_lowres_512.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Upscaler_1.png")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_lowres_100.jpg"),
|
||||
# "prompt": "A cat is sitting on a stone.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_lowres_100.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Upscaler_2.png")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-SoftRGB"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "R": 128/255,
|
||||
# "G": 128/255,
|
||||
# "B": 128/255
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_rgb_normal.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "R": 208/255,
|
||||
# "G": 185/255,
|
||||
# "B": 138/255
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_rgb_warm.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "R": 94/255,
|
||||
# "G": 163/255,
|
||||
# "B": 174/255
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_rgb_cold.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-PandaMeme"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A meme with a sleepy expression.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{}],
|
||||
# negative_template_inputs = [{}],
|
||||
# )
|
||||
# image.save("image_PandaMeme_sleepy.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A meme with a happy expression.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{}],
|
||||
# negative_template_inputs = [{}],
|
||||
# )
|
||||
# image.save("image_PandaMeme_happy.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A meme with a surprised expression.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{}],
|
||||
# negative_template_inputs = [{}],
|
||||
# )
|
||||
# image.save("image_PandaMeme_surprised.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Sharpness"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{"scale": 0.1}],
|
||||
# negative_template_inputs = [{"scale": 0.5}],
|
||||
# )
|
||||
# image.save("image_Sharpness_0.1.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{"scale": 0.8}],
|
||||
# negative_template_inputs = [{"scale": 0.5}],
|
||||
# )
|
||||
# image.save("image_Sharpness_0.8.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Inpaint"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="An orange cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "mask": Image.open("data/assets/image_mask_1.jpg"),
|
||||
# "force_inpaint": True,
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "mask": Image.open("data/assets/image_mask_1.jpg"),
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Inpaint_1.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat wearing sunglasses is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "mask": Image.open("data/assets/image_mask_2.jpg"),
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "mask": Image.open("data/assets/image_mask_2.jpg"),
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Inpaint_2.jpg")
|
||||
@@ -1,16 +1,17 @@
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path /mnt/nas1/duanzhongjie.dzj/dataset/ImagePulseV2 \
|
||||
--dataset_metadata_path /mnt/nas1/duanzhongjie.dzj/dataset/ImagePulseV2/metadata_example_ti2ti.jsonl \
|
||||
--extra_inputs "skill_inputs" \
|
||||
--dataset_base_path xxx \
|
||||
--dataset_metadata_path xxx/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 1 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--skill_model_id_or_path "models/base" \
|
||||
--template_model_id_or_path "xxx" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 999 \
|
||||
--remove_prefix_in_ckpt "pipe.skill_model." \
|
||||
--output_path "./models/train/FLUX.2-klein-base-4B-skills_full" \
|
||||
--trainable_models "skill_model" \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B_full" \
|
||||
--trainable_models "template_model" \
|
||||
--save_steps 1000 \
|
||||
--use_gradient_checkpointing \
|
||||
--save_steps 200
|
||||
--find_unused_parameters
|
||||
@@ -18,7 +18,7 @@ class Flux2ImageTrainingModule(DiffusionTrainingModule):
|
||||
extra_inputs=None,
|
||||
fp8_models=None,
|
||||
offload_models=None,
|
||||
skill_model_id_or_path=None,
|
||||
template_model_id_or_path=None,
|
||||
device="cpu",
|
||||
task="sft",
|
||||
):
|
||||
@@ -27,7 +27,7 @@ class Flux2ImageTrainingModule(DiffusionTrainingModule):
|
||||
model_configs = self.parse_model_configs(model_paths, model_id_with_origin_paths, fp8_models=fp8_models, offload_models=offload_models, device=device)
|
||||
tokenizer_config = self.parse_path_or_model_id(tokenizer_path, default_value=ModelConfig(model_id="black-forest-labs/FLUX.2-dev", origin_file_pattern="tokenizer/"))
|
||||
self.pipe = Flux2ImagePipeline.from_pretrained(torch_dtype=torch.bfloat16, device=device, model_configs=model_configs, tokenizer_config=tokenizer_config)
|
||||
self.pipe = self.load_training_skill_model(self.pipe, skill_model_id_or_path)
|
||||
self.pipe = self.load_training_template_model(self.pipe, template_model_id_or_path, args.use_gradient_checkpointing, args.use_gradient_checkpointing_offload)
|
||||
self.pipe = self.split_pipeline_units(task, self.pipe, trainable_models, lora_base_model)
|
||||
|
||||
# Training mode
|
||||
@@ -128,7 +128,7 @@ if __name__ == "__main__":
|
||||
extra_inputs=args.extra_inputs,
|
||||
fp8_models=args.fp8_models,
|
||||
offload_models=args.offload_models,
|
||||
skill_model_id_or_path=args.skill_model_id_or_path,
|
||||
template_model_id_or_path=args.template_model_id_or_path,
|
||||
task=args.task,
|
||||
device="cpu" if args.initialize_model_on_cpu else accelerator.device,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user