mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-04-16 15:28:21 +00:00
update docs
This commit is contained in:
@@ -0,0 +1,52 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important!
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")],
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"lora_ids": list(range(1, 180, 2)),
|
||||
"lora_scales": 1.0,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"lora_ids": list(range(1, 180, 2)),
|
||||
"lora_scales": 1.0,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
)
|
||||
image.save("image_Aesthetic_1.0.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"lora_ids": list(range(1, 180, 2)),
|
||||
"lora_scales": 2.5,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"lora_ids": list(range(1, 180, 2)),
|
||||
"lora_scales": 2.5,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
)
|
||||
image.save("image_Aesthetic_2.5.jpg")
|
||||
@@ -0,0 +1,43 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness")],
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.7}],
|
||||
negative_template_inputs = [{"scale": 0.5}]
|
||||
)
|
||||
image.save("image_Brightness_light.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.5}],
|
||||
negative_template_inputs = [{"scale": 0.5}]
|
||||
)
|
||||
image.save("image_Brightness_normal.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.3}],
|
||||
negative_template_inputs = [{"scale": 0.5}]
|
||||
)
|
||||
image.save("image_Brightness_dark.jpg")
|
||||
@@ -0,0 +1,54 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet")],
|
||||
)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone, bathed in bright sunshine.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "A cat is sitting on a stone, bathed in bright sunshine.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_ControlNet_sunshine.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone, surrounded by colorful magical particles.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_ControlNet_magic.jpg")
|
||||
54
examples/flux2/model_inference/Template-KleinBase4B-Edit.py
Normal file
54
examples/flux2/model_inference/Template-KleinBase4B-Edit.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit")],
|
||||
)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="Put a hat on this cat.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "Put a hat on this cat.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Edit_hat.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="Make the cat turn its head to look to the right.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "Make the cat turn its head to look to the right.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Edit_head.jpg")
|
||||
@@ -0,0 +1,56 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint")],
|
||||
)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="An orange cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_1.jpg"),
|
||||
"force_inpaint": True,
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_1.jpg"),
|
||||
}],
|
||||
)
|
||||
image.save("image_Inpaint_1.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat wearing sunglasses is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_2.jpg"),
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_2.jpg"),
|
||||
}],
|
||||
)
|
||||
image.save("image_Inpaint_2.jpg")
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A meme with a sleepy expression.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{}],
|
||||
negative_template_inputs = [{}],
|
||||
)
|
||||
image.save("image_PandaMeme_sleepy.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A meme with a happy expression.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{}],
|
||||
negative_template_inputs = [{}],
|
||||
)
|
||||
image.save("image_PandaMeme_happy.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A meme with a surprised expression.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{}],
|
||||
negative_template_inputs = [{}],
|
||||
)
|
||||
image.save("image_PandaMeme_surprised.jpg")
|
||||
@@ -0,0 +1,35 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness")],
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.1}],
|
||||
negative_template_inputs = [{"scale": 0.5}],
|
||||
)
|
||||
image.save("image_Sharpness_0.1.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.8}],
|
||||
negative_template_inputs = [{"scale": 0.5}],
|
||||
)
|
||||
image.save("image_Sharpness_0.8.jpg")
|
||||
@@ -0,0 +1,52 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB")],
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"R": 128/255,
|
||||
"G": 128/255,
|
||||
"B": 128/255
|
||||
}],
|
||||
)
|
||||
image.save("image_rgb_normal.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"R": 208/255,
|
||||
"G": 185/255,
|
||||
"B": 138/255
|
||||
}],
|
||||
)
|
||||
image.save("image_rgb_warm.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"R": 94/255,
|
||||
"G": 163/255,
|
||||
"B": 174/255
|
||||
}],
|
||||
)
|
||||
image.save("image_rgb_cold.jpg")
|
||||
@@ -0,0 +1,54 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler")],
|
||||
)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_512.jpg"),
|
||||
"prompt": "A cat is sitting on a stone.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_512.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Upscaler_1.png")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_100.jpg"),
|
||||
"prompt": "A cat is sitting on a stone.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_100.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Upscaler_2.png")
|
||||
@@ -1,256 +0,0 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
def load_template_pipeline(model_ids):
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id=model_id) for model_id in model_ids],
|
||||
)
|
||||
return template
|
||||
|
||||
# Base Model
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
# image = pipe(
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# )
|
||||
# image.save("image_base.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Brightness"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{"scale": 0.7}],
|
||||
# negative_template_inputs = [{"scale": 0.5}]
|
||||
# )
|
||||
# image.save("image_Brightness_light.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{"scale": 0.5}],
|
||||
# negative_template_inputs = [{"scale": 0.5}]
|
||||
# )
|
||||
# image.save("image_Brightness_normal.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{"scale": 0.3}],
|
||||
# negative_template_inputs = [{"scale": 0.5}]
|
||||
# )
|
||||
# image.save("image_Brightness_dark.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-ControlNet"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone, bathed in bright sunshine.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_depth.jpg"),
|
||||
# "prompt": "A cat is sitting on a stone, bathed in bright sunshine.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_depth.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_ControlNet_sunshine.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone, surrounded by colorful magical particles.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_depth.jpg"),
|
||||
# "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_depth.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_ControlNet_magic.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Edit"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="Put a hat on this cat.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "prompt": "Put a hat on this cat.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Edit_hat.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="Make the cat turn its head to look to the right.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "prompt": "Make the cat turn its head to look to the right.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Edit_head.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Upscaler"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_lowres_512.jpg"),
|
||||
# "prompt": "A cat is sitting on a stone.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_lowres_512.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Upscaler_1.png")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_lowres_100.jpg"),
|
||||
# "prompt": "A cat is sitting on a stone.",
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_lowres_100.jpg"),
|
||||
# "prompt": "",
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Upscaler_2.png")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-SoftRGB"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "R": 128/255,
|
||||
# "G": 128/255,
|
||||
# "B": 128/255
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_rgb_normal.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "R": 208/255,
|
||||
# "G": 185/255,
|
||||
# "B": 138/255
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_rgb_warm.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "R": 94/255,
|
||||
# "G": 163/255,
|
||||
# "B": 174/255
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_rgb_cold.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-PandaMeme"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A meme with a sleepy expression.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{}],
|
||||
# negative_template_inputs = [{}],
|
||||
# )
|
||||
# image.save("image_PandaMeme_sleepy.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A meme with a happy expression.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{}],
|
||||
# negative_template_inputs = [{}],
|
||||
# )
|
||||
# image.save("image_PandaMeme_happy.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A meme with a surprised expression.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{}],
|
||||
# negative_template_inputs = [{}],
|
||||
# )
|
||||
# image.save("image_PandaMeme_surprised.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Sharpness"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{"scale": 0.1}],
|
||||
# negative_template_inputs = [{"scale": 0.5}],
|
||||
# )
|
||||
# image.save("image_Sharpness_0.1.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{"scale": 0.8}],
|
||||
# negative_template_inputs = [{"scale": 0.5}],
|
||||
# )
|
||||
# image.save("image_Sharpness_0.8.jpg")
|
||||
|
||||
# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Inpaint"])
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="An orange cat is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "mask": Image.open("data/assets/image_mask_1.jpg"),
|
||||
# "force_inpaint": True,
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "mask": Image.open("data/assets/image_mask_1.jpg"),
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Inpaint_1.jpg")
|
||||
# image = template(
|
||||
# pipe,
|
||||
# prompt="A cat wearing sunglasses is sitting on a stone.",
|
||||
# seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
# template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "mask": Image.open("data/assets/image_mask_2.jpg"),
|
||||
# }],
|
||||
# negative_template_inputs = [{
|
||||
# "image": Image.open("data/assets/image_reference.jpg"),
|
||||
# "mask": Image.open("data/assets/image_mask_2.jpg"),
|
||||
# }],
|
||||
# )
|
||||
# image.save("image_Inpaint_2.jpg")
|
||||
@@ -0,0 +1,63 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
"offload_device": "disk",
|
||||
"onload_dtype": torch.float8_e4m3fn,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.float8_e4m3fn,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")],
|
||||
lazy_loading=True,
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"lora_ids": list(range(1, 180, 2)),
|
||||
"lora_scales": 1.0,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"lora_ids": list(range(1, 180, 2)),
|
||||
"lora_scales": 1.0,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
)
|
||||
image.save("image_Aesthetic_1.0.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"lora_ids": list(range(1, 180, 2)),
|
||||
"lora_scales": 2.5,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"lora_ids": list(range(1, 180, 2)),
|
||||
"lora_scales": 2.5,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
)
|
||||
image.save("image_Aesthetic_2.5.jpg")
|
||||
@@ -0,0 +1,55 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
"offload_device": "disk",
|
||||
"onload_dtype": torch.float8_e4m3fn,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.float8_e4m3fn,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness")],
|
||||
lazy_loading=True,
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.7}],
|
||||
negative_template_inputs = [{"scale": 0.5}]
|
||||
)
|
||||
image.save("image_Brightness_light.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.5}],
|
||||
negative_template_inputs = [{"scale": 0.5}]
|
||||
)
|
||||
image.save("image_Brightness_normal.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.3}],
|
||||
negative_template_inputs = [{"scale": 0.5}]
|
||||
)
|
||||
image.save("image_Brightness_dark.jpg")
|
||||
@@ -0,0 +1,66 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
"offload_device": "disk",
|
||||
"onload_dtype": torch.float8_e4m3fn,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.float8_e4m3fn,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet")],
|
||||
lazy_loading=True,
|
||||
)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone, bathed in bright sunshine.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "A cat is sitting on a stone, bathed in bright sunshine.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_ControlNet_sunshine.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone, surrounded by colorful magical particles.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_ControlNet_magic.jpg")
|
||||
@@ -0,0 +1,66 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
"offload_device": "disk",
|
||||
"onload_dtype": torch.float8_e4m3fn,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.float8_e4m3fn,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit")],
|
||||
lazy_loading=True,
|
||||
)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="Put a hat on this cat.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "Put a hat on this cat.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Edit_hat.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="Make the cat turn its head to look to the right.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "Make the cat turn its head to look to the right.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Edit_head.jpg")
|
||||
@@ -0,0 +1,68 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
"offload_device": "disk",
|
||||
"onload_dtype": torch.float8_e4m3fn,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.float8_e4m3fn,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint")],
|
||||
lazy_loading=True,
|
||||
)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="An orange cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_1.jpg"),
|
||||
"force_inpaint": True,
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_1.jpg"),
|
||||
}],
|
||||
)
|
||||
image.save("image_Inpaint_1.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat wearing sunglasses is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_2.jpg"),
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_2.jpg"),
|
||||
}],
|
||||
)
|
||||
image.save("image_Inpaint_2.jpg")
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
"offload_device": "disk",
|
||||
"onload_dtype": torch.float8_e4m3fn,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.float8_e4m3fn,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
|
||||
lazy_loading=True,
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A meme with a sleepy expression.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{}],
|
||||
negative_template_inputs = [{}],
|
||||
)
|
||||
image.save("image_PandaMeme_sleepy.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A meme with a happy expression.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{}],
|
||||
negative_template_inputs = [{}],
|
||||
)
|
||||
image.save("image_PandaMeme_happy.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A meme with a surprised expression.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{}],
|
||||
negative_template_inputs = [{}],
|
||||
)
|
||||
image.save("image_PandaMeme_surprised.jpg")
|
||||
@@ -0,0 +1,47 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
"offload_device": "disk",
|
||||
"onload_dtype": torch.float8_e4m3fn,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.float8_e4m3fn,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness")],
|
||||
lazy_loading=True,
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.1}],
|
||||
negative_template_inputs = [{"scale": 0.5}],
|
||||
)
|
||||
image.save("image_Sharpness_0.1.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.8}],
|
||||
negative_template_inputs = [{"scale": 0.5}],
|
||||
)
|
||||
image.save("image_Sharpness_0.8.jpg")
|
||||
@@ -0,0 +1,64 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
"offload_device": "disk",
|
||||
"onload_dtype": torch.float8_e4m3fn,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.float8_e4m3fn,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB")],
|
||||
lazy_loading=True,
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"R": 128/255,
|
||||
"G": 128/255,
|
||||
"B": 128/255
|
||||
}],
|
||||
)
|
||||
image.save("image_rgb_normal.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"R": 208/255,
|
||||
"G": 185/255,
|
||||
"B": 138/255
|
||||
}],
|
||||
)
|
||||
image.save("image_rgb_warm.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"R": 94/255,
|
||||
"G": 163/255,
|
||||
"B": 174/255
|
||||
}],
|
||||
)
|
||||
image.save("image_rgb_cold.jpg")
|
||||
@@ -0,0 +1,66 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": "disk",
|
||||
"offload_device": "disk",
|
||||
"onload_dtype": torch.float8_e4m3fn,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.float8_e4m3fn,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler")],
|
||||
lazy_loading=True,
|
||||
)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_512.jpg"),
|
||||
"prompt": "A cat is sitting on a stone.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_512.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Upscaler_1.png")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_100.jpg"),
|
||||
"prompt": "A cat is sitting on a stone.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_100.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Upscaler_2.png")
|
||||
@@ -0,0 +1,19 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Aesthetic/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Aesthetic \
|
||||
--dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Aesthetic/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Aesthetic:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B-Aesthetic_full" \
|
||||
--trainable_models "template_model" \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters \
|
||||
--enable_lora_hot_loading
|
||||
@@ -0,0 +1,18 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \
|
||||
--dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B-Brightness_full" \
|
||||
--trainable_models "template_model" \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters
|
||||
@@ -0,0 +1,18 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-ControlNet/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-ControlNet \
|
||||
--dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-ControlNet/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-ControlNet:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B-ControlNet_full" \
|
||||
--trainable_models "template_model" \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters
|
||||
@@ -1,17 +1,18 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Edit/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path xxx \
|
||||
--dataset_metadata_path xxx/metadata.jsonl \
|
||||
--dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Edit \
|
||||
--dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Edit/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 1 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--template_model_id_or_path "xxx" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Edit:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 999 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B_full" \
|
||||
--output_path "./models/train/Template-KleinBase4B-Edit_full" \
|
||||
--trainable_models "template_model" \
|
||||
--save_steps 1000 \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters
|
||||
@@ -0,0 +1,18 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Inpaint/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Inpaint \
|
||||
--dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Inpaint/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Inpaint:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B-Inpaint_full" \
|
||||
--trainable_models "template_model" \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters
|
||||
@@ -0,0 +1,18 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-PandaMeme/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme \
|
||||
--dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-PandaMeme:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B-PandaMeme_full" \
|
||||
--trainable_models "template_model" \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters
|
||||
@@ -0,0 +1,18 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Sharpness/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Sharpness \
|
||||
--dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Sharpness/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Sharpness:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B-Sharpness_full" \
|
||||
--trainable_models "template_model" \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters
|
||||
@@ -0,0 +1,18 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-SoftRGB/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-SoftRGB \
|
||||
--dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-SoftRGB/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-SoftRGB:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B-SoftRGB_full" \
|
||||
--trainable_models "template_model" \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters
|
||||
@@ -0,0 +1,18 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Upscaler/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Upscaler \
|
||||
--dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Upscaler/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Upscaler:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B-Upscaler_full" \
|
||||
--trainable_models "template_model" \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters
|
||||
62
examples/flux2/model_training/scripts/brightness/model.py
Normal file
62
examples/flux2/model_training/scripts/brightness/model.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import torch, math
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
|
||||
class SingleValueEncoder(torch.nn.Module):
|
||||
def __init__(self, dim_in=256, dim_out=4096, length=32):
|
||||
super().__init__()
|
||||
self.length = length
|
||||
self.prefer_value_embedder = torch.nn.Sequential(torch.nn.Linear(dim_in, dim_out), torch.nn.SiLU(), torch.nn.Linear(dim_out, dim_out))
|
||||
self.positional_embedding = torch.nn.Parameter(torch.randn(self.length, dim_out))
|
||||
|
||||
def get_timestep_embedding(self, timesteps, embedding_dim, max_period=10000):
|
||||
half_dim = embedding_dim // 2
|
||||
exponent = -math.log(max_period) * torch.arange(0, half_dim, dtype=torch.float32, device=timesteps.device) / half_dim
|
||||
emb = timesteps[:, None].float() * torch.exp(exponent)[None, :]
|
||||
emb = torch.cat([torch.cos(emb), torch.sin(emb)], dim=-1)
|
||||
return emb
|
||||
|
||||
def forward(self, value, dtype):
|
||||
emb = self.get_timestep_embedding(value * 1000, 256).to(dtype)
|
||||
emb = self.prefer_value_embedder(emb).squeeze(0)
|
||||
base_embeddings = emb.expand(self.length, -1)
|
||||
positional_embedding = self.positional_embedding.to(dtype=base_embeddings.dtype, device=base_embeddings.device)
|
||||
learned_embeddings = base_embeddings + positional_embedding
|
||||
return learned_embeddings
|
||||
|
||||
|
||||
class ValueFormatModel(torch.nn.Module):
|
||||
def __init__(self, num_double_blocks=5, num_single_blocks=20, dim=3072, num_heads=24, length=512):
|
||||
super().__init__()
|
||||
self.block_names = [f"double_{i}" for i in range(num_double_blocks)] + [f"single_{i}" for i in range(num_single_blocks)]
|
||||
self.proj_k = torch.nn.ModuleDict({block_name: SingleValueEncoder(dim_out=dim, length=length) for block_name in self.block_names})
|
||||
self.proj_v = torch.nn.ModuleDict({block_name: SingleValueEncoder(dim_out=dim, length=length) for block_name in self.block_names})
|
||||
self.num_heads = num_heads
|
||||
self.length = length
|
||||
|
||||
@torch.no_grad()
|
||||
def process_inputs(self, pipe, scale, **kwargs):
|
||||
return {"value": torch.Tensor([scale]).to(dtype=pipe.torch_dtype, device=pipe.device)}
|
||||
|
||||
def forward(self, value, **kwargs):
|
||||
kv_cache = {}
|
||||
for block_name in self.block_names:
|
||||
k = self.proj_k[block_name](value, value.dtype)
|
||||
k = k.view(1, self.length, self.num_heads, -1)
|
||||
v = self.proj_v[block_name](value, value.dtype)
|
||||
v = v.view(1, self.length, self.num_heads, -1)
|
||||
kv_cache[block_name] = (k, v)
|
||||
return {"kv_cache": kv_cache}
|
||||
|
||||
|
||||
class DataAnnotator:
|
||||
def __call__(self, image, **kwargs):
|
||||
image = Image.open(image)
|
||||
image = np.array(image)
|
||||
return {"scale": image.astype(np.float32).mean() / 255}
|
||||
|
||||
|
||||
TEMPLATE_MODEL = ValueFormatModel
|
||||
TEMPLATE_MODEL_PATH = None # You should modify this parameter after training
|
||||
TEMPLATE_DATA_PROCESSOR = DataAnnotator
|
||||
@@ -0,0 +1,34 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-4B/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path data/example_image_dataset \
|
||||
--dataset_metadata_path data/example_image_dataset/metadata.csv \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 1 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 5 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/FLUX.2-klein-base-4B_lora_cache" \
|
||||
--lora_base_model "dit" \
|
||||
--lora_target_modules "to_q,to_k,to_v,to_out.0,add_q_proj,add_k_proj,add_v_proj,to_add_out,linear_in,linear_out,to_qkv_mlp_proj,single_transformer_blocks.0.attn.to_out,single_transformer_blocks.1.attn.to_out,single_transformer_blocks.2.attn.to_out,single_transformer_blocks.3.attn.to_out,single_transformer_blocks.4.attn.to_out,single_transformer_blocks.5.attn.to_out,single_transformer_blocks.6.attn.to_out,single_transformer_blocks.7.attn.to_out,single_transformer_blocks.8.attn.to_out,single_transformer_blocks.9.attn.to_out,single_transformer_blocks.10.attn.to_out,single_transformer_blocks.11.attn.to_out,single_transformer_blocks.12.attn.to_out,single_transformer_blocks.13.attn.to_out,single_transformer_blocks.14.attn.to_out,single_transformer_blocks.15.attn.to_out,single_transformer_blocks.16.attn.to_out,single_transformer_blocks.17.attn.to_out,single_transformer_blocks.18.attn.to_out,single_transformer_blocks.19.attn.to_out" \
|
||||
--lora_rank 32 \
|
||||
--use_gradient_checkpointing \
|
||||
--task "sft:data_process"
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path "./models/train/FLUX.2-klein-base-4B_lora_cache" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 5 \
|
||||
--remove_prefix_in_ckpt "pipe.dit." \
|
||||
--output_path "./models/train/FLUX.2-klein-base-4B_lora" \
|
||||
--lora_base_model "dit" \
|
||||
--lora_target_modules "to_q,to_k,to_v,to_out.0,add_q_proj,add_k_proj,add_v_proj,to_add_out,linear_in,linear_out,to_qkv_mlp_proj,single_transformer_blocks.0.attn.to_out,single_transformer_blocks.1.attn.to_out,single_transformer_blocks.2.attn.to_out,single_transformer_blocks.3.attn.to_out,single_transformer_blocks.4.attn.to_out,single_transformer_blocks.5.attn.to_out,single_transformer_blocks.6.attn.to_out,single_transformer_blocks.7.attn.to_out,single_transformer_blocks.8.attn.to_out,single_transformer_blocks.9.attn.to_out,single_transformer_blocks.10.attn.to_out,single_transformer_blocks.11.attn.to_out,single_transformer_blocks.12.attn.to_out,single_transformer_blocks.13.attn.to_out,single_transformer_blocks.14.attn.to_out,single_transformer_blocks.15.attn.to_out,single_transformer_blocks.16.attn.to_out,single_transformer_blocks.17.attn.to_out,single_transformer_blocks.18.attn.to_out,single_transformer_blocks.19.attn.to_out" \
|
||||
--lora_rank 32 \
|
||||
--use_gradient_checkpointing \
|
||||
--task "sft:train"
|
||||
@@ -0,0 +1,36 @@
|
||||
modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \
|
||||
--dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 1 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \
|
||||
--trainable_models "template_model" \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters \
|
||||
--task "sft:data_process"
|
||||
|
||||
accelerate launch examples/flux2/model_training/train.py \
|
||||
--dataset_base_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \
|
||||
--extra_inputs "template_inputs" \
|
||||
--max_pixels 1048576 \
|
||||
--dataset_repeat 50 \
|
||||
--model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors" \
|
||||
--template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \
|
||||
--tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
|
||||
--learning_rate 1e-4 \
|
||||
--num_epochs 2 \
|
||||
--remove_prefix_in_ckpt "pipe.template_model." \
|
||||
--output_path "./models/train/Template-KleinBase4B-Brightness_full" \
|
||||
--trainable_models "template_model" \
|
||||
--use_gradient_checkpointing \
|
||||
--find_unused_parameters \
|
||||
--task "sft:train"
|
||||
@@ -0,0 +1,55 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
from diffsynth.core import load_state_dict
|
||||
import torch
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important!
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")],
|
||||
)
|
||||
state_dict = load_state_dict("./models/train/Template-KleinBase4B-Aesthetic_full/epoch-1.safetensors", torch_dtype=torch.bfloat16)
|
||||
template.models[0].load_state_dict(state_dict)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="a bird with fire",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"lora_ids": [1],
|
||||
"lora_scales": 1.0,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"lora_ids": [1],
|
||||
"lora_scales": 1.0,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
)
|
||||
image.save("image_Aesthetic_1.0.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="a bird with fire",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"lora_ids": [1],
|
||||
"lora_scales": 2.5,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"lora_ids": [1],
|
||||
"lora_scales": 2.5,
|
||||
"merge_type": "mean",
|
||||
}],
|
||||
)
|
||||
image.save("image_Aesthetic_2.5.jpg")
|
||||
@@ -0,0 +1,46 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
from diffsynth.core import load_state_dict
|
||||
import torch
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness")],
|
||||
)
|
||||
state_dict = load_state_dict("./models/train/Template-KleinBase4B-Brightness_full/epoch-1.safetensors", torch_dtype=torch.bfloat16)
|
||||
template.models[0].load_state_dict(state_dict)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.7}],
|
||||
negative_template_inputs = [{"scale": 0.5}]
|
||||
)
|
||||
image.save("image_Brightness_light.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.5}],
|
||||
negative_template_inputs = [{"scale": 0.5}]
|
||||
)
|
||||
image.save("image_Brightness_normal.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.3}],
|
||||
negative_template_inputs = [{"scale": 0.5}]
|
||||
)
|
||||
image.save("image_Brightness_dark.jpg")
|
||||
@@ -0,0 +1,57 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
from diffsynth.core import load_state_dict
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet")],
|
||||
)
|
||||
state_dict = load_state_dict("./models/train/Template-KleinBase4B-ControlNet_full/epoch-1.safetensors", torch_dtype=torch.bfloat16)
|
||||
template.models[0].load_state_dict(state_dict)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone, bathed in bright sunshine.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "A cat is sitting on a stone, bathed in bright sunshine.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_ControlNet_sunshine.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone, surrounded by colorful magical particles.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_depth.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_ControlNet_magic.jpg")
|
||||
@@ -0,0 +1,57 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
from diffsynth.core import load_state_dict
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit")],
|
||||
)
|
||||
state_dict = load_state_dict("./models/train/Template-KleinBase4B-Edit_full/epoch-1.safetensors", torch_dtype=torch.bfloat16)
|
||||
template.models[0].load_state_dict(state_dict)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="Put a hat on this cat.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "Put a hat on this cat.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Edit_hat.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="Make the cat turn its head to look to the right.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "Make the cat turn its head to look to the right.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Edit_head.jpg")
|
||||
@@ -0,0 +1,59 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
from diffsynth.core import load_state_dict
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint")],
|
||||
)
|
||||
state_dict = load_state_dict("./models/train/Template-KleinBase4B-Inpaint_full/epoch-1.safetensors", torch_dtype=torch.bfloat16)
|
||||
template.models[0].load_state_dict(state_dict)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="An orange cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_1.jpg"),
|
||||
"force_inpaint": True,
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_1.jpg"),
|
||||
}],
|
||||
)
|
||||
image.save("image_Inpaint_1.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat wearing sunglasses is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_2.jpg"),
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_reference.jpg"),
|
||||
"mask": Image.open("data/examples/templates/image_mask_2.jpg"),
|
||||
}],
|
||||
)
|
||||
image.save("image_Inpaint_2.jpg")
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
from diffsynth.core import load_state_dict
|
||||
import torch
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
|
||||
)
|
||||
state_dict = load_state_dict("./models/train/Template-KleinBase4B-PandaMeme_full/epoch-1.safetensors", torch_dtype=torch.bfloat16)
|
||||
template.models[0].load_state_dict(state_dict)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A meme with a sleepy expression.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{}],
|
||||
negative_template_inputs = [{}],
|
||||
)
|
||||
image.save("image_PandaMeme_sleepy.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A meme with a happy expression.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{}],
|
||||
negative_template_inputs = [{}],
|
||||
)
|
||||
image.save("image_PandaMeme_happy.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A meme with a surprised expression.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{}],
|
||||
negative_template_inputs = [{}],
|
||||
)
|
||||
image.save("image_PandaMeme_surprised.jpg")
|
||||
@@ -0,0 +1,38 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
from diffsynth.core import load_state_dict
|
||||
import torch
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness")],
|
||||
)
|
||||
state_dict = load_state_dict("./models/train/Template-KleinBase4B-Sharpness_full/epoch-1.safetensors", torch_dtype=torch.bfloat16)
|
||||
template.models[0].load_state_dict(state_dict)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.1}],
|
||||
negative_template_inputs = [{"scale": 0.5}],
|
||||
)
|
||||
image.save("image_Sharpness_0.1.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{"scale": 0.8}],
|
||||
negative_template_inputs = [{"scale": 0.5}],
|
||||
)
|
||||
image.save("image_Sharpness_0.8.jpg")
|
||||
@@ -0,0 +1,55 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
from diffsynth.core import load_state_dict
|
||||
import torch
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB")],
|
||||
)
|
||||
state_dict = load_state_dict("./models/train/Template-KleinBase4B-SoftRGB_full/epoch-1.safetensors", torch_dtype=torch.bfloat16)
|
||||
template.models[0].load_state_dict(state_dict)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"R": 128/255,
|
||||
"G": 128/255,
|
||||
"B": 128/255
|
||||
}],
|
||||
)
|
||||
image.save("image_rgb_normal.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"R": 208/255,
|
||||
"G": 185/255,
|
||||
"B": 138/255
|
||||
}],
|
||||
)
|
||||
image.save("image_rgb_warm.jpg")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"R": 94/255,
|
||||
"G": 163/255,
|
||||
"B": 174/255
|
||||
}],
|
||||
)
|
||||
image.save("image_rgb_cold.jpg")
|
||||
@@ -0,0 +1,57 @@
|
||||
from diffsynth.diffusion.template import TemplatePipeline
|
||||
from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
|
||||
from diffsynth.core import load_state_dict
|
||||
import torch
|
||||
from modelscope import dataset_snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
pipe = Flux2ImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
|
||||
ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
template = TemplatePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler")],
|
||||
)
|
||||
state_dict = load_state_dict("./models/train/Template-KleinBase4B-Upscaler_full/epoch-1.safetensors", torch_dtype=torch.bfloat16)
|
||||
template.models[0].load_state_dict(state_dict)
|
||||
dataset_snapshot_download(
|
||||
"DiffSynth-Studio/examples_in_diffsynth",
|
||||
allow_file_pattern=["templates/*"],
|
||||
local_dir="data/examples",
|
||||
)
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_512.jpg"),
|
||||
"prompt": "A cat is sitting on a stone.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_512.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Upscaler_1.png")
|
||||
image = template(
|
||||
pipe,
|
||||
prompt="A cat is sitting on a stone.",
|
||||
seed=0, cfg_scale=4, num_inference_steps=50,
|
||||
template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_100.jpg"),
|
||||
"prompt": "A cat is sitting on a stone.",
|
||||
}],
|
||||
negative_template_inputs = [{
|
||||
"image": Image.open("data/examples/templates/image_lowres_100.jpg"),
|
||||
"prompt": "",
|
||||
}],
|
||||
)
|
||||
image.save("image_Upscaler_2.png")
|
||||
Reference in New Issue
Block a user