template age

2026-04-21 19:56:56 +00:00 · 2026-04-20 11:41:20 +08:00
parent 13f2618da2
commit b51fac3e0e
8 changed files with 158 additions and 30 deletions
--- a/examples/flux2/model_inference/Template-KleinBase4B-Age.py
+++ b/examples/flux2/model_inference/Template-KleinBase4B-Age.py
@@ -0,0 +1,31 @@
+from diffsynth.diffusion.template import TemplatePipeline
+from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
+import torch
+
+pipe = Flux2ImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
+    ],
+    tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
+)
+template = TemplatePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Age")],
+)
+prompt = "Half body color photograph of a single woman, head and torso with visible arms and hands resting gently in front of the body, looking directly at the camera, centered composition, colorful studio background with soft gradient of warm pastel tones, vibrant studio lighting, wearing a plain red short-sleeve t-shirt, straight black shoulder-length hair, photorealistic, high quality"# prompt = "Full body photograph of a single woman standing, looking directly at the camera, centered composition, plain neutral gray background, soft even studio lighting, wearing a plain white short-sleeve t-shirt and blue jeans, barefoot, arms resting naturally at sides, straight black shoulder-length hair, photorealistic, high quality"
+negative_age = 45
+for age in range(10, 91, 5):
+    print(f"Generating age {age}...")
+    image = template(
+        pipe,
+        prompt=prompt,
+        seed=0, cfg_scale=4, num_inference_steps=50,
+        template_inputs=[{"age": age}],
+        negative_template_inputs=[{"age": negative_age}],
+    )
+    image.save(f"image_age_{age}.jpg")
--- a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py
+++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py
@@ -0,0 +1,42 @@
+from diffsynth.diffusion.template import TemplatePipeline
+from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
+import torch
+vram_config = {
+    "offload_dtype": "disk",
+    "offload_device": "disk",
+    "onload_dtype": torch.float8_e4m3fn,
+    "onload_device": "cpu",
+    "preparing_dtype": torch.float8_e4m3fn,
+    "preparing_device": "cuda",
+    "computation_dtype": torch.bfloat16,
+    "computation_device": "cuda",
+}
+pipe = Flux2ImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
+    ],
+    tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
+    vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
+)
+template = TemplatePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Age")],
+    lazy_loading=True,
+)
+prompt = "Half body color photograph of a single woman, head and torso with visible arms and hands resting gently in front of the body, looking directly at the camera, centered composition, colorful studio background with soft gradient of warm pastel tones, vibrant studio lighting, wearing a plain red short-sleeve t-shirt, straight black shoulder-length hair, photorealistic, high quality"
+negative_age = 45
+for age in range(10, 91, 5):
+    print(f"Generating age {age}...")
+    image = template(
+        pipe,
+        prompt=prompt,
+        seed=0, cfg_scale=4, num_inference_steps=50,
+        template_inputs=[{"age": age}],
+        negative_template_inputs=[{"age": negative_age}],
+    )
+    image.save(f"image_age_{age}.jpg")
--- a/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh
+++ b/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh
@@ -0,0 +1,18 @@
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Age/*" --local_dir ./data/diffsynth_example_dataset
+
+accelerate launch examples/flux2/model_training/train.py \
+  --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Age \
+  --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Age/metadata.jsonl \
+  --extra_inputs "template_inputs" \
+  --max_pixels 1048576 \
+  --dataset_repeat 50 \
+  --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
+  --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Age:" \
+  --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
+  --learning_rate 1e-4 \
+  --num_epochs 2 \
+  --remove_prefix_in_ckpt "pipe.template_model." \
+  --output_path "./models/train/Template-KleinBase4B-Age_full" \
+  --trainable_models "template_model" \
+  --use_gradient_checkpointing \
+  --find_unused_parameters
--- a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py
+++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py
@@ -0,0 +1,33 @@
+from diffsynth.diffusion.template import TemplatePipeline
+from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
+from diffsynth.core import load_state_dict
+import torch
+
+pipe = Flux2ImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
+    ],
+    tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
+)
+template = TemplatePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Age")],
+)
+state_dict = load_state_dict("./models/train/Template-KleinBase4B-Age_full/epoch-1.safetensors", torch_dtype=torch.bfloat16)
+template.models[0].load_state_dict(state_dict)
+prompt = "Half body color photograph of a single woman, head and torso with visible arms and hands resting gently in front of the body, looking directly at the camera, centered composition, colorful studio background with soft gradient of warm pastel tones, vibrant studio lighting, wearing a plain red short-sleeve t-shirt, straight black shoulder-length hair, photorealistic, high quality"# prompt = "Full body photograph of a single woman standing, looking directly at the camera, centered composition, plain neutral gray background, soft even studio lighting, wearing a plain white short-sleeve t-shirt and blue jeans, barefoot, arms resting naturally at sides, straight black shoulder-length hair, photorealistic, high quality"
+negative_age = 45
+for age in [10, 35, 70]:
+    print(f"Generating age {age}...")
+    image = template(
+        pipe,
+        prompt=prompt,
+        seed=0, cfg_scale=4, num_inference_steps=50,
+        template_inputs=[{"age": age}],
+    )
+    image.save(f"image_age_{age}.jpg")