support z-image and z-image-i2L

This commit is contained in:
Artiprocher
2026-01-27 10:56:15 +08:00
parent ffb7a138f7
commit d12bf71bcc
12 changed files with 266 additions and 4 deletions

View File

@@ -0,0 +1,61 @@
from diffsynth.pipelines.z_image import (
ZImagePipeline, ModelConfig,
ZImageUnit_Image2LoRAEncode, ZImageUnit_Image2LoRADecode
)
from modelscope import snapshot_download
from safetensors.torch import save_file
import torch
from PIL import Image
# Use `vram_config` to enable LoRA hot-loading
vram_config = {
"offload_dtype": torch.bfloat16,
"offload_device": "cuda",
"onload_dtype": torch.bfloat16,
"onload_device": "cuda",
"preparing_dtype": torch.bfloat16,
"preparing_device": "cuda",
"computation_dtype": torch.bfloat16,
"computation_device": "cuda",
}
# Load models
pipe = ZImagePipeline.from_pretrained(
torch_dtype=torch.bfloat16,
device="cuda",
model_configs=[
ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="transformer/*.safetensors", **vram_config),
ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors"),
ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors"),
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors"),
ModelConfig(model_id="DiffSynth-Studio/Z-Image-i2L", origin_file_pattern="model.safetensors"),
],
tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"),
)
# Load images
snapshot_download(
model_id="DiffSynth-Studio/Z-Image-i2L",
allow_file_pattern="assets/style/*",
local_dir="data/style_input"
)
images = [Image.open(f"data/style_input/assets/style/1/{i}.jpg") for i in range(6)]
# Image to LoRA
with torch.no_grad():
embs = ZImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=images)
lora = ZImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
save_file(lora, "lora.safetensors")
# Generate images
prompt = "a cat"
negative_prompt = "泛黄发绿模糊低分辨率低质量图像扭曲的肢体诡异的外观丑陋AI感噪点网格感JPEG压缩条纹异常的肢体水印乱码意义不明的字符"
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
seed=0, cfg_scale=7, num_inference_steps=50,
positive_only_lora=lora,
sigma_shift=8
)
image.save("image.jpg")