support z-image-omni-base-i2L

This commit is contained in:
Artiprocher
2026-01-07 20:36:53 +08:00
parent bac39b1cd2
commit dd479e5bff
6 changed files with 413 additions and 6 deletions

View File

@@ -0,0 +1,62 @@
from diffsynth.pipelines.z_image import (
ZImagePipeline, ModelConfig,
ZImageUnit_Image2LoRAEncode, ZImageUnit_Image2LoRADecode
)
from modelscope import snapshot_download
from safetensors.torch import save_file
import torch
from PIL import Image
# Use `vram_config` to enable LoRA hot-loading
vram_config = {
"offload_dtype": torch.bfloat16,
"offload_device": "cuda",
"onload_dtype": torch.bfloat16,
"onload_device": "cuda",
"preparing_dtype": torch.bfloat16,
"preparing_device": "cuda",
"computation_dtype": torch.bfloat16,
"computation_device": "cuda",
}
# Load models
pipe = ZImagePipeline.from_pretrained(
torch_dtype=torch.bfloat16,
device="cuda",
model_configs=[
ModelConfig(model_id="Tongyi-MAI/Z-Image-Omni-Base", origin_file_pattern="transformer/*.safetensors", **vram_config),
ModelConfig(model_id="Tongyi-MAI/Z-Image-Omni-Base", origin_file_pattern="siglip/model.safetensors"),
ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors"),
ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors"),
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors"),
ModelConfig(model_id="DiffSynth-Studio/Z-Image-Omni-Base-i2L", origin_file_pattern="model.safetensors"),
],
tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"),
)
# Load images
snapshot_download(
model_id="DiffSynth-Studio/Qwen-Image-i2L",
allow_file_pattern="assets/style/*",
local_dir="data/examples"
)
images = [Image.open(f"data/style/1/{i}.jpg") for i in range(5)]
# Image to LoRA
with torch.no_grad():
embs = ZImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=images)
lora = ZImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
save_file(lora, "lora.safetensors")
# Generate images
prompt = "a cat"
negative_prompt = "泛黄发绿模糊低分辨率低质量图像扭曲的肢体诡异的外观丑陋AI感噪点网格感JPEG压缩条纹异常的肢体水印乱码意义不明的字符"
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
seed=0, cfg_scale=7, num_inference_steps=50,
positive_only_lora=lora,
sigma_shift=8
)
image.save("image.jpg")

View File

@@ -0,0 +1,62 @@
from diffsynth.pipelines.z_image import (
ZImagePipeline, ModelConfig,
ZImageUnit_Image2LoRAEncode, ZImageUnit_Image2LoRADecode
)
from modelscope import snapshot_download
from safetensors.torch import save_file
import torch
from PIL import Image
# Use `vram_config` to enable LoRA hot-loading
vram_config = {
"offload_dtype": torch.bfloat16,
"offload_device": "cpu",
"onload_dtype": torch.bfloat16,
"onload_device": "cpu",
"preparing_dtype": torch.bfloat16,
"preparing_device": "cuda",
"computation_dtype": torch.bfloat16,
"computation_device": "cuda",
}
# Load models
pipe = ZImagePipeline.from_pretrained(
torch_dtype=torch.bfloat16,
device="cuda",
model_configs=[
ModelConfig(model_id="Tongyi-MAI/Z-Image-Omni-Base", origin_file_pattern="transformer/*.safetensors", **vram_config),
ModelConfig(model_id="Tongyi-MAI/Z-Image-Omni-Base", origin_file_pattern="siglip/model.safetensors", **vram_config),
ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_config),
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors", **vram_config),
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors", **vram_config),
ModelConfig(model_id="DiffSynth-Studio/Z-Image-Omni-Base-i2L", origin_file_pattern="model.safetensors", **vram_config),
],
tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"),
)
# Load images
snapshot_download(
model_id="DiffSynth-Studio/Qwen-Image-i2L",
allow_file_pattern="assets/style/*",
local_dir="data/examples"
)
images = [Image.open(f"data/style/1/{i}.jpg") for i in range(5)]
# Image to LoRA
with torch.no_grad():
embs = ZImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=images)
lora = ZImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
save_file(lora, "lora.safetensors")
# Generate images
prompt = "a cat"
negative_prompt = "泛黄发绿模糊低分辨率低质量图像扭曲的肢体诡异的外观丑陋AI感噪点网格感JPEG压缩条纹异常的肢体水印乱码意义不明的字符"
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
seed=0, cfg_scale=7, num_inference_steps=50,
positive_only_lora=lora,
sigma_shift=8
)
image.save("image.jpg")