mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-18 22:08:13 +00:00
support z-image and z-image-i2L
This commit is contained in:
61
examples/z_image/model_inference_low_vram/Z-Image-i2L.py
Normal file
61
examples/z_image/model_inference_low_vram/Z-Image-i2L.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from diffsynth.pipelines.z_image import (
|
||||
ZImagePipeline, ModelConfig,
|
||||
ZImageUnit_Image2LoRAEncode, ZImageUnit_Image2LoRADecode
|
||||
)
|
||||
from modelscope import snapshot_download
|
||||
from safetensors.torch import save_file
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
# Use `vram_config` to enable LoRA hot-loading
|
||||
vram_config = {
|
||||
"offload_dtype": torch.bfloat16,
|
||||
"offload_device": "cpu",
|
||||
"onload_dtype": torch.bfloat16,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.bfloat16,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
|
||||
# Load models
|
||||
pipe = ZImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_config),
|
||||
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors", **vram_config),
|
||||
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors", **vram_config),
|
||||
ModelConfig(model_id="DiffSynth-Studio/Z-Image-i2L", origin_file_pattern="model.safetensors", **vram_config),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
|
||||
# Load images
|
||||
snapshot_download(
|
||||
model_id="DiffSynth-Studio/Z-Image-i2L",
|
||||
allow_file_pattern="assets/style/*",
|
||||
local_dir="data/style_input"
|
||||
)
|
||||
images = [Image.open(f"data/style_input/assets/style/1/{i}.jpg") for i in range(6)]
|
||||
|
||||
# Image to LoRA
|
||||
with torch.no_grad():
|
||||
embs = ZImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=images)
|
||||
lora = ZImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
|
||||
save_file(lora, "lora.safetensors")
|
||||
|
||||
# Generate images
|
||||
prompt = "a cat"
|
||||
negative_prompt = "泛黄,发绿,模糊,低分辨率,低质量图像,扭曲的肢体,诡异的外观,丑陋,AI感,噪点,网格感,JPEG压缩条纹,异常的肢体,水印,乱码,意义不明的字符"
|
||||
image = pipe(
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
seed=0, cfg_scale=7, num_inference_steps=50,
|
||||
positive_only_lora=lora,
|
||||
sigma_shift=8
|
||||
)
|
||||
image.save("image.jpg")
|
||||
26
examples/z_image/model_inference_low_vram/Z-Image.py
Normal file
26
examples/z_image/model_inference_low_vram/Z-Image.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from diffsynth.pipelines.z_image import ZImagePipeline, ModelConfig
|
||||
import torch
|
||||
|
||||
vram_config = {
|
||||
"offload_dtype": torch.bfloat16,
|
||||
"offload_device": "cpu",
|
||||
"onload_dtype": torch.bfloat16,
|
||||
"onload_device": "cpu",
|
||||
"preparing_dtype": torch.bfloat16,
|
||||
"preparing_device": "cuda",
|
||||
"computation_dtype": torch.bfloat16,
|
||||
"computation_device": "cuda",
|
||||
}
|
||||
pipe = ZImagePipeline.from_pretrained(
|
||||
torch_dtype=torch.bfloat16,
|
||||
device="cuda",
|
||||
model_configs=[
|
||||
ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="transformer/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
|
||||
ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_config),
|
||||
],
|
||||
tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"),
|
||||
)
|
||||
prompt = "Young Chinese woman in red Hanfu, intricate embroidery. Impeccable makeup, red floral forehead pattern. Elaborate high bun, golden phoenix headdress, red flowers, beads. Holds round folding fan with lady, trees, bird. Neon lightning-bolt lamp (⚡️), bright yellow glow, above extended left palm. Soft-lit outdoor night background, silhouetted tiered pagoda (西安大雁塔), blurred colorful distant lights."
|
||||
image = pipe(prompt=prompt, seed=42, rand_device="cuda", num_inference_steps=50, cfg_scale=4)
|
||||
image.save("image_Z-Image.jpg")
|
||||
Reference in New Issue
Block a user