DiffSynth-Studio/examples/qwen_image/accelerate/Qwen-Image-FP8.py

from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig
from diffsynth.models.qwen_image_dit import RMSNorm
from diffsynth.vram_management.layers import enable_vram_management, AutoWrappedLinear, AutoWrappedModule
import torch


pipe = QwenImagePipeline.from_pretrained(
    torch_dtype=torch.bfloat16,
    device="cuda",
    model_configs=[
        ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors", offload_dtype=torch.float8_e4m3fn),
        ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors"),
        ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
    ],
    tokenizer_config=ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="tokenizer/"),
)

enable_vram_management(
    pipe.dit,
    module_map = {
        RMSNorm: AutoWrappedModule,
    },
    module_config = dict(
        offload_dtype=torch.bfloat16,
        offload_device="cuda",
        onload_dtype=torch.bfloat16,
        onload_device="cuda",
        computation_dtype=torch.bfloat16,
        computation_device="cuda",
    ),
    vram_limit=None,
)
enable_vram_management(
    pipe.dit,
    module_map = {
        torch.nn.Linear: AutoWrappedLinear,
    },
    module_config = dict(
        offload_dtype=torch.float8_e4m3fn,
        offload_device="cuda",
        onload_dtype=torch.float8_e4m3fn,
        onload_device="cuda",
        computation_dtype=torch.float8_e4m3fn,
        computation_device="cuda",
    ),
    vram_limit=None,
)

prompt = "精致肖像，水下少女，蓝裙飘逸，发丝轻扬，光影透澈，气泡环绕，面容恬静，细节精致，梦幻唯美。"
image = pipe(prompt, seed=0, num_inference_steps=40, enable_fp8_attention=True)
image.save("image.jpg")