support ernie-image-turbo (#1391)

* support ernie-image-turbo

* pr review fix

* fix modelname
This commit is contained in:
Hong Zhang
2026-04-14 11:35:43 +08:00
committed by GitHub
parent 960d8c62c0
commit b5d04ceb30
18 changed files with 142 additions and 82 deletions

View File

@@ -543,13 +543,13 @@ flux2_series = [
ernie_image_series = [
{
# Example: ModelConfig(model_id="baidu/ERNIE-Image", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors")
# Example: ModelConfig(model_id="PaddlePaddle/ERNIE-Image", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors")
"model_hash": "584c13713849f1af4e03d5f1858b8b7b",
"model_name": "ernie_image_dit",
"model_class": "diffsynth.models.ernie_image_dit.ErnieImageDiT",
},
{
# Example: ModelConfig(model_id="baidu/ERNIE-Image", origin_file_pattern="text_encoder/model.safetensors")
# Example: ModelConfig(model_id="PaddlePaddle/ERNIE-Image", origin_file_pattern="text_encoder/model.safetensors")
"model_hash": "404ed9f40796a38dd34c1620f1920207",
"model_name": "ernie_image_text_encoder",
"model_class": "diffsynth.models.ernie_image_text_encoder.ErnieImageTextEncoder",

View File

@@ -131,11 +131,14 @@ class FlowMatchScheduler():
return sigmas, timesteps
@staticmethod
def set_timesteps_ernie_image(num_inference_steps=50, denoising_strength=1.0):
"""ERNIE-Image scheduler: pure linear sigmas from 1.0 to 0.0, no shift."""
def set_timesteps_ernie_image(num_inference_steps=50, denoising_strength=1.0, shift=3.0):
sigma_min = 0.0
sigma_max = 1.0
num_train_timesteps = 1000
sigma_start = denoising_strength
sigmas = torch.linspace(sigma_start, 0.0, num_inference_steps + 1)[:-1]
sigma_start = sigma_min + (sigma_max - sigma_min) * denoising_strength
sigmas = torch.linspace(sigma_start, sigma_min, num_inference_steps + 1)[:-1]
if shift is not None and shift != 1.0:
sigmas = shift * sigmas / (1 + (shift - 1) * sigmas)
timesteps = sigmas * num_train_timesteps
return sigmas, timesteps
@@ -185,9 +188,6 @@ class FlowMatchScheduler():
return sigmas, timesteps
def set_training_weight(self):
if self.set_timesteps_fn == FlowMatchScheduler.set_timesteps_ernie_image:
self.set_uniform_training_weight()
return
steps = 1000
x = self.timesteps
y = torch.exp(-2 * ((x - steps / 2) / steps) ** 2)
@@ -199,13 +199,6 @@ class FlowMatchScheduler():
bsmntw_weighing = bsmntw_weighing + bsmntw_weighing[1]
self.linear_timesteps_weights = bsmntw_weighing
def set_uniform_training_weight(self):
"""Assign equal weight to every timestep, suitable for linear schedulers like ERNIE-Image."""
steps = 1000
num_steps = len(self.timesteps)
uniform_weight = torch.full((num_steps,), steps / num_steps, dtype=self.timesteps.dtype)
self.linear_timesteps_weights = uniform_weight
def set_timesteps(self, num_inference_steps=100, denoising_strength=1.0, training=False, **kwargs):
self.sigmas, self.timesteps = self.set_timesteps_fn(
num_inference_steps=num_inference_steps,

View File

@@ -2,7 +2,7 @@
Ernie-Image DiT for DiffSynth-Studio.
Refactored from diffusers ErnieImageTransformer2DModel to use DiffSynth core modules.
Default parameters from actual checkpoint config.json (baidu/ERNIE-Image transformer).
Default parameters from actual checkpoint config.json (PaddlePaddle/ERNIE-Image transformer).
"""
import torch

View File

@@ -46,7 +46,7 @@ class ErnieImagePipeline(BasePipeline):
torch_dtype: torch.dtype = torch.bfloat16,
device: Union[str, torch.device] = get_device_type(),
model_configs: list[ModelConfig] = [],
tokenizer_config: ModelConfig = ModelConfig(model_id="baidu/ERNIE-Image", origin_file_pattern="tokenizer/"),
tokenizer_config: ModelConfig = ModelConfig(model_id="PaddlePaddle/ERNIE-Image", origin_file_pattern="tokenizer/"),
vram_limit: float = None,
):
pipe = ErnieImagePipeline(device=device, torch_dtype=torch_dtype)
@@ -78,11 +78,12 @@ class ErnieImagePipeline(BasePipeline):
rand_device: str = "cuda",
# Steps
num_inference_steps: int = 50,
sigma_shift: float = 3.0,
# Progress bar
progress_bar_cmd=tqdm,
):
# Scheduler
self.scheduler.set_timesteps(num_inference_steps=num_inference_steps)
self.scheduler.set_timesteps(num_inference_steps=num_inference_steps, shift=sigma_shift)
# Parameters
inputs_posi = {"prompt": prompt}