From a671070a28cf5f1b082379b39b54512349afd844 Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Mon, 11 Nov 2024 21:01:38 +0800 Subject: [PATCH] bug fix --- diffsynth/prompters/omnigen_prompter.py | 31 ++++++++++++++++++- .../image_synthesis/sd35_text_to_image.py | 23 +++++++++----- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/diffsynth/prompters/omnigen_prompter.py b/diffsynth/prompters/omnigen_prompter.py index 8a6c38b..616efab 100644 --- a/diffsynth/prompters/omnigen_prompter.py +++ b/diffsynth/prompters/omnigen_prompter.py @@ -7,8 +7,37 @@ from PIL import Image from torchvision import transforms from transformers import AutoTokenizer from huggingface_hub import snapshot_download +import numpy as np -from OmniGen.utils import crop_arr + + +def crop_arr(pil_image, max_image_size): + while min(*pil_image.size) >= 2 * max_image_size: + pil_image = pil_image.resize( + tuple(x // 2 for x in pil_image.size), resample=Image.BOX + ) + + if max(*pil_image.size) > max_image_size: + scale = max_image_size / max(*pil_image.size) + pil_image = pil_image.resize( + tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC + ) + + if min(*pil_image.size) < 16: + scale = 16 / min(*pil_image.size) + pil_image = pil_image.resize( + tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC + ) + + arr = np.array(pil_image) + crop_y1 = (arr.shape[0] % 16) // 2 + crop_y2 = arr.shape[0] % 16 - crop_y1 + + crop_x1 = (arr.shape[1] % 16) // 2 + crop_x2 = arr.shape[1] % 16 - crop_x1 + + arr = arr[crop_y1:arr.shape[0]-crop_y2, crop_x1:arr.shape[1]-crop_x2] + return Image.fromarray(arr) diff --git a/examples/image_synthesis/sd35_text_to_image.py b/examples/image_synthesis/sd35_text_to_image.py index cd8c467..37f2da8 100644 --- a/examples/image_synthesis/sd35_text_to_image.py +++ b/examples/image_synthesis/sd35_text_to_image.py @@ -2,18 +2,27 @@ from diffsynth import ModelManager, SD3ImagePipeline import torch - model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda", model_id_list=["StableDiffusion3.5-large"]) pipe = SD3ImagePipeline.from_model_manager(model_manager) -prompt = "A capybara holding a sign that reads Hello World" -negative_prompt = "" +prompt = "a full body photo of a beautiful Asian girl. CG, masterpiece, best quality, solo, long hair, wavy hair, silver hair, blue eyes, blue dress, medium breasts, dress, underwater, air bubble, floating hair, refraction, portrait. The girl's flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her." +negative_prompt = "worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, cleavage, nsfw," +torch.manual_seed(1) image = pipe( - prompt=prompt, + prompt=prompt, negative_prompt=negative_prompt, - cfg_scale=3.5, - num_inference_steps=28, width=1024, height=1024, - seed=0 + cfg_scale=5, + num_inference_steps=100, width=1024, height=1024, ) image.save("image_1024.jpg") + +image = pipe( + prompt=prompt, + negative_prompt=negative_prompt, + cfg_scale=5, + input_image=image.resize((2048, 2048)), denoising_strength=0.5, + num_inference_steps=50, width=2048, height=2048, + tiled=True +) +image.save("image_2048.jpg")