bug fix

2026-04-08 17:18:21 +00:00 · 2024-11-11 21:01:38 +08:00
parent 4600d5351b
commit a671070a28
2 changed files with 46 additions and 8 deletions
--- a/diffsynth/prompters/omnigen_prompter.py
+++ b/diffsynth/prompters/omnigen_prompter.py
@@ -7,8 +7,37 @@ from PIL import Image
 from torchvision import transforms
 from transformers import AutoTokenizer
 from huggingface_hub import snapshot_download
+import numpy as np

-from OmniGen.utils import crop_arr
+
+
+def crop_arr(pil_image, max_image_size):
+    while min(*pil_image.size) >= 2 * max_image_size:
+        pil_image = pil_image.resize(
+            tuple(x // 2 for x in pil_image.size), resample=Image.BOX
+        )
+
+    if max(*pil_image.size) > max_image_size:
+        scale = max_image_size / max(*pil_image.size)
+        pil_image = pil_image.resize(
+            tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
+        )
+    
+    if min(*pil_image.size) < 16:
+        scale = 16 / min(*pil_image.size)
+        pil_image = pil_image.resize(
+            tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
+        )
+    
+    arr = np.array(pil_image)
+    crop_y1 = (arr.shape[0] % 16) // 2
+    crop_y2 = arr.shape[0] % 16 - crop_y1
+
+    crop_x1 = (arr.shape[1] % 16) // 2
+    crop_x2 = arr.shape[1] % 16 - crop_x1
+
+    arr = arr[crop_y1:arr.shape[0]-crop_y2, crop_x1:arr.shape[1]-crop_x2]    
+    return Image.fromarray(arr)



--- a/examples/image_synthesis/sd35_text_to_image.py
+++ b/examples/image_synthesis/sd35_text_to_image.py
@@ -2,18 +2,27 @@ from diffsynth import ModelManager, SD3ImagePipeline
 import torch


-
 model_manager = ModelManager(torch_dtype=torch.bfloat16, device="cuda", model_id_list=["StableDiffusion3.5-large"])
 pipe = SD3ImagePipeline.from_model_manager(model_manager)

-prompt = "A capybara holding a sign that reads Hello World"
-negative_prompt = ""
+prompt = "a full body photo of a beautiful Asian girl. CG, masterpiece, best quality, solo, long hair, wavy hair, silver hair, blue eyes, blue dress, medium breasts, dress, underwater, air bubble, floating hair, refraction, portrait. The girl's flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her."
+negative_prompt = "worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, cleavage, nsfw,"

+torch.manual_seed(1)
 image = pipe(
-    prompt=prompt,
+    prompt=prompt, 
    negative_prompt=negative_prompt,
-    cfg_scale=3.5,
-    num_inference_steps=28, width=1024, height=1024,
-    seed=0
+    cfg_scale=5,
+    num_inference_steps=100, width=1024, height=1024,
 )
 image.save("image_1024.jpg")
+
+image = pipe(
+    prompt=prompt, 
+    negative_prompt=negative_prompt,
+    cfg_scale=5,
+    input_image=image.resize((2048, 2048)), denoising_strength=0.5,
+    num_inference_steps=50, width=2048, height=2048,
+    tiled=True
+)
+image.save("image_2048.jpg")