diff --git a/diffsynth/pipelines/base.py b/diffsynth/pipelines/base.py index c14c5d6..8b247d6 100644 --- a/diffsynth/pipelines/base.py +++ b/diffsynth/pipelines/base.py @@ -7,14 +7,26 @@ from torchvision.transforms import GaussianBlur class BasePipeline(torch.nn.Module): - def __init__(self, device="cuda", torch_dtype=torch.float16): + def __init__(self, device="cuda", torch_dtype=torch.float16, height_division_factor=64, width_division_factor=64): super().__init__() self.device = device self.torch_dtype = torch_dtype + self.height_division_factor = height_division_factor + self.width_division_factor = width_division_factor self.cpu_offload = False self.model_names = [] + def check_resize_height_width(self, height, width): + if height % self.height_division_factor != 0: + height = (height + self.height_division_factor - 1) // self.height_division_factor * self.height_division_factor + print(f"The height cannot be evenly divided by {self.height_division_factor}. We round it up to {height}.") + if width % self.width_division_factor != 0: + width = (width + self.width_division_factor - 1) // self.width_division_factor * self.width_division_factor + print(f"The width cannot be evenly divided by {self.width_division_factor}. We round it up to {width}.") + return height, width + + def preprocess_image(self, image): image = torch.Tensor(np.array(image, dtype=np.float32) * (2 / 255) - 1).permute(2, 0, 1).unsqueeze(0) return image diff --git a/diffsynth/pipelines/flux_image.py b/diffsynth/pipelines/flux_image.py index 95ccadc..40488dc 100644 --- a/diffsynth/pipelines/flux_image.py +++ b/diffsynth/pipelines/flux_image.py @@ -15,7 +15,7 @@ from ..models.tiler import FastTileWorker class FluxImagePipeline(BasePipeline): def __init__(self, device="cuda", torch_dtype=torch.float16): - super().__init__(device=device, torch_dtype=torch_dtype) + super().__init__(device=device, torch_dtype=torch_dtype, height_division_factor=16, width_division_factor=16) self.scheduler = FlowMatchScheduler() self.prompter = FluxPrompter() # models diff --git a/diffsynth/pipelines/hunyuan_image.py b/diffsynth/pipelines/hunyuan_image.py index 9cc8c5e..9c6736d 100644 --- a/diffsynth/pipelines/hunyuan_image.py +++ b/diffsynth/pipelines/hunyuan_image.py @@ -125,7 +125,7 @@ class ImageSizeManager: class HunyuanDiTImagePipeline(BasePipeline): def __init__(self, device="cuda", torch_dtype=torch.float16): - super().__init__(device=device, torch_dtype=torch_dtype) + super().__init__(device=device, torch_dtype=torch_dtype, height_division_factor=16, width_division_factor=16) self.scheduler = EnhancedDDIMScheduler(prediction_type="v_prediction", beta_start=0.00085, beta_end=0.03) self.prompter = HunyuanDiTPrompter() self.image_size_manager = ImageSizeManager() diff --git a/diffsynth/pipelines/sd3_image.py b/diffsynth/pipelines/sd3_image.py index 9a4109e..97d388d 100644 --- a/diffsynth/pipelines/sd3_image.py +++ b/diffsynth/pipelines/sd3_image.py @@ -10,7 +10,7 @@ from tqdm import tqdm class SD3ImagePipeline(BasePipeline): def __init__(self, device="cuda", torch_dtype=torch.float16): - super().__init__(device=device, torch_dtype=torch_dtype) + super().__init__(device=device, torch_dtype=torch_dtype, height_division_factor=16, width_division_factor=16) self.scheduler = FlowMatchScheduler() self.prompter = SD3Prompter() # models