Step1x vram (#556)

* support step1x vram management
2026-03-24 18:28:10 +00:00 · 2025-04-28 10:13:20 +08:00
parent 32f630ff5f
commit ef2a7abad4
2 changed files with 104 additions and 97 deletions
--- a/diffsynth/pipelines/flux_image.py
+++ b/diffsynth/pipelines/flux_image.py
@@ -35,10 +35,11 @@ class FluxImagePipeline(BasePipeline):
        self.infinityou_processor: InfinitYou = None
        self.qwenvl = None
        self.step1x_connector: Qwen2Connector = None
-        self.model_names = ['text_encoder_1', 'text_encoder_2', 'dit', 'vae_decoder', 'vae_encoder', 'controlnet', 'ipadapter', 'ipadapter_image_encoder', 'step1x_connector']
+        self.model_names = ['text_encoder_1', 'text_encoder_2', 'dit', 'vae_decoder', 'vae_encoder', 'controlnet', 'ipadapter', 'ipadapter_image_encoder', 'qwenvl', 'step1x_connector']
    def enable_vram_management(self, num_persistent_param_in_dit=None):
        if self.text_encoder_1 is not None:
            dtype = next(iter(self.text_encoder_1.parameters())).dtype
            enable_vram_management(
                self.text_encoder_1,
@@ -56,6 +57,7 @@ class FluxImagePipeline(BasePipeline):
                    computation_device=self.device,
                ),
            )
        if self.text_encoder_2 is not None:
            dtype = next(iter(self.text_encoder_2.parameters())).dtype
            enable_vram_management(
                self.text_encoder_2,
@@ -75,6 +77,7 @@ class FluxImagePipeline(BasePipeline):
                    computation_device=self.device,
                ),
            )
        if self.dit is not None:
            dtype = next(iter(self.dit.parameters())).dtype
            enable_vram_management(
                self.dit,
@@ -100,6 +103,7 @@ class FluxImagePipeline(BasePipeline):
                    computation_device=self.device,
                ),
            )
        if self.vae_decoder is not None:
            dtype = next(iter(self.vae_decoder.parameters())).dtype
            enable_vram_management(
                self.vae_decoder,
@@ -117,6 +121,7 @@ class FluxImagePipeline(BasePipeline):
                    computation_device=self.device,
                ),
            )
        if self.vae_encoder is not None:
            dtype = next(iter(self.vae_encoder.parameters())).dtype
            enable_vram_management(
                self.vae_encoder,
@@ -403,6 +408,7 @@ class FluxImagePipeline(BasePipeline):
    def prepare_step1x_kwargs(self, prompt, negative_prompt, image):
        if image is None:
            return {}, {}
        self.load_models_to_device(["qwenvl", "vae_encoder"])
        captions = [prompt, negative_prompt]
        ref_images = [image, image]
        embs, masks = self.qwenvl(captions, ref_images)
@@ -504,7 +510,7 @@ class FluxImagePipeline(BasePipeline):
        tea_cache_kwargs = {"tea_cache": TeaCache(num_inference_steps, rel_l1_thresh=tea_cache_l1_thresh) if tea_cache_l1_thresh is not None else None}
        # Denoise
-        self.load_models_to_device(['dit', 'controlnet'])
+        self.load_models_to_device(['dit', 'controlnet', 'step1x_connector'])
        for progress_id, timestep in enumerate(progress_bar_cmd(self.scheduler.timesteps)):
            timestep = timestep.unsqueeze(0).to(self.device)
--- a/examples/step1x/step1x.py
+++ b/examples/step1x/step1x.py
@@ -15,6 +15,7 @@ model_manager.load_models([
    "models/stepfun-ai/Step1X-Edit/vae.safetensors",
 ])
 pipe = FluxImagePipeline.from_model_manager(model_manager)
 pipe.enable_vram_management()
 image = Image.fromarray(np.zeros((1248, 832, 3), dtype=np.uint8) + 255)
 image = pipe(