From 729c512c6656ed608de85fc1b671d09855c21590 Mon Sep 17 00:00:00 2001
From: Artiprocher <wangye87v5@hotmail.com>
Date: Mon, 28 Jul 2025 15:18:47 +0800
Subject: [PATCH] bugfix

---
 diffsynth/pipelines/wan_video_new.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/diffsynth/pipelines/wan_video_new.py b/diffsynth/pipelines/wan_video_new.py
index 8b47508..4f62a74 100644
--- a/diffsynth/pipelines/wan_video_new.py
+++ b/diffsynth/pipelines/wan_video_new.py
@@ -706,7 +706,8 @@ class WanVideoUnit_FunReference(PipelineUnit):
 class WanVideoUnit_FunCameraControl(PipelineUnit):
     def __init__(self):
         super().__init__(
-            input_params=("height", "width", "num_frames", "camera_control_direction", "camera_control_speed", "camera_control_origin", "latents", "input_image")
+            input_params=("height", "width", "num_frames", "camera_control_direction", "camera_control_speed", "camera_control_origin", "latents", "input_image"),
+            onload_model_names=("vae",)
         )
 
     def process(self, pipe: WanVideoPipeline, height, width, num_frames, camera_control_direction, camera_control_speed, camera_control_origin, latents, input_image):
@@ -729,6 +730,7 @@ class WanVideoUnit_FunCameraControl(PipelineUnit):
 
         input_image = input_image.resize((width, height))
         input_latents = pipe.preprocess_video([input_image])
+        pipe.load_models_to_device(self.onload_model_names)
         input_latents = pipe.vae.encode(input_latents, device=pipe.device)
         y = torch.zeros_like(latents).to(pipe.device)
         y[:, :, :1] = input_latents