mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-23 00:58:11 +00:00
add fun-v1.1-1.3B-control-camera
This commit is contained in:
@@ -133,6 +133,8 @@ model_loader_configs = [
|
|||||||
(None, "3ef3b1f8e1dab83d5b71fd7b617f859f", ["wan_video_dit"], [WanModel], "civitai"),
|
(None, "3ef3b1f8e1dab83d5b71fd7b617f859f", ["wan_video_dit"], [WanModel], "civitai"),
|
||||||
(None, "70ddad9d3a133785da5ea371aae09504", ["wan_video_dit"], [WanModel], "civitai"),
|
(None, "70ddad9d3a133785da5ea371aae09504", ["wan_video_dit"], [WanModel], "civitai"),
|
||||||
(None, "26bde73488a92e64cc20b0a7485b9e5b", ["wan_video_dit"], [WanModel], "civitai"),
|
(None, "26bde73488a92e64cc20b0a7485b9e5b", ["wan_video_dit"], [WanModel], "civitai"),
|
||||||
|
(None, "ac6a5aa74f4a0aab6f64eb9a72f19901", ["wan_video_dit"], [WanModel], "civitai"),
|
||||||
|
(None, "b61c605c2adbd23124d152ed28e049ae", ["wan_video_dit"], [WanModel], "civitai"),
|
||||||
(None, "a61453409b67cd3246cf0c3bebad47ba", ["wan_video_dit", "wan_video_vace"], [WanModel, VaceWanModel], "civitai"),
|
(None, "a61453409b67cd3246cf0c3bebad47ba", ["wan_video_dit", "wan_video_vace"], [WanModel, VaceWanModel], "civitai"),
|
||||||
(None, "7a513e1f257a861512b1afd387a8ecd9", ["wan_video_dit", "wan_video_vace"], [WanModel, VaceWanModel], "civitai"),
|
(None, "7a513e1f257a861512b1afd387a8ecd9", ["wan_video_dit", "wan_video_vace"], [WanModel, VaceWanModel], "civitai"),
|
||||||
(None, "cb104773c6c2cb6df4f9529ad5c60d0b", ["wan_video_dit"], [WanModel], "diffusers"),
|
(None, "cb104773c6c2cb6df4f9529ad5c60d0b", ["wan_video_dit"], [WanModel], "diffusers"),
|
||||||
|
|||||||
177
diffsynth/models/wan_video_camera_controller.py
Normal file
177
diffsynth/models/wan_video_camera_controller.py
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import numpy as np
|
||||||
|
from einops import rearrange
|
||||||
|
from packaging import version as pver
|
||||||
|
import os
|
||||||
|
class SimpleAdapter(nn.Module):
|
||||||
|
def __init__(self, in_dim, out_dim, kernel_size, stride, num_residual_blocks=1):
|
||||||
|
super(SimpleAdapter, self).__init__()
|
||||||
|
|
||||||
|
# Pixel Unshuffle: reduce spatial dimensions by a factor of 8
|
||||||
|
self.pixel_unshuffle = nn.PixelUnshuffle(downscale_factor=8)
|
||||||
|
|
||||||
|
# Convolution: reduce spatial dimensions by a factor
|
||||||
|
# of 2 (without overlap)
|
||||||
|
self.conv = nn.Conv2d(in_dim * 64, out_dim, kernel_size=kernel_size, stride=stride, padding=0)
|
||||||
|
|
||||||
|
# Residual blocks for feature extraction
|
||||||
|
self.residual_blocks = nn.Sequential(
|
||||||
|
*[ResidualBlock(out_dim) for _ in range(num_residual_blocks)]
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# Reshape to merge the frame dimension into batch
|
||||||
|
bs, c, f, h, w = x.size()
|
||||||
|
x = x.permute(0, 2, 1, 3, 4).contiguous().view(bs * f, c, h, w)
|
||||||
|
|
||||||
|
# Pixel Unshuffle operation
|
||||||
|
x_unshuffled = self.pixel_unshuffle(x)
|
||||||
|
|
||||||
|
# Convolution operation
|
||||||
|
x_conv = self.conv(x_unshuffled)
|
||||||
|
|
||||||
|
# Feature extraction with residual blocks
|
||||||
|
out = self.residual_blocks(x_conv)
|
||||||
|
|
||||||
|
# Reshape to restore original bf dimension
|
||||||
|
out = out.view(bs, f, out.size(1), out.size(2), out.size(3))
|
||||||
|
|
||||||
|
# Permute dimensions to reorder (if needed), e.g., swap channels and feature frames
|
||||||
|
out = out.permute(0, 2, 1, 3, 4)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class ResidualBlock(nn.Module):
|
||||||
|
def __init__(self, dim):
|
||||||
|
super(ResidualBlock, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(dim, dim, kernel_size=3, padding=1)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, padding=1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
residual = x
|
||||||
|
out = self.relu(self.conv1(x))
|
||||||
|
out = self.conv2(out)
|
||||||
|
out += residual
|
||||||
|
return out
|
||||||
|
|
||||||
|
class Camera(object):
|
||||||
|
"""Copied from https://github.com/hehao13/CameraCtrl/blob/main/inference.py
|
||||||
|
"""
|
||||||
|
def __init__(self, entry):
|
||||||
|
fx, fy, cx, cy = entry[1:5]
|
||||||
|
self.fx = fx
|
||||||
|
self.fy = fy
|
||||||
|
self.cx = cx
|
||||||
|
self.cy = cy
|
||||||
|
w2c_mat = np.array(entry[7:]).reshape(3, 4)
|
||||||
|
w2c_mat_4x4 = np.eye(4)
|
||||||
|
w2c_mat_4x4[:3, :] = w2c_mat
|
||||||
|
self.w2c_mat = w2c_mat_4x4
|
||||||
|
self.c2w_mat = np.linalg.inv(w2c_mat_4x4)
|
||||||
|
|
||||||
|
def get_relative_pose(cam_params):
|
||||||
|
"""Copied from https://github.com/hehao13/CameraCtrl/blob/main/inference.py
|
||||||
|
"""
|
||||||
|
abs_w2cs = [cam_param.w2c_mat for cam_param in cam_params]
|
||||||
|
abs_c2ws = [cam_param.c2w_mat for cam_param in cam_params]
|
||||||
|
cam_to_origin = 0
|
||||||
|
target_cam_c2w = np.array([
|
||||||
|
[1, 0, 0, 0],
|
||||||
|
[0, 1, 0, -cam_to_origin],
|
||||||
|
[0, 0, 1, 0],
|
||||||
|
[0, 0, 0, 1]
|
||||||
|
])
|
||||||
|
abs2rel = target_cam_c2w @ abs_w2cs[0]
|
||||||
|
ret_poses = [target_cam_c2w, ] + [abs2rel @ abs_c2w for abs_c2w in abs_c2ws[1:]]
|
||||||
|
ret_poses = np.array(ret_poses, dtype=np.float32)
|
||||||
|
return ret_poses
|
||||||
|
|
||||||
|
def custom_meshgrid(*args):
|
||||||
|
"""Copied from https://github.com/hehao13/CameraCtrl/blob/main/inference.py
|
||||||
|
"""
|
||||||
|
# ref: https://pytorch.org/docs/stable/generated/torch.meshgrid.html?highlight=meshgrid#torch.meshgrid
|
||||||
|
if pver.parse(torch.__version__) < pver.parse('1.10'):
|
||||||
|
return torch.meshgrid(*args)
|
||||||
|
else:
|
||||||
|
return torch.meshgrid(*args, indexing='ij')
|
||||||
|
|
||||||
|
|
||||||
|
def ray_condition(K, c2w, H, W, device):
|
||||||
|
"""Copied from https://github.com/hehao13/CameraCtrl/blob/main/inference.py
|
||||||
|
"""
|
||||||
|
# c2w: B, V, 4, 4
|
||||||
|
# K: B, V, 4
|
||||||
|
|
||||||
|
B = K.shape[0]
|
||||||
|
|
||||||
|
j, i = custom_meshgrid(
|
||||||
|
torch.linspace(0, H - 1, H, device=device, dtype=c2w.dtype),
|
||||||
|
torch.linspace(0, W - 1, W, device=device, dtype=c2w.dtype),
|
||||||
|
)
|
||||||
|
i = i.reshape([1, 1, H * W]).expand([B, 1, H * W]) + 0.5 # [B, HxW]
|
||||||
|
j = j.reshape([1, 1, H * W]).expand([B, 1, H * W]) + 0.5 # [B, HxW]
|
||||||
|
|
||||||
|
fx, fy, cx, cy = K.chunk(4, dim=-1) # B,V, 1
|
||||||
|
|
||||||
|
zs = torch.ones_like(i) # [B, HxW]
|
||||||
|
xs = (i - cx) / fx * zs
|
||||||
|
ys = (j - cy) / fy * zs
|
||||||
|
zs = zs.expand_as(ys)
|
||||||
|
|
||||||
|
directions = torch.stack((xs, ys, zs), dim=-1) # B, V, HW, 3
|
||||||
|
directions = directions / directions.norm(dim=-1, keepdim=True) # B, V, HW, 3
|
||||||
|
|
||||||
|
rays_d = directions @ c2w[..., :3, :3].transpose(-1, -2) # B, V, 3, HW
|
||||||
|
rays_o = c2w[..., :3, 3] # B, V, 3
|
||||||
|
rays_o = rays_o[:, :, None].expand_as(rays_d) # B, V, 3, HW
|
||||||
|
# c2w @ dirctions
|
||||||
|
rays_dxo = torch.cross(rays_o, rays_d)
|
||||||
|
plucker = torch.cat([rays_dxo, rays_d], dim=-1)
|
||||||
|
plucker = plucker.reshape(B, c2w.shape[1], H, W, 6) # B, V, H, W, 6
|
||||||
|
# plucker = plucker.permute(0, 1, 4, 2, 3)
|
||||||
|
return plucker
|
||||||
|
|
||||||
|
def process_pose_file(pose_file_path, width=672, height=384, original_pose_width=1280, original_pose_height=720, device='cpu', return_poses=False):
|
||||||
|
"""Modified from https://github.com/hehao13/CameraCtrl/blob/main/inference.py
|
||||||
|
"""
|
||||||
|
if os.path.isfile(pose_file_path):
|
||||||
|
with open(pose_file_path, 'r') as f:
|
||||||
|
poses = f.readlines()
|
||||||
|
else:
|
||||||
|
poses = pose_file_path.splitlines()
|
||||||
|
|
||||||
|
poses = [pose.strip().split(' ') for pose in poses[1:]]
|
||||||
|
cam_params = [[float(x) for x in pose] for pose in poses]
|
||||||
|
if return_poses:
|
||||||
|
return cam_params
|
||||||
|
else:
|
||||||
|
cam_params = [Camera(cam_param) for cam_param in cam_params]
|
||||||
|
|
||||||
|
sample_wh_ratio = width / height
|
||||||
|
pose_wh_ratio = original_pose_width / original_pose_height # Assuming placeholder ratios, change as needed
|
||||||
|
|
||||||
|
if pose_wh_ratio > sample_wh_ratio:
|
||||||
|
resized_ori_w = height * pose_wh_ratio
|
||||||
|
for cam_param in cam_params:
|
||||||
|
cam_param.fx = resized_ori_w * cam_param.fx / width
|
||||||
|
else:
|
||||||
|
resized_ori_h = width / pose_wh_ratio
|
||||||
|
for cam_param in cam_params:
|
||||||
|
cam_param.fy = resized_ori_h * cam_param.fy / height
|
||||||
|
|
||||||
|
intrinsic = np.asarray([[cam_param.fx * width,
|
||||||
|
cam_param.fy * height,
|
||||||
|
cam_param.cx * width,
|
||||||
|
cam_param.cy * height]
|
||||||
|
for cam_param in cam_params], dtype=np.float32)
|
||||||
|
|
||||||
|
K = torch.as_tensor(intrinsic)[None] # [1, 1, 4]
|
||||||
|
c2ws = get_relative_pose(cam_params) # Assuming this function is defined elsewhere
|
||||||
|
c2ws = torch.as_tensor(c2ws)[None] # [1, n_frame, 4, 4]
|
||||||
|
plucker_embedding = ray_condition(K, c2ws, height, width, device=device)[0].permute(0, 3, 1, 2).contiguous() # V, 6, H, W
|
||||||
|
plucker_embedding = plucker_embedding[None]
|
||||||
|
plucker_embedding = rearrange(plucker_embedding, "b f c h w -> b f h w c")[0]
|
||||||
|
return plucker_embedding
|
||||||
@@ -5,6 +5,7 @@ import math
|
|||||||
from typing import Tuple, Optional
|
from typing import Tuple, Optional
|
||||||
from einops import rearrange
|
from einops import rearrange
|
||||||
from .utils import hash_state_dict_keys
|
from .utils import hash_state_dict_keys
|
||||||
|
from .wan_video_camera_controller import SimpleAdapter
|
||||||
try:
|
try:
|
||||||
import flash_attn_interface
|
import flash_attn_interface
|
||||||
FLASH_ATTN_3_AVAILABLE = True
|
FLASH_ATTN_3_AVAILABLE = True
|
||||||
@@ -273,6 +274,8 @@ class WanModel(torch.nn.Module):
|
|||||||
has_image_input: bool,
|
has_image_input: bool,
|
||||||
has_image_pos_emb: bool = False,
|
has_image_pos_emb: bool = False,
|
||||||
has_ref_conv: bool = False,
|
has_ref_conv: bool = False,
|
||||||
|
add_control_adapter: bool = False,
|
||||||
|
in_dim_control_adapter: int = 24,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.dim = dim
|
self.dim = dim
|
||||||
@@ -308,6 +311,10 @@ class WanModel(torch.nn.Module):
|
|||||||
self.ref_conv = nn.Conv2d(16, dim, kernel_size=(2, 2), stride=(2, 2))
|
self.ref_conv = nn.Conv2d(16, dim, kernel_size=(2, 2), stride=(2, 2))
|
||||||
self.has_image_pos_emb = has_image_pos_emb
|
self.has_image_pos_emb = has_image_pos_emb
|
||||||
self.has_ref_conv = has_ref_conv
|
self.has_ref_conv = has_ref_conv
|
||||||
|
if add_control_adapter:
|
||||||
|
self.control_adapter = SimpleAdapter(in_dim_control_adapter, dim, kernel_size=patch_size[1:], stride=patch_size[1:])
|
||||||
|
else:
|
||||||
|
self.control_adapter = None
|
||||||
|
|
||||||
def patchify(self, x: torch.Tensor):
|
def patchify(self, x: torch.Tensor):
|
||||||
x = self.patch_embedding(x)
|
x = self.patch_embedding(x)
|
||||||
@@ -612,6 +619,42 @@ class WanModelStateDictConverter:
|
|||||||
"eps": 1e-6,
|
"eps": 1e-6,
|
||||||
"has_ref_conv": True
|
"has_ref_conv": True
|
||||||
}
|
}
|
||||||
|
elif hash_state_dict_keys(state_dict) == "ac6a5aa74f4a0aab6f64eb9a72f19901":
|
||||||
|
# 1.3B PAI control-camera v1.1
|
||||||
|
config = {
|
||||||
|
"has_image_input": True,
|
||||||
|
"patch_size": [1, 2, 2],
|
||||||
|
"in_dim": 32,
|
||||||
|
"dim": 1536,
|
||||||
|
"ffn_dim": 8960,
|
||||||
|
"freq_dim": 256,
|
||||||
|
"text_dim": 4096,
|
||||||
|
"out_dim": 16,
|
||||||
|
"num_heads": 12,
|
||||||
|
"num_layers": 30,
|
||||||
|
"eps": 1e-6,
|
||||||
|
"has_ref_conv": False,
|
||||||
|
"add_control_adapter": True,
|
||||||
|
"in_dim_control_adapter": 24,
|
||||||
|
}
|
||||||
|
elif hash_state_dict_keys(state_dict) == "b61c605c2adbd23124d152ed28e049ae":
|
||||||
|
# 14B PAI control-camera v1.1
|
||||||
|
config = {
|
||||||
|
"has_image_input": True,
|
||||||
|
"patch_size": [1, 2, 2],
|
||||||
|
"in_dim": 32,
|
||||||
|
"dim": 5120,
|
||||||
|
"ffn_dim": 13824,
|
||||||
|
"freq_dim": 256,
|
||||||
|
"text_dim": 4096,
|
||||||
|
"out_dim": 16,
|
||||||
|
"num_heads": 40,
|
||||||
|
"num_layers": 40,
|
||||||
|
"eps": 1e-6,
|
||||||
|
"has_ref_conv": False,
|
||||||
|
"add_control_adapter": True,
|
||||||
|
"in_dim_control_adapter": 24,
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
config = {}
|
config = {}
|
||||||
return state_dict, config
|
return state_dict, config
|
||||||
|
|||||||
@@ -208,6 +208,7 @@ class WanVideoPipeline(BasePipeline):
|
|||||||
WanVideoUnit_InputVideoEmbedder(),
|
WanVideoUnit_InputVideoEmbedder(),
|
||||||
WanVideoUnit_PromptEmbedder(),
|
WanVideoUnit_PromptEmbedder(),
|
||||||
WanVideoUnit_ImageEmbedder(),
|
WanVideoUnit_ImageEmbedder(),
|
||||||
|
WanVideoUnit_FunCamera(),
|
||||||
WanVideoUnit_FunControl(),
|
WanVideoUnit_FunControl(),
|
||||||
WanVideoUnit_FunReference(),
|
WanVideoUnit_FunReference(),
|
||||||
WanVideoUnit_SpeedControl(),
|
WanVideoUnit_SpeedControl(),
|
||||||
@@ -473,6 +474,8 @@ class WanVideoPipeline(BasePipeline):
|
|||||||
tea_cache_model_id: Optional[str] = "",
|
tea_cache_model_id: Optional[str] = "",
|
||||||
# progress_bar
|
# progress_bar
|
||||||
progress_bar_cmd=tqdm,
|
progress_bar_cmd=tqdm,
|
||||||
|
# Camera control
|
||||||
|
control_camera_video: Optional[torch.Tensor] = None
|
||||||
):
|
):
|
||||||
# Scheduler
|
# Scheduler
|
||||||
self.scheduler.set_timesteps(num_inference_steps, denoising_strength=denoising_strength, shift=sigma_shift)
|
self.scheduler.set_timesteps(num_inference_steps, denoising_strength=denoising_strength, shift=sigma_shift)
|
||||||
@@ -491,6 +494,7 @@ class WanVideoPipeline(BasePipeline):
|
|||||||
"end_image": end_image,
|
"end_image": end_image,
|
||||||
"input_video": input_video, "denoising_strength": denoising_strength,
|
"input_video": input_video, "denoising_strength": denoising_strength,
|
||||||
"control_video": control_video, "reference_image": reference_image,
|
"control_video": control_video, "reference_image": reference_image,
|
||||||
|
"control_camera_video": control_camera_video,
|
||||||
"vace_video": vace_video, "vace_video_mask": vace_video_mask, "vace_reference_image": vace_reference_image, "vace_scale": vace_scale,
|
"vace_video": vace_video, "vace_video_mask": vace_video_mask, "vace_reference_image": vace_reference_image, "vace_scale": vace_scale,
|
||||||
"seed": seed, "rand_device": rand_device,
|
"seed": seed, "rand_device": rand_device,
|
||||||
"height": height, "width": width, "num_frames": num_frames,
|
"height": height, "width": width, "num_frames": num_frames,
|
||||||
@@ -686,8 +690,38 @@ class WanVideoUnit_ImageEmbedder(PipelineUnit):
|
|||||||
y = y.to(dtype=pipe.torch_dtype, device=pipe.device)
|
y = y.to(dtype=pipe.torch_dtype, device=pipe.device)
|
||||||
return {"clip_feature": clip_context, "y": y}
|
return {"clip_feature": clip_context, "y": y}
|
||||||
|
|
||||||
|
class WanVideoUnit_FunCamera(PipelineUnit):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
input_params=("control_camera_video", "cfg_merge", "num_frames", "height", "width", "input_image", "latents"),
|
||||||
|
onload_model_names=("vae")
|
||||||
|
)
|
||||||
|
|
||||||
|
def process(self, pipe: WanVideoPipeline, control_camera_video, cfg_merge, num_frames, height, width, input_image, latents):
|
||||||
|
if control_camera_video is None:
|
||||||
|
return {}
|
||||||
|
control_camera_video = control_camera_video[:num_frames].permute([3, 0, 1, 2]).unsqueeze(0)
|
||||||
|
control_camera_latents = torch.concat(
|
||||||
|
[
|
||||||
|
torch.repeat_interleave(control_camera_video[:, :, 0:1], repeats=4, dim=2),
|
||||||
|
control_camera_video[:, :, 1:]
|
||||||
|
], dim=2
|
||||||
|
).transpose(1, 2)
|
||||||
|
b, f, c, h, w = control_camera_latents.shape
|
||||||
|
control_camera_latents = control_camera_latents.contiguous().view(b, f // 4, 4, c, h, w).transpose(2, 3)
|
||||||
|
control_camera_latents = control_camera_latents.contiguous().view(b, f // 4, c * 4, h, w).transpose(1, 2)
|
||||||
|
control_camera_latents_input = control_camera_latents.to(device=pipe.device, dtype=pipe.torch_dtype)
|
||||||
|
|
||||||
|
input_image = input_image.resize((width, height))
|
||||||
|
input_latents = pipe.preprocess_video([input_image])
|
||||||
|
input_latents = pipe.vae.encode(input_latents, device=pipe.device)
|
||||||
|
y = torch.zeros_like(latents).to(pipe.device)
|
||||||
|
if latents.size()[2] != 1:
|
||||||
|
y[:, :, :1] = input_latents
|
||||||
|
y = y.to(dtype=pipe.torch_dtype, device=pipe.device)
|
||||||
|
|
||||||
|
return {"control_camera_latents": control_camera_latents, "control_camera_latents_input": control_camera_latents_input, "y":y}
|
||||||
|
|
||||||
class WanVideoUnit_FunControl(PipelineUnit):
|
class WanVideoUnit_FunControl(PipelineUnit):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
@@ -954,6 +988,7 @@ def model_fn_wan_video(
|
|||||||
cfg_merge: bool = False,
|
cfg_merge: bool = False,
|
||||||
use_gradient_checkpointing: bool = False,
|
use_gradient_checkpointing: bool = False,
|
||||||
use_gradient_checkpointing_offload: bool = False,
|
use_gradient_checkpointing_offload: bool = False,
|
||||||
|
control_camera_latents_input = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
if sliding_window_size is not None and sliding_window_stride is not None:
|
if sliding_window_size is not None and sliding_window_stride is not None:
|
||||||
@@ -1006,7 +1041,9 @@ def model_fn_wan_video(
|
|||||||
clip_embdding = dit.img_emb(clip_feature)
|
clip_embdding = dit.img_emb(clip_feature)
|
||||||
context = torch.cat([clip_embdding, context], dim=1)
|
context = torch.cat([clip_embdding, context], dim=1)
|
||||||
|
|
||||||
x, (f, h, w) = dit.patchify(x)
|
# Add camera control
|
||||||
|
x, (f, h, w) = dit.patchify(x, control_camera_latents_input)
|
||||||
|
|
||||||
|
|
||||||
# Reference image
|
# Reference image
|
||||||
if reference_latents is not None:
|
if reference_latents is not None:
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
|[PAI/Wan2.1-Fun-V1.1-14B-Control](https://modelscope.cn/models/PAI/Wan2.1-Fun-V1.1-14B-Control)|基础模型|`control_video`, `reference_image`|[code](./model_inference/Wan2.1-Fun-V1.1-14B-Control.py)|[code](./model_training/full/Wan2.1-Fun-V1.1-14B-Control.sh)|[code](./model_training/validate_full/Wan2.1-Fun-V1.1-14B-Control.py)|[code](./model_training/lora/Wan2.1-Fun-V1.1-14B-Control.sh)|[code](./model_training/validate_lora/Wan2.1-Fun-V1.1-14B-Control.py)|
|
|[PAI/Wan2.1-Fun-V1.1-14B-Control](https://modelscope.cn/models/PAI/Wan2.1-Fun-V1.1-14B-Control)|基础模型|`control_video`, `reference_image`|[code](./model_inference/Wan2.1-Fun-V1.1-14B-Control.py)|[code](./model_training/full/Wan2.1-Fun-V1.1-14B-Control.sh)|[code](./model_training/validate_full/Wan2.1-Fun-V1.1-14B-Control.py)|[code](./model_training/lora/Wan2.1-Fun-V1.1-14B-Control.sh)|[code](./model_training/validate_lora/Wan2.1-Fun-V1.1-14B-Control.py)|
|
||||||
|[PAI/Wan2.1-Fun-V1.1-1.3B-InP](https://modelscope.cn/models/PAI/Wan2.1-Fun-V1.1-1.3B-InP)|基础模型|`input_image`, `end_image`||||||
|
|[PAI/Wan2.1-Fun-V1.1-1.3B-InP](https://modelscope.cn/models/PAI/Wan2.1-Fun-V1.1-1.3B-InP)|基础模型|`input_image`, `end_image`||||||
|
||||||
|[PAI/Wan2.1-Fun-V1.1-14B-InP](https://modelscope.cn/models/PAI/Wan2.1-Fun-V1.1-14B-InP)|基础模型|`input_image`, `end_image`||||||
|
|[PAI/Wan2.1-Fun-V1.1-14B-InP](https://modelscope.cn/models/PAI/Wan2.1-Fun-V1.1-14B-InP)|基础模型|`input_image`, `end_image`||||||
|
||||||
|[PAI/Wan2.1-Fun-V1.1-1.3B-Control-Camera](https://modelscope.cn/models/PAI/Wan2.1-Fun-V1.1-1.3B-Control-Camera)|基础模型|||||||
|
|[PAI/Wan2.1-Fun-V1.1-1.3B-Control-Camera](https://modelscope.cn/models/PAI/Wan2.1-Fun-V1.1-1.3B-Control-Camera)|基础模型|`control_camera_video`, `input_image`|[code](./model_inference/Wan2.1-Fun-V1.1-1.3B-Control-Camera.py)|||||
|
||||||
|[PAI/Wan2.1-Fun-V1.1-14B-Control-Camera](https://modelscope.cn/models/PAI/Wan2.1-Fun-V1.1-14B-Control-Camera)|基础模型|||||||
|
|[PAI/Wan2.1-Fun-V1.1-14B-Control-Camera](https://modelscope.cn/models/PAI/Wan2.1-Fun-V1.1-14B-Control-Camera)|基础模型|||||||
|
||||||
|[iic/VACE-Wan2.1-1.3B-Preview](https://modelscope.cn/models/iic/VACE-Wan2.1-1.3B-Preview)|适配器|`vace_control_video`, `vace_reference_image`|[code](./model_inference/Wan2.1-VACE-1.3B-Preview.py)|[code](./model_training/full/VACE-Wan2.1-1.3B-Preview.sh)|[code](./model_training/validate_full/VACE-Wan2.1-1.3B-Preview.py)|[code](./model_training/lora/VACE-Wan2.1-1.3B-Preview.sh)|[code](./model_training/validate_lora/VACE-Wan2.1-1.3B-Preview.py)|
|
|[iic/VACE-Wan2.1-1.3B-Preview](https://modelscope.cn/models/iic/VACE-Wan2.1-1.3B-Preview)|适配器|`vace_control_video`, `vace_reference_image`|[code](./model_inference/Wan2.1-VACE-1.3B-Preview.py)|[code](./model_training/full/VACE-Wan2.1-1.3B-Preview.sh)|[code](./model_training/validate_full/VACE-Wan2.1-1.3B-Preview.py)|[code](./model_training/lora/VACE-Wan2.1-1.3B-Preview.sh)|[code](./model_training/validate_lora/VACE-Wan2.1-1.3B-Preview.py)|
|
||||||
|[Wan-AI/Wan2.1-VACE-1.3B](https://modelscope.cn/models/Wan-AI/Wan2.1-VACE-1.3B)|适配器|`vace_control_video`, `vace_reference_image`|[code](./model_inference/Wan2.1-VACE-1.3B.py)|||||
|
|[Wan-AI/Wan2.1-VACE-1.3B](https://modelscope.cn/models/Wan-AI/Wan2.1-VACE-1.3B)|适配器|`vace_control_video`, `vace_reference_image`|[code](./model_inference/Wan2.1-VACE-1.3B.py)|||||
|
||||||
|
|||||||
@@ -0,0 +1,129 @@
|
|||||||
|
import torch
|
||||||
|
from PIL import Image
|
||||||
|
from diffsynth import save_video, VideoData
|
||||||
|
from diffsynth.pipelines.wan_video_new import WanVideoPipeline, ModelConfig
|
||||||
|
from modelscope import dataset_snapshot_download
|
||||||
|
from diffsynth.models.wan_video_camera_controller import process_pose_file
|
||||||
|
|
||||||
|
pipe = WanVideoPipeline.from_pretrained(
|
||||||
|
torch_dtype=torch.bfloat16,
|
||||||
|
device="cuda",
|
||||||
|
model_configs=[
|
||||||
|
ModelConfig(model_id="PAI/Wan2.1-Fun-V1.1-1.3B-Control-Camera", origin_file_pattern="diffusion_pytorch_model*.safetensors", offload_device="cpu"),
|
||||||
|
ModelConfig(model_id="PAI/Wan2.1-Fun-V1.1-1.3B-Control-Camera", origin_file_pattern="models_t5_umt5-xxl-enc-bf16.pth", offload_device="cpu"),
|
||||||
|
ModelConfig(model_id="PAI/Wan2.1-Fun-V1.1-1.3B-Control-Camera", origin_file_pattern="Wan2.1_VAE.pth", offload_device="cpu"),
|
||||||
|
ModelConfig(model_id="PAI/Wan2.1-Fun-V1.1-1.3B-Control-Camera", origin_file_pattern="models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth", offload_device="cpu"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
pipe.enable_vram_management()
|
||||||
|
|
||||||
|
|
||||||
|
# Control camera video
|
||||||
|
# text or txt file path, e.g. control_camera_text = "Pan_Left.txt"
|
||||||
|
control_camera_text = '''
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.018518518518518517 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.037037037037037035 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.05555555555555555 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.07407407407407407 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.09259259259259259 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.1111111111111111 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.12962962962962962 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.14814814814814814 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.16666666666666666 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.18518518518518517 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.2037037037037037 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.2222222222222222 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.24074074074074073 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.25925925925925924 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.2777777777777778 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.2962962962962963 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.31481481481481477 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.3333333333333333 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.35185185185185186 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.37037037037037035 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.38888888888888884 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.4074074074074074 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.42592592592592593 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.4444444444444444 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.4629629629629629 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.48148148148148145 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.5 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.5185185185185185 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.537037037037037 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.5555555555555556 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.5740740740740741 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.5925925925925926 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.611111111111111 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.6296296296296295 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.6481481481481481 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.6666666666666666 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.6851851851851851 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.7037037037037037 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.7222222222222222 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.7407407407407407 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.7592592592592593 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.7777777777777777 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.7962962962962963 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.8148148148148148 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.8333333333333334 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.8518518518518519 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.8703703703703705 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.8888888888888888 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.9074074074074074 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.9259259259259258 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.9444444444444444 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.9629629629629629 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 0.9814814814814815 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.0185185185185186 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.037037037037037 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.0555555555555556 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.074074074074074 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.0925925925925926 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.1111111111111112 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.1296296296296298 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.1481481481481481 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.1666666666666667 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.1851851851851851 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.2037037037037037 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.222222222222222 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.2407407407407407 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.259259259259259 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.2777777777777777 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.2962962962962963 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.3148148148148149 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.3333333333333333 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.3518518518518519 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.3703703703703702 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.3888888888888888 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.4074074074074074 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.425925925925926 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.4444444444444444 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.462962962962963 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
0 0.532139961 0.946026558 0.5 0.5 0 0 1.0 0.0 0.0 1.4814814814814814 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0
|
||||||
|
'''
|
||||||
|
|
||||||
|
dataset_snapshot_download(
|
||||||
|
dataset_id="DiffSynth-Studio/examples_in_diffsynth",
|
||||||
|
local_dir="./",
|
||||||
|
allow_file_pattern=f"data/examples/wan/input_image.jpg"
|
||||||
|
)
|
||||||
|
input_image = Image.open("data/examples/wan/input_image.jpg")
|
||||||
|
|
||||||
|
height = 480
|
||||||
|
width = 832
|
||||||
|
|
||||||
|
control_camera_video = process_pose_file(control_camera_text, width, height)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
video = pipe(
|
||||||
|
prompt="一艘小船正勇敢地乘风破浪前行。蔚蓝的大海波涛汹涌,白色的浪花拍打着船身,但小船毫不畏惧,坚定地驶向远方。阳光洒在水面上,闪烁着金色的光芒,为这壮丽的场景增添了一抹温暖。镜头拉近,可以看到船上的旗帜迎风飘扬,象征着不屈的精神与冒险的勇气。这段画面充满力量,激励人心,展现了面对挑战时的无畏与执着。",
|
||||||
|
negative_prompt="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
|
||||||
|
height=height, width=width, num_frames=81,
|
||||||
|
seed=43, tiled=True,
|
||||||
|
control_camera_video = control_camera_video,
|
||||||
|
input_image = input_image,
|
||||||
|
)
|
||||||
|
save_video(video, "video.mp4", fps=15, quality=5)
|
||||||
Reference in New Issue
Block a user