rearrange examples

This commit is contained in:
Artiprocher
2024-06-06 18:50:07 +08:00
parent f6de5eef4d
commit 4d4a095420
20 changed files with 140 additions and 45 deletions

View File

@@ -0,0 +1,9 @@
# Text to Video
In DiffSynth Studio, we can use AnimateDiff and SVD to generate videos. However, these models usually generate terrible contents. We do not recommend users to use these models, until a more powerful video model emerges.
### Example 7: Text to Video
Generate a video using a Stable Diffusion model and an AnimateDiff model. We can break the limitation of number of frames! See [sd_text_to_video.py](./sd_text_to_video.py).
https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/8f556355-4079-4445-9b48-e9da77699437

View File

@@ -0,0 +1,47 @@
from diffsynth import ModelManager, SDImagePipeline, SDVideoPipeline, ControlNetConfigUnit, VideoData, save_video, save_frames
from diffsynth.extensions.RIFE import RIFEInterpolater
import torch
# Download models
# `models/stable_diffusion/dreamshaper_8.safetensors`: [link](https://civitai.com/api/download/models/128713?type=Model&format=SafeTensor&size=pruned&fp=fp16)
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
# `models/RIFE/flownet.pkl`: [link](https://drive.google.com/file/d/1APIzVeI-4ZZCEuIRE1m6WYfSCaOsi_7_/view?usp=sharing)
# Load models
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
model_manager.load_models([
"models/stable_diffusion/dreamshaper_8.safetensors",
"models/AnimateDiff/mm_sd_v15_v2.ckpt",
"models/RIFE/flownet.pkl"
])
# Text -> Image
pipe_image = SDImagePipeline.from_model_manager(model_manager)
torch.manual_seed(0)
image = pipe_image(
prompt = "lightning storm, sea",
negative_prompt = "",
cfg_scale=7.5,
num_inference_steps=30, height=512, width=768,
)
# Text + Image -> Video (6GB VRAM is enough!)
pipe = SDVideoPipeline.from_model_manager(model_manager)
output_video = pipe(
prompt = "lightning storm, sea",
negative_prompt = "",
cfg_scale=7.5,
num_frames=64,
num_inference_steps=10, height=512, width=768,
animatediff_batch_size=16, animatediff_stride=1, input_frames=[image]*64, denoising_strength=0.9,
vram_limit_level=0,
)
# Video -> Video with high fps
interpolater = RIFEInterpolater.from_model_manager(model_manager)
output_video = interpolater.interpolate(output_video, num_iter=3)
# Save images and video
save_video(output_video, "output_video.mp4", fps=120)

View File

@@ -0,0 +1,28 @@
from diffsynth import ModelManager, SDXLVideoPipeline, save_video
import torch
# Download models
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
# `models/AnimateDiff/mm_sdxl_v10_beta.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sdxl_v10_beta.ckpt)
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
model_manager.load_models([
"models/stable_diffusion_xl/sd_xl_base_1.0.safetensors",
"models/AnimateDiff/mm_sdxl_v10_beta.ckpt"
])
pipe = SDXLVideoPipeline.from_model_manager(model_manager)
prompt = "A panda standing on a surfboard in the ocean in sunset, 4k, high resolution.Realistic, Cinematic, high resolution"
negative_prompt = ""
torch.manual_seed(0)
video = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
cfg_scale=8.5,
height=1024, width=1024, num_frames=16,
num_inference_steps=100,
)
save_video(video, "video.mp4", fps=16)

View File

@@ -0,0 +1,37 @@
from diffsynth import save_video, SDXLImagePipeline, ModelManager, SVDVideoPipeline
from diffsynth import ModelManager
import torch
# Download models
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
# `models/stable_video_diffusion/svd_xt.safetensors`: [link](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/resolve/main/svd_xt.safetensors)
prompt = "cloud, wind"
torch.manual_seed(0)
# 1. Text-to-image using SD-XL
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
model_manager.load_models(["models/stable_diffusion_xl/sd_xl_base_1.0.safetensors"])
pipe = SDXLImagePipeline.from_model_manager(model_manager)
image = pipe(
prompt=prompt,
negative_prompt="",
cfg_scale=6,
height=1024, width=1024, num_inference_steps=50,
)
pipe.to("cpu")
torch.cuda.empty_cache()
# 2. Image-to-video using SVD
model_manager = ModelManager()
model_manager.load_models(["models/stable_video_diffusion/svd_xt.safetensors"])
pipe = SVDVideoPipeline.from_model_manager(model_manager)
video = pipe(
input_image=image,
num_frames=25, fps=15, height=1024, width=1024,
motion_bucket_id=127,
num_inference_steps=50
)
save_video(video, "video.mp4", fps=15)