rearrange examples

2026-03-24 01:48:13 +00:00 · 2024-06-06 18:50:07 +08:00
parent f6de5eef4d
commit 4d4a095420
20 changed files with 140 additions and 45 deletions
--- a/examples/video_synthesis/README.md
+++ b/examples/video_synthesis/README.md
@@ -0,0 +1,9 @@
+# Text to Video
+
+In DiffSynth Studio, we can use AnimateDiff and SVD to generate videos. However, these models usually generate terrible contents. We do not recommend users to use these models, until a more powerful video model emerges.
+
+### Example 7: Text to Video
+
+Generate a video using a Stable Diffusion model and an AnimateDiff model. We can break the limitation of number of frames! See [sd_text_to_video.py](./sd_text_to_video.py).
+
+https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/8f556355-4079-4445-9b48-e9da77699437
--- a/examples/video_synthesis/sd_text_to_video.py
+++ b/examples/video_synthesis/sd_text_to_video.py
@@ -0,0 +1,47 @@
+from diffsynth import ModelManager, SDImagePipeline, SDVideoPipeline, ControlNetConfigUnit, VideoData, save_video, save_frames
+from diffsynth.extensions.RIFE import RIFEInterpolater
+import torch
+
+
+# Download models
+# `models/stable_diffusion/dreamshaper_8.safetensors`: [link](https://civitai.com/api/download/models/128713?type=Model&format=SafeTensor&size=pruned&fp=fp16)
+# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
+# `models/RIFE/flownet.pkl`: [link](https://drive.google.com/file/d/1APIzVeI-4ZZCEuIRE1m6WYfSCaOsi_7_/view?usp=sharing)
+
+
+# Load models
+model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
+model_manager.load_models([
+    "models/stable_diffusion/dreamshaper_8.safetensors",
+    "models/AnimateDiff/mm_sd_v15_v2.ckpt",
+    "models/RIFE/flownet.pkl"
+])
+
+# Text -> Image
+pipe_image = SDImagePipeline.from_model_manager(model_manager)
+torch.manual_seed(0)
+image = pipe_image(
+    prompt = "lightning storm, sea",
+    negative_prompt = "",
+    cfg_scale=7.5,
+    num_inference_steps=30, height=512, width=768,
+)
+
+# Text + Image -> Video (6GB VRAM is enough!)
+pipe = SDVideoPipeline.from_model_manager(model_manager)
+output_video = pipe(
+    prompt = "lightning storm, sea",
+    negative_prompt = "",
+    cfg_scale=7.5,
+    num_frames=64,
+    num_inference_steps=10, height=512, width=768,
+    animatediff_batch_size=16, animatediff_stride=1, input_frames=[image]*64, denoising_strength=0.9,
+    vram_limit_level=0,
+)
+
+# Video -> Video with high fps
+interpolater = RIFEInterpolater.from_model_manager(model_manager)
+output_video = interpolater.interpolate(output_video, num_iter=3)
+
+# Save images and video
+save_video(output_video, "output_video.mp4", fps=120)
--- a/examples/video_synthesis/sdxl_text_to_video.py
+++ b/examples/video_synthesis/sdxl_text_to_video.py
@@ -0,0 +1,28 @@
+from diffsynth import ModelManager, SDXLVideoPipeline, save_video
+import torch
+
+
+# Download models
+# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
+# `models/AnimateDiff/mm_sdxl_v10_beta.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sdxl_v10_beta.ckpt)
+
+
+model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
+model_manager.load_models([
+    "models/stable_diffusion_xl/sd_xl_base_1.0.safetensors",
+    "models/AnimateDiff/mm_sdxl_v10_beta.ckpt"
+])
+pipe = SDXLVideoPipeline.from_model_manager(model_manager)
+
+prompt = "A panda standing on a surfboard in the ocean in sunset, 4k, high resolution.Realistic, Cinematic, high resolution"
+negative_prompt = ""
+
+torch.manual_seed(0)
+video = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    cfg_scale=8.5,
+    height=1024, width=1024, num_frames=16,
+    num_inference_steps=100,
+)
+save_video(video, "video.mp4", fps=16)
--- a/examples/video_synthesis/svd_text_to_video.py
+++ b/examples/video_synthesis/svd_text_to_video.py
@@ -0,0 +1,37 @@
+from diffsynth import save_video, SDXLImagePipeline, ModelManager, SVDVideoPipeline
+from diffsynth import ModelManager
+import torch
+
+
+# Download models
+# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
+# `models/stable_video_diffusion/svd_xt.safetensors`: [link](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/resolve/main/svd_xt.safetensors)
+
+
+prompt = "cloud, wind"
+torch.manual_seed(0)
+
+# 1. Text-to-image using SD-XL
+model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
+model_manager.load_models(["models/stable_diffusion_xl/sd_xl_base_1.0.safetensors"])
+pipe = SDXLImagePipeline.from_model_manager(model_manager)
+image = pipe(
+    prompt=prompt,
+    negative_prompt="",
+    cfg_scale=6,
+    height=1024, width=1024, num_inference_steps=50,
+)
+pipe.to("cpu")
+torch.cuda.empty_cache()
+
+# 2. Image-to-video using SVD
+model_manager = ModelManager()
+model_manager.load_models(["models/stable_video_diffusion/svd_xt.safetensors"])
+pipe = SVDVideoPipeline.from_model_manager(model_manager)
+video = pipe(
+    input_image=image,
+    num_frames=25, fps=15, height=1024, width=1024,
+    motion_bucket_id=127,
+    num_inference_steps=50
+)
+save_video(video, "video.mp4", fps=15)