update readme

2026-03-22 16:50:47 +00:00 · 2025-07-22 20:02:21 +08:00
parent ff95c56884
commit ebeda32215
5 changed files with 462 additions and 42 deletions
--- a/examples/CogVideoX/README.md
+++ b/examples/CogVideoX/README.md
@@ -0,0 +1,39 @@
+# CogVideoX
+
+### Example: Text-to-Video using CogVideoX-5B (Experimental)
+
+See [cogvideo_text_to_video.py](cogvideo_text_to_video.py).
+
+First, we generate a video using prompt "an astronaut riding a horse on Mars".
+
+https://github.com/user-attachments/assets/4c91c1cd-e4a0-471a-bd8d-24d761262941
+
+Then, we convert the astronaut to a robot.
+
+https://github.com/user-attachments/assets/225a00a4-2bc8-4740-8e86-a64b460a29ec
+
+Upscale the video using the model itself.
+
+https://github.com/user-attachments/assets/c02cb30c-de60-473c-8242-32c67b3155ad
+
+Make the video look smoother by interpolating frames.
+
+https://github.com/user-attachments/assets/f0e465b4-45df-4435-ab10-7a084ca2b0a0
+
+Here is another example.
+
+First, we generate a video using prompt "a dog is running".
+
+https://github.com/user-attachments/assets/e3696297-99f5-4d0c-a5ca-1d1566db85b4
+
+Then, we add a blue collar to the dog.
+
+https://github.com/user-attachments/assets/7ff22be7-4390-4d33-ae6c-53f6f056e18d
+
+Upscale the video using the model itself.
+
+https://github.com/user-attachments/assets/a909c32c-0b7d-495c-a53c-d23a99a3d3e9
+
+Make the video look smoother by interpolating frames.
+
+https://github.com/user-attachments/assets/ea37c150-97a0-4858-8003-0c2e5eef3331
--- a/examples/CogVideoX/cogvideo_text_to_video.py
+++ b/examples/CogVideoX/cogvideo_text_to_video.py
@@ -0,0 +1,73 @@
+from diffsynth import ModelManager, save_video, VideoData, download_models, CogVideoPipeline
+from diffsynth.extensions.RIFE import RIFEInterpolater
+import torch, os
+os.environ["TOKENIZERS_PARALLELISM"] = "True"
+
+
+
+def text_to_video(model_manager, prompt, seed, output_path):
+    pipe = CogVideoPipeline.from_model_manager(model_manager)
+    torch.manual_seed(seed)
+    video = pipe(
+        prompt=prompt,
+        height=480, width=720,
+        cfg_scale=7.0, num_inference_steps=200
+    )
+    save_video(video, output_path, fps=8, quality=5)
+
+
+def edit_video(model_manager, prompt, seed, input_path, output_path):
+    pipe = CogVideoPipeline.from_model_manager(model_manager)
+    input_video = VideoData(video_file=input_path)
+    torch.manual_seed(seed)
+    video = pipe(
+        prompt=prompt,
+        height=480, width=720,
+        cfg_scale=7.0, num_inference_steps=200,
+        input_video=input_video, denoising_strength=0.7
+    )
+    save_video(video, output_path, fps=8, quality=5)
+
+
+def self_upscale(model_manager, prompt, seed, input_path, output_path):
+    pipe = CogVideoPipeline.from_model_manager(model_manager)
+    input_video = VideoData(video_file=input_path, height=480*2, width=720*2).raw_data()
+    torch.manual_seed(seed)
+    video = pipe(
+        prompt=prompt,
+        height=480*2, width=720*2,
+        cfg_scale=7.0, num_inference_steps=30,
+        input_video=input_video, denoising_strength=0.4, tiled=True
+    )
+    save_video(video, output_path, fps=8, quality=7)
+
+
+def interpolate_video(model_manager, input_path, output_path):
+    rife = RIFEInterpolater.from_model_manager(model_manager)
+    video = VideoData(video_file=input_path).raw_data()
+    video = rife.interpolate(video, num_iter=2)
+    save_video(video, output_path, fps=32, quality=5)
+
+
+
+download_models(["CogVideoX-5B", "RIFE"])
+
+model_manager = ModelManager(torch_dtype=torch.bfloat16)
+model_manager.load_models([
+    "models/CogVideo/CogVideoX-5b/text_encoder",
+    "models/CogVideo/CogVideoX-5b/transformer",
+    "models/CogVideo/CogVideoX-5b/vae/diffusion_pytorch_model.safetensors",
+    "models/RIFE/flownet.pkl",
+])
+
+# Example 1
+text_to_video(model_manager, "an astronaut riding a horse on Mars.", 0, "1_video_1.mp4")
+edit_video(model_manager, "a white robot riding a horse on Mars.", 1, "1_video_1.mp4", "1_video_2.mp4")
+self_upscale(model_manager, "a white robot riding a horse on Mars.", 2, "1_video_2.mp4", "1_video_3.mp4")
+interpolate_video(model_manager, "1_video_3.mp4", "1_video_4.mp4")
+
+# Example 2
+text_to_video(model_manager, "a dog is running.", 1, "2_video_1.mp4")
+edit_video(model_manager, "a dog with blue collar.", 2, "2_video_1.mp4", "2_video_2.mp4")
+self_upscale(model_manager, "a dog with blue collar.", 3, "2_video_2.mp4", "2_video_3.mp4")
+interpolate_video(model_manager, "2_video_3.mp4", "2_video_4.mp4")