mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-20 15:48:20 +00:00
Merge branch 'main' into qwen
This commit is contained in:
24
examples/image_synthesis/omost_flux_text_to_image.py
Normal file
24
examples/image_synthesis/omost_flux_text_to_image.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import torch
|
||||
from diffsynth import download_models, ModelManager, OmostPromter, FluxImagePipeline
|
||||
|
||||
|
||||
download_models(["OmostPrompt"])
|
||||
download_models(["FLUX.1-dev"])
|
||||
|
||||
model_manager = ModelManager(torch_dtype=torch.bfloat16)
|
||||
model_manager.load_models([
|
||||
"models/OmostPrompt/omost-llama-3-8b-4bits",
|
||||
"models/FLUX/FLUX.1-dev/text_encoder/model.safetensors",
|
||||
"models/FLUX/FLUX.1-dev/text_encoder_2",
|
||||
"models/FLUX/FLUX.1-dev/ae.safetensors",
|
||||
"models/FLUX/FLUX.1-dev/flux1-dev.safetensors"
|
||||
])
|
||||
|
||||
pipe = FluxImagePipeline.from_model_manager(model_manager, prompt_extender_classes=[OmostPromter])
|
||||
|
||||
torch.manual_seed(0)
|
||||
image = pipe(
|
||||
prompt="an image of a witch who is releasing ice and fire magic",
|
||||
num_inference_steps=30, embedded_guidance=3.5
|
||||
)
|
||||
image.save("image_omost.jpg")
|
||||
@@ -1,8 +1,46 @@
|
||||
# Text to Video
|
||||
|
||||
In DiffSynth Studio, we can use AnimateDiff and SVD to generate videos. However, these models usually generate terrible contents. We do not recommend users to use these models, until a more powerful video model emerges.
|
||||
In DiffSynth Studio, we can use some video models to generate videos.
|
||||
|
||||
### Example 7: Text to Video
|
||||
### Example: Text-to-Video using CogVideoX-5B (Experimental)
|
||||
|
||||
See [cogvideo_text_to_video.py](cogvideo_text_to_video.py).
|
||||
|
||||
First, we generate a video using prompt "an astronaut riding a horse on Mars".
|
||||
|
||||
https://github.com/user-attachments/assets/4c91c1cd-e4a0-471a-bd8d-24d761262941
|
||||
|
||||
Then, we convert the astronaut to a robot.
|
||||
|
||||
https://github.com/user-attachments/assets/225a00a4-2bc8-4740-8e86-a64b460a29ec
|
||||
|
||||
Upscale the video using the model itself.
|
||||
|
||||
https://github.com/user-attachments/assets/c02cb30c-de60-473c-8242-32c67b3155ad
|
||||
|
||||
Make the video look smoother by interpolating frames.
|
||||
|
||||
https://github.com/user-attachments/assets/f0e465b4-45df-4435-ab10-7a084ca2b0a0
|
||||
|
||||
Here is another example.
|
||||
|
||||
First, we generate a video using prompt "a dog is running".
|
||||
|
||||
https://github.com/user-attachments/assets/e3696297-99f5-4d0c-a5ca-1d1566db85b4
|
||||
|
||||
Then, we add a blue collar to the dog.
|
||||
|
||||
https://github.com/user-attachments/assets/7ff22be7-4390-4d33-ae6c-53f6f056e18d
|
||||
|
||||
Upscale the video using the model itself.
|
||||
|
||||
https://github.com/user-attachments/assets/a909c32c-0b7d-495c-a53c-d23a99a3d3e9
|
||||
|
||||
Make the video look smoother by interpolating frames.
|
||||
|
||||
https://github.com/user-attachments/assets/ea37c150-97a0-4858-8003-0c2e5eef3331
|
||||
|
||||
### Example: Text-to-Video using AnimateDiff
|
||||
|
||||
Generate a video using a Stable Diffusion model and an AnimateDiff model. We can break the limitation of number of frames! See [sd_text_to_video.py](./sd_text_to_video.py).
|
||||
|
||||
|
||||
73
examples/video_synthesis/cogvideo_text_to_video.py
Normal file
73
examples/video_synthesis/cogvideo_text_to_video.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from diffsynth import ModelManager, save_video, VideoData, download_models, CogVideoPipeline
|
||||
from diffsynth.extensions.RIFE import RIFEInterpolater
|
||||
import torch, os
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "True"
|
||||
|
||||
|
||||
|
||||
def text_to_video(model_manager, prompt, seed, output_path):
|
||||
pipe = CogVideoPipeline.from_model_manager(model_manager)
|
||||
torch.manual_seed(seed)
|
||||
video = pipe(
|
||||
prompt=prompt,
|
||||
height=480, width=720,
|
||||
cfg_scale=7.0, num_inference_steps=200
|
||||
)
|
||||
save_video(video, output_path, fps=8, quality=5)
|
||||
|
||||
|
||||
def edit_video(model_manager, prompt, seed, input_path, output_path):
|
||||
pipe = CogVideoPipeline.from_model_manager(model_manager)
|
||||
input_video = VideoData(video_file=input_path)
|
||||
torch.manual_seed(seed)
|
||||
video = pipe(
|
||||
prompt=prompt,
|
||||
height=480, width=720,
|
||||
cfg_scale=7.0, num_inference_steps=200,
|
||||
input_video=input_video, denoising_strength=0.7
|
||||
)
|
||||
save_video(video, output_path, fps=8, quality=5)
|
||||
|
||||
|
||||
def self_upscale(model_manager, prompt, seed, input_path, output_path):
|
||||
pipe = CogVideoPipeline.from_model_manager(model_manager)
|
||||
input_video = VideoData(video_file=input_path, height=480*2, width=720*2).raw_data()
|
||||
torch.manual_seed(seed)
|
||||
video = pipe(
|
||||
prompt=prompt,
|
||||
height=480*2, width=720*2,
|
||||
cfg_scale=7.0, num_inference_steps=30,
|
||||
input_video=input_video, denoising_strength=0.4, tiled=True
|
||||
)
|
||||
save_video(video, output_path, fps=8, quality=7)
|
||||
|
||||
|
||||
def interpolate_video(model_manager, input_path, output_path):
|
||||
rife = RIFEInterpolater.from_model_manager(model_manager)
|
||||
video = VideoData(video_file=input_path).raw_data()
|
||||
video = rife.interpolate(video, num_iter=2)
|
||||
save_video(video, output_path, fps=32, quality=5)
|
||||
|
||||
|
||||
|
||||
download_models(["CogVideoX-5B", "RIFE"])
|
||||
|
||||
model_manager = ModelManager(torch_dtype=torch.bfloat16)
|
||||
model_manager.load_models([
|
||||
"models/CogVideo/CogVideoX-5b/text_encoder",
|
||||
"models/CogVideo/CogVideoX-5b/transformer",
|
||||
"models/CogVideo/CogVideoX-5b/vae/diffusion_pytorch_model.safetensors",
|
||||
"models/RIFE/flownet.pkl",
|
||||
])
|
||||
|
||||
# Example 1
|
||||
text_to_video(model_manager, "an astronaut riding a horse on Mars.", 0, "1_video_1.mp4")
|
||||
edit_video(model_manager, "a white robot riding a horse on Mars.", 1, "1_video_1.mp4", "1_video_2.mp4")
|
||||
self_upscale(model_manager, "a white robot riding a horse on Mars.", 2, "1_video_2.mp4", "1_video_3.mp4")
|
||||
interpolate_video(model_manager, "1_video_3.mp4", "1_video_4.mp4")
|
||||
|
||||
# Example 2
|
||||
text_to_video(model_manager, "a dog is running.", 1, "2_video_1.mp4")
|
||||
edit_video(model_manager, "a dog with blue collar.", 2, "2_video_1.mp4", "2_video_2.mp4")
|
||||
self_upscale(model_manager, "a dog with blue collar.", 3, "2_video_2.mp4", "2_video_3.mp4")
|
||||
interpolate_video(model_manager, "2_video_3.mp4", "2_video_4.mp4")
|
||||
Reference in New Issue
Block a user