From 7b1fe47199e96e52aef5f32f1b0ee93343f18bf1 Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Mon, 5 Feb 2024 13:59:56 +0800 Subject: [PATCH] add diffutoon editing example --- README.md | 2 +- diffsynth/pipelines/stable_diffusion_video.py | 28 ++- diffsynth/processors/sequencial_processor.py | 32 ++- ...utoon_toon_shading_with_editing_signals.py | 196 ++++++++++++++++++ 4 files changed, 250 insertions(+), 8 deletions(-) create mode 100644 examples/diffutoon_toon_shading_with_editing_signals.py diff --git a/README.md b/README.md index 20b9adf..3778c1e 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/b54c05c5-d747-47 ### Example 5: Toon Shading with Editing Signals (Diffutoon) -Coming soon. +This example is implemented based on [Diffutoon](https://arxiv.org/abs/2401.16224), supporting video editing signals. See `examples\diffutoon_toon_shading_with_editing_signals.py`. https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/20528af5-5100-474a-8cdc-440b9efdd86c diff --git a/diffsynth/pipelines/stable_diffusion_video.py b/diffsynth/pipelines/stable_diffusion_video.py index 36c3127..eed4da3 100644 --- a/diffsynth/pipelines/stable_diffusion_video.py +++ b/diffsynth/pipelines/stable_diffusion_video.py @@ -4,6 +4,7 @@ from ..prompts import SDPrompter from ..schedulers import EnhancedDDIMScheduler from ..data import VideoData, save_frames, save_video from .dancer import lets_dance +from ..processors.sequencial_processor import SequencialProcessor from typing import List import torch, os, json from tqdm import tqdm @@ -251,6 +252,10 @@ class SDVideoPipeline(torch.nn.Module): # Decode image output_frames = self.decode_images(latents) + # Post-process + if smoother is not None and (num_inference_steps in smoother_progress_ids or -1 in smoother_progress_ids): + output_frames = smoother(output_frames, original_frames=input_frames) + return output_frames @@ -278,21 +283,30 @@ class SDVideoPipelineRunner: return model_manager, pipe - def synthesize_video(self, model_manager, pipe, seed, **pipeline_inputs): + def load_smoother(self, model_manager, smoother_configs): + smoother = SequencialProcessor.from_model_manager(model_manager, smoother_configs) + return smoother + + + def synthesize_video(self, model_manager, pipe, seed, smoother, **pipeline_inputs): torch.manual_seed(seed) if self.in_streamlit: import streamlit as st progress_bar_st = st.progress(0.0) - output_video = pipe(**pipeline_inputs, progress_bar_st=progress_bar_st) + output_video = pipe(**pipeline_inputs, smoother=smoother, progress_bar_st=progress_bar_st) progress_bar_st.progress(1.0) else: - output_video = pipe(**pipeline_inputs) + output_video = pipe(**pipeline_inputs, smoother=smoother) model_manager.to("cpu") return output_video def load_video(self, video_file, image_folder, height, width, start_frame_id, end_frame_id): video = VideoData(video_file=video_file, image_folder=image_folder, height=height, width=width) + if start_frame_id is None: + start_frame_id = 0 + if end_frame_id is None: + end_frame_id = len(video) frames = [video[i] for i in range(start_frame_id, end_frame_id)] return frames @@ -325,8 +339,14 @@ class SDVideoPipelineRunner: if self.in_streamlit: st.markdown("Loading models ...") model_manager, pipe = self.load_pipeline(**config["models"]) if self.in_streamlit: st.markdown("Loading models ... done!") + if "smoother_configs" in config: + if self.in_streamlit: st.markdown("Loading smoother ...") + smoother = self.load_smoother(model_manager, config["smoother_configs"]) + if self.in_streamlit: st.markdown("Loading smoother ... done!") + else: + smoother = None if self.in_streamlit: st.markdown("Synthesizing videos ...") - output_video = self.synthesize_video(model_manager, pipe, config["pipeline"]["seed"], **config["pipeline"]["pipeline_inputs"]) + output_video = self.synthesize_video(model_manager, pipe, config["pipeline"]["seed"], smoother, **config["pipeline"]["pipeline_inputs"]) if self.in_streamlit: st.markdown("Synthesizing videos ... done!") if self.in_streamlit: st.markdown("Saving videos ...") self.save_output(output_video, config["data"]["output_folder"], config["data"]["fps"], config) diff --git a/diffsynth/processors/sequencial_processor.py b/diffsynth/processors/sequencial_processor.py index 6f6b440..9b5bc94 100644 --- a/diffsynth/processors/sequencial_processor.py +++ b/diffsynth/processors/sequencial_processor.py @@ -1,15 +1,41 @@ from .base import VideoProcessor +class AutoVideoProcessor(VideoProcessor): + def __init__(self): + pass + + @staticmethod + def from_model_manager(model_manager, processor_type, **kwargs): + if processor_type == "FastBlend": + from .FastBlend import FastBlendSmoother + return FastBlendSmoother.from_model_manager(model_manager, **kwargs) + elif processor_type == "Contrast": + from .PILEditor import ContrastEditor + return ContrastEditor.from_model_manager(model_manager, **kwargs) + elif processor_type == "Sharpness": + from .PILEditor import SharpnessEditor + return SharpnessEditor.from_model_manager(model_manager, **kwargs) + elif processor_type == "RIFE": + from .RIFE import RIFESmoother + return RIFESmoother.from_model_manager(model_manager, **kwargs) + else: + raise ValueError(f"invalid processor_type: {processor_type}") + + class SequencialProcessor(VideoProcessor): def __init__(self, processors=[]): self.processors = processors @staticmethod - def from_model_manager(model_manager, **kwargs): - return SequencialProcessor(**kwargs) + def from_model_manager(model_manager, configs): + processors = [ + AutoVideoProcessor.from_model_manager(model_manager, config["processor_type"], **config["config"]) + for config in configs + ] + return SequencialProcessor(processors) def __call__(self, rendered_frames, **kwargs): for processor in self.processors: rendered_frames = processor(rendered_frames, **kwargs) - return rendered_frames \ No newline at end of file + return rendered_frames diff --git a/examples/diffutoon_toon_shading_with_editing_signals.py b/examples/diffutoon_toon_shading_with_editing_signals.py new file mode 100644 index 0000000..428867e --- /dev/null +++ b/examples/diffutoon_toon_shading_with_editing_signals.py @@ -0,0 +1,196 @@ +from diffsynth import SDVideoPipelineRunner +import os + + +# Download models +# `models/stable_diffusion/aingdiffusion_v12.safetensors`: [link](https://civitai.com/api/download/models/229575) +# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt) +# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth) +# `models/ControlNet/control_v11f1e_sd15_tile.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth) +# `models/ControlNet/control_v11f1p_sd15_depth.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1p_sd15_depth.pth) +# `models/ControlNet/control_v11p_sd15_softedge.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_softedge.pth) +# `models/Annotators/dpt_hybrid-midas-501f0c75.pt`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/dpt_hybrid-midas-501f0c75.pt) +# `models/Annotators/ControlNetHED.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetHED.pth) +# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth) +# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth) +# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16) + +# The original video in the example is https://www.bilibili.com/video/BV1zu4y1s7Ec/. + +config_stage_1 = { + "models": { + "model_list": [ + "models/stable_diffusion/aingdiffusion_v12.safetensors", + "models/ControlNet/control_v11p_sd15_softedge.pth", + "models/ControlNet/control_v11f1p_sd15_depth.pth" + ], + "textual_inversion_folder": "models/textual_inversion", + "device": "cuda", + "lora_alphas": [], + "controlnet_units": [ + { + "processor_id": "softedge", + "model_path": "models/ControlNet/control_v11p_sd15_softedge.pth", + "scale": 0.5 + }, + { + "processor_id": "depth", + "model_path": "models/ControlNet/control_v11f1p_sd15_depth.pth", + "scale": 0.5 + } + ] + }, + "data": { + "input_frames": { + "video_file": "data/examples/diffutoon_edit/input_video.mp4", + "image_folder": None, + "height": 512, + "width": 512, + "start_frame_id": 0, + "end_frame_id": 30 + }, + "controlnet_frames": [ + { + "video_file": "data/examples/diffutoon_edit/input_video.mp4", + "image_folder": None, + "height": 512, + "width": 512, + "start_frame_id": 0, + "end_frame_id": 30 + }, + { + "video_file": "data/examples/diffutoon_edit/input_video.mp4", + "image_folder": None, + "height": 512, + "width": 512, + "start_frame_id": 0, + "end_frame_id": 30 + } + ], + "output_folder": "data/examples/diffutoon_edit/color_video", + "fps": 25 + }, + "smoother_configs": [ + { + "processor_type": "FastBlend", + "config": {} + } + ], + "pipeline": { + "seed": 0, + "pipeline_inputs": { + "prompt": "best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings", + "negative_prompt": "verybadimagenegative_v1.3", + "cfg_scale": 7.0, + "clip_skip": 1, + "denoising_strength": 0.9, + "num_inference_steps": 20, + "animatediff_batch_size": 8, + "animatediff_stride": 4, + "unet_batch_size": 8, + "controlnet_batch_size": 8, + "cross_frame_attention": True, + "smoother_progress_ids": [-1], + # The following parameters will be overwritten. You don't need to modify them. + "input_frames": [], + "num_frames": 30, + "width": 512, + "height": 512, + "controlnet_frames": [] + } + } +} + + +config_stage_2 = { + "models": { + "model_list": [ + "models/stable_diffusion/aingdiffusion_v12.safetensors", + "models/AnimateDiff/mm_sd_v15_v2.ckpt", + "models/ControlNet/control_v11f1e_sd15_tile.pth", + "models/ControlNet/control_v11p_sd15_lineart.pth" + ], + "textual_inversion_folder": "models/textual_inversion", + "device": "cuda", + "lora_alphas": [], + "controlnet_units": [ + { + "processor_id": "tile", + "model_path": "models/ControlNet/control_v11f1e_sd15_tile.pth", + "scale": 0.5 + }, + { + "processor_id": "lineart", + "model_path": "models/ControlNet/control_v11p_sd15_lineart.pth", + "scale": 0.5 + } + ] + }, + "data": { + "input_frames": { + "video_file": "data/examples/diffutoon_edit/input_video.mp4", + "image_folder": None, + "height": 1536, + "width": 1536, + "start_frame_id": 0, + "end_frame_id": 30 + }, + "controlnet_frames": [ + { + "video_file": "data/examples/diffutoon_edit/input_video.mp4", + "image_folder": None, + "height": 1536, + "width": 1536, + "start_frame_id": 0, + "end_frame_id": 30 + }, + { + "video_file": "data/examples/diffutoon_edit/input_video.mp4", + "image_folder": None, + "height": 1536, + "width": 1536, + "start_frame_id": 0, + "end_frame_id": 30 + } + ], + "output_folder": "data/examples/diffutoon_edit/output", + "fps": 30 + }, + "pipeline": { + "seed": 0, + "pipeline_inputs": { + "prompt": "best quality, perfect anime illustration, light, a girl is dancing, smile, solo", + "negative_prompt": "verybadimagenegative_v1.3", + "cfg_scale": 7.0, + "clip_skip": 2, + "denoising_strength": 1.0, + "num_inference_steps": 10, + "animatediff_batch_size": 16, + "animatediff_stride": 8, + "unet_batch_size": 1, + "controlnet_batch_size": 1, + "cross_frame_attention": False, + # The following parameters will be overwritten. You don't need to modify them. + "input_frames": [], + "num_frames": 30, + "width": 1536, + "height": 1536, + "controlnet_frames": [] + } + } +} + + +runner = SDVideoPipelineRunner() +runner.run(config_stage_1) + +# Replace the color video with the synthesized video +config_stage_2["data"]["controlnet_frames"][0] = { + "video_file": os.path.join(config_stage_1["data"]["output_folder"], "video.mp4"), + "image_folder": None, + "height": config_stage_2["data"]["input_frames"]["height"], + "width": config_stage_2["data"]["input_frames"]["width"], + "start_frame_id": None, + "end_frame_id": None +} +runner.run(config_stage_2)