mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-23 00:58:11 +00:00
rearrange examples
This commit is contained in:
512
examples/Diffutoon/Diffutoon.ipynb
Normal file
512
examples/Diffutoon/Diffutoon.ipynb
Normal file
@@ -0,0 +1,512 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "8ObdI5jCB8xy"
|
||||
},
|
||||
"source": [
|
||||
"# DiffSynth Studio\n",
|
||||
"\n",
|
||||
"Welcome to DiffSynth Studio! This is an example of Diffutoon."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "XSkKX7O2BwuM"
|
||||
},
|
||||
"source": [
|
||||
"## Install"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "msCpt0pLnT8W",
|
||||
"outputId": "35d93b35-451b-4760-d1ee-ef7ff190916e"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!git clone https://github.com/Artiprocher/DiffSynth-Studio.git\n",
|
||||
"!pip install -q transformers controlnet-aux==0.0.7 streamlit streamlit-drawable-canvas imageio imageio[ffmpeg] safetensors einops cupy-cuda12x\n",
|
||||
"%cd /content/DiffSynth-Studio"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5eCu_rlKB3kK"
|
||||
},
|
||||
"source": [
|
||||
"## Download Models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "9znMkpVj3qZ1"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def download_model(url, file_path):\n",
|
||||
" model_file = requests.get(url, allow_redirects=True)\n",
|
||||
" with open(file_path, \"wb\") as f:\n",
|
||||
" f.write(model_file.content)\n",
|
||||
"\n",
|
||||
"download_model(\"https://civitai.com/api/download/models/229575\", \"models/stable_diffusion/aingdiffusion_v12.safetensors\")\n",
|
||||
"download_model(\"https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt\", \"models/AnimateDiff/mm_sd_v15_v2.ckpt\")\n",
|
||||
"download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth\", \"models/ControlNet/control_v11p_sd15_lineart.pth\")\n",
|
||||
"download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth\", \"models/ControlNet/control_v11f1e_sd15_tile.pth\")\n",
|
||||
"download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1p_sd15_depth.pth\", \"models/ControlNet/control_v11f1p_sd15_depth.pth\")\n",
|
||||
"download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_softedge.pth\", \"models/ControlNet/control_v11p_sd15_softedge.pth\")\n",
|
||||
"download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/dpt_hybrid-midas-501f0c75.pt\", \"models/Annotators/dpt_hybrid-midas-501f0c75.pt\")\n",
|
||||
"download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetHED.pth\", \"models/Annotators/ControlNetHED.pth\")\n",
|
||||
"download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth\", \"models/Annotators/sk_model.pth\")\n",
|
||||
"download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth\", \"models/Annotators/sk_model2.pth\")\n",
|
||||
"download_model(\"https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16\", \"models/textual_inversion/verybadimagenegative_v1.3.pt\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "iwOq2lWtKVYS"
|
||||
},
|
||||
"source": [
|
||||
"## Run Diffutoon"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "tII_XRY-PJeo"
|
||||
},
|
||||
"source": [
|
||||
"### Config Template"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "vsd2alA3PrGe"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config_stage_1_template = {\n",
|
||||
" \"models\": {\n",
|
||||
" \"model_list\": [\n",
|
||||
" \"models/stable_diffusion/aingdiffusion_v12.safetensors\",\n",
|
||||
" \"models/ControlNet/control_v11p_sd15_softedge.pth\",\n",
|
||||
" \"models/ControlNet/control_v11f1p_sd15_depth.pth\"\n",
|
||||
" ],\n",
|
||||
" \"textual_inversion_folder\": \"models/textual_inversion\",\n",
|
||||
" \"device\": \"cuda\",\n",
|
||||
" \"lora_alphas\": [],\n",
|
||||
" \"controlnet_units\": [\n",
|
||||
" {\n",
|
||||
" \"processor_id\": \"softedge\",\n",
|
||||
" \"model_path\": \"models/ControlNet/control_v11p_sd15_softedge.pth\",\n",
|
||||
" \"scale\": 0.5\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"processor_id\": \"depth\",\n",
|
||||
" \"model_path\": \"models/ControlNet/control_v11f1p_sd15_depth.pth\",\n",
|
||||
" \"scale\": 0.5\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" \"data\": {\n",
|
||||
" \"input_frames\": {\n",
|
||||
" \"video_file\": \"/content/input_video.mp4\",\n",
|
||||
" \"image_folder\": None,\n",
|
||||
" \"height\": 512,\n",
|
||||
" \"width\": 512,\n",
|
||||
" \"start_frame_id\": 0,\n",
|
||||
" \"end_frame_id\": 30\n",
|
||||
" },\n",
|
||||
" \"controlnet_frames\": [\n",
|
||||
" {\n",
|
||||
" \"video_file\": \"/content/input_video.mp4\",\n",
|
||||
" \"image_folder\": None,\n",
|
||||
" \"height\": 512,\n",
|
||||
" \"width\": 512,\n",
|
||||
" \"start_frame_id\": 0,\n",
|
||||
" \"end_frame_id\": 30\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"video_file\": \"/content/input_video.mp4\",\n",
|
||||
" \"image_folder\": None,\n",
|
||||
" \"height\": 512,\n",
|
||||
" \"width\": 512,\n",
|
||||
" \"start_frame_id\": 0,\n",
|
||||
" \"end_frame_id\": 30\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"output_folder\": \"data/examples/diffutoon_edit/color_video\",\n",
|
||||
" \"fps\": 25\n",
|
||||
" },\n",
|
||||
" \"smoother_configs\": [\n",
|
||||
" {\n",
|
||||
" \"processor_type\": \"FastBlend\",\n",
|
||||
" \"config\": {}\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"pipeline\": {\n",
|
||||
" \"seed\": 0,\n",
|
||||
" \"pipeline_inputs\": {\n",
|
||||
" \"prompt\": \"best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings\",\n",
|
||||
" \"negative_prompt\": \"verybadimagenegative_v1.3\",\n",
|
||||
" \"cfg_scale\": 7.0,\n",
|
||||
" \"clip_skip\": 1,\n",
|
||||
" \"denoising_strength\": 0.9,\n",
|
||||
" \"num_inference_steps\": 20,\n",
|
||||
" \"animatediff_batch_size\": 8,\n",
|
||||
" \"animatediff_stride\": 4,\n",
|
||||
" \"unet_batch_size\": 8,\n",
|
||||
" \"controlnet_batch_size\": 8,\n",
|
||||
" \"cross_frame_attention\": True,\n",
|
||||
" \"smoother_progress_ids\": [-1],\n",
|
||||
" # The following parameters will be overwritten. You don't need to modify them.\n",
|
||||
" \"input_frames\": [],\n",
|
||||
" \"num_frames\": 30,\n",
|
||||
" \"width\": 512,\n",
|
||||
" \"height\": 512,\n",
|
||||
" \"controlnet_frames\": []\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"config_stage_2_template = {\n",
|
||||
" \"models\": {\n",
|
||||
" \"model_list\": [\n",
|
||||
" \"models/stable_diffusion/aingdiffusion_v12.safetensors\",\n",
|
||||
" \"models/AnimateDiff/mm_sd_v15_v2.ckpt\",\n",
|
||||
" \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n",
|
||||
" \"models/ControlNet/control_v11p_sd15_lineart.pth\"\n",
|
||||
" ],\n",
|
||||
" \"textual_inversion_folder\": \"models/textual_inversion\",\n",
|
||||
" \"device\": \"cuda\",\n",
|
||||
" \"lora_alphas\": [],\n",
|
||||
" \"controlnet_units\": [\n",
|
||||
" {\n",
|
||||
" \"processor_id\": \"tile\",\n",
|
||||
" \"model_path\": \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n",
|
||||
" \"scale\": 0.5\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"processor_id\": \"lineart\",\n",
|
||||
" \"model_path\": \"models/ControlNet/control_v11p_sd15_lineart.pth\",\n",
|
||||
" \"scale\": 0.5\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" \"data\": {\n",
|
||||
" \"input_frames\": {\n",
|
||||
" \"video_file\": \"/content/input_video.mp4\",\n",
|
||||
" \"image_folder\": None,\n",
|
||||
" \"height\": 1024,\n",
|
||||
" \"width\": 1024,\n",
|
||||
" \"start_frame_id\": 0,\n",
|
||||
" \"end_frame_id\": 30\n",
|
||||
" },\n",
|
||||
" \"controlnet_frames\": [\n",
|
||||
" {\n",
|
||||
" \"video_file\": \"/content/input_video.mp4\",\n",
|
||||
" \"image_folder\": None,\n",
|
||||
" \"height\": 1024,\n",
|
||||
" \"width\": 1024,\n",
|
||||
" \"start_frame_id\": 0,\n",
|
||||
" \"end_frame_id\": 30\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"video_file\": \"/content/input_video.mp4\",\n",
|
||||
" \"image_folder\": None,\n",
|
||||
" \"height\": 1024,\n",
|
||||
" \"width\": 1024,\n",
|
||||
" \"start_frame_id\": 0,\n",
|
||||
" \"end_frame_id\": 30\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"output_folder\": \"/content/output\",\n",
|
||||
" \"fps\": 25\n",
|
||||
" },\n",
|
||||
" \"pipeline\": {\n",
|
||||
" \"seed\": 0,\n",
|
||||
" \"pipeline_inputs\": {\n",
|
||||
" \"prompt\": \"best quality, perfect anime illustration, light, a girl is dancing, smile, solo\",\n",
|
||||
" \"negative_prompt\": \"verybadimagenegative_v1.3\",\n",
|
||||
" \"cfg_scale\": 7.0,\n",
|
||||
" \"clip_skip\": 2,\n",
|
||||
" \"denoising_strength\": 1.0,\n",
|
||||
" \"num_inference_steps\": 10,\n",
|
||||
" \"animatediff_batch_size\": 16,\n",
|
||||
" \"animatediff_stride\": 8,\n",
|
||||
" \"unet_batch_size\": 1,\n",
|
||||
" \"controlnet_batch_size\": 1,\n",
|
||||
" \"cross_frame_attention\": False,\n",
|
||||
" # The following parameters will be overwritten. You don't need to modify them.\n",
|
||||
" \"input_frames\": [],\n",
|
||||
" \"num_frames\": 30,\n",
|
||||
" \"width\": 1536,\n",
|
||||
" \"height\": 1536,\n",
|
||||
" \"controlnet_frames\": []\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "113QAmNHP6T_"
|
||||
},
|
||||
"source": [
|
||||
"### Upload Input Video\n",
|
||||
"\n",
|
||||
"Before you run the following code, please upload your input video to `/content/input_video.mp4`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "CyqAsj1o5U9B"
|
||||
},
|
||||
"source": [
|
||||
"### Toon Shading\n",
|
||||
"\n",
|
||||
"Render your video in an anime style.\n",
|
||||
"\n",
|
||||
"We highly recommend you to use a higher resolution for better visual quality. The default resolution of Diffutoon is 1536x1536, which requires 22GB VRAM. If you don't have enough VRAM, 1024x1024 is also acceptable.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "761nbrgeKMvj",
|
||||
"outputId": "c0d47d5f-16e9-4a65-e664-9bd5fc491111"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from diffsynth import SDVideoPipelineRunner\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"config = config_stage_2_template.copy()\n",
|
||||
"config[\"data\"][\"input_frames\"] = {\n",
|
||||
" \"video_file\": \"/content/input_video.mp4\",\n",
|
||||
" \"image_folder\": None,\n",
|
||||
" \"height\": 1024,\n",
|
||||
" \"width\": 1024,\n",
|
||||
" \"start_frame_id\": 0,\n",
|
||||
" \"end_frame_id\": 30\n",
|
||||
"}\n",
|
||||
"config[\"data\"][\"controlnet_frames\"] = [config[\"data\"][\"input_frames\"], config[\"data\"][\"input_frames\"]]\n",
|
||||
"config[\"data\"][\"output_folder\"] = \"/content/toon_video\"\n",
|
||||
"config[\"data\"][\"fps\"] = 25\n",
|
||||
"\n",
|
||||
"runner = SDVideoPipelineRunner()\n",
|
||||
"runner.run(config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "9wujhGUmDIwY"
|
||||
},
|
||||
"source": [
|
||||
"Let's see the video!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 420
|
||||
},
|
||||
"id": "TBNAigacAq6h",
|
||||
"outputId": "8f57c3b4-982b-4643-f3dc-53c51bd85a4b"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython.display import HTML\n",
|
||||
"from base64 import b64encode\n",
|
||||
"\n",
|
||||
"mp4 = open(\"/content/toon_video/video.mp4\", \"rb\").read()\n",
|
||||
"data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
|
||||
"HTML(\"\"\"\n",
|
||||
"<video width=400 controls>\n",
|
||||
"<source src=\"%s\" type=\"video/mp4\">\n",
|
||||
"</video>\n",
|
||||
"\"\"\" % data_url)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "48hQfX--5YGi"
|
||||
},
|
||||
"source": [
|
||||
"### Toon Shading with Editing Signals"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "bAQ9Zq-3-MH6"
|
||||
},
|
||||
"source": [
|
||||
"In stage 1, input your prompt, and diffutoon will generate the editing signals in the format of low-resolution color video."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "BtDzYgIq5bgg",
|
||||
"outputId": "bb27b7b9-7979-4409-f476-f25f0a164ef4"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from diffsynth import SDVideoPipelineRunner\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"config_stage_1 = config_stage_1_template.copy()\n",
|
||||
"config_stage_1[\"data\"][\"input_frames\"] = {\n",
|
||||
" \"video_file\": \"/content/input_video.mp4\",\n",
|
||||
" \"image_folder\": None,\n",
|
||||
" \"height\": 512,\n",
|
||||
" \"width\": 512,\n",
|
||||
" \"start_frame_id\": 0,\n",
|
||||
" \"end_frame_id\": 30\n",
|
||||
"}\n",
|
||||
"config_stage_1[\"data\"][\"controlnet_frames\"] = [config_stage_1[\"data\"][\"input_frames\"], config_stage_1[\"data\"][\"input_frames\"]]\n",
|
||||
"config_stage_1[\"data\"][\"output_folder\"] = \"/content/color_video\"\n",
|
||||
"config_stage_1[\"data\"][\"fps\"] = 25\n",
|
||||
"config_stage_1[\"pipeline\"][\"pipeline_inputs\"][\"prompt\"] = \"best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings\"\n",
|
||||
"\n",
|
||||
"runner = SDVideoPipelineRunner()\n",
|
||||
"runner.run(config_stage_1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "D9_AWwhi-pA9"
|
||||
},
|
||||
"source": [
|
||||
"In stage 2, diffutoon will rerender the whole video according to the editing signals."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "JFysCk7y51i_",
|
||||
"outputId": "475050d3-c72e-4e08-b55c-d59ed86b5497"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from diffsynth import SDVideoPipelineRunner\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"config_stage_2 = config_stage_2_template.copy()\n",
|
||||
"config_stage_2[\"data\"][\"input_frames\"] = {\n",
|
||||
" \"video_file\": \"/content/input_video.mp4\",\n",
|
||||
" \"image_folder\": None,\n",
|
||||
" \"height\": 1024,\n",
|
||||
" \"width\": 1024,\n",
|
||||
" \"start_frame_id\": 0,\n",
|
||||
" \"end_frame_id\": 30\n",
|
||||
"}\n",
|
||||
"config_stage_2[\"data\"][\"controlnet_frames\"][0] = {\n",
|
||||
" \"video_file\": \"/content/color_video/video.mp4\",\n",
|
||||
" \"image_folder\": None,\n",
|
||||
" \"height\": config_stage_2[\"data\"][\"input_frames\"][\"height\"],\n",
|
||||
" \"width\": config_stage_2[\"data\"][\"input_frames\"][\"width\"],\n",
|
||||
" \"start_frame_id\": None,\n",
|
||||
" \"end_frame_id\": None\n",
|
||||
"}\n",
|
||||
"config_stage_2[\"data\"][\"controlnet_frames\"][1] = config[\"data\"][\"input_frames\"]\n",
|
||||
"config_stage_2[\"data\"][\"output_folder\"] = \"/content/edit_video\"\n",
|
||||
"config_stage_2[\"data\"][\"fps\"] = 25\n",
|
||||
"\n",
|
||||
"runner = SDVideoPipelineRunner()\n",
|
||||
"runner.run(config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "HIPrCAIS_Im0"
|
||||
},
|
||||
"source": [
|
||||
"Let's see the video!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 420
|
||||
},
|
||||
"id": "Y2nz7rew-7VI",
|
||||
"outputId": "fbcbadc6-4045-4aac-dfb0-80bacec003bf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython.display import HTML\n",
|
||||
"from base64 import b64encode\n",
|
||||
"\n",
|
||||
"mp4 = open(\"/content/edit_video/video.mp4\", \"rb\").read()\n",
|
||||
"data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
|
||||
"HTML(\"\"\"\n",
|
||||
"<video width=400 controls>\n",
|
||||
"<source src=\"%s\" type=\"video/mp4\">\n",
|
||||
"</video>\n",
|
||||
"\"\"\" % data_url)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"collapsed_sections": [
|
||||
"tII_XRY-PJeo"
|
||||
],
|
||||
"gpuType": "T4",
|
||||
"provenance": [],
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
21
examples/Diffutoon/README.md
Normal file
21
examples/Diffutoon/README.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# Diffutoon
|
||||
|
||||
[Diffutoon](https://arxiv.org/abs/2401.16224) is a toon shading approach. This approach is adept for rendering high-resoluton videos with rapid motion.
|
||||
|
||||
## Example: Toon Shading (Diffutoon)
|
||||
|
||||
Directly render realistic videos in a flatten style. In this example, you can easily modify the parameters in the config dict. See [`diffutoon_toon_shading.py`](./diffutoon_toon_shading.py). We also provide [an example on Colab](https://colab.research.google.com/github/Artiprocher/DiffSynth-Studio/blob/main/examples/Diffutoon.ipynb).
|
||||
|
||||
https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/b54c05c5-d747-4709-be5e-b39af82404dd
|
||||
|
||||
## Example: Toon Shading with Editing Signals (Diffutoon)
|
||||
|
||||
This example supports video editing signals. See `examples\diffutoon_toon_shading_with_editing_signals.py`. The editing feature is also supported in the [Colab example](https://colab.research.google.com/github/Artiprocher/DiffSynth-Studio/blob/main/examples/Diffutoon/Diffutoon.ipynb).
|
||||
|
||||
https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/20528af5-5100-474a-8cdc-440b9efdd86c
|
||||
|
||||
## Example: Toon Shading (in native Python code)
|
||||
|
||||
This example is provided for developers. If you don't want to use the config to manage parameters, you can see `examples/sd_toon_shading.py` to learn how to use it in native Python code.
|
||||
|
||||
https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/607c199b-6140-410b-a111-3e4ffb01142c
|
||||
94
examples/Diffutoon/diffutoon_toon_shading.py
Normal file
94
examples/Diffutoon/diffutoon_toon_shading.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from diffsynth import SDVideoPipelineRunner
|
||||
|
||||
|
||||
# Download models
|
||||
# `models/stable_diffusion/aingdiffusion_v12.safetensors`: [link](https://civitai.com/api/download/models/229575)
|
||||
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
||||
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
||||
# `models/ControlNet/control_v11f1e_sd15_tile.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth)
|
||||
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
||||
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
||||
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
||||
|
||||
# The original video in the example is https://www.bilibili.com/video/BV1iG411a7sQ/.
|
||||
|
||||
config = {
|
||||
"models": {
|
||||
"model_list": [
|
||||
"models/stable_diffusion/aingdiffusion_v12.safetensors",
|
||||
"models/AnimateDiff/mm_sd_v15_v2.ckpt",
|
||||
"models/ControlNet/control_v11f1e_sd15_tile.pth",
|
||||
"models/ControlNet/control_v11p_sd15_lineart.pth"
|
||||
],
|
||||
"textual_inversion_folder": "models/textual_inversion",
|
||||
"device": "cuda",
|
||||
"lora_alphas": [],
|
||||
"controlnet_units": [
|
||||
{
|
||||
"processor_id": "tile",
|
||||
"model_path": "models/ControlNet/control_v11f1e_sd15_tile.pth",
|
||||
"scale": 0.5
|
||||
},
|
||||
{
|
||||
"processor_id": "lineart",
|
||||
"model_path": "models/ControlNet/control_v11p_sd15_lineart.pth",
|
||||
"scale": 0.5
|
||||
}
|
||||
]
|
||||
},
|
||||
"data": {
|
||||
"input_frames": {
|
||||
"video_file": "data/examples/diffutoon/input_video.mp4",
|
||||
"image_folder": None,
|
||||
"height": 1536,
|
||||
"width": 1536,
|
||||
"start_frame_id": 0,
|
||||
"end_frame_id": 30
|
||||
},
|
||||
"controlnet_frames": [
|
||||
{
|
||||
"video_file": "data/examples/diffutoon/input_video.mp4",
|
||||
"image_folder": None,
|
||||
"height": 1536,
|
||||
"width": 1536,
|
||||
"start_frame_id": 0,
|
||||
"end_frame_id": 30
|
||||
},
|
||||
{
|
||||
"video_file": "data/examples/diffutoon/input_video.mp4",
|
||||
"image_folder": None,
|
||||
"height": 1536,
|
||||
"width": 1536,
|
||||
"start_frame_id": 0,
|
||||
"end_frame_id": 30
|
||||
}
|
||||
],
|
||||
"output_folder": "data/examples/diffutoon/output",
|
||||
"fps": 30
|
||||
},
|
||||
"pipeline": {
|
||||
"seed": 0,
|
||||
"pipeline_inputs": {
|
||||
"prompt": "best quality, perfect anime illustration, light, a girl is dancing, smile, solo",
|
||||
"negative_prompt": "verybadimagenegative_v1.3",
|
||||
"cfg_scale": 7.0,
|
||||
"clip_skip": 2,
|
||||
"denoising_strength": 1.0,
|
||||
"num_inference_steps": 10,
|
||||
"animatediff_batch_size": 16,
|
||||
"animatediff_stride": 8,
|
||||
"unet_batch_size": 1,
|
||||
"controlnet_batch_size": 1,
|
||||
"cross_frame_attention": False,
|
||||
# The following parameters will be overwritten. You don't need to modify them.
|
||||
"input_frames": [],
|
||||
"num_frames": 30,
|
||||
"width": 1536,
|
||||
"height": 1536,
|
||||
"controlnet_frames": []
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
runner = SDVideoPipelineRunner()
|
||||
runner.run(config)
|
||||
@@ -0,0 +1,196 @@
|
||||
from diffsynth import SDVideoPipelineRunner
|
||||
import os
|
||||
|
||||
|
||||
# Download models
|
||||
# `models/stable_diffusion/aingdiffusion_v12.safetensors`: [link](https://civitai.com/api/download/models/229575)
|
||||
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
||||
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
||||
# `models/ControlNet/control_v11f1e_sd15_tile.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth)
|
||||
# `models/ControlNet/control_v11f1p_sd15_depth.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1p_sd15_depth.pth)
|
||||
# `models/ControlNet/control_v11p_sd15_softedge.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_softedge.pth)
|
||||
# `models/Annotators/dpt_hybrid-midas-501f0c75.pt`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/dpt_hybrid-midas-501f0c75.pt)
|
||||
# `models/Annotators/ControlNetHED.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetHED.pth)
|
||||
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
||||
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
||||
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
||||
|
||||
# The original video in the example is https://www.bilibili.com/video/BV1zu4y1s7Ec/.
|
||||
|
||||
config_stage_1 = {
|
||||
"models": {
|
||||
"model_list": [
|
||||
"models/stable_diffusion/aingdiffusion_v12.safetensors",
|
||||
"models/ControlNet/control_v11p_sd15_softedge.pth",
|
||||
"models/ControlNet/control_v11f1p_sd15_depth.pth"
|
||||
],
|
||||
"textual_inversion_folder": "models/textual_inversion",
|
||||
"device": "cuda",
|
||||
"lora_alphas": [],
|
||||
"controlnet_units": [
|
||||
{
|
||||
"processor_id": "softedge",
|
||||
"model_path": "models/ControlNet/control_v11p_sd15_softedge.pth",
|
||||
"scale": 0.5
|
||||
},
|
||||
{
|
||||
"processor_id": "depth",
|
||||
"model_path": "models/ControlNet/control_v11f1p_sd15_depth.pth",
|
||||
"scale": 0.5
|
||||
}
|
||||
]
|
||||
},
|
||||
"data": {
|
||||
"input_frames": {
|
||||
"video_file": "data/examples/diffutoon_edit/input_video.mp4",
|
||||
"image_folder": None,
|
||||
"height": 512,
|
||||
"width": 512,
|
||||
"start_frame_id": 0,
|
||||
"end_frame_id": 30
|
||||
},
|
||||
"controlnet_frames": [
|
||||
{
|
||||
"video_file": "data/examples/diffutoon_edit/input_video.mp4",
|
||||
"image_folder": None,
|
||||
"height": 512,
|
||||
"width": 512,
|
||||
"start_frame_id": 0,
|
||||
"end_frame_id": 30
|
||||
},
|
||||
{
|
||||
"video_file": "data/examples/diffutoon_edit/input_video.mp4",
|
||||
"image_folder": None,
|
||||
"height": 512,
|
||||
"width": 512,
|
||||
"start_frame_id": 0,
|
||||
"end_frame_id": 30
|
||||
}
|
||||
],
|
||||
"output_folder": "data/examples/diffutoon_edit/color_video",
|
||||
"fps": 25
|
||||
},
|
||||
"smoother_configs": [
|
||||
{
|
||||
"processor_type": "FastBlend",
|
||||
"config": {}
|
||||
}
|
||||
],
|
||||
"pipeline": {
|
||||
"seed": 0,
|
||||
"pipeline_inputs": {
|
||||
"prompt": "best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings",
|
||||
"negative_prompt": "verybadimagenegative_v1.3",
|
||||
"cfg_scale": 7.0,
|
||||
"clip_skip": 1,
|
||||
"denoising_strength": 0.9,
|
||||
"num_inference_steps": 20,
|
||||
"animatediff_batch_size": 8,
|
||||
"animatediff_stride": 4,
|
||||
"unet_batch_size": 8,
|
||||
"controlnet_batch_size": 8,
|
||||
"cross_frame_attention": True,
|
||||
"smoother_progress_ids": [-1],
|
||||
# The following parameters will be overwritten. You don't need to modify them.
|
||||
"input_frames": [],
|
||||
"num_frames": 30,
|
||||
"width": 512,
|
||||
"height": 512,
|
||||
"controlnet_frames": []
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
config_stage_2 = {
|
||||
"models": {
|
||||
"model_list": [
|
||||
"models/stable_diffusion/aingdiffusion_v12.safetensors",
|
||||
"models/AnimateDiff/mm_sd_v15_v2.ckpt",
|
||||
"models/ControlNet/control_v11f1e_sd15_tile.pth",
|
||||
"models/ControlNet/control_v11p_sd15_lineart.pth"
|
||||
],
|
||||
"textual_inversion_folder": "models/textual_inversion",
|
||||
"device": "cuda",
|
||||
"lora_alphas": [],
|
||||
"controlnet_units": [
|
||||
{
|
||||
"processor_id": "tile",
|
||||
"model_path": "models/ControlNet/control_v11f1e_sd15_tile.pth",
|
||||
"scale": 0.5
|
||||
},
|
||||
{
|
||||
"processor_id": "lineart",
|
||||
"model_path": "models/ControlNet/control_v11p_sd15_lineart.pth",
|
||||
"scale": 0.5
|
||||
}
|
||||
]
|
||||
},
|
||||
"data": {
|
||||
"input_frames": {
|
||||
"video_file": "data/examples/diffutoon_edit/input_video.mp4",
|
||||
"image_folder": None,
|
||||
"height": 1536,
|
||||
"width": 1536,
|
||||
"start_frame_id": 0,
|
||||
"end_frame_id": 30
|
||||
},
|
||||
"controlnet_frames": [
|
||||
{
|
||||
"video_file": "data/examples/diffutoon_edit/input_video.mp4",
|
||||
"image_folder": None,
|
||||
"height": 1536,
|
||||
"width": 1536,
|
||||
"start_frame_id": 0,
|
||||
"end_frame_id": 30
|
||||
},
|
||||
{
|
||||
"video_file": "data/examples/diffutoon_edit/input_video.mp4",
|
||||
"image_folder": None,
|
||||
"height": 1536,
|
||||
"width": 1536,
|
||||
"start_frame_id": 0,
|
||||
"end_frame_id": 30
|
||||
}
|
||||
],
|
||||
"output_folder": "data/examples/diffutoon_edit/output",
|
||||
"fps": 30
|
||||
},
|
||||
"pipeline": {
|
||||
"seed": 0,
|
||||
"pipeline_inputs": {
|
||||
"prompt": "best quality, perfect anime illustration, light, a girl is dancing, smile, solo",
|
||||
"negative_prompt": "verybadimagenegative_v1.3",
|
||||
"cfg_scale": 7.0,
|
||||
"clip_skip": 2,
|
||||
"denoising_strength": 1.0,
|
||||
"num_inference_steps": 10,
|
||||
"animatediff_batch_size": 16,
|
||||
"animatediff_stride": 8,
|
||||
"unet_batch_size": 1,
|
||||
"controlnet_batch_size": 1,
|
||||
"cross_frame_attention": False,
|
||||
# The following parameters will be overwritten. You don't need to modify them.
|
||||
"input_frames": [],
|
||||
"num_frames": 30,
|
||||
"width": 1536,
|
||||
"height": 1536,
|
||||
"controlnet_frames": []
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
runner = SDVideoPipelineRunner()
|
||||
runner.run(config_stage_1)
|
||||
|
||||
# Replace the color video with the synthesized video
|
||||
config_stage_2["data"]["controlnet_frames"][0] = {
|
||||
"video_file": os.path.join(config_stage_1["data"]["output_folder"], "video.mp4"),
|
||||
"image_folder": None,
|
||||
"height": config_stage_2["data"]["input_frames"]["height"],
|
||||
"width": config_stage_2["data"]["input_frames"]["width"],
|
||||
"start_frame_id": None,
|
||||
"end_frame_id": None
|
||||
}
|
||||
runner.run(config_stage_2)
|
||||
65
examples/Diffutoon/sd_toon_shading.py
Normal file
65
examples/Diffutoon/sd_toon_shading.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from diffsynth import ModelManager, SDVideoPipeline, ControlNetConfigUnit, VideoData, save_video, save_frames
|
||||
from diffsynth.extensions.RIFE import RIFESmoother
|
||||
import torch
|
||||
|
||||
|
||||
# Download models
|
||||
# `models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors`: [link](https://civitai.com/api/download/models/266360?type=Model&format=SafeTensor&size=pruned&fp=fp16)
|
||||
# `models/AnimateDiff/mm_sd_v15_v2.ckpt`: [link](https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt)
|
||||
# `models/ControlNet/control_v11p_sd15_lineart.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth)
|
||||
# `models/ControlNet/control_v11f1e_sd15_tile.pth`: [link](https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth)
|
||||
# `models/Annotators/sk_model.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth)
|
||||
# `models/Annotators/sk_model2.pth`: [link](https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth)
|
||||
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
|
||||
# `models/RIFE/flownet.pkl`: [link](https://drive.google.com/file/d/1APIzVeI-4ZZCEuIRE1m6WYfSCaOsi_7_/view?usp=sharing)
|
||||
|
||||
|
||||
# Load models
|
||||
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
|
||||
model_manager.load_textual_inversions("models/textual_inversion")
|
||||
model_manager.load_models([
|
||||
"models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors",
|
||||
"models/AnimateDiff/mm_sd_v15_v2.ckpt",
|
||||
"models/ControlNet/control_v11p_sd15_lineart.pth",
|
||||
"models/ControlNet/control_v11f1e_sd15_tile.pth",
|
||||
"models/RIFE/flownet.pkl"
|
||||
])
|
||||
pipe = SDVideoPipeline.from_model_manager(
|
||||
model_manager,
|
||||
[
|
||||
ControlNetConfigUnit(
|
||||
processor_id="lineart",
|
||||
model_path="models/ControlNet/control_v11p_sd15_lineart.pth",
|
||||
scale=0.5
|
||||
),
|
||||
ControlNetConfigUnit(
|
||||
processor_id="tile",
|
||||
model_path="models/ControlNet/control_v11f1e_sd15_tile.pth",
|
||||
scale=0.5
|
||||
)
|
||||
]
|
||||
)
|
||||
smoother = RIFESmoother.from_model_manager(model_manager)
|
||||
|
||||
# Load video (we only use 60 frames for quick testing)
|
||||
# The original video is here: https://www.bilibili.com/video/BV19w411A7YJ/
|
||||
video = VideoData(
|
||||
video_file="data/bilibili_videos/៸៸᳐_⩊_៸៸᳐ 66 微笑调查队🌻/៸៸᳐_⩊_៸៸᳐ 66 微笑调查队🌻 - 1.66 微笑调查队🌻(Av278681824,P1).mp4",
|
||||
height=1024, width=1024)
|
||||
input_video = [video[i] for i in range(40*60, 41*60)]
|
||||
|
||||
# Toon shading (20G VRAM)
|
||||
torch.manual_seed(0)
|
||||
output_video = pipe(
|
||||
prompt="best quality, perfect anime illustration, light, a girl is dancing, smile, solo",
|
||||
negative_prompt="verybadimagenegative_v1.3",
|
||||
cfg_scale=3, clip_skip=2,
|
||||
controlnet_frames=input_video, num_frames=len(input_video),
|
||||
num_inference_steps=10, height=1024, width=1024,
|
||||
animatediff_batch_size=32, animatediff_stride=16,
|
||||
vram_limit_level=0,
|
||||
)
|
||||
output_video = smoother(output_video)
|
||||
|
||||
# Save video
|
||||
save_video(output_video, "output_video.mp4", fps=60)
|
||||
Reference in New Issue
Block a user