From 79fb9fe6c4efc96eaa5d431589289972835efdae Mon Sep 17 00:00:00 2001
From: Artiprocher <wangye87v5@hotmail.com>
Date: Mon, 5 Feb 2024 15:27:35 +0800
Subject: [PATCH] update colab example

---
 README.md                             |   4 +-
 examples/Diffutoon.ipynb              | 512 ++++++++++++++++++++++++++
 examples/Diffutoon_toon_shading.ipynb | 282 --------------
 3 files changed, 514 insertions(+), 284 deletions(-)
 create mode 100644 examples/Diffutoon.ipynb
 delete mode 100644 examples/Diffutoon_toon_shading.ipynb

diff --git a/README.md b/README.md
index 3778c1e..b231fee 100644
--- a/README.md
+++ b/README.md
@@ -56,13 +56,13 @@ Generate images with Stable Diffusion XL Turbo. You can see `examples/sdxl_turbo
 
 ### Example 4: Toon Shading (Diffutoon)
 
-This example is implemented based on [Diffutoon](https://arxiv.org/abs/2401.16224). This approach is adept for rendering high-resoluton videos with rapid motion. You can easily modify the parameters in the config dict. See `examples/diffutoon_toon_shading.py`. We also provide [an example on Colab](https://colab.research.google.com/github/Artiprocher/DiffSynth-Studio/blob/main/examples/Diffutoon_toon_shading.ipynb).
+This example is implemented based on [Diffutoon](https://arxiv.org/abs/2401.16224). This approach is adept for rendering high-resoluton videos with rapid motion. You can easily modify the parameters in the config dict. See `examples/diffutoon_toon_shading.py`. We also provide [an example on Colab](https://colab.research.google.com/github/Artiprocher/DiffSynth-Studio/blob/main/examples/Diffutoon.ipynb).
 
 https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/b54c05c5-d747-4709-be5e-b39af82404dd
 
 ### Example 5: Toon Shading with Editing Signals (Diffutoon)
 
-This example is implemented based on [Diffutoon](https://arxiv.org/abs/2401.16224), supporting video editing signals. See `examples\diffutoon_toon_shading_with_editing_signals.py`.
+This example is implemented based on [Diffutoon](https://arxiv.org/abs/2401.16224), supporting video editing signals. See `examples\diffutoon_toon_shading_with_editing_signals.py`. The editing feature is also supported in the [Colab example](https://colab.research.google.com/github/Artiprocher/DiffSynth-Studio/blob/main/examples/Diffutoon.ipynb).
 
 https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/20528af5-5100-474a-8cdc-440b9efdd86c
 
diff --git a/examples/Diffutoon.ipynb b/examples/Diffutoon.ipynb
new file mode 100644
index 0000000..302e03d
--- /dev/null
+++ b/examples/Diffutoon.ipynb
@@ -0,0 +1,512 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8ObdI5jCB8xy"
+      },
+      "source": [
+        "# DiffSynth Studio\n",
+        "\n",
+        "Welcome to DiffSynth Studio! This is an example of Diffutoon."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XSkKX7O2BwuM"
+      },
+      "source": [
+        "## Install"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "msCpt0pLnT8W",
+        "outputId": "35d93b35-451b-4760-d1ee-ef7ff190916e"
+      },
+      "outputs": [],
+      "source": [
+        "!git clone https://github.com/Artiprocher/DiffSynth-Studio.git\n",
+        "!pip install -q transformers controlnet-aux==0.0.7 streamlit streamlit-drawable-canvas imageio imageio[ffmpeg] safetensors einops cupy-cuda12x\n",
+        "%cd /content/DiffSynth-Studio"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5eCu_rlKB3kK"
+      },
+      "source": [
+        "## Download Models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9znMkpVj3qZ1"
+      },
+      "outputs": [],
+      "source": [
+        "import requests\n",
+        "\n",
+        "\n",
+        "def download_model(url, file_path):\n",
+        "  model_file = requests.get(url, allow_redirects=True)\n",
+        "  with open(file_path, \"wb\") as f:\n",
+        "    f.write(model_file.content)\n",
+        "\n",
+        "download_model(\"https://civitai.com/api/download/models/229575\", \"models/stable_diffusion/aingdiffusion_v12.safetensors\")\n",
+        "download_model(\"https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt\", \"models/AnimateDiff/mm_sd_v15_v2.ckpt\")\n",
+        "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth\", \"models/ControlNet/control_v11p_sd15_lineart.pth\")\n",
+        "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth\", \"models/ControlNet/control_v11f1e_sd15_tile.pth\")\n",
+        "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1p_sd15_depth.pth\", \"models/ControlNet/control_v11f1p_sd15_depth.pth\")\n",
+        "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_softedge.pth\", \"models/ControlNet/control_v11p_sd15_softedge.pth\")\n",
+        "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/dpt_hybrid-midas-501f0c75.pt\", \"models/Annotators/dpt_hybrid-midas-501f0c75.pt\")\n",
+        "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetHED.pth\", \"models/Annotators/ControlNetHED.pth\")\n",
+        "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth\", \"models/Annotators/sk_model.pth\")\n",
+        "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth\", \"models/Annotators/sk_model2.pth\")\n",
+        "download_model(\"https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16\", \"models/textual_inversion/verybadimagenegative_v1.3.pt\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iwOq2lWtKVYS"
+      },
+      "source": [
+        "## Run Diffutoon"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tII_XRY-PJeo"
+      },
+      "source": [
+        "### Config Template"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vsd2alA3PrGe"
+      },
+      "outputs": [],
+      "source": [
+        "config_stage_1_template = {\n",
+        "    \"models\": {\n",
+        "        \"model_list\": [\n",
+        "            \"models/stable_diffusion/aingdiffusion_v12.safetensors\",\n",
+        "            \"models/ControlNet/control_v11p_sd15_softedge.pth\",\n",
+        "            \"models/ControlNet/control_v11f1p_sd15_depth.pth\"\n",
+        "        ],\n",
+        "        \"textual_inversion_folder\": \"models/textual_inversion\",\n",
+        "        \"device\": \"cuda\",\n",
+        "        \"lora_alphas\": [],\n",
+        "        \"controlnet_units\": [\n",
+        "            {\n",
+        "                \"processor_id\": \"softedge\",\n",
+        "                \"model_path\": \"models/ControlNet/control_v11p_sd15_softedge.pth\",\n",
+        "                \"scale\": 0.5\n",
+        "            },\n",
+        "            {\n",
+        "                \"processor_id\": \"depth\",\n",
+        "                \"model_path\": \"models/ControlNet/control_v11f1p_sd15_depth.pth\",\n",
+        "                \"scale\": 0.5\n",
+        "            }\n",
+        "        ]\n",
+        "    },\n",
+        "    \"data\": {\n",
+        "        \"input_frames\": {\n",
+        "            \"video_file\": \"/content/input_video.mp4\",\n",
+        "            \"image_folder\": None,\n",
+        "            \"height\": 512,\n",
+        "            \"width\": 512,\n",
+        "            \"start_frame_id\": 0,\n",
+        "            \"end_frame_id\": 30\n",
+        "        },\n",
+        "        \"controlnet_frames\": [\n",
+        "            {\n",
+        "                \"video_file\": \"/content/input_video.mp4\",\n",
+        "                \"image_folder\": None,\n",
+        "                \"height\": 512,\n",
+        "                \"width\": 512,\n",
+        "                \"start_frame_id\": 0,\n",
+        "                \"end_frame_id\": 30\n",
+        "            },\n",
+        "            {\n",
+        "                \"video_file\": \"/content/input_video.mp4\",\n",
+        "                \"image_folder\": None,\n",
+        "                \"height\": 512,\n",
+        "                \"width\": 512,\n",
+        "                \"start_frame_id\": 0,\n",
+        "                \"end_frame_id\": 30\n",
+        "            }\n",
+        "        ],\n",
+        "        \"output_folder\": \"data/examples/diffutoon_edit/color_video\",\n",
+        "        \"fps\": 25\n",
+        "    },\n",
+        "    \"smoother_configs\": [\n",
+        "        {\n",
+        "            \"processor_type\": \"FastBlend\",\n",
+        "            \"config\": {}\n",
+        "        }\n",
+        "    ],\n",
+        "    \"pipeline\": {\n",
+        "        \"seed\": 0,\n",
+        "        \"pipeline_inputs\": {\n",
+        "            \"prompt\": \"best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings\",\n",
+        "            \"negative_prompt\": \"verybadimagenegative_v1.3\",\n",
+        "            \"cfg_scale\": 7.0,\n",
+        "            \"clip_skip\": 1,\n",
+        "            \"denoising_strength\": 0.9,\n",
+        "            \"num_inference_steps\": 20,\n",
+        "            \"animatediff_batch_size\": 8,\n",
+        "            \"animatediff_stride\": 4,\n",
+        "            \"unet_batch_size\": 8,\n",
+        "            \"controlnet_batch_size\": 8,\n",
+        "            \"cross_frame_attention\": True,\n",
+        "            \"smoother_progress_ids\": [-1],\n",
+        "            # The following parameters will be overwritten. You don't need to modify them.\n",
+        "            \"input_frames\": [],\n",
+        "            \"num_frames\": 30,\n",
+        "            \"width\": 512,\n",
+        "            \"height\": 512,\n",
+        "            \"controlnet_frames\": []\n",
+        "        }\n",
+        "    }\n",
+        "}\n",
+        "\n",
+        "config_stage_2_template = {\n",
+        "    \"models\": {\n",
+        "        \"model_list\": [\n",
+        "            \"models/stable_diffusion/aingdiffusion_v12.safetensors\",\n",
+        "            \"models/AnimateDiff/mm_sd_v15_v2.ckpt\",\n",
+        "            \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n",
+        "            \"models/ControlNet/control_v11p_sd15_lineart.pth\"\n",
+        "        ],\n",
+        "        \"textual_inversion_folder\": \"models/textual_inversion\",\n",
+        "        \"device\": \"cuda\",\n",
+        "        \"lora_alphas\": [],\n",
+        "        \"controlnet_units\": [\n",
+        "            {\n",
+        "                \"processor_id\": \"tile\",\n",
+        "                \"model_path\": \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n",
+        "                \"scale\": 0.5\n",
+        "            },\n",
+        "            {\n",
+        "                \"processor_id\": \"lineart\",\n",
+        "                \"model_path\": \"models/ControlNet/control_v11p_sd15_lineart.pth\",\n",
+        "                \"scale\": 0.5\n",
+        "            }\n",
+        "        ]\n",
+        "    },\n",
+        "    \"data\": {\n",
+        "        \"input_frames\": {\n",
+        "            \"video_file\": \"/content/input_video.mp4\",\n",
+        "            \"image_folder\": None,\n",
+        "            \"height\": 1024,\n",
+        "            \"width\": 1024,\n",
+        "            \"start_frame_id\": 0,\n",
+        "            \"end_frame_id\": 30\n",
+        "        },\n",
+        "        \"controlnet_frames\": [\n",
+        "            {\n",
+        "                \"video_file\": \"/content/input_video.mp4\",\n",
+        "                \"image_folder\": None,\n",
+        "                \"height\": 1024,\n",
+        "                \"width\": 1024,\n",
+        "                \"start_frame_id\": 0,\n",
+        "                \"end_frame_id\": 30\n",
+        "            },\n",
+        "            {\n",
+        "                \"video_file\": \"/content/input_video.mp4\",\n",
+        "                \"image_folder\": None,\n",
+        "                \"height\": 1024,\n",
+        "                \"width\": 1024,\n",
+        "                \"start_frame_id\": 0,\n",
+        "                \"end_frame_id\": 30\n",
+        "            }\n",
+        "        ],\n",
+        "        \"output_folder\": \"/content/output\",\n",
+        "        \"fps\": 25\n",
+        "    },\n",
+        "    \"pipeline\": {\n",
+        "        \"seed\": 0,\n",
+        "        \"pipeline_inputs\": {\n",
+        "            \"prompt\": \"best quality, perfect anime illustration, light, a girl is dancing, smile, solo\",\n",
+        "            \"negative_prompt\": \"verybadimagenegative_v1.3\",\n",
+        "            \"cfg_scale\": 7.0,\n",
+        "            \"clip_skip\": 2,\n",
+        "            \"denoising_strength\": 1.0,\n",
+        "            \"num_inference_steps\": 10,\n",
+        "            \"animatediff_batch_size\": 16,\n",
+        "            \"animatediff_stride\": 8,\n",
+        "            \"unet_batch_size\": 1,\n",
+        "            \"controlnet_batch_size\": 1,\n",
+        "            \"cross_frame_attention\": False,\n",
+        "            # The following parameters will be overwritten. You don't need to modify them.\n",
+        "            \"input_frames\": [],\n",
+        "            \"num_frames\": 30,\n",
+        "            \"width\": 1536,\n",
+        "            \"height\": 1536,\n",
+        "            \"controlnet_frames\": []\n",
+        "        }\n",
+        "    }\n",
+        "}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "113QAmNHP6T_"
+      },
+      "source": [
+        "### Upload Input Video\n",
+        "\n",
+        "Before you run the following code, please upload your input video to `/content/input_video.mp4`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CyqAsj1o5U9B"
+      },
+      "source": [
+        "### Toon Shading\n",
+        "\n",
+        "Render your video in an anime style.\n",
+        "\n",
+        "We highly recommend you to use a higher resolution for better visual quality. The default resolution of Diffutoon is 1536x1536, which requires 22GB VRAM. If you don't have enough VRAM, 1024x1024 is also acceptable.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "761nbrgeKMvj",
+        "outputId": "c0d47d5f-16e9-4a65-e664-9bd5fc491111"
+      },
+      "outputs": [],
+      "source": [
+        "from diffsynth import SDVideoPipelineRunner\n",
+        "\n",
+        "\n",
+        "config = config_stage_2_template.copy()\n",
+        "config[\"data\"][\"input_frames\"] = {\n",
+        "    \"video_file\": \"/content/input_video.mp4\",\n",
+        "    \"image_folder\": None,\n",
+        "    \"height\": 1024,\n",
+        "    \"width\": 1024,\n",
+        "    \"start_frame_id\": 0,\n",
+        "    \"end_frame_id\": 30\n",
+        "}\n",
+        "config[\"data\"][\"controlnet_frames\"] = [config[\"data\"][\"input_frames\"], config[\"data\"][\"input_frames\"]]\n",
+        "config[\"data\"][\"output_folder\"] = \"/content/toon_video\"\n",
+        "config[\"data\"][\"fps\"] = 25\n",
+        "\n",
+        "runner = SDVideoPipelineRunner()\n",
+        "runner.run(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9wujhGUmDIwY"
+      },
+      "source": [
+        "Let's see the video!"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 420
+        },
+        "id": "TBNAigacAq6h",
+        "outputId": "8f57c3b4-982b-4643-f3dc-53c51bd85a4b"
+      },
+      "outputs": [],
+      "source": [
+        "from IPython.display import HTML\n",
+        "from base64 import b64encode\n",
+        "\n",
+        "mp4 = open(\"/content/toon_video/video.mp4\", \"rb\").read()\n",
+        "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
+        "HTML(\"\"\"\n",
+        "<video width=400 controls>\n",
+        "<source src=\"%s\" type=\"video/mp4\">\n",
+        "</video>\n",
+        "\"\"\" % data_url)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "48hQfX--5YGi"
+      },
+      "source": [
+        "### Toon Shading with Editing Signals"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bAQ9Zq-3-MH6"
+      },
+      "source": [
+        "In stage 1, input your prompt, and diffutoon will generate the editing signals in the format of low-resolution color video."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "BtDzYgIq5bgg",
+        "outputId": "bb27b7b9-7979-4409-f476-f25f0a164ef4"
+      },
+      "outputs": [],
+      "source": [
+        "from diffsynth import SDVideoPipelineRunner\n",
+        "\n",
+        "\n",
+        "config_stage_1 = config_stage_1_template.copy()\n",
+        "config_stage_1[\"data\"][\"input_frames\"] = {\n",
+        "    \"video_file\": \"/content/input_video.mp4\",\n",
+        "    \"image_folder\": None,\n",
+        "    \"height\": 512,\n",
+        "    \"width\": 512,\n",
+        "    \"start_frame_id\": 0,\n",
+        "    \"end_frame_id\": 30\n",
+        "}\n",
+        "config_stage_1[\"data\"][\"controlnet_frames\"] = [config_stage_1[\"data\"][\"input_frames\"], config_stage_1[\"data\"][\"input_frames\"]]\n",
+        "config_stage_1[\"data\"][\"output_folder\"] = \"/content/color_video\"\n",
+        "config_stage_1[\"data\"][\"fps\"] = 25\n",
+        "config_stage_1[\"pipeline\"][\"pipeline_inputs\"][\"prompt\"] = \"best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings\"\n",
+        "\n",
+        "runner = SDVideoPipelineRunner()\n",
+        "runner.run(config_stage_1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D9_AWwhi-pA9"
+      },
+      "source": [
+        "In stage 2, diffutoon will rerender the whole video according to the editing signals."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "JFysCk7y51i_",
+        "outputId": "475050d3-c72e-4e08-b55c-d59ed86b5497"
+      },
+      "outputs": [],
+      "source": [
+        "from diffsynth import SDVideoPipelineRunner\n",
+        "\n",
+        "\n",
+        "config_stage_2 = config_stage_2_template.copy()\n",
+        "config_stage_2[\"data\"][\"input_frames\"] = {\n",
+        "    \"video_file\": \"/content/input_video.mp4\",\n",
+        "    \"image_folder\": None,\n",
+        "    \"height\": 1024,\n",
+        "    \"width\": 1024,\n",
+        "    \"start_frame_id\": 0,\n",
+        "    \"end_frame_id\": 30\n",
+        "}\n",
+        "config_stage_2[\"data\"][\"controlnet_frames\"][0] = {\n",
+        "    \"video_file\": \"/content/color_video/video.mp4\",\n",
+        "    \"image_folder\": None,\n",
+        "    \"height\": config_stage_2[\"data\"][\"input_frames\"][\"height\"],\n",
+        "    \"width\": config_stage_2[\"data\"][\"input_frames\"][\"width\"],\n",
+        "    \"start_frame_id\": None,\n",
+        "    \"end_frame_id\": None\n",
+        "}\n",
+        "config_stage_2[\"data\"][\"controlnet_frames\"][1] = config[\"data\"][\"input_frames\"]\n",
+        "config_stage_2[\"data\"][\"output_folder\"] = \"/content/edit_video\"\n",
+        "config_stage_2[\"data\"][\"fps\"] = 25\n",
+        "\n",
+        "runner = SDVideoPipelineRunner()\n",
+        "runner.run(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HIPrCAIS_Im0"
+      },
+      "source": [
+        "Let's see the video!"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 420
+        },
+        "id": "Y2nz7rew-7VI",
+        "outputId": "fbcbadc6-4045-4aac-dfb0-80bacec003bf"
+      },
+      "outputs": [],
+      "source": [
+        "from IPython.display import HTML\n",
+        "from base64 import b64encode\n",
+        "\n",
+        "mp4 = open(\"/content/edit_video/video.mp4\", \"rb\").read()\n",
+        "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
+        "HTML(\"\"\"\n",
+        "<video width=400 controls>\n",
+        "<source src=\"%s\" type=\"video/mp4\">\n",
+        "</video>\n",
+        "\"\"\" % data_url)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "tII_XRY-PJeo"
+      ],
+      "gpuType": "T4",
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/examples/Diffutoon_toon_shading.ipynb b/examples/Diffutoon_toon_shading.ipynb
deleted file mode 100644
index bfcedbd..0000000
--- a/examples/Diffutoon_toon_shading.ipynb
+++ /dev/null
@@ -1,282 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "gpuType": "T4",
-      "collapsed_sections": [
-        "tII_XRY-PJeo"
-      ]
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "GPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# DiffSynth Studio\n",
-        "\n",
-        "Welcome to DiffSynth Studio! This is an example of Diffutoon."
-      ],
-      "metadata": {
-        "id": "8ObdI5jCB8xy"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Install"
-      ],
-      "metadata": {
-        "id": "XSkKX7O2BwuM"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "msCpt0pLnT8W",
-        "outputId": "48e084bc-c5ad-4d99-e5d9-8be686a57675"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Cloning into 'DiffSynth-Studio'...\n",
-            "remote: Enumerating objects: 259, done.\u001b[K\n",
-            "remote: Counting objects: 100% (259/259), done.\u001b[K\n",
-            "remote: Compressing objects: 100% (168/168), done.\u001b[K\n",
-            "remote: Total 259 (delta 128), reused 203 (delta 81), pack-reused 0\u001b[K\n",
-            "Receiving objects: 100% (259/259), 967.07 KiB | 3.58 MiB/s, done.\n",
-            "Resolving deltas: 100% (128/128), done.\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m202.4/202.4 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m96.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.6/44.6 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.4/196.4 kB\u001b[0m \u001b[31m28.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.8/4.8 MB\u001b[0m \u001b[31m89.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.1/82.1 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Building wheel for controlnet-aux (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "/content/DiffSynth-Studio\n"
-          ]
-        }
-      ],
-      "source": [
-        "!git clone https://github.com/Artiprocher/DiffSynth-Studio.git\n",
-        "!pip install -q transformers controlnet-aux==0.0.7 streamlit streamlit-drawable-canvas imageio imageio[ffmpeg] safetensors einops\n",
-        "%cd /content/DiffSynth-Studio"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Download Models"
-      ],
-      "metadata": {
-        "id": "5eCu_rlKB3kK"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import requests\n",
-        "\n",
-        "\n",
-        "def download_model(url, file_path):\n",
-        "  model_file = requests.get(url, allow_redirects=True)\n",
-        "  with open(file_path, \"wb\") as f:\n",
-        "    f.write(model_file.content)\n",
-        "\n",
-        "download_model(\"https://civitai.com/api/download/models/229575\", \"models/stable_diffusion/aingdiffusion_v12.safetensors\")\n",
-        "download_model(\"https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt\", \"models/AnimateDiff/mm_sd_v15_v2.ckpt\")\n",
-        "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth\", \"models/ControlNet/control_v11p_sd15_lineart.pth\")\n",
-        "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth\", \"models/ControlNet/control_v11f1e_sd15_tile.pth\")\n",
-        "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth\", \"models/Annotators/sk_model.pth\")\n",
-        "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth\", \"models/Annotators/sk_model2.pth\")\n",
-        "download_model(\"https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16\", \"models/textual_inversion/verybadimagenegative_v1.3.pt\")"
-      ],
-      "metadata": {
-        "id": "9znMkpVj3qZ1"
-      },
-      "execution_count": 2,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Run Diffutoon"
-      ],
-      "metadata": {
-        "id": "iwOq2lWtKVYS"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Config Template"
-      ],
-      "metadata": {
-        "id": "tII_XRY-PJeo"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "config_template = {\n",
-        "    \"models\": {\n",
-        "        \"model_list\": [\n",
-        "            \"models/stable_diffusion/aingdiffusion_v12.safetensors\",\n",
-        "            \"models/AnimateDiff/mm_sd_v15_v2.ckpt\",\n",
-        "            \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n",
-        "            \"models/ControlNet/control_v11p_sd15_lineart.pth\"\n",
-        "        ],\n",
-        "        \"textual_inversion_folder\": \"models/textual_inversion\",\n",
-        "        \"device\": \"cuda\",\n",
-        "        \"lora_alphas\": [],\n",
-        "        \"controlnet_units\": [\n",
-        "            {\n",
-        "                \"processor_id\": \"tile\",\n",
-        "                \"model_path\": \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n",
-        "                \"scale\": 0.5\n",
-        "            },\n",
-        "            {\n",
-        "                \"processor_id\": \"lineart\",\n",
-        "                \"model_path\": \"models/ControlNet/control_v11p_sd15_lineart.pth\",\n",
-        "                \"scale\": 0.5\n",
-        "            }\n",
-        "        ]\n",
-        "    },\n",
-        "    \"data\": {\n",
-        "        \"input_frames\": {\n",
-        "            \"video_file\": \"/content/video_guide.mp4\",\n",
-        "            \"image_folder\": None,\n",
-        "            \"height\": 1024,\n",
-        "            \"width\": 1024,\n",
-        "            \"start_frame_id\": 0,\n",
-        "            \"end_frame_id\": 30\n",
-        "        },\n",
-        "        \"controlnet_frames\": [\n",
-        "            {\n",
-        "                \"video_file\": \"/content/video_guide.mp4\",\n",
-        "                \"image_folder\": None,\n",
-        "                \"height\": 1024,\n",
-        "                \"width\": 1024,\n",
-        "                \"start_frame_id\": 0,\n",
-        "                \"end_frame_id\": 30\n",
-        "            },\n",
-        "            {\n",
-        "                \"video_file\": \"/content/video_guide.mp4\",\n",
-        "                \"image_folder\": None,\n",
-        "                \"height\": 1024,\n",
-        "                \"width\": 1024,\n",
-        "                \"start_frame_id\": 0,\n",
-        "                \"end_frame_id\": 30\n",
-        "            }\n",
-        "        ],\n",
-        "        \"output_folder\": \"/content/output\",\n",
-        "        \"fps\": 30\n",
-        "    },\n",
-        "    \"pipeline\": {\n",
-        "        \"seed\": 0,\n",
-        "        \"pipeline_inputs\": {\n",
-        "            \"prompt\": \"best quality, perfect anime illustration, light, a girl is dancing, smile, solo\",\n",
-        "            \"negative_prompt\": \"verybadimagenegative_v1.3\",\n",
-        "            \"cfg_scale\": 7.0,\n",
-        "            \"clip_skip\": 2,\n",
-        "            \"denoising_strength\": 1.0,\n",
-        "            \"num_inference_steps\": 10,\n",
-        "            \"animatediff_batch_size\": 16,\n",
-        "            \"animatediff_stride\": 8,\n",
-        "            \"unet_batch_size\": 1,\n",
-        "            \"controlnet_batch_size\": 1,\n",
-        "            \"cross_frame_attention\": False,\n",
-        "            # The following parameters will be overwritten. You don't need to modify them.\n",
-        "            \"input_frames\": [],\n",
-        "            \"num_frames\": 30,\n",
-        "            \"width\": 1536,\n",
-        "            \"height\": 1536,\n",
-        "            \"controlnet_frames\": []\n",
-        "        }\n",
-        "    }\n",
-        "}"
-      ],
-      "metadata": {
-        "id": "vsd2alA3PrGe"
-      },
-      "execution_count": 3,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Run\n",
-        "\n",
-        "Before you run the following code, please upload your input video.\n",
-        "\n",
-        "We highly recommend you to use a higher resolution for better visual quality. The default resolution of Diffutoon is 1536x1536, which requires 22GB VRAM. If you don't have enough VRAM, 1024x1024 is also acceptable."
-      ],
-      "metadata": {
-        "id": "113QAmNHP6T_"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from diffsynth import SDVideoPipelineRunner\n",
-        "\n",
-        "\n",
-        "config = config_template.copy()\n",
-        "config[\"data\"][\"input_frames\"] = {\n",
-        "    \"video_file\": \"/content/input_video.mp4\",\n",
-        "    \"image_folder\": None,\n",
-        "    \"height\": 1024,\n",
-        "    \"width\": 1024,\n",
-        "    \"start_frame_id\": 0,\n",
-        "    \"end_frame_id\": 16\n",
-        "}\n",
-        "config[\"data\"][\"controlnet_frames\"] = [config[\"data\"][\"input_frames\"], config[\"data\"][\"input_frames\"]]\n",
-        "config[\"data\"][\"output_folder\"] = \"/content/output\"\n",
-        "config[\"data\"][\"fps\"] = 30\n",
-        "\n",
-        "runner = SDVideoPipelineRunner()\n",
-        "runner.run(config)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "761nbrgeKMvj",
-        "outputId": "aea6f1fe-8485-4eb1-ac23-9c1023b3b9cd"
-      },
-      "execution_count": 6,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "100%|██████████| 16/16 [00:00<00:00, 82.74it/s]\n",
-            "100%|██████████| 16/16 [00:04<00:00,  3.71it/s]\n",
-            "100%|██████████| 10/10 [05:17<00:00, 31.78s/it]\n",
-            "Saving images: 100%|██████████| 16/16 [00:06<00:00,  2.38it/s]\n",
-            "Saving video: 100%|██████████| 16/16 [00:00<00:00, 31.93it/s]\n"
-          ]
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file