From 79fb9fe6c4efc96eaa5d431589289972835efdae Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Mon, 5 Feb 2024 15:27:35 +0800 Subject: [PATCH] update colab example --- README.md | 4 +- examples/Diffutoon.ipynb | 512 ++++++++++++++++++++++++++ examples/Diffutoon_toon_shading.ipynb | 282 -------------- 3 files changed, 514 insertions(+), 284 deletions(-) create mode 100644 examples/Diffutoon.ipynb delete mode 100644 examples/Diffutoon_toon_shading.ipynb diff --git a/README.md b/README.md index 3778c1e..b231fee 100644 --- a/README.md +++ b/README.md @@ -56,13 +56,13 @@ Generate images with Stable Diffusion XL Turbo. You can see `examples/sdxl_turbo ### Example 4: Toon Shading (Diffutoon) -This example is implemented based on [Diffutoon](https://arxiv.org/abs/2401.16224). This approach is adept for rendering high-resoluton videos with rapid motion. You can easily modify the parameters in the config dict. See `examples/diffutoon_toon_shading.py`. We also provide [an example on Colab](https://colab.research.google.com/github/Artiprocher/DiffSynth-Studio/blob/main/examples/Diffutoon_toon_shading.ipynb). +This example is implemented based on [Diffutoon](https://arxiv.org/abs/2401.16224). This approach is adept for rendering high-resoluton videos with rapid motion. You can easily modify the parameters in the config dict. See `examples/diffutoon_toon_shading.py`. We also provide [an example on Colab](https://colab.research.google.com/github/Artiprocher/DiffSynth-Studio/blob/main/examples/Diffutoon.ipynb). https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/b54c05c5-d747-4709-be5e-b39af82404dd ### Example 5: Toon Shading with Editing Signals (Diffutoon) -This example is implemented based on [Diffutoon](https://arxiv.org/abs/2401.16224), supporting video editing signals. See `examples\diffutoon_toon_shading_with_editing_signals.py`. +This example is implemented based on [Diffutoon](https://arxiv.org/abs/2401.16224), supporting video editing signals. See `examples\diffutoon_toon_shading_with_editing_signals.py`. The editing feature is also supported in the [Colab example](https://colab.research.google.com/github/Artiprocher/DiffSynth-Studio/blob/main/examples/Diffutoon.ipynb). https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/20528af5-5100-474a-8cdc-440b9efdd86c diff --git a/examples/Diffutoon.ipynb b/examples/Diffutoon.ipynb new file mode 100644 index 0000000..302e03d --- /dev/null +++ b/examples/Diffutoon.ipynb @@ -0,0 +1,512 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "8ObdI5jCB8xy" + }, + "source": [ + "# DiffSynth Studio\n", + "\n", + "Welcome to DiffSynth Studio! This is an example of Diffutoon." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XSkKX7O2BwuM" + }, + "source": [ + "## Install" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "msCpt0pLnT8W", + "outputId": "35d93b35-451b-4760-d1ee-ef7ff190916e" + }, + "outputs": [], + "source": [ + "!git clone https://github.com/Artiprocher/DiffSynth-Studio.git\n", + "!pip install -q transformers controlnet-aux==0.0.7 streamlit streamlit-drawable-canvas imageio imageio[ffmpeg] safetensors einops cupy-cuda12x\n", + "%cd /content/DiffSynth-Studio" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5eCu_rlKB3kK" + }, + "source": [ + "## Download Models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9znMkpVj3qZ1" + }, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "\n", + "def download_model(url, file_path):\n", + " model_file = requests.get(url, allow_redirects=True)\n", + " with open(file_path, \"wb\") as f:\n", + " f.write(model_file.content)\n", + "\n", + "download_model(\"https://civitai.com/api/download/models/229575\", \"models/stable_diffusion/aingdiffusion_v12.safetensors\")\n", + "download_model(\"https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt\", \"models/AnimateDiff/mm_sd_v15_v2.ckpt\")\n", + "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth\", \"models/ControlNet/control_v11p_sd15_lineart.pth\")\n", + "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth\", \"models/ControlNet/control_v11f1e_sd15_tile.pth\")\n", + "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1p_sd15_depth.pth\", \"models/ControlNet/control_v11f1p_sd15_depth.pth\")\n", + "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_softedge.pth\", \"models/ControlNet/control_v11p_sd15_softedge.pth\")\n", + "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/dpt_hybrid-midas-501f0c75.pt\", \"models/Annotators/dpt_hybrid-midas-501f0c75.pt\")\n", + "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetHED.pth\", \"models/Annotators/ControlNetHED.pth\")\n", + "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth\", \"models/Annotators/sk_model.pth\")\n", + "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth\", \"models/Annotators/sk_model2.pth\")\n", + "download_model(\"https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16\", \"models/textual_inversion/verybadimagenegative_v1.3.pt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iwOq2lWtKVYS" + }, + "source": [ + "## Run Diffutoon" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tII_XRY-PJeo" + }, + "source": [ + "### Config Template" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vsd2alA3PrGe" + }, + "outputs": [], + "source": [ + "config_stage_1_template = {\n", + " \"models\": {\n", + " \"model_list\": [\n", + " \"models/stable_diffusion/aingdiffusion_v12.safetensors\",\n", + " \"models/ControlNet/control_v11p_sd15_softedge.pth\",\n", + " \"models/ControlNet/control_v11f1p_sd15_depth.pth\"\n", + " ],\n", + " \"textual_inversion_folder\": \"models/textual_inversion\",\n", + " \"device\": \"cuda\",\n", + " \"lora_alphas\": [],\n", + " \"controlnet_units\": [\n", + " {\n", + " \"processor_id\": \"softedge\",\n", + " \"model_path\": \"models/ControlNet/control_v11p_sd15_softedge.pth\",\n", + " \"scale\": 0.5\n", + " },\n", + " {\n", + " \"processor_id\": \"depth\",\n", + " \"model_path\": \"models/ControlNet/control_v11f1p_sd15_depth.pth\",\n", + " \"scale\": 0.5\n", + " }\n", + " ]\n", + " },\n", + " \"data\": {\n", + " \"input_frames\": {\n", + " \"video_file\": \"/content/input_video.mp4\",\n", + " \"image_folder\": None,\n", + " \"height\": 512,\n", + " \"width\": 512,\n", + " \"start_frame_id\": 0,\n", + " \"end_frame_id\": 30\n", + " },\n", + " \"controlnet_frames\": [\n", + " {\n", + " \"video_file\": \"/content/input_video.mp4\",\n", + " \"image_folder\": None,\n", + " \"height\": 512,\n", + " \"width\": 512,\n", + " \"start_frame_id\": 0,\n", + " \"end_frame_id\": 30\n", + " },\n", + " {\n", + " \"video_file\": \"/content/input_video.mp4\",\n", + " \"image_folder\": None,\n", + " \"height\": 512,\n", + " \"width\": 512,\n", + " \"start_frame_id\": 0,\n", + " \"end_frame_id\": 30\n", + " }\n", + " ],\n", + " \"output_folder\": \"data/examples/diffutoon_edit/color_video\",\n", + " \"fps\": 25\n", + " },\n", + " \"smoother_configs\": [\n", + " {\n", + " \"processor_type\": \"FastBlend\",\n", + " \"config\": {}\n", + " }\n", + " ],\n", + " \"pipeline\": {\n", + " \"seed\": 0,\n", + " \"pipeline_inputs\": {\n", + " \"prompt\": \"best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings\",\n", + " \"negative_prompt\": \"verybadimagenegative_v1.3\",\n", + " \"cfg_scale\": 7.0,\n", + " \"clip_skip\": 1,\n", + " \"denoising_strength\": 0.9,\n", + " \"num_inference_steps\": 20,\n", + " \"animatediff_batch_size\": 8,\n", + " \"animatediff_stride\": 4,\n", + " \"unet_batch_size\": 8,\n", + " \"controlnet_batch_size\": 8,\n", + " \"cross_frame_attention\": True,\n", + " \"smoother_progress_ids\": [-1],\n", + " # The following parameters will be overwritten. You don't need to modify them.\n", + " \"input_frames\": [],\n", + " \"num_frames\": 30,\n", + " \"width\": 512,\n", + " \"height\": 512,\n", + " \"controlnet_frames\": []\n", + " }\n", + " }\n", + "}\n", + "\n", + "config_stage_2_template = {\n", + " \"models\": {\n", + " \"model_list\": [\n", + " \"models/stable_diffusion/aingdiffusion_v12.safetensors\",\n", + " \"models/AnimateDiff/mm_sd_v15_v2.ckpt\",\n", + " \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n", + " \"models/ControlNet/control_v11p_sd15_lineart.pth\"\n", + " ],\n", + " \"textual_inversion_folder\": \"models/textual_inversion\",\n", + " \"device\": \"cuda\",\n", + " \"lora_alphas\": [],\n", + " \"controlnet_units\": [\n", + " {\n", + " \"processor_id\": \"tile\",\n", + " \"model_path\": \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n", + " \"scale\": 0.5\n", + " },\n", + " {\n", + " \"processor_id\": \"lineart\",\n", + " \"model_path\": \"models/ControlNet/control_v11p_sd15_lineart.pth\",\n", + " \"scale\": 0.5\n", + " }\n", + " ]\n", + " },\n", + " \"data\": {\n", + " \"input_frames\": {\n", + " \"video_file\": \"/content/input_video.mp4\",\n", + " \"image_folder\": None,\n", + " \"height\": 1024,\n", + " \"width\": 1024,\n", + " \"start_frame_id\": 0,\n", + " \"end_frame_id\": 30\n", + " },\n", + " \"controlnet_frames\": [\n", + " {\n", + " \"video_file\": \"/content/input_video.mp4\",\n", + " \"image_folder\": None,\n", + " \"height\": 1024,\n", + " \"width\": 1024,\n", + " \"start_frame_id\": 0,\n", + " \"end_frame_id\": 30\n", + " },\n", + " {\n", + " \"video_file\": \"/content/input_video.mp4\",\n", + " \"image_folder\": None,\n", + " \"height\": 1024,\n", + " \"width\": 1024,\n", + " \"start_frame_id\": 0,\n", + " \"end_frame_id\": 30\n", + " }\n", + " ],\n", + " \"output_folder\": \"/content/output\",\n", + " \"fps\": 25\n", + " },\n", + " \"pipeline\": {\n", + " \"seed\": 0,\n", + " \"pipeline_inputs\": {\n", + " \"prompt\": \"best quality, perfect anime illustration, light, a girl is dancing, smile, solo\",\n", + " \"negative_prompt\": \"verybadimagenegative_v1.3\",\n", + " \"cfg_scale\": 7.0,\n", + " \"clip_skip\": 2,\n", + " \"denoising_strength\": 1.0,\n", + " \"num_inference_steps\": 10,\n", + " \"animatediff_batch_size\": 16,\n", + " \"animatediff_stride\": 8,\n", + " \"unet_batch_size\": 1,\n", + " \"controlnet_batch_size\": 1,\n", + " \"cross_frame_attention\": False,\n", + " # The following parameters will be overwritten. You don't need to modify them.\n", + " \"input_frames\": [],\n", + " \"num_frames\": 30,\n", + " \"width\": 1536,\n", + " \"height\": 1536,\n", + " \"controlnet_frames\": []\n", + " }\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "113QAmNHP6T_" + }, + "source": [ + "### Upload Input Video\n", + "\n", + "Before you run the following code, please upload your input video to `/content/input_video.mp4`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CyqAsj1o5U9B" + }, + "source": [ + "### Toon Shading\n", + "\n", + "Render your video in an anime style.\n", + "\n", + "We highly recommend you to use a higher resolution for better visual quality. The default resolution of Diffutoon is 1536x1536, which requires 22GB VRAM. If you don't have enough VRAM, 1024x1024 is also acceptable.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "761nbrgeKMvj", + "outputId": "c0d47d5f-16e9-4a65-e664-9bd5fc491111" + }, + "outputs": [], + "source": [ + "from diffsynth import SDVideoPipelineRunner\n", + "\n", + "\n", + "config = config_stage_2_template.copy()\n", + "config[\"data\"][\"input_frames\"] = {\n", + " \"video_file\": \"/content/input_video.mp4\",\n", + " \"image_folder\": None,\n", + " \"height\": 1024,\n", + " \"width\": 1024,\n", + " \"start_frame_id\": 0,\n", + " \"end_frame_id\": 30\n", + "}\n", + "config[\"data\"][\"controlnet_frames\"] = [config[\"data\"][\"input_frames\"], config[\"data\"][\"input_frames\"]]\n", + "config[\"data\"][\"output_folder\"] = \"/content/toon_video\"\n", + "config[\"data\"][\"fps\"] = 25\n", + "\n", + "runner = SDVideoPipelineRunner()\n", + "runner.run(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9wujhGUmDIwY" + }, + "source": [ + "Let's see the video!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 420 + }, + "id": "TBNAigacAq6h", + "outputId": "8f57c3b4-982b-4643-f3dc-53c51bd85a4b" + }, + "outputs": [], + "source": [ + "from IPython.display import HTML\n", + "from base64 import b64encode\n", + "\n", + "mp4 = open(\"/content/toon_video/video.mp4\", \"rb\").read()\n", + "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", + "HTML(\"\"\"\n", + "\n", + "\"\"\" % data_url)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "48hQfX--5YGi" + }, + "source": [ + "### Toon Shading with Editing Signals" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bAQ9Zq-3-MH6" + }, + "source": [ + "In stage 1, input your prompt, and diffutoon will generate the editing signals in the format of low-resolution color video." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BtDzYgIq5bgg", + "outputId": "bb27b7b9-7979-4409-f476-f25f0a164ef4" + }, + "outputs": [], + "source": [ + "from diffsynth import SDVideoPipelineRunner\n", + "\n", + "\n", + "config_stage_1 = config_stage_1_template.copy()\n", + "config_stage_1[\"data\"][\"input_frames\"] = {\n", + " \"video_file\": \"/content/input_video.mp4\",\n", + " \"image_folder\": None,\n", + " \"height\": 512,\n", + " \"width\": 512,\n", + " \"start_frame_id\": 0,\n", + " \"end_frame_id\": 30\n", + "}\n", + "config_stage_1[\"data\"][\"controlnet_frames\"] = [config_stage_1[\"data\"][\"input_frames\"], config_stage_1[\"data\"][\"input_frames\"]]\n", + "config_stage_1[\"data\"][\"output_folder\"] = \"/content/color_video\"\n", + "config_stage_1[\"data\"][\"fps\"] = 25\n", + "config_stage_1[\"pipeline\"][\"pipeline_inputs\"][\"prompt\"] = \"best quality, perfect anime illustration, orange clothes, night, a girl is dancing, smile, solo, black silk stockings\"\n", + "\n", + "runner = SDVideoPipelineRunner()\n", + "runner.run(config_stage_1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D9_AWwhi-pA9" + }, + "source": [ + "In stage 2, diffutoon will rerender the whole video according to the editing signals." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JFysCk7y51i_", + "outputId": "475050d3-c72e-4e08-b55c-d59ed86b5497" + }, + "outputs": [], + "source": [ + "from diffsynth import SDVideoPipelineRunner\n", + "\n", + "\n", + "config_stage_2 = config_stage_2_template.copy()\n", + "config_stage_2[\"data\"][\"input_frames\"] = {\n", + " \"video_file\": \"/content/input_video.mp4\",\n", + " \"image_folder\": None,\n", + " \"height\": 1024,\n", + " \"width\": 1024,\n", + " \"start_frame_id\": 0,\n", + " \"end_frame_id\": 30\n", + "}\n", + "config_stage_2[\"data\"][\"controlnet_frames\"][0] = {\n", + " \"video_file\": \"/content/color_video/video.mp4\",\n", + " \"image_folder\": None,\n", + " \"height\": config_stage_2[\"data\"][\"input_frames\"][\"height\"],\n", + " \"width\": config_stage_2[\"data\"][\"input_frames\"][\"width\"],\n", + " \"start_frame_id\": None,\n", + " \"end_frame_id\": None\n", + "}\n", + "config_stage_2[\"data\"][\"controlnet_frames\"][1] = config[\"data\"][\"input_frames\"]\n", + "config_stage_2[\"data\"][\"output_folder\"] = \"/content/edit_video\"\n", + "config_stage_2[\"data\"][\"fps\"] = 25\n", + "\n", + "runner = SDVideoPipelineRunner()\n", + "runner.run(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HIPrCAIS_Im0" + }, + "source": [ + "Let's see the video!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 420 + }, + "id": "Y2nz7rew-7VI", + "outputId": "fbcbadc6-4045-4aac-dfb0-80bacec003bf" + }, + "outputs": [], + "source": [ + "from IPython.display import HTML\n", + "from base64 import b64encode\n", + "\n", + "mp4 = open(\"/content/edit_video/video.mp4\", \"rb\").read()\n", + "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", + "HTML(\"\"\"\n", + "\n", + "\"\"\" % data_url)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "tII_XRY-PJeo" + ], + "gpuType": "T4", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/Diffutoon_toon_shading.ipynb b/examples/Diffutoon_toon_shading.ipynb deleted file mode 100644 index bfcedbd..0000000 --- a/examples/Diffutoon_toon_shading.ipynb +++ /dev/null @@ -1,282 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4", - "collapsed_sections": [ - "tII_XRY-PJeo" - ] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# DiffSynth Studio\n", - "\n", - "Welcome to DiffSynth Studio! This is an example of Diffutoon." - ], - "metadata": { - "id": "8ObdI5jCB8xy" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Install" - ], - "metadata": { - "id": "XSkKX7O2BwuM" - } - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "msCpt0pLnT8W", - "outputId": "48e084bc-c5ad-4d99-e5d9-8be686a57675" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Cloning into 'DiffSynth-Studio'...\n", - "remote: Enumerating objects: 259, done.\u001b[K\n", - "remote: Counting objects: 100% (259/259), done.\u001b[K\n", - "remote: Compressing objects: 100% (168/168), done.\u001b[K\n", - "remote: Total 259 (delta 128), reused 203 (delta 81), pack-reused 0\u001b[K\n", - "Receiving objects: 100% (259/259), 967.07 KiB | 3.58 MiB/s, done.\n", - "Resolving deltas: 100% (128/128), done.\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m202.4/202.4 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m96.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.6/44.6 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.4/196.4 kB\u001b[0m \u001b[31m28.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.8/4.8 MB\u001b[0m \u001b[31m89.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.1/82.1 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Building wheel for controlnet-aux (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "/content/DiffSynth-Studio\n" - ] - } - ], - "source": [ - "!git clone https://github.com/Artiprocher/DiffSynth-Studio.git\n", - "!pip install -q transformers controlnet-aux==0.0.7 streamlit streamlit-drawable-canvas imageio imageio[ffmpeg] safetensors einops\n", - "%cd /content/DiffSynth-Studio" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Download Models" - ], - "metadata": { - "id": "5eCu_rlKB3kK" - } - }, - { - "cell_type": "code", - "source": [ - "import requests\n", - "\n", - "\n", - "def download_model(url, file_path):\n", - " model_file = requests.get(url, allow_redirects=True)\n", - " with open(file_path, \"wb\") as f:\n", - " f.write(model_file.content)\n", - "\n", - "download_model(\"https://civitai.com/api/download/models/229575\", \"models/stable_diffusion/aingdiffusion_v12.safetensors\")\n", - "download_model(\"https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt\", \"models/AnimateDiff/mm_sd_v15_v2.ckpt\")\n", - "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth\", \"models/ControlNet/control_v11p_sd15_lineart.pth\")\n", - "download_model(\"https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth\", \"models/ControlNet/control_v11f1e_sd15_tile.pth\")\n", - "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth\", \"models/Annotators/sk_model.pth\")\n", - "download_model(\"https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth\", \"models/Annotators/sk_model2.pth\")\n", - "download_model(\"https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16\", \"models/textual_inversion/verybadimagenegative_v1.3.pt\")" - ], - "metadata": { - "id": "9znMkpVj3qZ1" - }, - "execution_count": 2, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Run Diffutoon" - ], - "metadata": { - "id": "iwOq2lWtKVYS" - } - }, - { - "cell_type": "markdown", - "source": [ - "### Config Template" - ], - "metadata": { - "id": "tII_XRY-PJeo" - } - }, - { - "cell_type": "code", - "source": [ - "config_template = {\n", - " \"models\": {\n", - " \"model_list\": [\n", - " \"models/stable_diffusion/aingdiffusion_v12.safetensors\",\n", - " \"models/AnimateDiff/mm_sd_v15_v2.ckpt\",\n", - " \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n", - " \"models/ControlNet/control_v11p_sd15_lineart.pth\"\n", - " ],\n", - " \"textual_inversion_folder\": \"models/textual_inversion\",\n", - " \"device\": \"cuda\",\n", - " \"lora_alphas\": [],\n", - " \"controlnet_units\": [\n", - " {\n", - " \"processor_id\": \"tile\",\n", - " \"model_path\": \"models/ControlNet/control_v11f1e_sd15_tile.pth\",\n", - " \"scale\": 0.5\n", - " },\n", - " {\n", - " \"processor_id\": \"lineart\",\n", - " \"model_path\": \"models/ControlNet/control_v11p_sd15_lineart.pth\",\n", - " \"scale\": 0.5\n", - " }\n", - " ]\n", - " },\n", - " \"data\": {\n", - " \"input_frames\": {\n", - " \"video_file\": \"/content/video_guide.mp4\",\n", - " \"image_folder\": None,\n", - " \"height\": 1024,\n", - " \"width\": 1024,\n", - " \"start_frame_id\": 0,\n", - " \"end_frame_id\": 30\n", - " },\n", - " \"controlnet_frames\": [\n", - " {\n", - " \"video_file\": \"/content/video_guide.mp4\",\n", - " \"image_folder\": None,\n", - " \"height\": 1024,\n", - " \"width\": 1024,\n", - " \"start_frame_id\": 0,\n", - " \"end_frame_id\": 30\n", - " },\n", - " {\n", - " \"video_file\": \"/content/video_guide.mp4\",\n", - " \"image_folder\": None,\n", - " \"height\": 1024,\n", - " \"width\": 1024,\n", - " \"start_frame_id\": 0,\n", - " \"end_frame_id\": 30\n", - " }\n", - " ],\n", - " \"output_folder\": \"/content/output\",\n", - " \"fps\": 30\n", - " },\n", - " \"pipeline\": {\n", - " \"seed\": 0,\n", - " \"pipeline_inputs\": {\n", - " \"prompt\": \"best quality, perfect anime illustration, light, a girl is dancing, smile, solo\",\n", - " \"negative_prompt\": \"verybadimagenegative_v1.3\",\n", - " \"cfg_scale\": 7.0,\n", - " \"clip_skip\": 2,\n", - " \"denoising_strength\": 1.0,\n", - " \"num_inference_steps\": 10,\n", - " \"animatediff_batch_size\": 16,\n", - " \"animatediff_stride\": 8,\n", - " \"unet_batch_size\": 1,\n", - " \"controlnet_batch_size\": 1,\n", - " \"cross_frame_attention\": False,\n", - " # The following parameters will be overwritten. You don't need to modify them.\n", - " \"input_frames\": [],\n", - " \"num_frames\": 30,\n", - " \"width\": 1536,\n", - " \"height\": 1536,\n", - " \"controlnet_frames\": []\n", - " }\n", - " }\n", - "}" - ], - "metadata": { - "id": "vsd2alA3PrGe" - }, - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "### Run\n", - "\n", - "Before you run the following code, please upload your input video.\n", - "\n", - "We highly recommend you to use a higher resolution for better visual quality. The default resolution of Diffutoon is 1536x1536, which requires 22GB VRAM. If you don't have enough VRAM, 1024x1024 is also acceptable." - ], - "metadata": { - "id": "113QAmNHP6T_" - } - }, - { - "cell_type": "code", - "source": [ - "from diffsynth import SDVideoPipelineRunner\n", - "\n", - "\n", - "config = config_template.copy()\n", - "config[\"data\"][\"input_frames\"] = {\n", - " \"video_file\": \"/content/input_video.mp4\",\n", - " \"image_folder\": None,\n", - " \"height\": 1024,\n", - " \"width\": 1024,\n", - " \"start_frame_id\": 0,\n", - " \"end_frame_id\": 16\n", - "}\n", - "config[\"data\"][\"controlnet_frames\"] = [config[\"data\"][\"input_frames\"], config[\"data\"][\"input_frames\"]]\n", - "config[\"data\"][\"output_folder\"] = \"/content/output\"\n", - "config[\"data\"][\"fps\"] = 30\n", - "\n", - "runner = SDVideoPipelineRunner()\n", - "runner.run(config)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "761nbrgeKMvj", - "outputId": "aea6f1fe-8485-4eb1-ac23-9c1023b3b9cd" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "100%|██████████| 16/16 [00:00<00:00, 82.74it/s]\n", - "100%|██████████| 16/16 [00:04<00:00, 3.71it/s]\n", - "100%|██████████| 10/10 [05:17<00:00, 31.78s/it]\n", - "Saving images: 100%|██████████| 16/16 [00:06<00:00, 2.38it/s]\n", - "Saving video: 100%|██████████| 16/16 [00:00<00:00, 31.93it/s]\n" - ] - } - ] - } - ] -} \ No newline at end of file