From d6d14859e331c0bd9bf79dcef0319304f30f2417 Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Wed, 21 Aug 2024 16:57:56 +0800 Subject: [PATCH 1/3] update UI --- apps/gradio/DiffSynth_Studio.py | 235 ++++++++++++++++++ .../streamlit/DiffSynth_Studio.py | 0 .../streamlit/pages}/1_Image_Creator.py | 13 +- .../streamlit/pages}/2_Video_Creator.py | 0 mask.jpg | Bin 0 -> 17011 bytes 5 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 apps/gradio/DiffSynth_Studio.py rename DiffSynth_Studio.py => apps/streamlit/DiffSynth_Studio.py (100%) rename {pages => apps/streamlit/pages}/1_Image_Creator.py (95%) rename {pages => apps/streamlit/pages}/2_Video_Creator.py (100%) create mode 100644 mask.jpg diff --git a/apps/gradio/DiffSynth_Studio.py b/apps/gradio/DiffSynth_Studio.py new file mode 100644 index 0000000..05dfba3 --- /dev/null +++ b/apps/gradio/DiffSynth_Studio.py @@ -0,0 +1,235 @@ +import gradio as gr +from diffsynth import ModelManager, SDImagePipeline, SDXLImagePipeline, SD3ImagePipeline, HunyuanDiTImagePipeline, FluxImagePipeline +import os, torch +from PIL import Image +import numpy as np + + +config = { + "Stable Diffusion": { + "model_folder": "models/stable_diffusion", + "pipeline_class": SDImagePipeline, + "default_parameters": { + "height": 512, + "width": 512, + } + }, + "Stable Diffusion XL": { + "model_folder": "models/stable_diffusion_xl", + "pipeline_class": SDXLImagePipeline, + "default_parameters": {} + }, + "Stable Diffusion 3": { + "model_folder": "models/stable_diffusion_3", + "pipeline_class": SD3ImagePipeline, + "default_parameters": {} + }, + "Stable Diffusion XL Turbo": { + "model_folder": "models/stable_diffusion_xl_turbo", + "pipeline_class": SDXLImagePipeline, + "default_parameters": { + "negative_prompt": "", + "cfg_scale": 1.0, + "num_inference_steps": 1, + "height": 512, + "width": 512, + } + }, + "Kolors": { + "model_folder": "models/kolors", + "pipeline_class": SDXLImagePipeline, + "default_parameters": {} + }, + "HunyuanDiT": { + "model_folder": "models/HunyuanDiT", + "pipeline_class": HunyuanDiTImagePipeline, + "default_parameters": {} + }, + "FLUX": { + "model_folder": "models/FLUX", + "pipeline_class": FluxImagePipeline, + "default_parameters": { + "cfg_scale": 1.0, + } + } +} +MAX_NUM_PAINTER_LAYERS = 8 + + +def load_model_list(model_type): + if model_type is None: + return [] + folder = config[model_type]["model_folder"] + file_list = [i for i in os.listdir(folder) if i.endswith(".safetensors")] + if model_type in ["HunyuanDiT", "Kolors", "FLUX"]: + file_list += [i for i in os.listdir(folder) if os.path.isdir(os.path.join(folder, i))] + file_list = sorted(file_list) + return file_list + + +def load_model(model_type, model_path): + model_path = os.path.join(config[model_type]["model_folder"], model_path) + model_manager = ModelManager() + if model_type == "HunyuanDiT": + model_manager.load_models([ + os.path.join(model_path, "clip_text_encoder/pytorch_model.bin"), + os.path.join(model_path, "mt5/pytorch_model.bin"), + os.path.join(model_path, "model/pytorch_model_ema.pt"), + os.path.join(model_path, "sdxl-vae-fp16-fix/diffusion_pytorch_model.bin"), + ]) + elif model_type == "Kolors": + model_manager.load_models([ + os.path.join(model_path, "text_encoder"), + os.path.join(model_path, "unet/diffusion_pytorch_model.safetensors"), + os.path.join(model_path, "vae/diffusion_pytorch_model.safetensors"), + ]) + elif model_type == "FLUX": + model_manager.torch_dtype = torch.bfloat16 + file_list = [ + os.path.join(model_path, "text_encoder/model.safetensors"), + os.path.join(model_path, "text_encoder_2"), + ] + for file_name in os.listdir(model_path): + if file_name.endswith(".safetensors"): + file_list.append(os.path.join(model_path, file_name)) + model_manager.load_models(file_list) + else: + model_manager.load_model(model_path) + pipe = config[model_type]["pipeline_class"].from_model_manager(model_manager) + return model_manager, pipe + + + +model_manager: ModelManager = None +pipe = None + +with gr.Blocks() as app: + gr.Markdown("# DiffSynth-Studio Painter") + with gr.Row(): + with gr.Column(scale=382, min_width=100): + + with gr.Accordion(label="Model"): + model_type = gr.Dropdown(choices=[i for i in config], label="Model type") + model_path = gr.Dropdown(choices=[], interactive=True, label="Model path") + + @gr.on(inputs=model_type, outputs=model_path, triggers=model_type.change) + def model_type_to_model_path(model_type): + return gr.Dropdown(choices=load_model_list(model_type)) + + with gr.Accordion(label="Prompt"): + prompt = gr.Textbox(label="Prompt", lines=3) + negative_prompt = gr.Textbox(label="Negative prompt", lines=1) + cfg_scale = gr.Slider(minimum=1.0, maximum=10.0, value=7.0, step=0.1, interactive=True, label="Classifier-free guidance scale") + embedded_guidance = gr.Slider(minimum=0.0, maximum=10.0, value=0.0, step=0.1, interactive=True, label="Embedded guidance scale (only for FLUX)") + + with gr.Accordion(label="Image"): + num_inference_steps = gr.Slider(minimum=1, maximum=100, value=20, step=1, interactive=True, label="Inference steps") + height = gr.Slider(minimum=64, maximum=2048, value=1024, step=64, interactive=True, label="Height") + width = gr.Slider(minimum=64, maximum=2048, value=1024, step=64, interactive=True, label="Width") + with gr.Column(): + use_fixed_seed = gr.Checkbox(value=True, interactive=False, label="Use fixed seed") + seed = gr.Number(minimum=0, maximum=10**9, value=0, interactive=True, label="Random seed", show_label=False) + + @gr.on( + inputs=[model_type, model_path, prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width], + outputs=[prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width], + triggers=model_path.change + ) + def model_path_to_default_params(model_type, model_path, prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width): + global model_manager, pipe + if isinstance(model_manager, ModelManager): + model_manager.to("cpu") + torch.cuda.empty_cache() + model_manager, pipe = load_model(model_type, model_path) + cfg_scale = config[model_type]["default_parameters"].get("cfg_scale", cfg_scale) + embedded_guidance = config[model_type]["default_parameters"].get("embedded_guidance", embedded_guidance) + num_inference_steps = config[model_type]["default_parameters"].get("num_inference_steps", num_inference_steps) + height = config[model_type]["default_parameters"].get("height", height) + width = config[model_type]["default_parameters"].get("width", width) + return prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width + + + with gr.Column(scale=618, min_width=100): + with gr.Accordion(label="Painter"): + enable_local_prompt_list = [] + local_prompt_list = [] + mask_scale_list = [] + canvas_list = [] + for painter_layer_id in range(MAX_NUM_PAINTER_LAYERS): + with gr.Tab(label=f"Layer {painter_layer_id}"): + enable_local_prompt = gr.Checkbox(label="Enable", value=False, key=f"enable_local_prompt_{painter_layer_id}") + local_prompt = gr.Textbox(label="Local prompt", key=f"local_prompt_{painter_layer_id}") + mask_scale = gr.Slider(minimum=0.0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Mask scale", key=f"mask_scale_{painter_layer_id}") + canvas = gr.ImageEditor(canvas_size=(512, 1), sources=None, layers=False, interactive=True, image_mode="RGBA", + brush=gr.Brush(default_size=100, default_color="#000000", colors=["#000000"]), + label="Painter", key=f"canvas_{painter_layer_id}") + @gr.on(inputs=[height, width, canvas], outputs=canvas, triggers=[height.change, width.change, canvas.clear, enable_local_prompt.change], show_progress="hidden") + def resize_canvas(height, width, canvas): + h, w = canvas["background"].shape[:2] + if h != height or width != w: + return np.ones((height, width, 3), dtype=np.uint8) * 255 + else: + return canvas + + enable_local_prompt_list.append(enable_local_prompt) + local_prompt_list.append(local_prompt) + mask_scale_list.append(mask_scale) + canvas_list.append(canvas) + with gr.Accordion(label="Results"): + run_button = gr.Button(value="Generate", variant="primary") + output_image = gr.Image(sources=None, show_label=False, interactive=False, type="pil") + with gr.Row(): + with gr.Column(): + output_to_painter_button = gr.Button(value="Set as painter's background") + with gr.Column(): + output_to_input_button = gr.Button(value="Set as input image") + painter_background = gr.State(None) + input_background = gr.State(None) + @gr.on( + inputs=[prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width, seed] + enable_local_prompt_list + local_prompt_list + mask_scale_list + canvas_list, + outputs=[output_image], + triggers=run_button.click + ) + def generate_image(prompt, negative_prompt, cfg_scale, embedded_guidance, num_inference_steps, height, width, seed, *args, progress=gr.Progress()): + global pipe + input_params = { + "prompt": prompt, + "negative_prompt": negative_prompt, + "cfg_scale": cfg_scale, + "num_inference_steps": num_inference_steps, + "height": height, + "width": width, + "progress_bar_cmd": progress.tqdm, + } + if isinstance(pipe, FluxImagePipeline): + input_params["embedded_guidance"] = embedded_guidance + enable_local_prompt_list, local_prompt_list, mask_scale_list, canvas_list = ( + args[0 * MAX_NUM_PAINTER_LAYERS: 1 * MAX_NUM_PAINTER_LAYERS], + args[1 * MAX_NUM_PAINTER_LAYERS: 2 * MAX_NUM_PAINTER_LAYERS], + args[2 * MAX_NUM_PAINTER_LAYERS: 3 * MAX_NUM_PAINTER_LAYERS], + args[3 * MAX_NUM_PAINTER_LAYERS: 4 * MAX_NUM_PAINTER_LAYERS] + ) + local_prompts, masks, mask_scales = [], [], [] + for enable_local_prompt, local_prompt, mask_scale, canvas in zip( + enable_local_prompt_list, local_prompt_list, mask_scale_list, canvas_list + ): + if enable_local_prompt: + local_prompts.append(local_prompt) + masks.append(Image.fromarray(canvas["layers"][0][:, :, -1]).convert("RGB")) + mask_scales.append(mask_scale) + input_params.update({ + "local_prompts": local_prompts, + "masks": masks, + "mask_scales": mask_scales, + }) + torch.manual_seed(seed) + image = pipe(**input_params) + return image + + @gr.on(inputs=[output_image] + canvas_list, outputs=canvas_list, triggers=output_to_painter_button.click) + def send_output_to_painter_background(output_image, *canvas_list): + for canvas in canvas_list: + h, w = canvas["background"].shape[:2] + canvas["background"] = output_image.resize((w, h)) + return tuple(canvas_list) +app.launch() diff --git a/DiffSynth_Studio.py b/apps/streamlit/DiffSynth_Studio.py similarity index 100% rename from DiffSynth_Studio.py rename to apps/streamlit/DiffSynth_Studio.py diff --git a/pages/1_Image_Creator.py b/apps/streamlit/pages/1_Image_Creator.py similarity index 95% rename from pages/1_Image_Creator.py rename to apps/streamlit/pages/1_Image_Creator.py index 3b8ad45..732d219 100644 --- a/pages/1_Image_Creator.py +++ b/apps/streamlit/pages/1_Image_Creator.py @@ -1,4 +1,4 @@ -import torch, os, io +import torch, os, io, json, time import numpy as np from PIL import Image import streamlit as st @@ -275,6 +275,7 @@ with column_input: num_painter_layer = st.number_input("Number of painter layers", min_value=0, max_value=10, step=1, value=0) local_prompts, masks, mask_scales = [], [], [] white_board = Image.fromarray(np.ones((512, 512, 3), dtype=np.uint8) * 255) + painter_layers_json_data = [] for painter_tab_id in range(num_painter_layer): with st.expander(f"Painter layer {painter_tab_id}", expanded=True): enable_local_prompt = st.checkbox(f"Enable prompt {painter_tab_id}", value=True) @@ -293,6 +294,9 @@ with column_input: drawing_mode="freedraw", key=f"canvas_{painter_tab_id}" ) + if canvas_result_local.json_data is not None: + painter_layers_json_data.append(canvas_result_local.json_data.copy()) + painter_layers_json_data[-1]["prompt"] = local_prompt if enable_local_prompt: local_prompts.append(local_prompt) if canvas_result_local.image_data is not None: @@ -302,6 +306,13 @@ with column_input: mask = Image.fromarray(255 - np.array(mask)) masks.append(mask) mask_scales.append(mask_scale) + save_painter_layers = st.button("Save painter layers") + if save_painter_layers: + os.makedirs("data/painter_layers", exist_ok=True) + json_file_path = f"data/painter_layers/{time.time_ns()}.json" + with open(json_file_path, "w") as f: + json.dump(painter_layers_json_data, f, indent=4) + st.markdown(f"Painter layers are saved in {json_file_path}.") with column_output: diff --git a/pages/2_Video_Creator.py b/apps/streamlit/pages/2_Video_Creator.py similarity index 100% rename from pages/2_Video_Creator.py rename to apps/streamlit/pages/2_Video_Creator.py diff --git a/mask.jpg b/mask.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b003d9a16d1d4dbe630cae4962a8d844ec1d3310 GIT binary patch literal 17011 zcmeIwIZzW(9LMqZ_PxE5&A=uBLnatG0M{ zys<;Ys}jYt!xOL(Z>_)^3)c-^GgfM2^ZUPf)xO!;@4k=KYds>FRjJApp_CA6FJe6; z6-1FFSrQdlmQ_`8XkM4*bZR+nPlh+w9~_@dV~F(a9wnV5@nL7x>gR?dyS^nHYHNt(Un!xGZ;`}fPfBu+QykM+3enN3c>BNeu(^AuCR94NLHG9t7dGi-6T(o$}(q+q6tX#EvP2<{i z>o;!Nyk+aQ?JYZY?%KU)@4o#94jn#n^w{we?I%y2KGS*j-1!R^FJI}tdhPm+o40P? zxqI*agWkUWfyYmtK70P+<*V02Z{EIp|Ka1O;m_$VYIpvaZ(+Z6>2{aEIpcD=iwceQ zW;z!`3CUenE!Wn2!bX$gEpKh>=u#uen!&8Ph8{qV>+2ahQGAL4+_}$2QUBwFaQHE00S@p126ysFaQHE00S@p126ysFaQHE00S@p z126ysFaQHE00S@p126ysFaQHE00S@p126ysFaQHE00S@p126ysFaQHE00S@p126ys PFaQHE@LvsNSbbjsIC>6U literal 0 HcmV?d00001 From 66f1ff43e974569bf661f254114cd234cd95d0ca Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Thu, 22 Aug 2024 10:35:58 +0800 Subject: [PATCH 2/3] update examples --- README.md | 60 +++++++++++++----- examples/image_synthesis/README.md | 4 +- .../image_synthesis/flux_text_to_image.py | 10 +-- mask.jpg | Bin 17011 -> 0 bytes 4 files changed, 51 insertions(+), 23 deletions(-) delete mode 100644 mask.jpg diff --git a/README.md b/README.md index 7624597..a57fa45 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ DiffSynth Studio is a Diffusion engine. We have restructured architectures inclu Until now, DiffSynth Studio has supported the following models: +* [FLUX](https://huggingface.co/black-forest-labs/FLUX.1-dev) * [ExVideo](https://huggingface.co/ECNU-CILab/ExVideo-SVD-128f-v1) * [Kolors](https://huggingface.co/Kwai-Kolors/Kolors) * [Stable Diffusion 3](https://huggingface.co/stabilityai/stable-diffusion-3-medium) @@ -30,6 +31,8 @@ Until now, DiffSynth Studio has supported the following models: ## News +- **August 22, 2024** We have implemented an interesting painter that supports all text-to-image models. Now you can create stunning images using the painter, with assistance from AI! + - Use it in our [WebUI](#usage-in-webui). - **June 21, 2024.** 🔥🔥🔥 We propose ExVideo, a post-tuning technique aimed at enhancing the capability of video generation models. We have extended Stable Video Diffusion to achieve the generation of long videos up to 128 frames. - [Project Page](https://ecnu-cilab.github.io/ExVideoProjectPage/) @@ -90,27 +93,16 @@ pip install diffsynth The Python examples are in [`examples`](./examples/). We provide an overview here. -### Long Video Synthesis +### Video Synthesis + +#### Long Video Synthesis We trained an extended video synthesis model, which can generate 128 frames. [`examples/ExVideo`](./examples/ExVideo/) https://github.com/modelscope/DiffSynth-Studio/assets/35051019/d97f6aa9-8064-4b5b-9d49-ed6001bb9acc -### Image Synthesis -Generate high-resolution images, by breaking the limitation of diffusion models! [`examples/image_synthesis`](./examples/image_synthesis/). - -LoRA fine-tuning is supported in [`examples/train`](./examples/train/). - -|Model|Example| -|-|-| -|Stable Diffusion|![1024](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/6fc84611-8da6-4a1f-8fee-9a34eba3b4a5)| -|Stable Diffusion XL|![1024](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/67687748-e738-438c-aee5-96096f09ac90)| -|Stable Diffusion 3|![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/4df346db-6f91-420a-b4c1-26e205376098)| -|Kolors|![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/53ef6f41-da11-4701-8665-9f64392607bf)| -|Hunyuan-DiT|![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/60b022c8-df3f-4541-95ab-bf39f2fa8bb5)| - -### Toon Shading +#### Toon Shading Render realistic videos in a flatten style and enable video editing features. [`examples/Diffutoon`](./examples/Diffutoon/) @@ -118,16 +110,50 @@ https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/b54c05c5-d747-47 https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/20528af5-5100-474a-8cdc-440b9efdd86c -### Video Stylization +#### Video Stylization Video stylization without video models. [`examples/diffsynth`](./examples/diffsynth/) https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/59fb2f7b-8de0-4481-b79f-0c3a7361a1ea +#### Image Synthesis + +Generate high-resolution images, by breaking the limitation of diffusion models! [`examples/image_synthesis`](./examples/image_synthesis/). + +LoRA fine-tuning is supported in [`examples/train`](./examples/train/). + +|FLUX|Stable Diffusion 3| +|-|-| +|![image_1024_cfg](https://github.com/user-attachments/assets/6af5b106-0673-4e58-9213-cd9157eef4c0)|![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/4df346db-6f91-420a-b4c1-26e205376098)| + +|Kolors|Hunyuan-DiT| +|-|-| +|![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/53ef6f41-da11-4701-8665-9f64392607bf)|![image_1024](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/60b022c8-df3f-4541-95ab-bf39f2fa8bb5)| + +|Stable Diffusion|Stable Diffusion XL| +|-|-| +|![1024](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/6fc84611-8da6-4a1f-8fee-9a34eba3b4a5)|![1024](https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/67687748-e738-438c-aee5-96096f09ac90)| + ## Usage (in WebUI) +Create stunning images using the painter, with assistance from AI! + +https://github.com/user-attachments/assets/95265d21-cdd6-4125-a7cb-9fbcf6ceb7b0 + +**This video is not rendered in real-time.** + +* `Gradio` version + ``` -python -m streamlit run DiffSynth_Studio.py +python apps/gradio/DiffSynth_Studio.py +``` + +![20240822102002](https://github.com/user-attachments/assets/59613157-de51-4109-99b3-97cbffd88076) + +* `Streamlit` version + +``` +python -m streamlit run apps/streamlit/DiffSynth_Studio.py ``` https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/93085557-73f3-4eee-a205-9829591ef954 diff --git a/examples/image_synthesis/README.md b/examples/image_synthesis/README.md index 2c751d0..04ec0e7 100644 --- a/examples/image_synthesis/README.md +++ b/examples/image_synthesis/README.md @@ -6,9 +6,11 @@ Image synthesis is the base feature of DiffSynth Studio. We can generate images Example script: [`flux_text_to_image.py`](./flux_text_to_image.py) +The original version of FLUX doesn't support classifier-free guidance; however, we believe that this guidance mechanism is an important feature for synthesizing beautiful images. You can enable it using the parameter `cfg_scale`, and the extra guidance scale introduced by FLUX is `embedded_guidance`. + |1024*1024 (original)|1024*1024 (classifier-free guidance)|2048*2048 (highres-fix)| |-|-|-| -|![image_1024](https://github.com/user-attachments/assets/d8e66872-8739-43e4-8c2b-eda9daba0450)|![image_1024_cfg](https://github.com/user-attachments/assets/1073c70d-018f-47e4-9342-bc580b4c7c59)|![image_2048_highres](https://github.com/user-attachments/assets/8719c1a8-b341-48c1-a085-364c3a7d25f0)| +|![image_1024](https://github.com/user-attachments/assets/ce01327f-068f-45f5-aba9-0fa45eb26199)|![image_1024_cfg](https://github.com/user-attachments/assets/6af5b106-0673-4e58-9213-cd9157eef4c0)|![image_2048_highres](https://github.com/user-attachments/assets/a4bb776f-d9f0-4450-968c-c5d090a3ab4c)| ### Example: Stable Diffusion diff --git a/examples/image_synthesis/flux_text_to_image.py b/examples/image_synthesis/flux_text_to_image.py index 775c684..a2e5199 100644 --- a/examples/image_synthesis/flux_text_to_image.py +++ b/examples/image_synthesis/flux_text_to_image.py @@ -12,14 +12,14 @@ model_manager.load_models([ ]) pipe = FluxImagePipeline.from_model_manager(model_manager) -prompt = "A captivating fantasy magic woman portrait set in the deep sea. The woman, with blue spaghetti strap silk dress, swims in the sea. Her flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her. Smooth, delicate and fair skin." -negative_prompt = "worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, dim, fuzzy, depth of Field, nsfw," +prompt = "CG. Full body. A captivating fantasy magic woman portrait in the deep sea. The woman, with blue spaghetti strap silk dress, swims in the sea. Her flowing silver hair shimmers with every color of the rainbow and cascades down, merging with the floating flora around her. Smooth, delicate and fair skin." +negative_prompt = "dark, worst quality, low quality, monochrome, zombie, interlocked fingers, Aissist, dim, fuzzy, depth of Field, nsfw," # Disable classifier-free guidance (consistent with the original implementation of FLUX.1) torch.manual_seed(6) image = pipe( prompt=prompt, - num_inference_steps=30, + num_inference_steps=30, embedded_guidance=3.5 ) image.save("image_1024.jpg") @@ -27,7 +27,7 @@ image.save("image_1024.jpg") torch.manual_seed(6) image = pipe( prompt=prompt, negative_prompt=negative_prompt, - num_inference_steps=30, cfg_scale=2.0 + num_inference_steps=30, cfg_scale=2.0, embedded_guidance=3.5 ) image.save("image_1024_cfg.jpg") @@ -35,7 +35,7 @@ image.save("image_1024_cfg.jpg") torch.manual_seed(7) image = pipe( prompt=prompt, - num_inference_steps=30, + num_inference_steps=30, embedded_guidance=3.5, input_image=image.resize((2048, 2048)), height=2048, width=2048, denoising_strength=0.6, tiled=True ) image.save("image_2048_highres.jpg") diff --git a/mask.jpg b/mask.jpg deleted file mode 100644 index b003d9a16d1d4dbe630cae4962a8d844ec1d3310..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 17011 zcmeIwIZzW(9LMqZ_PxE5&A=uBLnatG0M{ zys<;Ys}jYt!xOL(Z>_)^3)c-^GgfM2^ZUPf)xO!;@4k=KYds>FRjJApp_CA6FJe6; z6-1FFSrQdlmQ_`8XkM4*bZR+nPlh+w9~_@dV~F(a9wnV5@nL7x>gR?dyS^nHYHNt(Un!xGZ;`}fPfBu+QykM+3enN3c>BNeu(^AuCR94NLHG9t7dGi-6T(o$}(q+q6tX#EvP2<{i z>o;!Nyk+aQ?JYZY?%KU)@4o#94jn#n^w{we?I%y2KGS*j-1!R^FJI}tdhPm+o40P? zxqI*agWkUWfyYmtK70P+<*V02Z{EIp|Ka1O;m_$VYIpvaZ(+Z6>2{aEIpcD=iwceQ zW;z!`3CUenE!Wn2!bX$gEpKh>=u#uen!&8Ph8{qV>+2ahQGAL4+_}$2QUBwFaQHE00S@p126ysFaQHE00S@p126ysFaQHE00S@p z126ysFaQHE00S@p126ysFaQHE00S@p126ysFaQHE00S@p126ysFaQHE00S@p126ys PFaQHE@LvsNSbbjsIC>6U From 66e1b382cdaaf212b06c416be26bcd2c3f6fd948 Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Thu, 22 Aug 2024 10:37:30 +0800 Subject: [PATCH 3/3] update examples --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a57fa45..e8618f2 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ Video stylization without video models. [`examples/diffsynth`](./examples/diffsy https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/59fb2f7b-8de0-4481-b79f-0c3a7361a1ea -#### Image Synthesis +### Image Synthesis Generate high-resolution images, by breaking the limitation of diffusion models! [`examples/image_synthesis`](./examples/image_synthesis/).