update examples and downloaders

This commit is contained in:
Artiprocher
2024-06-27 19:43:50 +08:00
parent 0af60b9c73
commit 0b1704976a
21 changed files with 409 additions and 100 deletions

View File

@@ -1,3 +1,44 @@
# IP-Adapter
The features of IP-Adapter in DiffSynth Studio is not completed. Please wait for us.
IP-Adapter is a interesting model, which can adopt the content or style of another image to generate a new image.
## Example: Content Controlling in Stable Diffusion
Based on Stable Diffusion, we can transfer the object to another scene. See [`sd_ipadapter.py`](./sd_ipadapter.py).
|First, we generate a car. The prompt is "masterpiece, best quality, a car".|Next, utilizing IP-Adapter, we move the car to the road. The prompt is "masterpiece, best quality, a car running on the road".|
|-|-|
|![car](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/8530a2f0-f610-4269-a22c-ac6c2f21fc18)|![car_on_the_road](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/b8ccddb2-c423-46d8-bd1a-327fcc074a36)|
## Example: Content and Style Controlling in Stable Diffusion XL
The IP-Adapter model based on Stable Diffusion XL is more powerful. You have the option to use the content or style. See [`sdxl_ipadapter.py`](./sdxl_ipadapter.py).
* Content controlling (original usage of IP-Adapter)
|First, we generate a rabbit.|Next, enable IP-Adapter and let the rabbit jump.|For comparision, disable IP-Adapter to see the generated image.|
|-|-|-|
|![rabbit](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/4b452634-ec57-414f-897a-f8c50c74a650)|![rabbit_to_jumping_rabbit](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/b93c5495-0b77-4d97-bcd3-3942858288f2)|![rabbit_to_jumping_rabbit_without_ipa](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/52f37195-65b3-4a38-8d9b-73df37311c15)|
* Style controlling (InstantStyle)
|First, we generate a rabbit.|Next, enable InstantStyle and convert the rabbit to a cat.|For comparision, disable IP-Adapter to see the generated image.|
|-|-|-|
|![rabbit](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/4b452634-ec57-414f-897a-f8c50c74a650)|![rabbit_to_cat](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/a006b281-f643-4ea9-b0da-712289c96059)|![rabbit_to_cat_without_ipa](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/189bd11e-7a10-4c09-8554-0eebde9150fd)|
## Example: Image Fusing (Experimental)
Since IP-Adapter can control the content based on more than one image, we can do something interesting. See [`sdxl_ipadapter_multi_reference.py`](sdxl_ipadapter_multi_reference.py).
We have two pokemons here:
|Charizard|Pikachu|
|-|-|
|![](https://media.52poke.com/wiki/7/7e/006Charizard.png)|![](https://media.52poke.com/wiki/0/0d/025Pikachu.png)|
Fuse!
|Pikazard ???|
|-|
|![Pikazard](https://github.com/modelscope/DiffSynth-Studio/assets/35051019/807cdb31-94f5-4cc2-a978-3c6a7ffedc5b)|

View File

@@ -0,0 +1,38 @@
from diffsynth import ModelManager, SDImagePipeline, download_models
import torch
# Download models (automatically)
# `models/stable_diffusion/dreamshaper_8.safetensors`: [link](https://civitai.com/api/download/models/128713?type=Model&format=SafeTensor&size=pruned&fp=fp16)
# `models/IpAdapter/stable_diffusion/image_encoder/model.safetensors`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors)
# `models/IpAdapter/stable_diffusion/ip-adapter_sd15.bin`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter_sd15.bin)
# `models/textual_inversion/verybadimagenegative_v1.3.pt`: [link](https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16)
download_models(["DreamShaper_8", "IP-Adapter-SD", "TextualInversion_VeryBadImageNegative_v1.3"])
# Load models
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
model_manager.load_textual_inversions("models/textual_inversion")
model_manager.load_models([
"models/stable_diffusion/aingdiffusion_v12.safetensors",
"models/IpAdapter/stable_diffusion/image_encoder/model.safetensors",
"models/IpAdapter/stable_diffusion/ip-adapter_sd15.bin"
])
pipe = SDImagePipeline.from_model_manager(model_manager)
torch.manual_seed(1)
style_image = pipe(
prompt="masterpiece, best quality, a car",
negative_prompt="verybadimagenegative_v1.3",
cfg_scale=7, clip_skip=2,
height=512, width=512, num_inference_steps=50,
)
style_image.save("car.jpg")
image = pipe(
prompt="masterpiece, best quality, a car running on the road",
negative_prompt="verybadimagenegative_v1.3",
cfg_scale=7, clip_skip=2,
height=512, width=512, num_inference_steps=50,
ipadapter_images=[style_image], ipadapter_scale=1.0
)
image.save("car_on_the_road.jpg")

View File

@@ -1,36 +1,61 @@
from diffsynth import ModelManager, SDXLImagePipeline
from diffsynth import ModelManager, SDXLImagePipeline, download_models
import torch
# Download models
# Download models (automatically)
# `models/stable_diffusion_xl/sd_xl_base_1.0.safetensors`: [link](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors)
# `models/IpAdapter/image_encoder/model.safetensors`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors)
# `models/IpAdapter/ip-adapter_sdxl.bin`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/ip-adapter_sdxl.safetensors)
# `models/IpAdapter/stable_diffusion_xl/image_encoder/model.safetensors`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors)
# `models/IpAdapter/stable_diffusion_xl/ip-adapter_sdxl.bin`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/ip-adapter_sdxl.safetensors)
download_models(["StableDiffusionXL_v1", "IP-Adapter-SDXL"])
# Load models
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
model_manager.load_models([
"models/stable_diffusion_xl/sd_xl_base_1.0.safetensors",
"models/IpAdapter/image_encoder/model.safetensors",
"models/IpAdapter/ip-adapter_sdxl.bin"
"models/IpAdapter/stable_diffusion_xl/image_encoder/model.safetensors",
"models/IpAdapter/stable_diffusion_xl/ip-adapter_sdxl.bin"
])
pipe = SDXLImagePipeline.from_model_manager(model_manager)
pipe.ipadapter.set_less_adapter()
torch.manual_seed(0)
torch.manual_seed(123456)
style_image = pipe(
prompt="Starry Night, blue sky, by van Gogh",
negative_prompt="dark, gray",
prompt="a rabbit in a garden, colorful flowers",
negative_prompt="anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
cfg_scale=5,
height=1024, width=1024, num_inference_steps=30,
height=1024, width=1024, num_inference_steps=50,
)
style_image.save("style_image.jpg")
style_image.save("rabbit.jpg")
image = pipe(
prompt="a cat",
negative_prompt="",
cfg_scale=5,
height=1024, width=1024, num_inference_steps=30,
ipadapter_images=[style_image]
height=1024, width=1024, num_inference_steps=50,
ipadapter_images=[style_image], ipadapter_use_instant_style=True
)
image.save("transferred_image.jpg")
image.save("rabbit_to_cat.jpg")
image = pipe(
prompt="a rabbit is jumping",
negative_prompt="",
cfg_scale=5,
height=1024, width=1024, num_inference_steps=50,
ipadapter_images=[style_image], ipadapter_use_instant_style=False, ipadapter_scale=0.5
)
image.save("rabbit_to_jumping_rabbit.jpg")
image = pipe(
prompt="a cat",
negative_prompt="",
cfg_scale=5,
height=1024, width=1024, num_inference_steps=50,
)
image.save("rabbit_to_cat_without_ipa.jpg")
image = pipe(
prompt="a rabbit is jumping",
negative_prompt="",
cfg_scale=5,
height=1024, width=1024, num_inference_steps=50,
)
image.save("rabbit_to_jumping_rabbit_without_ipa.jpg")

View File

@@ -0,0 +1,34 @@
from diffsynth import ModelManager, SDXLImagePipeline, download_models
import torch, requests
from PIL import Image
# Download models (automatically)
# `models/stable_diffusion_xl/bluePencilXL_v200.safetensors`: [link](https://civitai.com/api/download/models/245614?type=Model&format=SafeTensor&size=pruned&fp=fp16)
# `models/IpAdapter/stable_diffusion_xl/image_encoder/model.safetensors`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors)
# `models/IpAdapter/stable_diffusion_xl/ip-adapter_sdxl.bin`: [link](https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/ip-adapter_sdxl.safetensors)
download_models(["BluePencilXL_v200", "IP-Adapter-SDXL"])
# Load models
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda")
model_manager.load_models([
"models/stable_diffusion_xl/bluePencilXL_v200.safetensors",
"models/IpAdapter/stable_diffusion_xl/image_encoder/model.safetensors",
"models/IpAdapter/stable_diffusion_xl/ip-adapter_sdxl.bin"
])
pipe = SDXLImagePipeline.from_model_manager(model_manager)
image_1 = Image.open(requests.get("https://media.52poke.com/wiki/7/7e/006Charizard.png", stream=True).raw).convert("RGB").resize((1024, 1024))
image_1.save("Charizard.jpg")
image_2 = Image.open(requests.get("https://media.52poke.com/wiki/0/0d/025Pikachu.png", stream=True).raw).convert("RGB").resize((1024, 1024))
image_2.save("Pikachu.jpg")
torch.manual_seed(0)
image = pipe(
prompt="a pokemon, maybe Charizard, maybe Pikachu",
negative_prompt="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
cfg_scale=5,
height=1024, width=1024, num_inference_steps=50,
ipadapter_images=[image_1, image_2], ipadapter_use_instant_style=False, ipadapter_scale=0.7
)
image.save(f"Pikazard.jpg")