mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-18 22:08:13 +00:00
support i2L
This commit is contained in:
@@ -31,6 +31,38 @@ qwen_image_series = [
|
||||
"model_class": "diffsynth.models.qwen_image_controlnet.QwenImageBlockWiseControlNet",
|
||||
"extra_kwargs": {"additional_in_dim": 4},
|
||||
},
|
||||
{
|
||||
# Example: ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors")
|
||||
"model_hash": "469c78b61e3e31bc9eec0d0af3d3f2f8",
|
||||
"model_name": "siglip2_image_encoder",
|
||||
"model_class": "diffsynth.models.siglip2_image_encoder.Siglip2ImageEncoder",
|
||||
},
|
||||
{
|
||||
# Example: ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors")
|
||||
"model_hash": "5722b5c873720009de96422993b15682",
|
||||
"model_name": "dinov3_image_encoder",
|
||||
"model_class": "diffsynth.models.dinov3_image_encoder.DINOv3ImageEncoder",
|
||||
},
|
||||
{
|
||||
# Example:
|
||||
"model_hash": "a166c33455cdbd89c0888a3645ca5c0f",
|
||||
"model_name": "qwen_image_image2lora_coarse",
|
||||
"model_class": "diffsynth.models.qwen_image_image2lora.QwenImageImage2LoRAModel",
|
||||
},
|
||||
{
|
||||
# Example:
|
||||
"model_hash": "a5476e691767a4da6d3a6634a10f7408",
|
||||
"model_name": "qwen_image_image2lora_fine",
|
||||
"model_class": "diffsynth.models.qwen_image_image2lora.QwenImageImage2LoRAModel",
|
||||
"extra_kwargs": {"residual_length": 37*37+7, "residual_mid_dim": 64}
|
||||
},
|
||||
{
|
||||
# Example:
|
||||
"model_hash": "0aad514690602ecaff932c701cb4b0bb",
|
||||
"model_name": "qwen_image_image2lora_style",
|
||||
"model_class": "diffsynth.models.qwen_image_image2lora.QwenImageImage2LoRAModel",
|
||||
"extra_kwargs": {"compress_dim": 64, "use_residual": False}
|
||||
},
|
||||
]
|
||||
|
||||
wan_series = [
|
||||
|
||||
@@ -32,6 +32,25 @@ VRAM_MANAGEMENT_MODULE_MAPS = {
|
||||
"diffsynth.models.qwen_image_dit.RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
||||
},
|
||||
"diffsynth.models.siglip2_image_encoder.Siglip2ImageEncoder": {
|
||||
"transformers.models.siglip.modeling_siglip.SiglipVisionEmbeddings": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"transformers.models.siglip.modeling_siglip.SiglipMultiheadAttentionPoolingHead": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"torch.nn.Conv2d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"torch.nn.Embedding": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
||||
},
|
||||
"diffsynth.models.dinov3_image_encoder.DINOv3ImageEncoder": {
|
||||
"transformers.models.dinov3_vit.modeling_dinov3_vit.DINOv3ViTLayerScale": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"transformers.models.dinov3_vit.modeling_dinov3_vit.DINOv3ViTRopePositionEmbedding": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"transformers.models.dinov3_vit.modeling_dinov3_vit.DINOv3ViTEmbeddings": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"torch.nn.Conv2d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
||||
},
|
||||
"diffsynth.models.qwen_image_image2lora.QwenImageImage2LoRAModel": {
|
||||
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
||||
},
|
||||
"diffsynth.models.wan_video_animate_adapter.WanAnimateAdapter": {
|
||||
"diffsynth.models.wan_video_animate_adapter.FaceEncoder": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
"diffsynth.models.wan_video_animate_adapter.EqualLinear": "diffsynth.core.vram.layers.AutoWrappedModule",
|
||||
|
||||
Reference in New Issue
Block a user