mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-19 06:48:12 +00:00
115 lines
8.1 KiB
Python
115 lines
8.1 KiB
Python
VRAM_MANAGEMENT_MODULE_MAPS = {
|
|
"diffsynth.models.qwen_image_dit.QwenImageDiT": {
|
|
"diffsynth.models.qwen_image_dit.RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
},
|
|
"diffsynth.models.qwen_image_text_encoder.QwenImageTextEncoder": {
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.Embedding": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.Qwen2_5_VLRotaryEmbedding": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.Qwen2RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.Qwen2_5_VisionPatchEmbed": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.Qwen2_5_VisionRotaryEmbedding": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.qwen_image_vae.QwenImageVAE": {
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.Conv3d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Conv2d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.qwen_image_vae.QwenImageRMS_norm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.qwen_image_controlnet.BlockWiseControlBlock": {
|
|
"diffsynth.models.qwen_image_dit.RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
},
|
|
"diffsynth.models.wan_video_animate_adapter.WanAnimateAdapter": {
|
|
"diffsynth.models.wan_video_animate_adapter.FaceEncoder": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_animate_adapter.EqualLinear": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_animate_adapter.ConvLayer": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_animate_adapter.FusedLeakyReLU": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_animate_adapter.RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Conv1d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Conv2d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Conv3d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.wan_video_dit_s2v.WanS2VModel": {
|
|
"diffsynth.models.wan_video_dit.Head": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_dit_s2v.WanS2VDiTBlock": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_dit_s2v.CausalAudioEncoder": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Embedding": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.Conv3d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_dit.RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Conv2d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.wan_video_dit.WanModel": {
|
|
"diffsynth.models.wan_video_dit.MLP": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_dit.DiTBlock": "diffsynth.core.vram.layers.AutoWrappedNonRecurseModule",
|
|
"diffsynth.models.wan_video_dit.Head": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.Conv3d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_dit.RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Conv2d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.wan_video_image_encoder.WanImageEncoder": {
|
|
"diffsynth.models.wan_video_image_encoder.VisionTransformer": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.Conv2d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.wan_video_mot.MotWanModel": {
|
|
"diffsynth.models.wan_video_mot.MotWanAttentionBlock": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Conv3d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.wan_video_motion_controller.WanMotionControllerModel": {
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
},
|
|
"diffsynth.models.wan_video_text_encoder.WanTextEncoder": {
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.Embedding": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_text_encoder.T5RelativeEmbedding": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_text_encoder.T5LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.wan_video_vace.VaceWanModel": {
|
|
"diffsynth.models.wan_video_dit.DiTBlock": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.Conv3d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_dit.RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.wan_video_vae.WanVideoVAE": {
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.Conv2d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_vae.RMS_norm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_vae.CausalConv3d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_vae.Upsample": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.SiLU": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Dropout": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.wan_video_vae.WanVideoVAE38": {
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.Conv2d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_vae.RMS_norm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_vae.CausalConv3d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.wan_video_vae.Upsample": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.SiLU": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Dropout": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.wav2vec.WanS2VAudioEncoder": {
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"torch.nn.Conv1d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
"diffsynth.models.longcat_video_dit.LongCatVideoTransformer3DModel": {
|
|
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
|
|
"torch.nn.Conv3d": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.longcat_video_dit.RMSNorm_FP32": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
"diffsynth.models.longcat_video_dit.LayerNorm_FP32": "diffsynth.core.vram.layers.AutoWrappedModule",
|
|
},
|
|
}
|