update flux

This commit is contained in:
Artiprocher
2025-11-21 16:04:50 +08:00
parent 96daa30bcc
commit 0b7dd55ff3
22 changed files with 87 additions and 45 deletions

View File

@@ -1,3 +1,14 @@
flux_general_vram_config = {
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
"torch.nn.Embedding": "diffsynth.core.vram.layers.AutoWrappedModule",
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
"torch.nn.Conv2d": "diffsynth.core.vram.layers.AutoWrappedModule",
"torch.nn.GroupNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
"diffsynth.models.general_modules.RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
"diffsynth.models.flux_lora_encoder.LoRALayerBlock": "diffsynth.core.vram.layers.AutoWrappedModule",
"diffsynth.models.flux_lora_patcher.LoraMerger": "diffsynth.core.vram.layers.AutoWrappedModule",
}
VRAM_MANAGEMENT_MODULE_MAPS = {
"diffsynth.models.qwen_image_dit.QwenImageDiT": {
"diffsynth.models.qwen_image_dit.RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
@@ -115,4 +126,28 @@ VRAM_MANAGEMENT_MODULE_MAPS = {
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
"diffsynth.models.flux_dit.RMSNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
},
"diffsynth.models.flux_text_encoder_clip.FluxTextEncoderClip": flux_general_vram_config,
"diffsynth.models.flux_vae.FluxVAEEncoder": flux_general_vram_config,
"diffsynth.models.flux_vae.FluxVAEDecoder": flux_general_vram_config,
"diffsynth.models.flux_controlnet.FluxControlNet": flux_general_vram_config,
"diffsynth.models.flux_infiniteyou.InfiniteYouImageProjector": flux_general_vram_config,
"diffsynth.models.flux_ipadapter.FluxIpAdapter": flux_general_vram_config,
"diffsynth.models.flux_lora_patcher.FluxLoraPatcher": flux_general_vram_config,
"diffsynth.models.step1x_connector.Qwen2Connector": flux_general_vram_config,
"diffsynth.models.flux_lora_encoder.FluxLoRAEncoder": flux_general_vram_config,
"diffsynth.models.flux_text_encoder_t5.FluxTextEncoderT5": {
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
"torch.nn.Embedding": "diffsynth.core.vram.layers.AutoWrappedModule",
"transformers.models.t5.modeling_t5.T5LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
"transformers.models.t5.modeling_t5.T5DenseActDense": "diffsynth.core.vram.layers.AutoWrappedModule",
"transformers.models.t5.modeling_t5.T5DenseGatedActDense": "diffsynth.core.vram.layers.AutoWrappedModule",
},
"diffsynth.models.flux_ipadapter.SiglipVisionModelSO400M": {
"transformers.models.siglip.modeling_siglip.SiglipVisionEmbeddings": "diffsynth.core.vram.layers.AutoWrappedModule",
"transformers.models.siglip.modeling_siglip.SiglipEncoder": "diffsynth.core.vram.layers.AutoWrappedModule",
"transformers.models.siglip.modeling_siglip.SiglipMultiheadAttentionPoolingHead": "diffsynth.core.vram.layers.AutoWrappedModule",
"torch.nn.MultiheadAttention": "diffsynth.core.vram.layers.AutoWrappedModule",
"torch.nn.Linear": "diffsynth.core.vram.layers.AutoWrappedLinear",
"torch.nn.LayerNorm": "diffsynth.core.vram.layers.AutoWrappedModule",
},
}

View File

@@ -17,11 +17,19 @@ def load_model(model_class, path, config=None, torch_dtype=torch.bfloat16, devic
if module_map is not None:
devices = [vram_config["offload_device"], vram_config["onload_device"], vram_config["preparing_device"], vram_config["computation_device"]]
device = [d for d in devices if d != "disk"][0]
disk_map = DiskMap(path, device, state_dict_converter=state_dict_converter)
dtypes = [vram_config["offload_dtype"], vram_config["onload_dtype"], vram_config["preparing_dtype"], vram_config["computation_dtype"]]
dtype = [d for d in dtypes if d != "disk"][0]
if vram_config["offload_device"] != "disk":
state_dict = {i: disk_map[i].to(vram_config["offload_dtype"]) for i in disk_map}
state_dict = DiskMap(path, device, torch_dtype=dtype)
if state_dict_converter is not None:
state_dict = state_dict_converter(state_dict)
else:
state_dict = {i: state_dict[i] for i in state_dict}
model.load_state_dict(state_dict, assign=True)
model = enable_vram_management(model, module_map, vram_config=vram_config, disk_map=disk_map, vram_limit=vram_limit)
model = enable_vram_management(model, module_map, vram_config=vram_config, disk_map=None, vram_limit=vram_limit)
else:
disk_map = DiskMap(path, device, state_dict_converter=state_dict_converter)
model = enable_vram_management(model, module_map, vram_config=vram_config, disk_map=disk_map, vram_limit=vram_limit)
else:
# Why do we use `DiskMap`?
# Sometimes a model file contains multiple models,

View File

@@ -30,12 +30,6 @@ class SingleValueEncoder(torch.nn.Module):
self.positional_embedding = torch.nn.Parameter(
torch.randn(self.prefer_len, dim_out)
)
self._initialize_weights()
def _initialize_weights(self):
last_linear = self.prefer_value_embedder[-1]
torch.nn.init.zeros_(last_linear.weight)
torch.nn.init.zeros_(last_linear.bias)
def forward(self, value, dtype):
value = value * 1000

View File

@@ -105,6 +105,8 @@ class FluxImagePipeline(BasePipeline):
self.lora_loader = FluxLoRALoader
def enable_lora_merger(self):
if not (hasattr(self.dit, "vram_management_enabled") and getattr(self.dit, "vram_management_enabled")):
raise ValueError("DiT VRAM management is not enabled.")
if self.lora_patcher is not None:
for name, module in self.dit.named_modules():
if isinstance(module, AutoWrappedLinear):
@@ -141,7 +143,9 @@ class FluxImagePipeline(BasePipeline):
pipe.tokenizer_2 = T5TokenizerFast.from_pretrained(tokenizer_2_config.path)
value_controllers = model_pool.fetch_model("flux_value_controller")
if value_controllers is not None: pipe.value_controller = MultiValueEncoder(value_controllers)
if value_controllers is not None:
pipe.value_controller = MultiValueEncoder(value_controllers)
pipe.value_controller.vram_management_enabled = pipe.value_controller.encoders[0].vram_management_enabled
controlnets = model_pool.fetch_model("flux_controlnet")
if controlnets is not None: pipe.controlnet = MultiControlNet(controlnets)
pipe.ipadapter = model_pool.fetch_model("flux_ipadapter")

View File

@@ -8,13 +8,13 @@ def merge_lora_weight(tensors_A, tensors_B):
return lora_A, lora_B
def merge_lora(loras: List[Dict[str, torch.Tensor]]):
def merge_lora(loras: List[Dict[str, torch.Tensor]], alpha=1):
lora_merged = {}
keys = [i for i in loras[0].keys() if ".lora_A." in i]
for key in keys:
tensors_A = [lora[key] for lora in loras]
tensors_B = [lora[key.replace(".lora_A.", ".lora_B.")] for lora in loras]
lora_A, lora_B = merge_lora_weight(tensors_A, tensors_B)
lora_merged[key] = lora_A
lora_merged[key] = lora_A * alpha
lora_merged[key.replace(".lora_A.", ".lora_B.")] = lora_B
return lora_merged