support flux-fp8

2026-03-20 23:58:12 +00:00 · 2024-09-19 10:32:16 +08:00
parent a9fbfa108f
commit 091df1f1e7
4 changed files with 91 additions and 125 deletions
--- a/diffsynth/models/flux_dit.py
+++ b/diffsynth/models/flux_dit.py
@@ -404,6 +404,77 @@ class FluxDiT(torch.nn.Module):
        hidden_states = self.unpatchify(hidden_states, height, width)

        return hidden_states
+    
+
+    def quantize(self):
+        def cast_to(weight, dtype=None, device=None, copy=False):
+            if device is None or weight.device == device:
+                if not copy:
+                    if dtype is None or weight.dtype == dtype:
+                        return weight
+                return weight.to(dtype=dtype, copy=copy)
+
+            r = torch.empty_like(weight, dtype=dtype, device=device)
+            r.copy_(weight)
+            return r
+
+        def cast_weight(s, input=None, dtype=None, device=None):
+            if input is not None:
+                if dtype is None:
+                    dtype = input.dtype
+                if device is None:
+                    device = input.device
+            weight = cast_to(s.weight, dtype, device)
+            return weight
+
+        def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
+            if input is not None:
+                if dtype is None:
+                    dtype = input.dtype
+                if bias_dtype is None:
+                    bias_dtype = dtype
+                if device is None:
+                    device = input.device
+            bias = None
+            weight = cast_to(s.weight, dtype, device)
+            bias = cast_to(s.bias, bias_dtype, device)
+            return weight, bias
+
+        class quantized_layer:
+            class Linear(torch.nn.Module):
+                def __init__(self, module):
+                    super().__init__()
+                    self.module = module
+                    
+                def forward(self,input,**kwargs):
+                    weight,bias= cast_bias_weight(self.module,input)
+                    return torch.nn.functional.linear(input,weight,bias)
+            
+            class RMSNorm(torch.nn.Module):
+                def __init__(self, module):
+                    super().__init__()
+                    self.module = module
+                    
+                def forward(self,hidden_states,**kwargs):
+                    weight= cast_weight(self.module,hidden_states)
+                    input_dtype = hidden_states.dtype
+                    variance = hidden_states.to(torch.float32).square().mean(-1, keepdim=True)
+                    hidden_states = hidden_states * torch.rsqrt(variance + self.module.eps)
+                    hidden_states = hidden_states.to(input_dtype) * weight
+                    return hidden_states
+            
+        def replace_layer(model):
+            for name, module in model.named_children():
+                if isinstance(module, torch.nn.Linear):
+                    new_layer = quantized_layer.Linear(module)
+                    setattr(model, name, new_layer)
+                elif isinstance(module, RMSNorm):
+                    new_layer = quantized_layer.RMSNorm(module)
+                    setattr(model, name, new_layer)
+                else:
+                    replace_layer(module)
+
+        replace_layer(self)


    @staticmethod
--- a/diffsynth/models/model_manager.py
+++ b/diffsynth/models/model_manager.py
@@ -415,8 +415,10 @@ class ModelManager:
                    break


-    def load_model(self, file_path, model_names=None):
+    def load_model(self, file_path, model_names=None, device=None, torch_dtype=None):
        print(f"Loading models from: {file_path}")
+        if device is None: device = self.device
+        if torch_dtype is None: torch_dtype = self.torch_dtype
        if os.path.isfile(file_path):
            state_dict = load_state_dict(file_path)
        else:
@@ -425,7 +427,7 @@ class ModelManager:
            if model_detector.match(file_path, state_dict):
                model_names, models = model_detector.load(
                    file_path, state_dict,
-                    device=self.device, torch_dtype=self.torch_dtype,
+                    device=device, torch_dtype=torch_dtype,
                    allowed_model_names=model_names, model_manager=self
                )
                for model_name, model in zip(model_names, models):
@@ -438,9 +440,9 @@ class ModelManager:
            print(f"    We cannot detect the model type. No models are loaded.")
        

-    def load_models(self, file_path_list, model_names=None):
+    def load_models(self, file_path_list, model_names=None, device=None, torch_dtype=None):
        for file_path in file_path_list:
-            self.load_model(file_path, model_names)
+            self.load_model(file_path, model_names, device=device, torch_dtype=torch_dtype)

    
    def fetch_model(self, model_name, file_path=None, require_model_path=False):