mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-04-08 17:18:21 +00:00
Diffusion Templates framework
This commit is contained in:
@@ -9,7 +9,7 @@ from ..utils.lora import GeneralLoRALoader
|
||||
from ..models.model_loader import ModelPool
|
||||
from ..utils.controlnet import ControlNetInput
|
||||
from ..core.device import get_device_name, IS_NPU_AVAILABLE
|
||||
from .skills import load_skill_model, load_skill_data_processor
|
||||
from .template import load_template_model, load_template_data_processor
|
||||
|
||||
|
||||
class PipelineUnit:
|
||||
@@ -320,14 +320,21 @@ class BasePipeline(torch.nn.Module):
|
||||
|
||||
|
||||
def cfg_guided_model_fn(self, model_fn, cfg_scale, inputs_shared, inputs_posi, inputs_nega, **inputs_others):
|
||||
# Positive side forward
|
||||
if inputs_shared.get("positive_only_lora", None) is not None:
|
||||
self.clear_lora(verbose=0)
|
||||
self.load_lora(self.dit, state_dict=inputs_shared["positive_only_lora"], verbose=0)
|
||||
noise_pred_posi = model_fn(**inputs_posi, **inputs_shared, **inputs_others)
|
||||
if inputs_shared.get("positive_only_lora", None) is not None:
|
||||
self.clear_lora(verbose=0)
|
||||
|
||||
if cfg_scale != 1.0:
|
||||
if inputs_shared.get("positive_only_lora", None) is not None:
|
||||
self.clear_lora(verbose=0)
|
||||
# Negative side forward
|
||||
if inputs_shared.get("negative_only_lora", None) is not None:
|
||||
self.load_lora(self.dit, state_dict=inputs_shared["negative_only_lora"], verbose=0)
|
||||
noise_pred_nega = model_fn(**inputs_nega, **inputs_shared, **inputs_others)
|
||||
if inputs_shared.get("negative_only_lora", None) is not None:
|
||||
self.clear_lora(verbose=0)
|
||||
|
||||
if isinstance(noise_pred_posi, tuple):
|
||||
# Separately handling different output types of latents, eg. video and audio latents.
|
||||
noise_pred = tuple(
|
||||
@@ -341,11 +348,11 @@ class BasePipeline(torch.nn.Module):
|
||||
return noise_pred
|
||||
|
||||
|
||||
def load_training_skill_model(self, model_config: ModelConfig = None):
|
||||
def load_training_template_model(self, model_config: ModelConfig = None):
|
||||
if model_config is not None:
|
||||
model_config.download_if_necessary()
|
||||
self.skill_model = load_skill_model(model_config.path, torch_dtype=self.torch_dtype, device=self.device)
|
||||
self.skill_data_processor = load_skill_data_processor(model_config.path)()
|
||||
self.template_model = load_template_model(model_config.path, torch_dtype=self.torch_dtype, device=self.device)
|
||||
self.template_data_processor = load_template_data_processor(model_config.path)()
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -60,8 +60,8 @@ def add_gradient_config(parser: argparse.ArgumentParser):
|
||||
parser.add_argument("--gradient_accumulation_steps", type=int, default=1, help="Gradient accumulation steps.")
|
||||
return parser
|
||||
|
||||
def add_skill_model_config(parser: argparse.ArgumentParser):
|
||||
parser.add_argument("--skill_model_id_or_path", type=str, default=None, help="Model ID of path of skill models.")
|
||||
def add_template_model_config(parser: argparse.ArgumentParser):
|
||||
parser.add_argument("--template_model_id_or_path", type=str, default=None, help="Model ID of path of template models.")
|
||||
return parser
|
||||
|
||||
def add_general_config(parser: argparse.ArgumentParser):
|
||||
@@ -71,5 +71,5 @@ def add_general_config(parser: argparse.ArgumentParser):
|
||||
parser = add_output_config(parser)
|
||||
parser = add_lora_config(parser)
|
||||
parser = add_gradient_config(parser)
|
||||
parser = add_skill_model_config(parser)
|
||||
parser = add_template_model_config(parser)
|
||||
return parser
|
||||
|
||||
@@ -1,137 +0,0 @@
|
||||
import torch, os, importlib, warnings, json
|
||||
from typing import Dict, List, Tuple, Union
|
||||
from ..core import ModelConfig, load_model
|
||||
from ..core.device.npu_compatible_device import get_device_type
|
||||
|
||||
|
||||
SkillCache = Dict[str, Tuple[torch.Tensor, torch.Tensor]]
|
||||
|
||||
|
||||
class SkillModel(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
@torch.no_grad()
|
||||
def process_inputs(self, pipe=None, **kwargs):
|
||||
return {}
|
||||
|
||||
def forward(self, **kwargs) -> SkillCache:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class MultiSkillModel(SkillModel):
|
||||
def __init__(self, models: List[SkillModel]):
|
||||
super().__init__()
|
||||
if not isinstance(models, list):
|
||||
models = [models]
|
||||
self.models = torch.nn.ModuleList(models)
|
||||
|
||||
def merge(self, kv_cache_list: List[SkillCache]) -> SkillCache:
|
||||
names = {}
|
||||
for kv_cache in kv_cache_list:
|
||||
for name in kv_cache:
|
||||
names[name] = None
|
||||
kv_cache_merged = {}
|
||||
for name in names:
|
||||
kv_list = [kv_cache.get(name) for kv_cache in kv_cache_list]
|
||||
kv_list = [kv for kv in kv_list if kv is not None]
|
||||
if len(kv_list) > 0:
|
||||
k = torch.concat([kv[0] for kv in kv_list], dim=1)
|
||||
v = torch.concat([kv[1] for kv in kv_list], dim=1)
|
||||
kv_cache_merged[name] = (k, v)
|
||||
return kv_cache_merged
|
||||
|
||||
@torch.no_grad()
|
||||
def process_inputs(self, pipe=None, inputs: List[Dict] = None, **kwargs):
|
||||
return [(i["model_id"], self.models[i["model_id"]].process_inputs(pipe=pipe, **i)) for i in inputs]
|
||||
|
||||
def forward(self, inputs: List[Tuple[int, Dict]], **kwargs) -> SkillCache:
|
||||
kv_cache_list = []
|
||||
for model_id, model_inputs in inputs:
|
||||
kv_cache = self.models[model_id](**model_inputs)
|
||||
kv_cache_list.append(kv_cache)
|
||||
return self.merge(kv_cache_list)
|
||||
|
||||
|
||||
def load_skill_model(path, torch_dtype=torch.bfloat16, device="cuda", verbose=1):
|
||||
spec = importlib.util.spec_from_file_location("skill_model", os.path.join(path, "model.py"))
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
model = load_model(
|
||||
model_class=getattr(module, 'SKILL_MODEL'),
|
||||
config=getattr(module, 'SKILL_MODEL_CONFIG') if hasattr(module, 'SKILL_MODEL_CONFIG') else None,
|
||||
path=os.path.join(path, getattr(module, 'SKILL_MODEL_PATH')),
|
||||
torch_dtype=torch_dtype,
|
||||
device=device,
|
||||
)
|
||||
if verbose > 0:
|
||||
metadata = {
|
||||
"model_architecture": getattr(module, 'SKILL_MODEL').__name__,
|
||||
"code_path": os.path.join(path, "model.py"),
|
||||
"weight_path": os.path.join(path, getattr(module, 'SKILL_MODEL_PATH')),
|
||||
}
|
||||
print(f"Skill model loaded: {json.dumps(metadata, indent=4)}")
|
||||
return model
|
||||
|
||||
|
||||
def load_skill_data_processor(path):
|
||||
spec = importlib.util.spec_from_file_location("skill_model", os.path.join(path, "model.py"))
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
if hasattr(module, 'SKILL_DATA_PROCESSOR'):
|
||||
processor = getattr(module, 'SKILL_DATA_PROCESSOR')
|
||||
return processor
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class SkillsPipeline(MultiSkillModel):
|
||||
def __init__(self, models: List[SkillModel]):
|
||||
super().__init__(models)
|
||||
|
||||
@staticmethod
|
||||
def check_vram_config(model_config: ModelConfig):
|
||||
params = [
|
||||
model_config.offload_device, model_config.offload_dtype,
|
||||
model_config.onload_device, model_config.onload_dtype,
|
||||
model_config.preparing_device, model_config.preparing_dtype,
|
||||
model_config.computation_device, model_config.computation_dtype,
|
||||
]
|
||||
for param in params:
|
||||
if param is not None:
|
||||
warnings.warn("SkillsPipeline doesn't support VRAM management. VRAM config will be ignored.")
|
||||
|
||||
@staticmethod
|
||||
def from_pretrained(
|
||||
torch_dtype: torch.dtype = torch.bfloat16,
|
||||
device: Union[str, torch.device] = get_device_type(),
|
||||
model_configs: list[ModelConfig] = [],
|
||||
):
|
||||
models = []
|
||||
for model_config in model_configs:
|
||||
SkillsPipeline.check_vram_config(model_config)
|
||||
model_config.download_if_necessary()
|
||||
model = load_skill_model(model_config.path, torch_dtype=torch_dtype, device=device)
|
||||
models.append(model)
|
||||
pipe = SkillsPipeline(models)
|
||||
return pipe
|
||||
|
||||
def call_single_side(self, pipe = None, inputs: List[Dict] = None):
|
||||
inputs = self.process_inputs(pipe=pipe, inputs=inputs)
|
||||
skill_cache = self.forward(inputs)
|
||||
return skill_cache
|
||||
|
||||
@torch.no_grad()
|
||||
def __call__(
|
||||
self,
|
||||
pipe = None,
|
||||
inputs: List[Dict] = None,
|
||||
positive_inputs: List[Dict] = None,
|
||||
negative_inputs: List[Dict] = None,
|
||||
):
|
||||
shared_cache = self.call_single_side(pipe=pipe, inputs=inputs or [])
|
||||
positive_cache = self.call_single_side(pipe=pipe, inputs=positive_inputs or [])
|
||||
negative_cache = self.call_single_side(pipe=pipe, inputs=negative_inputs or [])
|
||||
positive_cache = self.merge([positive_cache, shared_cache])
|
||||
negative_cache = self.merge([negative_cache, shared_cache])
|
||||
return {"skill_cache": positive_cache, "negative_skill_cache": negative_cache}
|
||||
176
diffsynth/diffusion/template.py
Normal file
176
diffsynth/diffusion/template.py
Normal file
@@ -0,0 +1,176 @@
|
||||
import torch, os, importlib, warnings, json, inspect
|
||||
from typing import Dict, List, Tuple, Union
|
||||
from ..core import ModelConfig, load_model
|
||||
from ..core.device.npu_compatible_device import get_device_type
|
||||
|
||||
|
||||
KVCache = Dict[str, Tuple[torch.Tensor, torch.Tensor]]
|
||||
|
||||
|
||||
class TemplateModel(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
@torch.no_grad()
|
||||
def process_inputs(self, **kwargs):
|
||||
return {}
|
||||
|
||||
def forward(self, **kwargs):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def check_template_model_format(model):
|
||||
if not hasattr(model, "process_inputs"):
|
||||
raise NotImplementedError("`process_inputs` is not implemented in the Template model.")
|
||||
if "kwargs" not in inspect.signature(model.process_inputs).parameters:
|
||||
raise NotImplementedError("`**kwargs` is not included in `process_inputs`.")
|
||||
if not hasattr(model, "forward"):
|
||||
raise NotImplementedError("`forward` is not implemented in the Template model.")
|
||||
if "kwargs" not in inspect.signature(model.forward).parameters:
|
||||
raise NotImplementedError("`**kwargs` is not included in `forward`.")
|
||||
|
||||
|
||||
def load_template_model(path, torch_dtype=torch.bfloat16, device="cuda", verbose=1):
|
||||
spec = importlib.util.spec_from_file_location("template_model", os.path.join(path, "model.py"))
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
template_model_path = getattr(module, 'TEMPLATE_MODEL_PATH') if hasattr(module, 'TEMPLATE_MODEL_PATH') else None
|
||||
if template_model_path is not None:
|
||||
# With `TEMPLATE_MODEL_PATH`, a pretrained model will be loaded.
|
||||
model = load_model(
|
||||
model_class=getattr(module, 'TEMPLATE_MODEL'),
|
||||
config=getattr(module, 'TEMPLATE_MODEL_CONFIG') if hasattr(module, 'TEMPLATE_MODEL_CONFIG') else None,
|
||||
path=os.path.join(path, getattr(module, 'TEMPLATE_MODEL_PATH')),
|
||||
torch_dtype=torch_dtype,
|
||||
device=device,
|
||||
)
|
||||
else:
|
||||
# Without `TEMPLATE_MODEL_PATH`, a randomly initialized model or a non-model module will be loaded.
|
||||
model = module.TEMPLATE_MODEL()
|
||||
if hasattr(model, "to"):
|
||||
model = model.to(dtype=torch_dtype, device=device)
|
||||
if hasattr(model, "eval"):
|
||||
model = model.eval()
|
||||
check_template_model_format(model)
|
||||
if verbose > 0:
|
||||
metadata = {
|
||||
"model_architecture": getattr(module, 'TEMPLATE_MODEL').__name__,
|
||||
"code_path": os.path.join(path, "model.py"),
|
||||
"weight_path": template_model_path,
|
||||
}
|
||||
print(f"Template model loaded: {json.dumps(metadata, indent=4)}")
|
||||
return model
|
||||
|
||||
|
||||
def load_template_data_processor(path):
|
||||
spec = importlib.util.spec_from_file_location("template_model", os.path.join(path, "model.py"))
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
if hasattr(module, 'TEMPLATE_DATA_PROCESSOR'):
|
||||
processor = getattr(module, 'TEMPLATE_DATA_PROCESSOR')
|
||||
return processor
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class TemplatePipeline(torch.nn.Module):
|
||||
def __init__(self, models: List[TemplateModel]):
|
||||
super().__init__()
|
||||
self.models = torch.nn.ModuleList(models)
|
||||
|
||||
def merge_kv_cache(self, kv_cache_list: List[KVCache]) -> KVCache:
|
||||
names = {}
|
||||
for kv_cache in kv_cache_list:
|
||||
for name in kv_cache:
|
||||
names[name] = None
|
||||
kv_cache_merged = {}
|
||||
for name in names:
|
||||
kv_list = [kv_cache.get(name) for kv_cache in kv_cache_list]
|
||||
kv_list = [kv for kv in kv_list if kv is not None]
|
||||
if len(kv_list) > 0:
|
||||
k = torch.concat([kv[0] for kv in kv_list], dim=1)
|
||||
v = torch.concat([kv[1] for kv in kv_list], dim=1)
|
||||
kv_cache_merged[name] = (k, v)
|
||||
return kv_cache_merged
|
||||
|
||||
def merge_template_cache(self, template_cache_list):
|
||||
params = sorted(list(set(sum([list(template_cache.keys()) for template_cache in template_cache_list], []))))
|
||||
template_cache_merged = {}
|
||||
for param in params:
|
||||
data = [template_cache[param] for template_cache in template_cache_list if param in template_cache]
|
||||
if param == "kv_cache":
|
||||
data = self.merge_kv_cache(data)
|
||||
elif len(data) == 1:
|
||||
data = data[0]
|
||||
else:
|
||||
print(f"Conflict detected: `{param}` appears in the outputs of multiple Template models. Only the first one will be retained.")
|
||||
data = data[0]
|
||||
template_cache_merged[param] = data
|
||||
return template_cache_merged
|
||||
|
||||
@staticmethod
|
||||
def check_vram_config(model_config: ModelConfig):
|
||||
params = [
|
||||
model_config.offload_device, model_config.offload_dtype,
|
||||
model_config.onload_device, model_config.onload_dtype,
|
||||
model_config.preparing_device, model_config.preparing_dtype,
|
||||
model_config.computation_device, model_config.computation_dtype,
|
||||
]
|
||||
for param in params:
|
||||
if param is not None:
|
||||
warnings.warn("TemplatePipeline doesn't support VRAM management. VRAM config will be ignored.")
|
||||
|
||||
@staticmethod
|
||||
def from_pretrained(
|
||||
torch_dtype: torch.dtype = torch.bfloat16,
|
||||
device: Union[str, torch.device] = get_device_type(),
|
||||
model_configs: list[ModelConfig] = [],
|
||||
):
|
||||
models = []
|
||||
for model_config in model_configs:
|
||||
TemplatePipeline.check_vram_config(model_config)
|
||||
model_config.download_if_necessary()
|
||||
model = load_template_model(model_config.path, torch_dtype=torch_dtype, device=device)
|
||||
models.append(model)
|
||||
pipe = TemplatePipeline(models)
|
||||
return pipe
|
||||
|
||||
@torch.no_grad()
|
||||
def process_inputs(self, inputs: List[Dict], pipe=None, **kwargs):
|
||||
return [(i.get("model_id", 0), self.models[i.get("model_id", 0)].process_inputs(pipe=pipe, **i)) for i in inputs]
|
||||
|
||||
def forward(self, inputs: List[Tuple[int, Dict]], pipe=None, **kwargs):
|
||||
template_cache = []
|
||||
for model_id, model_inputs in inputs:
|
||||
kv_cache = self.models[model_id](pipe=pipe, **model_inputs)
|
||||
template_cache.append(kv_cache)
|
||||
return template_cache
|
||||
|
||||
def call_single_side(self, pipe=None, inputs: List[Dict] = None):
|
||||
inputs = self.process_inputs(pipe=pipe, inputs=inputs)
|
||||
template_cache = self.forward(pipe=pipe, inputs=inputs)
|
||||
template_cache = self.merge_template_cache(template_cache)
|
||||
return template_cache
|
||||
|
||||
@torch.no_grad()
|
||||
def __call__(
|
||||
self,
|
||||
pipe=None,
|
||||
template_inputs: List[Dict] = None,
|
||||
negative_template_inputs: List[Dict] = None,
|
||||
**kwargs,
|
||||
):
|
||||
template_cache = self.call_single_side(pipe=pipe, inputs=template_inputs or [])
|
||||
negative_template_cache = self.call_single_side(pipe=pipe, inputs=negative_template_inputs or [])
|
||||
required_params = list(inspect.signature(pipe.__call__).parameters.keys())
|
||||
for param in template_cache:
|
||||
if param in required_params:
|
||||
kwargs[param] = template_cache[param]
|
||||
else:
|
||||
print(f"`{param}` is not included in the inputs of `{pipe.__class__.__name__}`. This parameter will be ignored.")
|
||||
for param in negative_template_cache:
|
||||
if "negative_" + param in required_params:
|
||||
kwargs["negative_" + param] = negative_template_cache[param]
|
||||
else:
|
||||
print(f"`{'negative_' + param}` is not included in the inputs of `{pipe.__class__.__name__}`. This parameter will be ignored.")
|
||||
return pipe(**kwargs)
|
||||
@@ -28,38 +28,45 @@ class GeneralUnit_RemoveCache(PipelineUnit):
|
||||
return inputs_shared, inputs_posi, inputs_nega
|
||||
|
||||
|
||||
class GeneralUnit_SkillProcessInputs(PipelineUnit):
|
||||
class GeneralUnit_TemplateProcessInputs(PipelineUnit):
|
||||
# Only used for training
|
||||
def __init__(self, data_processor):
|
||||
super().__init__(
|
||||
input_params=("skill_inputs",),
|
||||
output_params=("skill_inputs",),
|
||||
input_params=("template_inputs",),
|
||||
output_params=("template_inputs",),
|
||||
)
|
||||
self.data_processor = data_processor
|
||||
|
||||
def process(self, pipe, skill_inputs):
|
||||
if not hasattr(pipe, "skill_model"):
|
||||
def process(self, pipe, template_inputs):
|
||||
if not hasattr(pipe, "template_model"):
|
||||
return {}
|
||||
if self.data_processor is not None:
|
||||
skill_inputs = self.data_processor(**skill_inputs)
|
||||
skill_inputs = pipe.skill_model.process_inputs(pipe=pipe, **skill_inputs)
|
||||
return {"skill_inputs": skill_inputs}
|
||||
template_inputs = self.data_processor(**template_inputs)
|
||||
template_inputs = pipe.template_model.process_inputs(pipe=pipe, **template_inputs)
|
||||
return {"template_inputs": template_inputs}
|
||||
|
||||
|
||||
class GeneralUnit_SkillForward(PipelineUnit):
|
||||
class GeneralUnit_TemplateForward(PipelineUnit):
|
||||
# Only used for training
|
||||
def __init__(self):
|
||||
def __init__(self, use_gradient_checkpointing=False, use_gradient_checkpointing_offload=False):
|
||||
super().__init__(
|
||||
input_params=("skill_inputs",),
|
||||
output_params=("skill_cache",),
|
||||
onload_model_names=("skill_model",)
|
||||
input_params=("template_inputs",),
|
||||
output_params=("kv_cache",),
|
||||
onload_model_names=("template_model",)
|
||||
)
|
||||
self.use_gradient_checkpointing = use_gradient_checkpointing
|
||||
self.use_gradient_checkpointing_offload = use_gradient_checkpointing_offload
|
||||
|
||||
def process(self, pipe, skill_inputs):
|
||||
if not hasattr(pipe, "skill_model"):
|
||||
def process(self, pipe, template_inputs):
|
||||
if not hasattr(pipe, "template_model"):
|
||||
return {}
|
||||
skill_cache = pipe.skill_model.forward(**skill_inputs)
|
||||
return {"skill_cache": skill_cache}
|
||||
template_cache = pipe.template_model.forward(
|
||||
**template_inputs,
|
||||
pipe=pipe,
|
||||
use_gradient_checkpointing=self.use_gradient_checkpointing,
|
||||
use_gradient_checkpointing_offload=self.use_gradient_checkpointing_offload,
|
||||
)
|
||||
return template_cache
|
||||
|
||||
|
||||
class DiffusionTrainingModule(torch.nn.Module):
|
||||
@@ -246,13 +253,13 @@ class DiffusionTrainingModule(torch.nn.Module):
|
||||
return lora_target_modules
|
||||
|
||||
|
||||
def load_training_skill_model(self, pipe, path_or_model_id):
|
||||
def load_training_template_model(self, pipe, path_or_model_id, use_gradient_checkpointing=False, use_gradient_checkpointing_offload=False):
|
||||
if path_or_model_id is None:
|
||||
return pipe
|
||||
model_config = self.parse_path_or_model_id(path_or_model_id)
|
||||
pipe.load_training_skill_model(model_config)
|
||||
pipe.units.append(GeneralUnit_SkillProcessInputs(pipe.skill_data_processor))
|
||||
pipe.units.append(GeneralUnit_SkillForward())
|
||||
pipe.load_training_template_model(model_config)
|
||||
pipe.units.append(GeneralUnit_TemplateProcessInputs(pipe.template_data_processor))
|
||||
pipe.units.append(GeneralUnit_TemplateForward(use_gradient_checkpointing, use_gradient_checkpointing_offload))
|
||||
return pipe
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user