mirror of
https://github.com/modelscope/DiffSynth-Studio.git
synced 2026-03-18 22:08:13 +00:00
add audio_vae, audio_vocoder, text_encoder, connector and upsampler for ltx2
This commit is contained in:
32
diffsynth/utils/state_dict_converters/ltx2_audio_vae.py
Normal file
32
diffsynth/utils/state_dict_converters/ltx2_audio_vae.py
Normal file
@@ -0,0 +1,32 @@
|
||||
def LTX2AudioEncoderStateDictConverter(state_dict):
|
||||
# Not used
|
||||
state_dict_ = {}
|
||||
for name in state_dict:
|
||||
if name.startswith("audio_vae.encoder."):
|
||||
new_name = name.replace("audio_vae.encoder.", "")
|
||||
state_dict_[new_name] = state_dict[name]
|
||||
elif name.startswith("audio_vae.per_channel_statistics."):
|
||||
new_name = name.replace("audio_vae.per_channel_statistics.", "per_channel_statistics.")
|
||||
state_dict_[new_name] = state_dict[name]
|
||||
return state_dict_
|
||||
|
||||
|
||||
def LTX2AudioDecoderStateDictConverter(state_dict):
|
||||
state_dict_ = {}
|
||||
for name in state_dict:
|
||||
if name.startswith("audio_vae.decoder."):
|
||||
new_name = name.replace("audio_vae.decoder.", "")
|
||||
state_dict_[new_name] = state_dict[name]
|
||||
elif name.startswith("audio_vae.per_channel_statistics."):
|
||||
new_name = name.replace("audio_vae.per_channel_statistics.", "per_channel_statistics.")
|
||||
state_dict_[new_name] = state_dict[name]
|
||||
return state_dict_
|
||||
|
||||
|
||||
def LTX2VocoderStateDictConverter(state_dict):
|
||||
state_dict_ = {}
|
||||
for name in state_dict:
|
||||
if name.startswith("vocoder."):
|
||||
new_name = name.replace("vocoder.", "")
|
||||
state_dict_[new_name] = state_dict[name]
|
||||
return state_dict_
|
||||
31
diffsynth/utils/state_dict_converters/ltx2_text_encoder.py
Normal file
31
diffsynth/utils/state_dict_converters/ltx2_text_encoder.py
Normal file
@@ -0,0 +1,31 @@
|
||||
def LTX2TextEncoderStateDictConverter(state_dict):
|
||||
state_dict_ = {}
|
||||
for key in state_dict:
|
||||
if key.startswith("language_model.model."):
|
||||
new_key = key.replace("language_model.model.", "model.language_model.")
|
||||
elif key.startswith("vision_tower."):
|
||||
new_key = key.replace("vision_tower.", "model.vision_tower.")
|
||||
elif key.startswith("multi_modal_projector."):
|
||||
new_key = key.replace("multi_modal_projector.", "model.multi_modal_projector.")
|
||||
elif key.startswith("language_model.lm_head."):
|
||||
new_key = key.replace("language_model.lm_head.", "lm_head.")
|
||||
else:
|
||||
continue
|
||||
state_dict_[new_key] = state_dict[key]
|
||||
state_dict_["lm_head.weight"] = state_dict_.get("model.language_model.embed_tokens.weight")
|
||||
return state_dict_
|
||||
|
||||
|
||||
def LTX2TextEncoderPostModulesStateDictConverter(state_dict):
|
||||
state_dict_ = {}
|
||||
for key in state_dict:
|
||||
if key.startswith("text_embedding_projection."):
|
||||
new_key = key.replace("text_embedding_projection.", "feature_extractor_linear.")
|
||||
elif key.startswith("model.diffusion_model.video_embeddings_connector."):
|
||||
new_key = key.replace("model.diffusion_model.video_embeddings_connector.", "embeddings_connector.")
|
||||
elif key.startswith("model.diffusion_model.audio_embeddings_connector."):
|
||||
new_key = key.replace("model.diffusion_model.audio_embeddings_connector.", "audio_embeddings_connector.")
|
||||
else:
|
||||
continue
|
||||
state_dict_[new_key] = state_dict[key]
|
||||
return state_dict_
|
||||
@@ -1,22 +0,0 @@
|
||||
import torch
|
||||
from diffsynth.models.model_loader import ModelPool
|
||||
from diffsynth.core.loader import ModelConfig
|
||||
|
||||
|
||||
def test_model_loading(model_name,
|
||||
model_config: ModelConfig,
|
||||
vram_limit: float = None,
|
||||
device="cpu",
|
||||
torch_dtype=torch.bfloat16):
|
||||
model_pool = ModelPool()
|
||||
model_config.download_if_necessary()
|
||||
vram_config = model_config.vram_config()
|
||||
vram_config["computation_dtype"] = torch_dtype
|
||||
vram_config["computation_device"] = device
|
||||
model_pool.auto_load_model(
|
||||
model_config.path,
|
||||
vram_config=vram_config,
|
||||
vram_limit=vram_limit,
|
||||
clear_parameters=model_config.clear_parameters,
|
||||
)
|
||||
return model_pool.fetch_model(model_name)
|
||||
Reference in New Issue
Block a user