2023-12-26 15:50:51 +00:00
|
|
|
import collections
|
|
|
|
import numpy
|
2023-08-16 15:07:58 +00:00
|
|
|
import os
|
|
|
|
import torch
|
2023-12-26 15:50:51 +00:00
|
|
|
from safetensors.torch import serialize_file, load_file
|
2023-08-16 15:07:58 +00:00
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument("--input", type=str, help="Path to input pth model")
|
|
|
|
parser.add_argument(
|
|
|
|
"--output",
|
|
|
|
type=str,
|
|
|
|
default="./converted.st",
|
|
|
|
help="Path to output safetensors model",
|
|
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
2023-10-25 13:02:44 +00:00
|
|
|
def rename_key(rename, name):
|
|
|
|
for k, v in rename.items():
|
|
|
|
if k in name:
|
|
|
|
name = name.replace(k, v)
|
|
|
|
return name
|
|
|
|
|
|
|
|
|
2023-12-06 15:08:40 +00:00
|
|
|
def convert_file(pt_filename: str, sf_filename: str, rename={}, transpose_names=[]):
|
2023-12-26 15:50:51 +00:00
|
|
|
loaded: collections.OrderedDict = torch.load(pt_filename, map_location="cpu")
|
2023-08-16 15:07:58 +00:00
|
|
|
if "state_dict" in loaded:
|
|
|
|
loaded = loaded["state_dict"]
|
|
|
|
|
2023-12-14 10:37:07 +00:00
|
|
|
kk = list(loaded.keys())
|
|
|
|
version = 4
|
|
|
|
for x in kk:
|
|
|
|
if "ln_x" in x:
|
|
|
|
version = max(5, version)
|
|
|
|
if "gate.weight" in x:
|
|
|
|
version = max(5.1, version)
|
|
|
|
if int(version) == 5 and "att.time_decay" in x:
|
|
|
|
if len(loaded[x].shape) > 1:
|
|
|
|
if loaded[x].shape[1] > 1:
|
|
|
|
version = max(5.2, version)
|
|
|
|
if "time_maa" in x:
|
|
|
|
version = max(6, version)
|
|
|
|
|
2023-12-26 15:50:51 +00:00
|
|
|
print(f"Model detected: v{version:.1f}")
|
2023-12-14 10:37:07 +00:00
|
|
|
|
2023-12-26 15:50:51 +00:00
|
|
|
if version == 5.1:
|
2023-12-14 10:37:07 +00:00
|
|
|
_, n_emb = loaded["emb.weight"].shape
|
|
|
|
for k in kk:
|
|
|
|
if "time_decay" in k or "time_faaaa" in k:
|
|
|
|
# print(k, mm[k].shape)
|
|
|
|
loaded[k] = (
|
|
|
|
loaded[k].unsqueeze(1).repeat(1, n_emb // loaded[k].shape[0])
|
|
|
|
)
|
|
|
|
|
2024-02-28 15:25:46 +00:00
|
|
|
with torch.no_grad():
|
|
|
|
for k in kk:
|
|
|
|
new_k = rename_key(rename, k).lower()
|
|
|
|
v = loaded[k].half()
|
|
|
|
del loaded[k]
|
|
|
|
for transpose_name in transpose_names:
|
|
|
|
if transpose_name in new_k:
|
|
|
|
dims = len(v.shape)
|
|
|
|
v = v.transpose(dims - 2, dims - 1)
|
|
|
|
print(f"{new_k}\t{v.shape}\t{v.dtype}")
|
|
|
|
loaded[new_k] = {
|
|
|
|
"dtype": str(v.dtype).split(".")[-1],
|
|
|
|
"shape": v.shape,
|
|
|
|
"data": v.numpy().tobytes(),
|
|
|
|
}
|
2023-08-16 15:07:58 +00:00
|
|
|
|
|
|
|
dirname = os.path.dirname(sf_filename)
|
|
|
|
os.makedirs(dirname, exist_ok=True)
|
2023-12-26 15:50:51 +00:00
|
|
|
serialize_file(loaded, sf_filename, metadata={"format": "pt"})
|
|
|
|
# reloaded = load_file(sf_filename)
|
|
|
|
# for k in loaded:
|
|
|
|
# pt_tensor = torch.Tensor(
|
|
|
|
# numpy.frombuffer(
|
|
|
|
# bytearray(loaded[k]["data"]),
|
|
|
|
# dtype=getattr(numpy, loaded[k]["dtype"]),
|
|
|
|
# ).reshape(loaded[k]["shape"])
|
|
|
|
# )
|
|
|
|
# sf_tensor = reloaded[k]
|
|
|
|
# if not torch.equal(pt_tensor, sf_tensor):
|
|
|
|
# raise RuntimeError(f"The output tensors do not match for key {k}")
|
2023-08-16 15:07:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
try:
|
2023-10-25 13:02:44 +00:00
|
|
|
convert_file(
|
|
|
|
args.input,
|
|
|
|
args.output,
|
2023-12-06 15:08:40 +00:00
|
|
|
rename={
|
|
|
|
"time_faaaa": "time_first",
|
|
|
|
"time_maa": "time_mix",
|
|
|
|
"lora_A": "lora.0",
|
|
|
|
"lora_B": "lora.1",
|
|
|
|
},
|
|
|
|
transpose_names=[
|
|
|
|
"time_mix_w1",
|
|
|
|
"time_mix_w2",
|
|
|
|
"time_decay_w1",
|
|
|
|
"time_decay_w2",
|
|
|
|
],
|
2023-10-25 13:02:44 +00:00
|
|
|
)
|
2023-08-16 15:07:58 +00:00
|
|
|
print(f"Saved to {args.output}")
|
|
|
|
except Exception as e:
|
2023-11-08 14:57:38 +00:00
|
|
|
print(e)
|
2023-08-16 15:07:58 +00:00
|
|
|
with open("error.txt", "w") as f:
|
|
|
|
f.write(str(e))
|