RWKV-Runner/backend-python/convert_pytorch_to_ggml.py

# Converts an RWKV model checkpoint in PyTorch format to an rwkv.cpp compatible file.
# Usage: python convert_pytorch_to_ggml.py C:\RWKV-4-Pile-169M-20220807-8023.pth C:\rwkv.cpp-169M-FP16.bin FP16
# Get model checkpoints from https://huggingface.co/BlinkDL
# See FILE_FORMAT.md for the documentation on the file format.

import argparse
import struct
import torch
from typing import Dict


def parse_args():
    parser = argparse.ArgumentParser(
        description="Convert an RWKV model checkpoint in PyTorch format to an rwkv.cpp compatible file"
    )
    parser.add_argument("src_path", help="Path to PyTorch checkpoint file")
    parser.add_argument(
        "dest_path", help="Path to rwkv.cpp checkpoint file, will be overwritten"
    )
    parser.add_argument(
        "data_type",
        help="Data type, FP16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0",
        type=str,
        choices=[
            "FP16",
            "Q4_0",
            "Q4_1",
            "Q5_0",
            "Q5_1",
            "Q8_0",
        ],
        default="FP16",
    )
    return parser.parse_args()


def get_layer_count(state_dict: Dict[str, torch.Tensor]) -> int:
    n_layer: int = 0

    while f"blocks.{n_layer}.ln1.weight" in state_dict:
        n_layer += 1

    assert n_layer > 0

    return n_layer


def write_state_dict(
    state_dict: Dict[str, torch.Tensor], dest_path: str, data_type: str
) -> None:
    emb_weight: torch.Tensor = state_dict["emb.weight"]

    n_layer: int = get_layer_count(state_dict)
    n_vocab: int = emb_weight.shape[0]
    n_embed: int = emb_weight.shape[1]

    is_v5_1_or_2: bool = "blocks.0.att.ln_x.weight" in state_dict
    is_v5_2: bool = "blocks.0.att.gate.weight" in state_dict

    if is_v5_2:
        print("Detected RWKV v5.2")
    elif is_v5_1_or_2:
        print("Detected RWKV v5.1")
    else:
        print("Detected RWKV v4")

    with open(dest_path, "wb") as out_file:
        is_FP16: bool = data_type == "FP16" or data_type == "float16"

        out_file.write(
            struct.pack(
                # Disable padding with '='
                "=iiiiii",
                # Magic: 'ggmf' in hex
                0x67676D66,
                101,
                n_vocab,
                n_embed,
                n_layer,
                1 if is_FP16 else 0,
            )
        )

        for k in state_dict.keys():
            tensor: torch.Tensor = state_dict[k].float()

            if ".time_" in k:
                tensor = tensor.squeeze()

            if is_v5_1_or_2:
                if ".time_decay" in k:
                    if is_v5_2:
                        tensor = torch.exp(-torch.exp(tensor)).unsqueeze(-1)
                    else:
                        tensor = torch.exp(-torch.exp(tensor)).reshape(-1, 1, 1)

                if ".time_first" in k:
                    tensor = torch.exp(tensor).reshape(-1, 1, 1)

                if ".time_faaaa" in k:
                    tensor = tensor.unsqueeze(-1)
            else:
                if ".time_decay" in k:
                    tensor = -torch.exp(tensor)

            # Keep 1-dim vectors and small matrices in FP32
            if is_FP16 and len(tensor.shape) > 1 and ".time_" not in k:
                tensor = tensor.half()

            shape = tensor.shape

            print(f"Writing {k}, shape {shape}, type {tensor.dtype}")

            k_encoded: bytes = k.encode("utf-8")

            out_file.write(
                struct.pack(
                    "=iii",
                    len(shape),
                    len(k_encoded),
                    1 if tensor.dtype == torch.float16 else 0,
                )
            )

            # Dimension order is reversed here:
            # * PyTorch shape is (x rows, y columns)
            # * ggml shape is (y elements in a row, x elements in a column)
            # Both shapes represent the same tensor.
            for dim in reversed(tensor.shape):
                out_file.write(struct.pack("=i", dim))

            out_file.write(k_encoded)

            tensor.numpy().tofile(out_file)


def main() -> None:
    args = parse_args()

    print(f"Reading {args.src_path}")

    state_dict: Dict[str, torch.Tensor] = torch.load(args.src_path, map_location="cpu")

    temp_output: str = args.dest_path
    if args.data_type.startswith("Q"):
        import re

        temp_output = re.sub(r"Q[4,5,8]_[0,1]", "fp16", temp_output)
    write_state_dict(state_dict, temp_output, "FP16")
    if args.data_type.startswith("Q"):
        import sys
        import os

        sys.path.append(os.path.dirname(os.path.realpath(__file__)))
        from rwkv_pip.cpp import rwkv_cpp_shared_library

        library = rwkv_cpp_shared_library.load_rwkv_shared_library()
        library.rwkv_quantize_model_file(temp_output, args.dest_path, args.data_type)

    print("Done")


if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(e)
        with open("error.txt", "w") as f:
            f.write(str(e))
rwkv.cpp(ggml) support 2023-12-12 20:29:55 +08:00			`# Converts an RWKV model checkpoint in PyTorch format to an rwkv.cpp compatible file.`
			`# Usage: python convert_pytorch_to_ggml.py C:\RWKV-4-Pile-169M-20220807-8023.pth C:\rwkv.cpp-169M-FP16.bin FP16`
			`# Get model checkpoints from https://huggingface.co/BlinkDL`
			`# See FILE_FORMAT.md for the documentation on the file format.`

			`import argparse`
			`import struct`
			`import torch`
			`from typing import Dict`


			`def parse_args():`
			`parser = argparse.ArgumentParser(`
			`description="Convert an RWKV model checkpoint in PyTorch format to an rwkv.cpp compatible file"`
			`)`
			`parser.add_argument("src_path", help="Path to PyTorch checkpoint file")`
			`parser.add_argument(`
			`"dest_path", help="Path to rwkv.cpp checkpoint file, will be overwritten"`
			`)`
			`parser.add_argument(`
			`"data_type",`
			`help="Data type, FP16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0",`
			`type=str,`
			`choices=[`
			`"FP16",`
			`"Q4_0",`
			`"Q4_1",`
			`"Q5_0",`
			`"Q5_1",`
			`"Q8_0",`
			`],`
			`default="FP16",`
			`)`
			`return parser.parse_args()`


			`def get_layer_count(state_dict: Dict[str, torch.Tensor]) -> int:`
			`n_layer: int = 0`

			`while f"blocks.{n_layer}.ln1.weight" in state_dict:`
			`n_layer += 1`

			`assert n_layer > 0`

			`return n_layer`


			`def write_state_dict(`
			`state_dict: Dict[str, torch.Tensor], dest_path: str, data_type: str`
			`) -> None:`
			`emb_weight: torch.Tensor = state_dict["emb.weight"]`

			`n_layer: int = get_layer_count(state_dict)`
			`n_vocab: int = emb_weight.shape[0]`
			`n_embed: int = emb_weight.shape[1]`

			`is_v5_1_or_2: bool = "blocks.0.att.ln_x.weight" in state_dict`
			`is_v5_2: bool = "blocks.0.att.gate.weight" in state_dict`

			`if is_v5_2:`
			`print("Detected RWKV v5.2")`
			`elif is_v5_1_or_2:`
			`print("Detected RWKV v5.1")`
			`else:`
			`print("Detected RWKV v4")`

			`with open(dest_path, "wb") as out_file:`
			`is_FP16: bool = data_type == "FP16" or data_type == "float16"`

			`out_file.write(`
			`struct.pack(`
			`# Disable padding with '='`
			`"=iiiiii",`
			`# Magic: 'ggmf' in hex`
			`0x67676D66,`
			`101,`
			`n_vocab,`
			`n_embed,`
			`n_layer,`
			`1 if is_FP16 else 0,`
			`)`
			`)`

			`for k in state_dict.keys():`
			`tensor: torch.Tensor = state_dict[k].float()`

			`if ".time_" in k:`
			`tensor = tensor.squeeze()`

			`if is_v5_1_or_2:`
			`if ".time_decay" in k:`
			`if is_v5_2:`
			`tensor = torch.exp(-torch.exp(tensor)).unsqueeze(-1)`
			`else:`
			`tensor = torch.exp(-torch.exp(tensor)).reshape(-1, 1, 1)`

			`if ".time_first" in k:`
			`tensor = torch.exp(tensor).reshape(-1, 1, 1)`

			`if ".time_faaaa" in k:`
			`tensor = tensor.unsqueeze(-1)`
			`else:`
			`if ".time_decay" in k:`
			`tensor = -torch.exp(tensor)`

			`# Keep 1-dim vectors and small matrices in FP32`
			`if is_FP16 and len(tensor.shape) > 1 and ".time_" not in k:`
			`tensor = tensor.half()`

			`shape = tensor.shape`

			`print(f"Writing {k}, shape {shape}, type {tensor.dtype}")`

			`k_encoded: bytes = k.encode("utf-8")`

			`out_file.write(`
			`struct.pack(`
			`"=iii",`
			`len(shape),`
			`len(k_encoded),`
			`1 if tensor.dtype == torch.float16 else 0,`
			`)`
			`)`

			`# Dimension order is reversed here:`
			`# * PyTorch shape is (x rows, y columns)`
			`# * ggml shape is (y elements in a row, x elements in a column)`
			`# Both shapes represent the same tensor.`
			`for dim in reversed(tensor.shape):`
			`out_file.write(struct.pack("=i", dim))`

			`out_file.write(k_encoded)`

			`tensor.numpy().tofile(out_file)`


			`def main() -> None:`
			`args = parse_args()`

			`print(f"Reading {args.src_path}")`

			`state_dict: Dict[str, torch.Tensor] = torch.load(args.src_path, map_location="cpu")`

			`temp_output: str = args.dest_path`
			`if args.data_type.startswith("Q"):`
			`import re`

			`temp_output = re.sub(r"Q[4,5,8]_[0,1]", "fp16", temp_output)`
			`write_state_dict(state_dict, temp_output, "FP16")`
			`if args.data_type.startswith("Q"):`
			`import sys`
			`import os`

			`sys.path.append(os.path.dirname(os.path.realpath(__file__)))`
			`from rwkv_pip.cpp import rwkv_cpp_shared_library`

			`library = rwkv_cpp_shared_library.load_rwkv_shared_library()`
			`library.rwkv_quantize_model_file(temp_output, args.dest_path, args.data_type)`

			`print("Done")`


			`if __name__ == "__main__":`
			`try:`
			`main()`
			`except Exception as e:`
			`print(e)`
			`with open("error.txt", "w") as f:`
			`f.write(str(e))`