From ce26f49208ca027f4ff70bdf7adb176665fdeabe Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 27 Dec 2023 21:27:18 -0500 Subject: [PATCH] improve check-requirements.sh --- check-requirements.sh | 152 +++++++++++++++------------------ convert-hf-to-gguf.py | 71 ++++++++-------- convert-lora-to-ggml.py | 183 ++++++++++++++++++++-------------------- 3 files changed, 199 insertions(+), 207 deletions(-) diff --git a/check-requirements.sh b/check-requirements.sh index ac23b46cb7100..77ed98fcb1f22 100755 --- a/check-requirements.sh +++ b/check-requirements.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -euo pipefail # # check-requirements.sh checks all requirements files for each top-level # convert*.py script. @@ -8,7 +9,7 @@ # sized tmpfs /tmp or ramdisk is recommended if running this frequently. # # usage: ./check-requirements.sh [] -# ./check-requirements.sh 'nocleanup' [] +# ./check-requirements.sh nocleanup [] # # where: # - is a directory that can be used as the base for @@ -20,135 +21,108 @@ # - bash >= 3.2.57 # - shellcheck # -# For each script, it creates a fresh venv, `pip install -r` the -# requirements, and finally executes the python script with no arguments to -# check for a `ModuleNotFoundError`. +# For each script, it creates a fresh venv, `pip install`s the requirements, and +# finally imports the python script to check for `ImportError`. # log() { - local level="$1"; shift - local format="$1"; shift - # shellcheck disable=SC2059 - >&2 printf "$level: $format\n" "$@" + local level=$1 msg=$2 + printf >&2 '%s: %s\n' "$level" "$msg" } -debug () { - log 'DEBUG' "$@" +debug() { + log DEBUG "$@" } info() { - log 'INFO' "$@" + log INFO "$@" } fatal() { - log 'FATAL' "$@" + log FATAL "$@" exit 1 } cleanup() { if [[ -n ${workdir+x} && -d $workdir && -w $workdir ]]; then info "Removing $workdir" - ( - count=0 - rm -rfv "$workdir" | while read -r; do - if (( count++ > 750 )); then - printf '.' - count=0 - fi - done - printf '\n' - )& - wait $! - info "Removed '$workdir'" + local count=0 + rm -rfv -- "$workdir" | while read -r; do + if (( count++ > 750 )); then + printf . + count=0 + fi + done + printf '\n' + info "Removed $workdir" fi } -abort() { - cleanup - exit 1 -} - -if [[ $1 == nocleanup ]]; then - shift # discard nocleanup arg +if [[ ${1-} == nocleanup ]]; then + shift # discard nocleanup arg else - trap abort SIGINT SIGTERM SIGQUIT SIGABRT + trap exit INT TERM trap cleanup EXIT fi -set -eu -o pipefail -this="$(realpath "$0")"; readonly this +this=$(realpath -- "$0"); readonly this cd "$(dirname "$this")" shellcheck "$this" -readonly reqs_dir='./requirements' +readonly reqs_dir=requirements -workdir= -if [[ -n ${1+x} ]]; then - arg_dir="$(realpath "$1")" - if [[ ! ( -d $arg_dir && -w $arg_dir ) ]]; then - fatal "$arg_dir is not a valid directory" +if [[ ${1+x} ]]; then + tmp_dir=$(realpath -- "$1") + if [[ ! ( -d $tmp_dir && -w $tmp_dir ) ]]; then + fatal "$tmp_dir is not a writable directory" fi - workdir="$(mktemp -d "$arg_dir/check-requirements.XXXX")" else - workdir="$(mktemp -d "/tmp/check-requirements.XXXX")" + tmp_dir=/tmp fi -readonly workdir +workdir=$(mktemp -d "$tmp_dir/check-requirements.XXXX"); readonly workdir info "Working directory: $workdir" -assert_arg_count() { - local argcount="$1"; shift - if (( $# != argcount )); then - fatal "${FUNCNAME[1]}: incorrect number of args" - fi -} - check_requirements() { - assert_arg_count 2 "$@" - local venv="$1" - local reqs="$2" + local reqs=$1 info "$reqs: beginning check" - ( - # shellcheck source=/dev/null - source "$venv/bin/activate" - pip --disable-pip-version-check install -q -r "$reqs" - ) + pip --disable-pip-version-check install -qr "$reqs" info "$reqs: OK" } check_convert_script() { - assert_arg_count 1 "$@" - local py="$1"; shift # e.g. ./convert-hf-to-gguf.py - local pyname; pyname="$(basename "$py")" # e.g. convert-hf-to-gguf.py - pyname="${pyname%.py}" # e.g. convert-hf-to-gguf + local py=$1 # e.g. ./convert-hf-to-gguf.py + local pyname=${py##*/} # e.g. convert-hf-to-gguf.py + pyname=${pyname%.py} # e.g. convert-hf-to-gguf info "$py: beginning check" local reqs="$reqs_dir/requirements-$pyname.txt" - if [[ ! -r "$reqs" ]]; then + if [[ ! -r $reqs ]]; then fatal "$py missing requirements. Expected: $reqs" fi local venv="$workdir/$pyname-venv" python3 -m venv "$venv" - check_requirements "$venv" "$reqs" - - # Because we mask the return value of the subshell, - # we don't need to use set +e/-e. - # shellcheck disable=SC2155 - local py_err=$( + ( # shellcheck source=/dev/null source "$venv/bin/activate" - python "$py" 2>&1 + + check_requirements "$reqs" + + python - "$py" "$pyname" <&2 < argparse.Namespace: return parser.parse_args() -args = parse_args() +def main() -> None: + args = parse_args() -dir_model = args.model -if not dir_model.is_dir(): - print(f'Error: {args.model} is not a directory', file=sys.stderr) - sys.exit(1) + dir_model = args.model + if not dir_model.is_dir(): + print(f'Error: {args.model} is not a directory', file=sys.stderr) + sys.exit(1) -ftype_map = { - "f32": gguf.GGMLQuantizationType.F32, - "f16": gguf.GGMLQuantizationType.F16, -} + ftype_map = { + "f32": gguf.GGMLQuantizationType.F32, + "f16": gguf.GGMLQuantizationType.F16, + } -if args.outfile is not None: - fname_out = args.outfile -else: - # output in the same directory as the model by default - fname_out = dir_model / f'ggml-model-{args.outtype}.gguf' + if args.outfile is not None: + fname_out = args.outfile + else: + # output in the same directory as the model by default + fname_out = dir_model / f'ggml-model-{args.outtype}.gguf' -print(f"Loading model: {dir_model.name}") + print(f"Loading model: {dir_model.name}") -hparams = Model.load_hparams(dir_model) + hparams = Model.load_hparams(dir_model) -with torch.inference_mode(): - model_class = Model.from_model_architecture(hparams["architectures"][0]) - model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian) + with torch.inference_mode(): + model_class = Model.from_model_architecture(hparams["architectures"][0]) + model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian) - print("Set model parameters") - model_instance.set_gguf_parameters() + print("Set model parameters") + model_instance.set_gguf_parameters() - print("Set model tokenizer") - model_instance.set_vocab() + print("Set model tokenizer") + model_instance.set_vocab() + + if args.vocab_only: + print(f"Exporting model vocab to '{fname_out}'") + model_instance.write_vocab() + else: + print(f"Exporting model to '{fname_out}'") + model_instance.write() + + print(f"Model successfully exported to '{fname_out}'") - if args.vocab_only: - print(f"Exporting model vocab to '{fname_out}'") - model_instance.write_vocab() - else: - print(f"Exporting model to '{fname_out}'") - model_instance.write() - print(f"Model successfully exported to '{fname_out}'") +if __name__ == '__main__': + main() diff --git a/convert-lora-to-ggml.py b/convert-lora-to-ggml.py index 53bb8a3d97a05..35ce152f4248d 100755 --- a/convert-lora-to-ggml.py +++ b/convert-lora-to-ggml.py @@ -47,95 +47,96 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty fout.seek((fout.tell() + 31) & -32) -if len(sys.argv) < 2: - print(f"Usage: python {sys.argv[0]} [arch]") - print( - "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'" - ) - print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)") - sys.exit(1) - -input_json = os.path.join(sys.argv[1], "adapter_config.json") -input_model = os.path.join(sys.argv[1], "adapter_model.bin") -output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin") - -model = torch.load(input_model, map_location="cpu") -arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama" - -if arch_name not in gguf.MODEL_ARCH_NAMES.values(): - print(f"Error: unsupported architecture {arch_name}") - sys.exit(1) - -arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)] -name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone - -with open(input_json, "r") as f: - params = json.load(f) - -if params["peft_type"] != "LORA": - print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA") - sys.exit(1) - -if params["fan_in_fan_out"] is True: - print("Error: param fan_in_fan_out is not supported") - sys.exit(1) - -if params["bias"] is not None and params["bias"] != "none": - print("Error: param bias is not supported") - sys.exit(1) - -# TODO: these seem to be layers that have been trained but without lora. -# doesn't seem widely used but eventually should be supported -if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0: - print("Error: param modules_to_save is not supported") - sys.exit(1) - -with open(output_path, "wb") as fout: - fout.truncate() - - write_file_header(fout, params) - for k, v in model.items(): - orig_k = k - if k.endswith(".default.weight"): - k = k.replace(".default.weight", ".weight") - if k in ["llama_proj.weight", "llama_proj.bias"]: - continue - if k.endswith("lora_A.weight"): - if v.dtype != torch.float16 and v.dtype != torch.float32: +if __name__ == '__main__': + if len(sys.argv) < 2: + print(f"Usage: python {sys.argv[0]} [arch]") + print( + "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'" + ) + print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)") + sys.exit(1) + + input_json = os.path.join(sys.argv[1], "adapter_config.json") + input_model = os.path.join(sys.argv[1], "adapter_model.bin") + output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin") + + model = torch.load(input_model, map_location="cpu") + arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama" + + if arch_name not in gguf.MODEL_ARCH_NAMES.values(): + print(f"Error: unsupported architecture {arch_name}") + sys.exit(1) + + arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)] + name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone + + with open(input_json, "r") as f: + params = json.load(f) + + if params["peft_type"] != "LORA": + print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA") + sys.exit(1) + + if params["fan_in_fan_out"] is True: + print("Error: param fan_in_fan_out is not supported") + sys.exit(1) + + if params["bias"] is not None and params["bias"] != "none": + print("Error: param bias is not supported") + sys.exit(1) + + # TODO: these seem to be layers that have been trained but without lora. + # doesn't seem widely used but eventually should be supported + if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0: + print("Error: param modules_to_save is not supported") + sys.exit(1) + + with open(output_path, "wb") as fout: + fout.truncate() + + write_file_header(fout, params) + for k, v in model.items(): + orig_k = k + if k.endswith(".default.weight"): + k = k.replace(".default.weight", ".weight") + if k in ["llama_proj.weight", "llama_proj.bias"]: + continue + if k.endswith("lora_A.weight"): + if v.dtype != torch.float16 and v.dtype != torch.float32: + v = v.float() + v = v.T + else: v = v.float() - v = v.T - else: - v = v.float() - - t = v.detach().numpy() - - prefix = "base_model.model." - if k.startswith(prefix): - k = k[len(prefix) :] - - lora_suffixes = (".lora_A.weight", ".lora_B.weight") - if k.endswith(lora_suffixes): - suffix = k[-len(lora_suffixes[0]):] - k = k[: -len(lora_suffixes[0])] - else: - print(f"Error: unrecognized tensor name {orig_k}") - sys.exit(1) - - tname = name_map.get_name(k) - if tname is None: - print(f"Error: could not map tensor name {orig_k}") - print(" Note: the arch parameter must be specified if the model is not llama") - sys.exit(1) - - if suffix == ".lora_A.weight": - tname += ".weight.loraA" - elif suffix == ".lora_B.weight": - tname += ".weight.loraB" - else: - assert False - - print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") - write_tensor_header(fout, tname, t.shape, t.dtype) - t.tofile(fout) - -print(f"Converted {input_json} and {input_model} to {output_path}") + + t = v.detach().numpy() + + prefix = "base_model.model." + if k.startswith(prefix): + k = k[len(prefix) :] + + lora_suffixes = (".lora_A.weight", ".lora_B.weight") + if k.endswith(lora_suffixes): + suffix = k[-len(lora_suffixes[0]):] + k = k[: -len(lora_suffixes[0])] + else: + print(f"Error: unrecognized tensor name {orig_k}") + sys.exit(1) + + tname = name_map.get_name(k) + if tname is None: + print(f"Error: could not map tensor name {orig_k}") + print(" Note: the arch parameter must be specified if the model is not llama") + sys.exit(1) + + if suffix == ".lora_A.weight": + tname += ".weight.loraA" + elif suffix == ".lora_B.weight": + tname += ".weight.loraB" + else: + assert False + + print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") + write_tensor_header(fout, tname, t.shape, t.dtype) + t.tofile(fout) + + print(f"Converted {input_json} and {input_model} to {output_path}")