From 0ba23bad6f2169c90b94a605a4d72614821ad7cc Mon Sep 17 00:00:00 2001 From: ngxson Date: Mon, 15 Jul 2024 15:35:19 +0200 Subject: [PATCH] change kv metadata --- convert_hf_to_gguf.py | 1 + convert_lora_to_gguf.py | 9 ++++----- gguf-py/gguf/constants.py | 8 ++++++++ gguf-py/gguf/gguf_writer.py | 3 +++ src/llama.cpp | 31 ++++++++++++++++++++----------- 5 files changed, 36 insertions(+), 16 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index c5eb7bdbb9bce..a66228d71ed31 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -186,6 +186,7 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", " return new_name def set_gguf_parameters(self): + self.gguf_writer.add_type(gguf.GGUFType.MODEL) self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_block_count(self.block_count) diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py index 71d3e57f55720..bfd252d2e7ecd 100755 --- a/convert_lora_to_gguf.py +++ b/convert_lora_to_gguf.py @@ -359,17 +359,16 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter eager=args.no_lazy, model_name=None, ) - logger.info("Set model parameters") - model_instance.set_gguf_parameters() with open(lora_config, "r") as f: lparams: dict[str, Any] = json.load(f) alpha = lparams["lora_alpha"] - model_instance.gguf_writer.add_string("training.type", "finetune_lora") - model_instance.gguf_writer.add_float32("training.lora.alpha", float(alpha)) - + model_instance.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[model_instance.model_arch]) + model_instance.gguf_writer.add_string(gguf.Keys.General.TYPE, gguf.GGUFType.ADAPTER) + model_instance.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora") + model_instance.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha)) model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION) logger.info("Exporting model...") model_instance.write() diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index a95a44237e348..390d2d1890e2a 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -19,6 +19,7 @@ class Keys: class General: + TYPE = "general.type" ARCHITECTURE = "general.architecture" QUANTIZATION_VERSION = "general.quantization_version" ALIGNMENT = "general.alignment" @@ -120,10 +121,17 @@ class Tokenizer: MIDDLE_ID = "tokenizer.ggml.middle_token_id" EOT_ID = "tokenizer.ggml.eot_token_id" + class Adapter: + TYPE = "adapter.type" + LORA_ALPHA = "adapter.lora.alpha" + # # recommended mapping of model tensor names for storage in gguf # +class GGUFType: + MODEL = "model" + ADAPTER = "adapter" class MODEL_ARCH(IntEnum): LLAMA = auto() diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index cf95541629032..b0197961d46a8 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -424,6 +424,9 @@ def close(self) -> None: fout.close() self.fout = None + def add_type(self, type_name: str) -> None: + self.add_string(Keys.General.TYPE, type_name) + def add_architecture(self) -> None: self.add_string(Keys.General.ARCHITECTURE, self.arch) diff --git a/src/llama.cpp b/src/llama.cpp index f94bee142fcff..07bb427135d8c 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -287,6 +287,7 @@ static const std::map LLM_ARCH_NAMES = { }; enum llm_kv { + LLM_KV_GENERAL_TYPE, LLM_KV_GENERAL_ARCHITECTURE, LLM_KV_GENERAL_QUANTIZATION_VERSION, LLM_KV_GENERAL_ALIGNMENT, @@ -378,11 +379,12 @@ enum llm_kv { LLM_KV_TOKENIZER_MIDDLE_ID, LLM_KV_TOKENIZER_EOT_ID, - LLM_KV_TRAINING_TYPE, - LLM_KV_TRAINING_LORA_ALPHA, + LLM_KV_ADAPTER_TYPE, + LLM_KV_ADAPTER_LORA_ALPHA, }; static const std::map LLM_KV_NAMES = { + { LLM_KV_GENERAL_TYPE, "general.type" }, { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" }, { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" }, { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, @@ -474,8 +476,8 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" }, { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" }, - { LLM_KV_TRAINING_TYPE, "training.type" }, - { LLM_KV_TRAINING_LORA_ALPHA, "training.lora.alpha" }, + { LLM_KV_ADAPTER_TYPE, "adapter.type" }, + { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" }, }; struct LLM_KV { @@ -18596,20 +18598,27 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf, id); }; LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN); - auto lora_arch_name = get_kv_str(llm_kv(LLM_KV_GENERAL_ARCHITECTURE)); - auto lora_arch = llm_arch_from_string(lora_arch_name); - if (lora_arch != model->arch) { + + auto general_type = get_kv_str(llm_kv(LLM_KV_GENERAL_TYPE)); + if (general_type != "adapter") { + gguf_free(ctx_gguf); + throw std::runtime_error("expect general.type to be 'adapter', but got: " + general_type); + } + + auto general_arch_str = get_kv_str(llm_kv(LLM_KV_GENERAL_ARCHITECTURE)); + auto general_arch = llm_arch_from_string(general_arch_str); + if (general_arch != model->arch) { gguf_free(ctx_gguf); throw std::runtime_error("model arch and LoRA arch mismatch"); } - auto train_type = get_kv_str(llm_kv(LLM_KV_TRAINING_TYPE)); - if (train_type != "finetune_lora") { + auto adapter_type = get_kv_str(llm_kv(LLM_KV_ADAPTER_TYPE)); + if (adapter_type != "lora") { gguf_free(ctx_gguf); - throw std::runtime_error("expect training.type to be finetune_lora, but got: " + train_type); + throw std::runtime_error("expect adapter.type to be 'lora', but got: " + adapter_type); } - adapter.alpha = get_kv_f32(llm_kv(LLM_KV_TRAINING_LORA_ALPHA)); + adapter.alpha = get_kv_f32(llm_kv(LLM_KV_ADAPTER_LORA_ALPHA)); } int n_tensors = gguf_get_n_tensors(ctx_gguf);