Skip to content

Commit

Permalink
change kv metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
ngxson committed Jul 15, 2024
1 parent 9175f4b commit 0ba23ba
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 16 deletions.
1 change: 1 addition & 0 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", "
return new_name

def set_gguf_parameters(self):
self.gguf_writer.add_type(gguf.GGUFType.MODEL)
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
self.gguf_writer.add_block_count(self.block_count)

Expand Down
9 changes: 4 additions & 5 deletions convert_lora_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,17 +359,16 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
eager=args.no_lazy,
model_name=None,
)
logger.info("Set model parameters")
model_instance.set_gguf_parameters()

with open(lora_config, "r") as f:
lparams: dict[str, Any] = json.load(f)

alpha = lparams["lora_alpha"]

model_instance.gguf_writer.add_string("training.type", "finetune_lora")
model_instance.gguf_writer.add_float32("training.lora.alpha", float(alpha))

model_instance.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[model_instance.model_arch])
model_instance.gguf_writer.add_string(gguf.Keys.General.TYPE, gguf.GGUFType.ADAPTER)
model_instance.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
model_instance.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
logger.info("Exporting model...")
model_instance.write()
Expand Down
8 changes: 8 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

class Keys:
class General:
TYPE = "general.type"
ARCHITECTURE = "general.architecture"
QUANTIZATION_VERSION = "general.quantization_version"
ALIGNMENT = "general.alignment"
Expand Down Expand Up @@ -120,10 +121,17 @@ class Tokenizer:
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
EOT_ID = "tokenizer.ggml.eot_token_id"

class Adapter:
TYPE = "adapter.type"
LORA_ALPHA = "adapter.lora.alpha"

#
# recommended mapping of model tensor names for storage in gguf
#

class GGUFType:
MODEL = "model"
ADAPTER = "adapter"

class MODEL_ARCH(IntEnum):
LLAMA = auto()
Expand Down
3 changes: 3 additions & 0 deletions gguf-py/gguf/gguf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,9 @@ def close(self) -> None:
fout.close()
self.fout = None

def add_type(self, type_name: str) -> None:
self.add_string(Keys.General.TYPE, type_name)

def add_architecture(self) -> None:
self.add_string(Keys.General.ARCHITECTURE, self.arch)

Expand Down
31 changes: 20 additions & 11 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
};

enum llm_kv {
LLM_KV_GENERAL_TYPE,
LLM_KV_GENERAL_ARCHITECTURE,
LLM_KV_GENERAL_QUANTIZATION_VERSION,
LLM_KV_GENERAL_ALIGNMENT,
Expand Down Expand Up @@ -378,11 +379,12 @@ enum llm_kv {
LLM_KV_TOKENIZER_MIDDLE_ID,
LLM_KV_TOKENIZER_EOT_ID,

LLM_KV_TRAINING_TYPE,
LLM_KV_TRAINING_LORA_ALPHA,
LLM_KV_ADAPTER_TYPE,
LLM_KV_ADAPTER_LORA_ALPHA,
};

static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
{ LLM_KV_GENERAL_TYPE, "general.type" },
{ LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
{ LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
{ LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
Expand Down Expand Up @@ -474,8 +476,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
{ LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
{ LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },

{ LLM_KV_TRAINING_TYPE, "training.type" },
{ LLM_KV_TRAINING_LORA_ALPHA, "training.lora.alpha" },
{ LLM_KV_ADAPTER_TYPE, "adapter.type" },
{ LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
};

struct LLM_KV {
Expand Down Expand Up @@ -18596,20 +18598,27 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf, id);
};
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
auto lora_arch_name = get_kv_str(llm_kv(LLM_KV_GENERAL_ARCHITECTURE));
auto lora_arch = llm_arch_from_string(lora_arch_name);
if (lora_arch != model->arch) {

auto general_type = get_kv_str(llm_kv(LLM_KV_GENERAL_TYPE));
if (general_type != "adapter") {
gguf_free(ctx_gguf);
throw std::runtime_error("expect general.type to be 'adapter', but got: " + general_type);
}

auto general_arch_str = get_kv_str(llm_kv(LLM_KV_GENERAL_ARCHITECTURE));
auto general_arch = llm_arch_from_string(general_arch_str);
if (general_arch != model->arch) {
gguf_free(ctx_gguf);
throw std::runtime_error("model arch and LoRA arch mismatch");
}

auto train_type = get_kv_str(llm_kv(LLM_KV_TRAINING_TYPE));
if (train_type != "finetune_lora") {
auto adapter_type = get_kv_str(llm_kv(LLM_KV_ADAPTER_TYPE));
if (adapter_type != "lora") {
gguf_free(ctx_gguf);
throw std::runtime_error("expect training.type to be finetune_lora, but got: " + train_type);
throw std::runtime_error("expect adapter.type to be 'lora', but got: " + adapter_type);
}

adapter.alpha = get_kv_f32(llm_kv(LLM_KV_TRAINING_LORA_ALPHA));
adapter.alpha = get_kv_f32(llm_kv(LLM_KV_ADAPTER_LORA_ALPHA));
}

int n_tensors = gguf_get_n_tensors(ctx_gguf);
Expand Down

0 comments on commit 0ba23ba

Please sign in to comment.