From a5e87bf4386eb8bad20d662a214472ad37aade1e Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Mon, 16 Sep 2024 18:30:28 +0200 Subject: [PATCH 1/4] llama: fixed n_vocab for `no_vocab` models --- src/llama.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 0da764f9d1186..be2d8d3057098 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -6061,8 +6061,13 @@ static void llm_load_vocab( vocab.special_mask_id = -1; vocab.linefeed_id = -1; + // read vocab size from metadata + ml.get_key(LLM_KV_VOCAB_SIZE, vocab.n_vocab); + return; - } else if (tokenizer_model == "llama") { + } + + if (tokenizer_model == "llama") { vocab.type = LLAMA_VOCAB_TYPE_SPM; // default special tokens From 544b26640d0857844d9f4630ae8ffec7ce79f95e Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Mon, 16 Sep 2024 18:35:57 +0200 Subject: [PATCH 2/4] llama: updated error output for `llama_decode_internal` and `llama_encode_internal` --- src/llama.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index be2d8d3057098..64074efdf5c51 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -16582,7 +16582,7 @@ static int llama_decode_internal( const uint32_t n_tokens_all = batch_all.n_tokens; if (n_tokens_all == 0) { - LLAMA_LOG_ERROR("%s: n_tokens == 0", __func__); + LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__); return -1; } @@ -16595,7 +16595,7 @@ static int llama_decode_internal( if (batch_all.token) { for (uint32_t i = 0; i < n_tokens_all; ++i) { if (batch_all.token[i] < 0 || (uint32_t)batch_all.token[i] >= model.vocab.n_vocab) { - LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch_all.token[i]); + LLAMA_LOG_ERROR("%s: invalid token[%d] = %d\n", __func__, i, batch_all.token[i]); return -1; } } @@ -16883,7 +16883,7 @@ static int llama_encode_internal( const uint32_t n_tokens = batch.n_tokens; if (n_tokens == 0) { - LLAMA_LOG_ERROR("%s: n_tokens == 0", __func__); + LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__); return -1; } @@ -16896,7 +16896,7 @@ static int llama_encode_internal( if (batch.token) { for (uint32_t i = 0; i < n_tokens; ++i) { if (batch.token[i] < 0 || (uint32_t)batch.token[i] >= model.vocab.n_vocab) { - LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch.token[i]); + LLAMA_LOG_ERROR("%s: invalid token[%d] = %d\n", __func__, i, batch.token[i]); return -1; } } From 9704f0e92822696a3652b8907eafbb654937c872 Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Mon, 16 Sep 2024 19:30:07 +0200 Subject: [PATCH 3/4] llama: log warning if there's no vocab_size in metadata --- src/llama.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index 64074efdf5c51..d7855717a19ef 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -6062,8 +6062,9 @@ static void llm_load_vocab( vocab.linefeed_id = -1; // read vocab size from metadata - ml.get_key(LLM_KV_VOCAB_SIZE, vocab.n_vocab); - + if (!ml.get_key(LLM_KV_VOCAB_SIZE, vocab.n_vocab, false)) { + LLAMA_LOG_WARN("%s: there is no vocab_size in metadata, vocab.n_vocab will be set to 0\n", __func__); + } return; } From 93ef595b4bb7860b9c7438e6cfdf300488ff1380 Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Tue, 17 Sep 2024 11:23:52 +0200 Subject: [PATCH 4/4] llama: correct vocab size for logging Co-authored-by: Georgi Gerganov --- src/llama.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index d7855717a19ef..1d8bdbe725c5d 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -6063,7 +6063,8 @@ static void llm_load_vocab( // read vocab size from metadata if (!ml.get_key(LLM_KV_VOCAB_SIZE, vocab.n_vocab, false)) { - LLAMA_LOG_WARN("%s: there is no vocab_size in metadata, vocab.n_vocab will be set to 0\n", __func__); + vocab.n_vocab = 0; + LLAMA_LOG_WARN("%s: there is no vocab_size in metadata, vocab.n_vocab will be set to %u\n", __func__, vocab.n_vocab); } return; }