From a5e87bf4386eb8bad20d662a214472ad37aade1e Mon Sep 17 00:00:00 2001
From: Michael Podvitskiy <podvitskiymichael@gmail.com>
Date: Mon, 16 Sep 2024 18:30:28 +0200
Subject: [PATCH 1/4] llama: fixed n_vocab for `no_vocab` models

---
 src/llama.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index 0da764f9d1186..be2d8d3057098 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -6061,8 +6061,13 @@ static void llm_load_vocab(
             vocab.special_mask_id = -1;
             vocab.linefeed_id     = -1;
 
+            // read vocab size from metadata
+            ml.get_key(LLM_KV_VOCAB_SIZE, vocab.n_vocab);
+
             return;
-        } else if (tokenizer_model == "llama") {
+        }
+
+        if (tokenizer_model == "llama") {
             vocab.type = LLAMA_VOCAB_TYPE_SPM;
 
             // default special tokens

From 544b26640d0857844d9f4630ae8ffec7ce79f95e Mon Sep 17 00:00:00 2001
From: Michael Podvitskiy <podvitskiymichael@gmail.com>
Date: Mon, 16 Sep 2024 18:35:57 +0200
Subject: [PATCH 2/4] llama: updated error output for `llama_decode_internal`
 and `llama_encode_internal`

---
 src/llama.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index be2d8d3057098..64074efdf5c51 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -16582,7 +16582,7 @@ static int llama_decode_internal(
     const uint32_t n_tokens_all = batch_all.n_tokens;
 
     if (n_tokens_all == 0) {
-        LLAMA_LOG_ERROR("%s: n_tokens == 0", __func__);
+        LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__);
         return -1;
     }
 
@@ -16595,7 +16595,7 @@ static int llama_decode_internal(
     if (batch_all.token) {
         for (uint32_t i = 0; i < n_tokens_all; ++i) {
             if (batch_all.token[i] < 0 || (uint32_t)batch_all.token[i] >= model.vocab.n_vocab) {
-                LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch_all.token[i]);
+                LLAMA_LOG_ERROR("%s: invalid token[%d] = %d\n", __func__, i, batch_all.token[i]);
                 return -1;
             }
         }
@@ -16883,7 +16883,7 @@ static int llama_encode_internal(
     const uint32_t n_tokens = batch.n_tokens;
 
     if (n_tokens == 0) {
-        LLAMA_LOG_ERROR("%s: n_tokens == 0", __func__);
+        LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__);
         return -1;
     }
 
@@ -16896,7 +16896,7 @@ static int llama_encode_internal(
     if (batch.token) {
         for (uint32_t i = 0; i < n_tokens; ++i) {
             if (batch.token[i] < 0 || (uint32_t)batch.token[i] >= model.vocab.n_vocab) {
-                LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch.token[i]);
+                LLAMA_LOG_ERROR("%s: invalid token[%d] = %d\n", __func__, i, batch.token[i]);
                 return -1;
             }
         }

From 9704f0e92822696a3652b8907eafbb654937c872 Mon Sep 17 00:00:00 2001
From: Michael Podvitskiy <podvitskiymichael@gmail.com>
Date: Mon, 16 Sep 2024 19:30:07 +0200
Subject: [PATCH 3/4] llama: log warning if there's no vocab_size in metadata

---
 src/llama.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index 64074efdf5c51..d7855717a19ef 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -6062,8 +6062,9 @@ static void llm_load_vocab(
             vocab.linefeed_id     = -1;
 
             // read vocab size from metadata
-            ml.get_key(LLM_KV_VOCAB_SIZE, vocab.n_vocab);
-
+            if (!ml.get_key(LLM_KV_VOCAB_SIZE, vocab.n_vocab, false)) {
+                LLAMA_LOG_WARN("%s: there is no vocab_size in metadata, vocab.n_vocab will be set to 0\n", __func__);
+            }
             return;
         }
 

From 93ef595b4bb7860b9c7438e6cfdf300488ff1380 Mon Sep 17 00:00:00 2001
From: Michael Podvitskiy <podvitskiymichael@gmail.com>
Date: Tue, 17 Sep 2024 11:23:52 +0200
Subject: [PATCH 4/4] llama: correct vocab size for logging

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 src/llama.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index d7855717a19ef..1d8bdbe725c5d 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -6063,7 +6063,8 @@ static void llm_load_vocab(
 
             // read vocab size from metadata
             if (!ml.get_key(LLM_KV_VOCAB_SIZE, vocab.n_vocab, false)) {
-                LLAMA_LOG_WARN("%s: there is no vocab_size in metadata, vocab.n_vocab will be set to 0\n", __func__);
+                vocab.n_vocab = 0;
+                LLAMA_LOG_WARN("%s: there is no vocab_size in metadata, vocab.n_vocab will be set to %u\n", __func__, vocab.n_vocab);
             }
             return;
         }