From 086e7f6ebccbce9379ac64a51307507e5d196c36 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 2 Sep 2024 10:06:42 +0300
Subject: [PATCH 1/3] llama : disambiguate API

ggml-ci
---
 examples/embedding/embedding.cpp     |  4 ++--
 examples/perplexity/perplexity.cpp   |  2 +-
 examples/retrieval/retrieval.cpp     |  2 +-
 examples/server/server.cpp           |  2 +-
 examples/speculative/speculative.cpp |  4 ++--
 include/llama.h                      | 12 ++++++++----
 src/llama.cpp                        |  8 ++++----
 tests/test-tokenizer-1-bpe.cpp       |  4 ++--
 tests/test-tokenizer-1-spm.cpp       |  4 ++--
 9 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index b05aa006e7da5..f03f7d616f75c 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -31,7 +31,7 @@ static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & toke
 }
 
 static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd, int embd_norm) {
-    const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
+    const enum llama_pooling_type pooling_type = llama_get_pooling_type(ctx);
     const struct llama_model * model = llama_get_model(ctx);
 
     // clear previous kv_cache values (irrelevant for embeddings)
@@ -114,7 +114,7 @@ int main(int argc, char ** argv) {
     const int n_ctx_train = llama_n_ctx_train(model);
     const int n_ctx = llama_n_ctx(ctx);
 
-    const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
+    const enum llama_pooling_type pooling_type = llama_get_pooling_type(ctx);
 
     if (llama_model_has_encoder(model) && llama_model_has_decoder(model)) {
         fprintf(stderr, "%s: error: computing embeddings in encoder-decoder models is not supported\n", __func__);
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index 484dd589109c7..75fedc75a6397 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -796,7 +796,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
     size_t hs_task_count = prompt_lines.size()/6;
     fprintf(stderr, "%s : loaded %zu tasks from prompt.\n", __func__, hs_task_count);
 
-    const bool is_spm = llama_vocab_type(llama_get_model(ctx)) == LLAMA_VOCAB_TYPE_SPM;
+    const bool is_spm = llama_get_vocab_type(llama_get_model(ctx)) == LLAMA_VOCAB_TYPE_SPM;
     fprintf(stderr, "================================= is_spm = %d\n", is_spm);
 
     // The tasks should be randomized so the score stabilizes quickly.
diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp
index aab9d81058af9..8fb99ac597d29 100644
--- a/examples/retrieval/retrieval.cpp
+++ b/examples/retrieval/retrieval.cpp
@@ -162,7 +162,7 @@ int main(int argc, char ** argv) {
     const int n_ctx_train = llama_n_ctx_train(model);
     const int n_ctx = llama_n_ctx(ctx);
 
-    const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
+    const enum llama_pooling_type pooling_type = llama_get_pooling_type(ctx);
     if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
         fprintf(stderr, "%s: error: pooling type NONE not supported\n", __func__);
         return 1;
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index cc938e80d6a6d..99364297a80ff 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2450,7 +2450,7 @@ struct server_context {
 
     json model_meta() const {
         return json {
-            {"vocab_type",  llama_vocab_type    (model)},
+            {"vocab_type",  llama_get_vocab_type(model)},
             {"n_vocab",     llama_n_vocab       (model)},
             {"n_ctx_train", llama_n_ctx_train   (model)},
             {"n_embd",      llama_n_embd        (model)},
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp
index 1616edecbbef6..91721a4c5aa09 100644
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@@ -82,10 +82,10 @@ int main(int argc, char ** argv) {
     model_dft = llama_init_dft.model;
     ctx_dft = llama_init_dft.context;
 
-    const bool vocab_type_tgt = llama_vocab_type(model_tgt);
+    const bool vocab_type_tgt = llama_get_vocab_type(model_tgt);
     LOG("vocab_type tgt: %d\n", vocab_type_tgt);
 
-    const bool vocab_type_dft = llama_vocab_type(model_dft);
+    const bool vocab_type_dft = llama_get_vocab_type(model_dft);
     LOG("vocab_type dft: %d\n", vocab_type_dft);
 
     if (vocab_type_tgt != vocab_type_dft) {
diff --git a/include/llama.h b/include/llama.h
index bfc37e88bbb74..92e81f2971f7f 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -467,10 +467,14 @@ extern "C" {
     LLAMA_API uint32_t llama_n_ubatch   (const struct llama_context * ctx);
     LLAMA_API uint32_t llama_n_seq_max  (const struct llama_context * ctx);
 
-    LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
-
-    LLAMA_API enum llama_vocab_type   llama_vocab_type  (const struct llama_model * model);
-    LLAMA_API enum llama_rope_type    llama_rope_type   (const struct llama_model * model);
+    LLAMA_API enum llama_pooling_type llama_get_pooling_type(const struct llama_context * ctx);
+    LLAMA_API enum llama_vocab_type   llama_get_vocab_type  (const struct llama_model * model);
+    LLAMA_API enum llama_rope_type    llama_get_rope_type   (const struct llama_model * model);
+
+    // DEPRECATED: use the API above
+    //LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
+    //LLAMA_API enum llama_vocab_type   llama_vocab_type  (const struct llama_model * model);
+    //LLAMA_API enum llama_rope_type    llama_rope_type   (const struct llama_model * model);
 
     LLAMA_API int32_t llama_n_vocab    (const struct llama_model * model);
     LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
diff --git a/src/llama.cpp b/src/llama.cpp
index 2113c72f3c90b..b4cf7f5483927 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -5945,7 +5945,7 @@ static void llm_load_hparams(
         hparams.use_alibi = true;
     }
 
-    hparams.rope_type = llama_rope_type(&model);
+    hparams.rope_type = llama_get_rope_type(&model);
 }
 
 static void llm_load_vocab(
@@ -18469,11 +18469,11 @@ uint32_t llama_n_seq_max(const struct llama_context * ctx) {
     return ctx->kv_self.size;
 }
 
-enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
+enum llama_vocab_type llama_get_vocab_type(const struct llama_model * model) {
     return model->vocab.type;
 }
 
-enum llama_rope_type llama_rope_type(const struct llama_model * model) {
+enum llama_rope_type llama_get_rope_type(const struct llama_model * model) {
     switch (model->arch) {
         // these models do not use RoPE
         case LLM_ARCH_GPT2:
@@ -18536,7 +18536,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
     return LLAMA_ROPE_TYPE_NONE;
 }
 
-enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) {
+enum llama_pooling_type llama_get_pooling_type(const struct llama_context * ctx) {
     return ctx->cparams.pooling_type;
 }
 
diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp
index 9498387e0f212..4755b36d326ca 100644
--- a/tests/test-tokenizer-1-bpe.cpp
+++ b/tests/test-tokenizer-1-bpe.cpp
@@ -64,8 +64,8 @@ int main(int argc, char **argv) {
         }
     }
 
-    //GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE);
-    if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
+    //GGML_ASSERT(llama_get_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE);
+    if (llama_get_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
         return 99;
     }
 
diff --git a/tests/test-tokenizer-1-spm.cpp b/tests/test-tokenizer-1-spm.cpp
index 7ca9e2ca6a671..ebcedf3e7e2d6 100644
--- a/tests/test-tokenizer-1-spm.cpp
+++ b/tests/test-tokenizer-1-spm.cpp
@@ -52,8 +52,8 @@ int main(int argc, char ** argv) {
         }
     }
 
-    //GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
-    if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
+    //GGML_ASSERT(llama_get_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
+    if (llama_get_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
         return 99;
     }
 

From 4e379017e6f91d6da8963af87002fb97a709e79e Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 2 Sep 2024 18:32:11 +0300
Subject: [PATCH 2/3] llama : fix comment

---
 include/llama.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/llama.h b/include/llama.h
index 92e81f2971f7f..18916cde23da0 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -471,7 +471,7 @@ extern "C" {
     LLAMA_API enum llama_vocab_type   llama_get_vocab_type  (const struct llama_model * model);
     LLAMA_API enum llama_rope_type    llama_get_rope_type   (const struct llama_model * model);
 
-    // DEPRECATED: use the API above
+    // OLD API (use the new API above: https://github.com/ggerganov/llama.cpp/pull/9270)
     //LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
     //LLAMA_API enum llama_vocab_type   llama_vocab_type  (const struct llama_model * model);
     //LLAMA_API enum llama_rope_type    llama_rope_type   (const struct llama_model * model);

From 40fa68cb46fd0610420921dcf22562a88bf818bb Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 2 Sep 2024 18:32:24 +0300
Subject: [PATCH 3/3] readme : add API change notice

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index bb2b93a35021f..769f2f81d55d9 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
 
 ## Recent API changes
 
+- [2024 Sep 02] Rename `llama_xxx_type` to `llama_get_xxx_type` https://github.com/ggerganov/llama.cpp/pull/9270
 - [2024 Jun 26] The source code and CMake build scripts have been restructured https://github.com/ggerganov/llama.cpp/pull/8006
 - [2024 Apr 21] `llama_token_to_piece` can now optionally render special tokens https://github.com/ggerganov/llama.cpp/pull/6807
 - [2024 Apr 4] State and session file functions reorganized under `llama_state_*` https://github.com/ggerganov/llama.cpp/pull/6341