From 086e7f6ebccbce9379ac64a51307507e5d196c36 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 2 Sep 2024 10:06:42 +0300 Subject: [PATCH 1/3] llama : disambiguate API ggml-ci --- examples/embedding/embedding.cpp | 4 ++-- examples/perplexity/perplexity.cpp | 2 +- examples/retrieval/retrieval.cpp | 2 +- examples/server/server.cpp | 2 +- examples/speculative/speculative.cpp | 4 ++-- include/llama.h | 12 ++++++++---- src/llama.cpp | 8 ++++---- tests/test-tokenizer-1-bpe.cpp | 4 ++-- tests/test-tokenizer-1-spm.cpp | 4 ++-- 9 files changed, 23 insertions(+), 19 deletions(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index b05aa006e7da5..f03f7d616f75c 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -31,7 +31,7 @@ static void batch_add_seq(llama_batch & batch, const std::vector & toke } static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd, int embd_norm) { - const enum llama_pooling_type pooling_type = llama_pooling_type(ctx); + const enum llama_pooling_type pooling_type = llama_get_pooling_type(ctx); const struct llama_model * model = llama_get_model(ctx); // clear previous kv_cache values (irrelevant for embeddings) @@ -114,7 +114,7 @@ int main(int argc, char ** argv) { const int n_ctx_train = llama_n_ctx_train(model); const int n_ctx = llama_n_ctx(ctx); - const enum llama_pooling_type pooling_type = llama_pooling_type(ctx); + const enum llama_pooling_type pooling_type = llama_get_pooling_type(ctx); if (llama_model_has_encoder(model) && llama_model_has_decoder(model)) { fprintf(stderr, "%s: error: computing embeddings in encoder-decoder models is not supported\n", __func__); diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 484dd589109c7..75fedc75a6397 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -796,7 +796,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) { size_t hs_task_count = prompt_lines.size()/6; fprintf(stderr, "%s : loaded %zu tasks from prompt.\n", __func__, hs_task_count); - const bool is_spm = llama_vocab_type(llama_get_model(ctx)) == LLAMA_VOCAB_TYPE_SPM; + const bool is_spm = llama_get_vocab_type(llama_get_model(ctx)) == LLAMA_VOCAB_TYPE_SPM; fprintf(stderr, "================================= is_spm = %d\n", is_spm); // The tasks should be randomized so the score stabilizes quickly. diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index aab9d81058af9..8fb99ac597d29 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -162,7 +162,7 @@ int main(int argc, char ** argv) { const int n_ctx_train = llama_n_ctx_train(model); const int n_ctx = llama_n_ctx(ctx); - const enum llama_pooling_type pooling_type = llama_pooling_type(ctx); + const enum llama_pooling_type pooling_type = llama_get_pooling_type(ctx); if (pooling_type == LLAMA_POOLING_TYPE_NONE) { fprintf(stderr, "%s: error: pooling type NONE not supported\n", __func__); return 1; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index cc938e80d6a6d..99364297a80ff 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2450,7 +2450,7 @@ struct server_context { json model_meta() const { return json { - {"vocab_type", llama_vocab_type (model)}, + {"vocab_type", llama_get_vocab_type(model)}, {"n_vocab", llama_n_vocab (model)}, {"n_ctx_train", llama_n_ctx_train (model)}, {"n_embd", llama_n_embd (model)}, diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 1616edecbbef6..91721a4c5aa09 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -82,10 +82,10 @@ int main(int argc, char ** argv) { model_dft = llama_init_dft.model; ctx_dft = llama_init_dft.context; - const bool vocab_type_tgt = llama_vocab_type(model_tgt); + const bool vocab_type_tgt = llama_get_vocab_type(model_tgt); LOG("vocab_type tgt: %d\n", vocab_type_tgt); - const bool vocab_type_dft = llama_vocab_type(model_dft); + const bool vocab_type_dft = llama_get_vocab_type(model_dft); LOG("vocab_type dft: %d\n", vocab_type_dft); if (vocab_type_tgt != vocab_type_dft) { diff --git a/include/llama.h b/include/llama.h index bfc37e88bbb74..92e81f2971f7f 100644 --- a/include/llama.h +++ b/include/llama.h @@ -467,10 +467,14 @@ extern "C" { LLAMA_API uint32_t llama_n_ubatch (const struct llama_context * ctx); LLAMA_API uint32_t llama_n_seq_max (const struct llama_context * ctx); - LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx); - - LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model); - LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model); + LLAMA_API enum llama_pooling_type llama_get_pooling_type(const struct llama_context * ctx); + LLAMA_API enum llama_vocab_type llama_get_vocab_type (const struct llama_model * model); + LLAMA_API enum llama_rope_type llama_get_rope_type (const struct llama_model * model); + + // DEPRECATED: use the API above + //LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx); + //LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model); + //LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model); LLAMA_API int32_t llama_n_vocab (const struct llama_model * model); LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model); diff --git a/src/llama.cpp b/src/llama.cpp index 2113c72f3c90b..b4cf7f5483927 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -5945,7 +5945,7 @@ static void llm_load_hparams( hparams.use_alibi = true; } - hparams.rope_type = llama_rope_type(&model); + hparams.rope_type = llama_get_rope_type(&model); } static void llm_load_vocab( @@ -18469,11 +18469,11 @@ uint32_t llama_n_seq_max(const struct llama_context * ctx) { return ctx->kv_self.size; } -enum llama_vocab_type llama_vocab_type(const struct llama_model * model) { +enum llama_vocab_type llama_get_vocab_type(const struct llama_model * model) { return model->vocab.type; } -enum llama_rope_type llama_rope_type(const struct llama_model * model) { +enum llama_rope_type llama_get_rope_type(const struct llama_model * model) { switch (model->arch) { // these models do not use RoPE case LLM_ARCH_GPT2: @@ -18536,7 +18536,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) { return LLAMA_ROPE_TYPE_NONE; } -enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) { +enum llama_pooling_type llama_get_pooling_type(const struct llama_context * ctx) { return ctx->cparams.pooling_type; } diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp index 9498387e0f212..4755b36d326ca 100644 --- a/tests/test-tokenizer-1-bpe.cpp +++ b/tests/test-tokenizer-1-bpe.cpp @@ -64,8 +64,8 @@ int main(int argc, char **argv) { } } - //GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE); - if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) { + //GGML_ASSERT(llama_get_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE); + if (llama_get_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) { return 99; } diff --git a/tests/test-tokenizer-1-spm.cpp b/tests/test-tokenizer-1-spm.cpp index 7ca9e2ca6a671..ebcedf3e7e2d6 100644 --- a/tests/test-tokenizer-1-spm.cpp +++ b/tests/test-tokenizer-1-spm.cpp @@ -52,8 +52,8 @@ int main(int argc, char ** argv) { } } - //GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM); - if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) { + //GGML_ASSERT(llama_get_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM); + if (llama_get_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) { return 99; } From 4e379017e6f91d6da8963af87002fb97a709e79e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 2 Sep 2024 18:32:11 +0300 Subject: [PATCH 2/3] llama : fix comment --- include/llama.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llama.h b/include/llama.h index 92e81f2971f7f..18916cde23da0 100644 --- a/include/llama.h +++ b/include/llama.h @@ -471,7 +471,7 @@ extern "C" { LLAMA_API enum llama_vocab_type llama_get_vocab_type (const struct llama_model * model); LLAMA_API enum llama_rope_type llama_get_rope_type (const struct llama_model * model); - // DEPRECATED: use the API above + // OLD API (use the new API above: https://github.com/ggerganov/llama.cpp/pull/9270) //LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx); //LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model); //LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model); From 40fa68cb46fd0610420921dcf22562a88bf818bb Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 2 Sep 2024 18:32:24 +0300 Subject: [PATCH 3/3] readme : add API change notice --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index bb2b93a35021f..769f2f81d55d9 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) ## Recent API changes +- [2024 Sep 02] Rename `llama_xxx_type` to `llama_get_xxx_type` https://github.com/ggerganov/llama.cpp/pull/9270 - [2024 Jun 26] The source code and CMake build scripts have been restructured https://github.com/ggerganov/llama.cpp/pull/8006 - [2024 Apr 21] `llama_token_to_piece` can now optionally render special tokens https://github.com/ggerganov/llama.cpp/pull/6807 - [2024 Apr 4] State and session file functions reorganized under `llama_state_*` https://github.com/ggerganov/llama.cpp/pull/6341