From afa6800eb1dbb50f0868ca008cfb1940ab966357 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sun, 11 Aug 2024 17:03:41 -0700 Subject: [PATCH] feat: whitelist jina bert v2 for llama-server embedding --- examples/server/server.cpp | 5 ++++- src/llama.cpp | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 360f571e42867..e576b65a0eb17 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -693,7 +693,10 @@ struct server_context { n_ctx = llama_n_ctx(ctx); add_bos_token = llama_should_add_bos_token(model); - GGML_ASSERT(llama_add_eos_token(model) != 1); + + if (!llama_model_has_encoder(model)) { + GGML_ASSERT(llama_add_eos_token(model) != 1); + } return true; } diff --git a/src/llama.cpp b/src/llama.cpp index aaf8db496ecbd..3ea82265271f6 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17190,6 +17190,7 @@ bool llama_model_has_encoder(const struct llama_model * model) { switch (model->arch) { case LLM_ARCH_T5: return true; case LLM_ARCH_T5ENCODER: return true; + case LLM_ARCH_JINA_BERT_V2: return true; default: return false; } }