From 18e109d3f40ce0a3bb6af3fc2df429191145002f Mon Sep 17 00:00:00 2001 From: slaren Date: Fri, 15 Mar 2024 22:02:53 +0100 Subject: [PATCH] llama : fix Baichuan2 13B --- llama.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index 8e185d4bfe773..0e42c1b8a9ba3 100644 --- a/llama.cpp +++ b/llama.cpp @@ -5903,7 +5903,7 @@ struct llm_build_context { inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = build_inp_pos(); + struct ggml_tensor * inp_pos = model.type == MODEL_7B ? build_inp_pos() : nullptr; // KQ_mask (mask for 1 head, it will be broadcasted to all heads) struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); @@ -5953,7 +5953,6 @@ struct llm_build_context { cb(Qcur, "Qcur", il); cb(Kcur, "Kcur", il); - cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, NULL, Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);