From 3222aae43de83c6d3fd085c57c79011d219d4711 Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Fri, 6 Sep 2024 03:44:19 +0000 Subject: [PATCH] only enable sgemm for prompt processing --- ggml/src/llamafile/sgemm.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml/src/llamafile/sgemm.cpp b/ggml/src/llamafile/sgemm.cpp index f0988ba7cd24c..d0c2bb284509b 100644 --- a/ggml/src/llamafile/sgemm.cpp +++ b/ggml/src/llamafile/sgemm.cpp @@ -1006,6 +1006,10 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda assert(nth > 0); assert(ith < nth); + // only enable sgemm for prompt processing + if (n < 2) + return false; + if (Ctype != GGML_TYPE_F32) return false;