From e7f5c7d924f2f38f3c4f5e9e5607abc079182cfd Mon Sep 17 00:00:00 2001
From: Eve <139727413+netrunnereve@users.noreply.github.com>
Date: Sat, 7 Sep 2024 19:02:26 +0000
Subject: [PATCH] llamafile : disable sgemm for batch-size 1 (#9330)

---
 ggml/src/llamafile/sgemm.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ggml/src/llamafile/sgemm.cpp b/ggml/src/llamafile/sgemm.cpp
index f0988ba7cd24c..d0c2bb284509b 100644
--- a/ggml/src/llamafile/sgemm.cpp
+++ b/ggml/src/llamafile/sgemm.cpp
@@ -1006,6 +1006,10 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
     assert(nth > 0);
     assert(ith < nth);
 
+    // only enable sgemm for prompt processing
+    if (n < 2)
+        return false;
+
     if (Ctype != GGML_TYPE_F32)
         return false;