llamafile : disable sgemm for batch-size 1 (#9330)

2025-06-27 03:55:20 +00:00 · 2024-09-07 19:02:26 +00:00
parent 1b9ae5189c
commit e536426ded
1 changed files with 4 additions and 0 deletions
--- a/ggml/src/llamafile/sgemm.cpp
+++ b/ggml/src/llamafile/sgemm.cpp
@ -1006,6 +1006,10 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
    assert(nth > 0);
    assert(ith < nth);

+    // only enable sgemm for prompt processing
+    if (n < 2)
+        return false;
+
    if (Ctype != GGML_TYPE_F32)
        return false;