mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 03:55:20 +00:00
llamafile : disable sgemm for batch-size 1 (#9330)
This commit is contained in:
@ -1006,6 +1006,10 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
||||
assert(nth > 0);
|
||||
assert(ith < nth);
|
||||
|
||||
// only enable sgemm for prompt processing
|
||||
if (n < 2)
|
||||
return false;
|
||||
|
||||
if (Ctype != GGML_TYPE_F32)
|
||||
return false;
|
||||
|
||||
|
Reference in New Issue
Block a user