metal : use FA-vec kernel up to batch size 20

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-05-13 08:52:34 +03:00
parent fdfc7de7fc
commit 78d70223c3

View File

@@ -4358,7 +4358,7 @@ static bool ggml_metal_encode_node(
// TODO: add vec kernels for (ne00%64 == 0) and maybe also for (ne00%32 == 0)
// for now avoiding mainly to keep the number of templates/kernels a bit lower
// these are now trivial to add after: https://github.com/ggml-org/llama.cpp/pull/12612
if (ne01 >= 4 || (ne00%128 != 0 && ne00 != 96 && ne00 != 192 && ne00 != 576)) {
if (ne01 >= 20 || (ne00%128 != 0 && ne00 != 96 && ne00 != 192 && ne00 != 576)) {
switch (src1->type) {
case GGML_TYPE_F16:
{