From c252e0c4097b34666e5a81db9d0450d71fa3098f Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 13 May 2025 18:04:00 +0300 Subject: [PATCH] metal : optimize multi-sequence FA vec kernel (#13493) * batched-bench : fix pp batch contents * metal : optimize multi-sequence FA vec kernel ggml-ci --- ggml/src/ggml-metal/ggml-metal.metal | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 9cfddf450..122ae5973 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -3887,6 +3887,11 @@ kernel void kernel_flash_attn_ext_vec( sm[tiisg] = pm[ic + tiisg]; } + // skip -INF blocks + if (simd_max(sm[tiisg]) == -INFINITY) { + continue; + } + // Q*K^T { // each simdgroup processes 1 query and NE (NW/NL) head elements