llama : fix non-causal mask for gemma 3 (#12615)

2025-06-29 20:45:04 +00:00 · 2025-03-30 00:07:37 +01:00
parent 0bb2919335
commit af6ae1efb2
2 changed files with 72 additions and 106 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -1317,8 +1317,8 @@ int llama_context::decode(llama_batch & inp_batch) {
            n_outputs = n_outputs_new;
        }

-        // non-causal masks do not use the KV cache
-        if (hparams.causal_attn) {
+        // find KV slot
+        {
            kv_self_update();

            // if we have enough unused cells before the current head ->