mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 20:45:04 +00:00
llama : fix non-causal mask for gemma 3 (#12615)
This commit is contained in:
@ -1317,8 +1317,8 @@ int llama_context::decode(llama_batch & inp_batch) {
|
||||
n_outputs = n_outputs_new;
|
||||
}
|
||||
|
||||
// non-causal masks do not use the KV cache
|
||||
if (hparams.causal_attn) {
|
||||
// find KV slot
|
||||
{
|
||||
kv_self_update();
|
||||
|
||||
// if we have enough unused cells before the current head ->
|
||||
|
Reference in New Issue
Block a user