kv-cache : fix split_equal handling in unified implementation (#14130)

ggml-ci
2025-07-30 14:13:57 -04:00 · 2025-06-12 10:02:15 +03:00
parent a20b2b05bc
commit 9596506965
3 changed files with 128 additions and 71 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -877,6 +877,8 @@ int llama_context::encode(llama_batch & inp_batch) {
        memcpy(cross.v_embd.data(), embd, ggml_nbytes(t_embd));

        // remember the sequence ids used during the encoding - needed for cross attention later
+        // TODO: the seuqence indexing here is likely not correct in the general case
+        //       probably works only for split_simple
        cross.seq_ids_enc.resize(n_tokens);
        for (int32_t i = 0; i < n_tokens; i++) {
            cross.seq_ids_enc[i].clear();