kv-cache : fix split_equal handling in unified implementation (#14130)

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-06-12 10:02:15 +03:00
committed by GitHub
parent a20b2b05bc
commit 9596506965
3 changed files with 128 additions and 71 deletions

View File

@@ -877,6 +877,8 @@ int llama_context::encode(llama_batch & inp_batch) {
memcpy(cross.v_embd.data(), embd, ggml_nbytes(t_embd));
// remember the sequence ids used during the encoding - needed for cross attention later
// TODO: the seuqence indexing here is likely not correct in the general case
// probably works only for split_simple
cross.seq_ids_enc.resize(n_tokens);
for (int32_t i = 0; i < n_tokens; i++) {
cross.seq_ids_enc[i].clear();