mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-30 14:13:57 -04:00
kv-cache : fix split_equal handling in unified implementation (#14130)
ggml-ci
This commit is contained in:
@@ -877,6 +877,8 @@ int llama_context::encode(llama_batch & inp_batch) {
|
||||
memcpy(cross.v_embd.data(), embd, ggml_nbytes(t_embd));
|
||||
|
||||
// remember the sequence ids used during the encoding - needed for cross attention later
|
||||
// TODO: the seuqence indexing here is likely not correct in the general case
|
||||
// probably works only for split_simple
|
||||
cross.seq_ids_enc.resize(n_tokens);
|
||||
for (int32_t i = 0; i < n_tokens; i++) {
|
||||
cross.seq_ids_enc[i].clear();
|
||||
|
Reference in New Issue
Block a user