kv-cells : fix tracking of seq_pos (#14339)

* kv-cells : fix tracking of seq_pos during cache reuse

ggml-ci

* cont : improve error message

ggml-ci

* cont : add more comments
This commit is contained in:
Georgi Gerganov
2025-06-23 12:27:35 +03:00
committed by GitHub
parent 3a9457df96
commit 7b50d589a8
5 changed files with 56 additions and 17 deletions

View File

@ -3418,9 +3418,12 @@ struct server_context {
}
if (ret < -1) {
// TODO: update slot state based on llama_memory_seq_pos_min() and llama_memory_seq_pos_max()
err = "Compute error.";
}
// TODO: handle ret == 2 (abort) when we start aborting
if (!err.empty()) {
SRV_ERR("%s, i = %d, n_batch = %d, ret = %d\n", err.c_str(), i, n_batch, ret);
for (auto & slot : slots) {