kv-cache : refactor the update/defrag mechanism (#13988)

* kv-cache : refactor update mechanism

ggml-ci

* memory : improve status handling

* defrag : reset head + add comments

ggml-ci

* cont : minor fixes

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-06-04 18:58:20 +03:00
committed by GitHub
parent 2589ad3704
commit 3e63a58ef7
11 changed files with 340 additions and 191 deletions

View File

@@ -52,7 +52,8 @@ struct llama_context {
// return true of the KV cache was updated
// TODO: remove
bool kv_self_update();
bool kv_self_update(bool optimize);
void kv_self_defrag_sched();
enum llama_pooling_type pooling_type() const;
@@ -231,6 +232,9 @@ private:
std::unique_ptr<llama_memory_i> memory;
// TODO: temporary, until the llama_kv_self_defrag() API is removed
bool memory_force_optimize = false;
// decode output (2-dimensional array: [n_outputs][n_vocab])
size_t logits_size = 0; // capacity (of floats) for logits
float * logits = nullptr;