kv-cache : refactor the update/defrag mechanism (#13988)

* kv-cache : refactor update mechanism ggml-ci * memory : improve status handling * defrag : reset head + add comments ggml-ci * cont : minor fixes ggml-ci
2025-07-28 21:23:55 -04:00 · 2025-06-04 18:58:20 +03:00
parent 2589ad3704
commit 3e63a58ef7
11 changed files with 340 additions and 191 deletions
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -52,7 +52,8 @@ struct llama_context {

    // return true of the KV cache was updated
    // TODO: remove
-    bool kv_self_update();
+    bool kv_self_update(bool optimize);
+    void kv_self_defrag_sched();

    enum llama_pooling_type pooling_type() const;

@@ -231,6 +232,9 @@ private:

    std::unique_ptr<llama_memory_i> memory;

+    // TODO: temporary, until the llama_kv_self_defrag() API is removed
+    bool memory_force_optimize = false;
+
    // decode output (2-dimensional array: [n_outputs][n_vocab])
    size_t  logits_size = 0; // capacity (of floats) for logits
    float * logits      = nullptr;