llama : deprecate llama_kv_self_ API (#14030)

* llama : deprecate llama_kv_self_ API ggml-ci * llama : allow llama_memory_(nullptr) ggml-ci * memory : add flag for optional data clear in llama_memory_clear ggml-ci
2025-06-29 04:35:05 +00:00 · 2025-06-06 14:11:15 +03:00
parent 487a5e0401
commit 745aa5319b
34 changed files with 206 additions and 127 deletions
--- a/src/llama-kv-cache-recurrent.cpp
+++ b/src/llama-kv-cache-recurrent.cpp
@ -117,18 +117,21 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
    }
 }

-void llama_kv_cache_recurrent::clear() {
+void llama_kv_cache_recurrent::clear(bool data) {
    for (int32_t i = 0; i < (int32_t) size; ++i) {
        cells[i].pos = -1;
        cells[i].seq_id.clear();
        cells[i].src = -1;
        cells[i].tail = -1;
    }
+
    head = 0;
    used = 0;

-    for (auto & buf : bufs) {
-        ggml_backend_buffer_clear(buf.get(), 0);
+    if (data) {
+        for (auto & buf : bufs) {
+            ggml_backend_buffer_clear(buf.get(), 0);
+        }
    }
 }

@ -723,7 +726,7 @@ void llama_kv_cache_recurrent::state_read(llama_io_read_i & io, llama_seq_id seq

    if (!res) {
        if (seq_id == -1) {
-            clear();
+            clear(true);
        } else {
            seq_rm(seq_id, -1, -1);
        }
@ -880,7 +883,7 @@ bool llama_kv_cache_recurrent::state_read_meta(llama_io_read_i & io, uint32_t ce
            return false;
        }

-        clear();
+        clear(true);

        for (uint32_t i = 0; i < cell_count; ++i) {
            kv_cell & cell = cells[i];