context : fix recurrent reserve

ggml-ci
2025-07-14 22:58:10 +00:00 · 2025-02-24 08:59:12 +02:00
parent 0699a44c83
commit a5a85a3bc0
2 changed files with 7 additions and 0 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -3883,6 +3883,11 @@ llama_context_recurrent::llama_context_recurrent(
 llama_context_recurrent::~llama_context_recurrent() = default;

 void llama_context_recurrent::reserve() {
+    // simulate full KV cache
+    kv_self.n = kv_self.size;
+
+    LLAMA_LOG_DEBUG("%s: kv_self.n = %u\n", __func__, kv_self.n);
+
    // TODO: implement recurrent-specific reserve logic
    llama_context::reserve();
 }
--- a/src/llama-context.h
+++ b/src/llama-context.h
@ -447,6 +447,7 @@ private:
        ggml_tensor * self_k_shift;         // I32 [kv_size]
    } inp;

+protected:
    //
    // graph
    //
@ -570,6 +571,7 @@ private:
        ggml_tensor * s_mask; // F32 [1, n_kv]
    } inp;

+protected:
    //
    // graph
    //