diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 7628cbc9b..f73d4b9bf 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -3883,6 +3883,11 @@ llama_context_recurrent::llama_context_recurrent(
 llama_context_recurrent::~llama_context_recurrent() = default;
 
 void llama_context_recurrent::reserve() {
+    // simulate full KV cache
+    kv_self.n = kv_self.size;
+
+    LLAMA_LOG_DEBUG("%s: kv_self.n = %u\n", __func__, kv_self.n);
+
     // TODO: implement recurrent-specific reserve logic
     llama_context::reserve();
 }
diff --git a/src/llama-context.h b/src/llama-context.h
index 0e55aae1c..2945cbabe 100644
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -447,6 +447,7 @@ private:
         ggml_tensor * self_k_shift;         // I32 [kv_size]
     } inp;
 
+protected:
     //
     // graph
     //
@@ -570,6 +571,7 @@ private:
         ggml_tensor * s_mask; // F32 [1, n_kv]
     } inp;
 
+protected:
     //
     // graph
     //