From a5a85a3bc0c45d4f31f8ef4bc16ef158b0a8d670 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 24 Feb 2025 08:59:12 +0200
Subject: [PATCH] context : fix recurrent reserve

ggml-ci
---
 src/llama-context.cpp | 5 +++++
 src/llama-context.h   | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 7628cbc9b..f73d4b9bf 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -3883,6 +3883,11 @@ llama_context_recurrent::llama_context_recurrent(
 llama_context_recurrent::~llama_context_recurrent() = default;
 
 void llama_context_recurrent::reserve() {
+    // simulate full KV cache
+    kv_self.n = kv_self.size;
+
+    LLAMA_LOG_DEBUG("%s: kv_self.n = %u\n", __func__, kv_self.n);
+
     // TODO: implement recurrent-specific reserve logic
     llama_context::reserve();
 }
diff --git a/src/llama-context.h b/src/llama-context.h
index 0e55aae1c..2945cbabe 100644
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -447,6 +447,7 @@ private:
         ggml_tensor * self_k_shift;         // I32 [kv_size]
     } inp;
 
+protected:
     //
     // graph
     //
@@ -570,6 +571,7 @@ private:
         ggml_tensor * s_mask; // F32 [1, n_kv]
     } inp;
 
+protected:
     //
     // graph
     //