From a5a85a3bc0c45d4f31f8ef4bc16ef158b0a8d670 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 24 Feb 2025 08:59:12 +0200 Subject: [PATCH] context : fix recurrent reserve ggml-ci --- src/llama-context.cpp | 5 +++++ src/llama-context.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 7628cbc9b..f73d4b9bf 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -3883,6 +3883,11 @@ llama_context_recurrent::llama_context_recurrent( llama_context_recurrent::~llama_context_recurrent() = default; void llama_context_recurrent::reserve() { + // simulate full KV cache + kv_self.n = kv_self.size; + + LLAMA_LOG_DEBUG("%s: kv_self.n = %u\n", __func__, kv_self.n); + // TODO: implement recurrent-specific reserve logic llama_context::reserve(); } diff --git a/src/llama-context.h b/src/llama-context.h index 0e55aae1c..2945cbabe 100644 --- a/src/llama-context.h +++ b/src/llama-context.h @@ -447,6 +447,7 @@ private: ggml_tensor * self_k_shift; // I32 [kv_size] } inp; +protected: // // graph // @@ -570,6 +571,7 @@ private: ggml_tensor * s_mask; // F32 [1, n_kv] } inp; +protected: // // graph //