From 9db44a2a6303be84f5fc3c8df7f4d212d688b897 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Mon, 16 Jun 2025 13:34:25 -0600
Subject: [PATCH] fix: Fix resize vs reserve and skip null tensors in size
 computation

https://github.com/ggml-org/llama.cpp/pull/13979/files#r2149469788

Branch: HybridRecurrentCache

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
Co-Authored-By: @younesbelkada
---
 src/llama-kv-cache-recurrent.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/llama-kv-cache-recurrent.cpp b/src/llama-kv-cache-recurrent.cpp
index be19edd31..802025e22 100644
--- a/src/llama-kv-cache-recurrent.cpp
+++ b/src/llama-kv-cache-recurrent.cpp
@@ -60,8 +60,8 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
         return it->second;
     };
 
-    k_l.reserve(n_layer);
-    v_l.reserve(n_layer);
+    k_l.resize(n_layer);
+    v_l.resize(n_layer);
 
     for (int i = 0; i < n_layer; i++) {
         if (filter && !filter(i)) {
@@ -647,7 +647,9 @@ size_t llama_kv_cache_recurrent::size_k_bytes() const {
     size_t size_k_bytes = 0;
 
     for (const auto & k : k_l) {
-        size_k_bytes += ggml_nbytes(k);
+        if (k != nullptr) {
+            size_k_bytes += ggml_nbytes(k);
+        }
     }
 
     return size_k_bytes;
@@ -657,7 +659,9 @@ size_t llama_kv_cache_recurrent::size_v_bytes() const {
     size_t size_v_bytes = 0;
 
     for (const auto & v : v_l) {
-        size_v_bytes += ggml_nbytes(v);
+        if (v != nullptr) {
+            size_v_bytes += ggml_nbytes(v);
+        }
     }
 
     return size_v_bytes;