fix: Fix shift logic to defer to unified cache

Branch: HybridRecurrentCache Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
2025-09-03 05:39:25 -04:00 · 2025-06-03 16:29:40 -06:00
parent 6c6ec0003a
commit cf03d4ae5c
1 changed files with 2 additions and 2 deletions
--- a/src/llama-kv-cache-hybrid-recurrent.cpp
+++ b/src/llama-kv-cache-hybrid-recurrent.cpp
@@ -150,8 +150,8 @@ void llama_kv_cache_hybrid_recurrent::defrag_sched(float thold) {
 }

 bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
-    // TODO: Should this return true if the attention cache can shift?
-    return false;
+    // Shifting is trivially supported for recurrent
+    return kv_attn->get_can_shift();
 }

 void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {