fix: Fix shift logic to defer to unified cache

Branch: HybridRecurrentCache

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
Gabe Goodhart
2025-06-03 16:29:40 -06:00
parent 6c6ec0003a
commit cf03d4ae5c

View File

@@ -150,8 +150,8 @@ void llama_kv_cache_hybrid_recurrent::defrag_sched(float thold) {
}
bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
// TODO: Should this return true if the attention cache can shift?
return false;
// Shifting is trivially supported for recurrent
return kv_attn->get_can_shift();
}
void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {