mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-05 00:25:26 -04:00
fix: Fix shift logic to defer to unified cache
Branch: HybridRecurrentCache Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
@@ -150,8 +150,8 @@ void llama_kv_cache_hybrid_recurrent::defrag_sched(float thold) {
|
||||
}
|
||||
|
||||
bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
|
||||
// TODO: Should this return true if the attention cache can shift?
|
||||
return false;
|
||||
// Shifting is trivially supported for recurrent
|
||||
return kv_attn->get_can_shift();
|
||||
}
|
||||
|
||||
void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
|
||||
|
Reference in New Issue
Block a user