mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-05 16:38:29 -04:00
fix: Fix shift logic to defer to unified cache
Branch: HybridRecurrentCache Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
@@ -150,8 +150,8 @@ void llama_kv_cache_hybrid_recurrent::defrag_sched(float thold) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
|
bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
|
||||||
// TODO: Should this return true if the attention cache can shift?
|
// Shifting is trivially supported for recurrent
|
||||||
return false;
|
return kv_attn->get_can_shift();
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
|
void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
|
||||||
|
Reference in New Issue
Block a user