mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-18 08:37:43 +00:00
context : fix recurrent reserve
ggml-ci
This commit is contained in:
@ -3883,6 +3883,11 @@ llama_context_recurrent::llama_context_recurrent(
|
|||||||
llama_context_recurrent::~llama_context_recurrent() = default;
|
llama_context_recurrent::~llama_context_recurrent() = default;
|
||||||
|
|
||||||
void llama_context_recurrent::reserve() {
|
void llama_context_recurrent::reserve() {
|
||||||
|
// simulate full KV cache
|
||||||
|
kv_self.n = kv_self.size;
|
||||||
|
|
||||||
|
LLAMA_LOG_DEBUG("%s: kv_self.n = %u\n", __func__, kv_self.n);
|
||||||
|
|
||||||
// TODO: implement recurrent-specific reserve logic
|
// TODO: implement recurrent-specific reserve logic
|
||||||
llama_context::reserve();
|
llama_context::reserve();
|
||||||
}
|
}
|
||||||
|
@ -447,6 +447,7 @@ private:
|
|||||||
ggml_tensor * self_k_shift; // I32 [kv_size]
|
ggml_tensor * self_k_shift; // I32 [kv_size]
|
||||||
} inp;
|
} inp;
|
||||||
|
|
||||||
|
protected:
|
||||||
//
|
//
|
||||||
// graph
|
// graph
|
||||||
//
|
//
|
||||||
@ -570,6 +571,7 @@ private:
|
|||||||
ggml_tensor * s_mask; // F32 [1, n_kv]
|
ggml_tensor * s_mask; // F32 [1, n_kv]
|
||||||
} inp;
|
} inp;
|
||||||
|
|
||||||
|
protected:
|
||||||
//
|
//
|
||||||
// graph
|
// graph
|
||||||
//
|
//
|
||||||
|
Reference in New Issue
Block a user