mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-28 21:23:55 -04:00
kv-cache : refactor the update/defrag mechanism (#13988)
* kv-cache : refactor update mechanism ggml-ci * memory : improve status handling * defrag : reset head + add comments ggml-ci * cont : minor fixes ggml-ci
This commit is contained in:
@@ -52,7 +52,8 @@ struct llama_context {
|
||||
|
||||
// return true of the KV cache was updated
|
||||
// TODO: remove
|
||||
bool kv_self_update();
|
||||
bool kv_self_update(bool optimize);
|
||||
void kv_self_defrag_sched();
|
||||
|
||||
enum llama_pooling_type pooling_type() const;
|
||||
|
||||
@@ -231,6 +232,9 @@ private:
|
||||
|
||||
std::unique_ptr<llama_memory_i> memory;
|
||||
|
||||
// TODO: temporary, until the llama_kv_self_defrag() API is removed
|
||||
bool memory_force_optimize = false;
|
||||
|
||||
// decode output (2-dimensional array: [n_outputs][n_vocab])
|
||||
size_t logits_size = 0; // capacity (of floats) for logits
|
||||
float * logits = nullptr;
|
||||
|
Reference in New Issue
Block a user