memory : migrate from llama_kv_cache to more generic llama_memory (#14006)

* memory : merge llama_kv_cache into llama_memory + new `llama_memory` API

ggml-ci

* context : fix casts

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-06-05 15:29:22 +03:00
committed by GitHub
parent 3a077146a4
commit 7f37b6cf1e
11 changed files with 324 additions and 220 deletions

View File

@@ -13,13 +13,12 @@
#include <vector>
struct llama_model;
struct llama_kv_cache;
class llama_io_read_i;
class llama_io_write_i;
class llama_memory_i;
class llama_memory_state_i;
struct llama_memory_i;
struct llama_memory_state_i;
struct llama_context {
// init scheduler and compute buffers, reserve worst-case graphs
@@ -47,8 +46,7 @@ struct llama_context {
uint32_t n_threads() const;
uint32_t n_threads_batch() const;
llama_kv_cache * get_kv_self();
const llama_kv_cache * get_kv_self() const;
llama_memory_t get_memory() const;
// return true of the KV cache was updated
// TODO: remove