memory : rename interface to llama_memory_context_i (#14296)

* memory : rename interface to llama_memory_context_i ggml-ci * cont : fix comments * cont : use "mctx" for referencing a memory context ggml-ci
2025-07-01 13:05:52 +00:00 · 2025-06-21 08:03:46 +03:00
parent b23fa0b3f4
commit 692e3cdd0a
14 changed files with 339 additions and 341 deletions
--- a/src/llama-memory.h
+++ b/src/llama-memory.h
@ -3,7 +3,6 @@
 #include "llama.h"

 #include <memory>
-#include <vector>

 struct llama_ubatch;

@ -28,23 +27,21 @@ enum llama_memory_status {
    LLAMA_MEMORY_STATUS_FAILED_COMPUTE,
 };

-// helper function for combining the status of two memory states
+// helper function for combining the status of two memory contexts
 // useful for implementing hybrid memory types (e.g. iSWA)
 llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_memory_status s1);

-// the interface for managing the memory state during batch processing
+// the interface for managing the memory context during batch processing
 // this interface is implemented per memory type. see:
-//   - llama_kv_cache_unified_state
-//   - llama_kv_cache_unified_iswa_state
+//   - llama_kv_cache_unified_context
+//   - llama_kv_cache_unified_iswa_context
 //   ...
 //
-// the only method that can mutate the memory and the memory state is llama_memory_i::apply()
-//
-// TODO: rename to llama_memory_context_i ?
-struct llama_memory_state_i {
-    virtual ~llama_memory_state_i() = default;
+// the only method that should mutate the memory and the memory context is llama_memory_i::apply()
+struct llama_memory_context_i {
+    virtual ~llama_memory_context_i() = default;

-    // consume the current ubatch from the state and proceed to the next one
+    // consume the current ubatch from the context and proceed to the next one
    // return false if we are done
    virtual bool next() = 0;

@ -55,11 +52,11 @@ struct llama_memory_state_i {
    // get the current ubatch
    virtual const llama_ubatch & get_ubatch() const = 0;

-    // get the status of the memory state - used for error handling and checking if any updates would be applied
+    // get the status of the memory context - used for error handling and checking if any updates would be applied
    virtual llama_memory_status get_status() const = 0;
 };

-using llama_memory_state_ptr = std::unique_ptr<llama_memory_state_i>;
+using llama_memory_context_ptr = std::unique_ptr<llama_memory_context_i>;

 // general concept of LLM memory
 // the KV cache is a type of LLM memory, but there can be other types
@ -67,19 +64,19 @@ struct llama_memory_i {
    virtual ~llama_memory_i() = default;

    // split the input batch into a set of ubatches and verify that they can fit into the cache
-    // return a state object containing the ubatches and KV cache state required to process them
-    // check the llama_memory_state_i::get_status() for the result
-    virtual llama_memory_state_ptr init_batch(
+    // return a context object containing the ubatches and memory state required to process them
+    // check the llama_memory_context_i::get_status() for the result
+    virtual llama_memory_context_ptr init_batch(
            llama_batch_allocr & balloc,
            uint32_t n_ubatch,
            bool embd_all) = 0;

    // simulate full cache, used for allocating worst-case compute buffers
-    virtual llama_memory_state_ptr init_full() = 0;
+    virtual llama_memory_context_ptr init_full() = 0;

    // prepare for any pending memory updates, such as shifts, defrags, etc.
    // status == LLAMA_MEMORY_STATUS_NO_UPDATE if there is nothing to update
-    virtual llama_memory_state_ptr init_update(llama_context * lctx, bool optimize) = 0;
+    virtual llama_memory_context_ptr init_update(llama_context * lctx, bool optimize) = 0;

    // getters
    virtual bool get_can_shift() const = 0;