mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-01 13:05:52 +00:00
memory : rename interface to llama_memory_context_i (#14296)
* memory : rename interface to llama_memory_context_i ggml-ci * cont : fix comments * cont : use "mctx" for referencing a memory context ggml-ci
This commit is contained in:
@ -3,7 +3,6 @@
|
||||
#include "llama.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
struct llama_ubatch;
|
||||
|
||||
@ -28,23 +27,21 @@ enum llama_memory_status {
|
||||
LLAMA_MEMORY_STATUS_FAILED_COMPUTE,
|
||||
};
|
||||
|
||||
// helper function for combining the status of two memory states
|
||||
// helper function for combining the status of two memory contexts
|
||||
// useful for implementing hybrid memory types (e.g. iSWA)
|
||||
llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_memory_status s1);
|
||||
|
||||
// the interface for managing the memory state during batch processing
|
||||
// the interface for managing the memory context during batch processing
|
||||
// this interface is implemented per memory type. see:
|
||||
// - llama_kv_cache_unified_state
|
||||
// - llama_kv_cache_unified_iswa_state
|
||||
// - llama_kv_cache_unified_context
|
||||
// - llama_kv_cache_unified_iswa_context
|
||||
// ...
|
||||
//
|
||||
// the only method that can mutate the memory and the memory state is llama_memory_i::apply()
|
||||
//
|
||||
// TODO: rename to llama_memory_context_i ?
|
||||
struct llama_memory_state_i {
|
||||
virtual ~llama_memory_state_i() = default;
|
||||
// the only method that should mutate the memory and the memory context is llama_memory_i::apply()
|
||||
struct llama_memory_context_i {
|
||||
virtual ~llama_memory_context_i() = default;
|
||||
|
||||
// consume the current ubatch from the state and proceed to the next one
|
||||
// consume the current ubatch from the context and proceed to the next one
|
||||
// return false if we are done
|
||||
virtual bool next() = 0;
|
||||
|
||||
@ -55,11 +52,11 @@ struct llama_memory_state_i {
|
||||
// get the current ubatch
|
||||
virtual const llama_ubatch & get_ubatch() const = 0;
|
||||
|
||||
// get the status of the memory state - used for error handling and checking if any updates would be applied
|
||||
// get the status of the memory context - used for error handling and checking if any updates would be applied
|
||||
virtual llama_memory_status get_status() const = 0;
|
||||
};
|
||||
|
||||
using llama_memory_state_ptr = std::unique_ptr<llama_memory_state_i>;
|
||||
using llama_memory_context_ptr = std::unique_ptr<llama_memory_context_i>;
|
||||
|
||||
// general concept of LLM memory
|
||||
// the KV cache is a type of LLM memory, but there can be other types
|
||||
@ -67,19 +64,19 @@ struct llama_memory_i {
|
||||
virtual ~llama_memory_i() = default;
|
||||
|
||||
// split the input batch into a set of ubatches and verify that they can fit into the cache
|
||||
// return a state object containing the ubatches and KV cache state required to process them
|
||||
// check the llama_memory_state_i::get_status() for the result
|
||||
virtual llama_memory_state_ptr init_batch(
|
||||
// return a context object containing the ubatches and memory state required to process them
|
||||
// check the llama_memory_context_i::get_status() for the result
|
||||
virtual llama_memory_context_ptr init_batch(
|
||||
llama_batch_allocr & balloc,
|
||||
uint32_t n_ubatch,
|
||||
bool embd_all) = 0;
|
||||
|
||||
// simulate full cache, used for allocating worst-case compute buffers
|
||||
virtual llama_memory_state_ptr init_full() = 0;
|
||||
virtual llama_memory_context_ptr init_full() = 0;
|
||||
|
||||
// prepare for any pending memory updates, such as shifts, defrags, etc.
|
||||
// status == LLAMA_MEMORY_STATUS_NO_UPDATE if there is nothing to update
|
||||
virtual llama_memory_state_ptr init_update(llama_context * lctx, bool optimize) = 0;
|
||||
virtual llama_memory_context_ptr init_update(llama_context * lctx, bool optimize) = 0;
|
||||
|
||||
// getters
|
||||
virtual bool get_can_shift() const = 0;
|
||||
|
Reference in New Issue
Block a user