mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-16 07:38:28 +00:00
context : abstract constructor and init
ggml-ci
This commit is contained in:
@ -21,9 +21,16 @@ class llama_io_write_i;
|
||||
using llama_loras = std::unordered_map<struct llama_adapter_lora *, float>;
|
||||
|
||||
struct llama_context : public llama_graph_i {
|
||||
llama_context(const llama_model & model);
|
||||
llama_context(
|
||||
const llama_model & model,
|
||||
const llama_context_params & params);
|
||||
|
||||
virtual ~llama_context();
|
||||
|
||||
// init scheduler and compute buffers
|
||||
// call once after the context is constructed
|
||||
virtual void init();
|
||||
|
||||
const llama_model & get_model() const;
|
||||
const llama_cparams & get_cparams() const;
|
||||
|
||||
@ -52,10 +59,6 @@ struct llama_context : public llama_graph_i {
|
||||
|
||||
virtual int64_t n_pos_per_token() const; // vision
|
||||
|
||||
virtual ggml_context_ptr init();
|
||||
|
||||
virtual void synchronize();
|
||||
|
||||
virtual void attach_threadpool(
|
||||
ggml_threadpool_t threadpool,
|
||||
ggml_threadpool_t threadpool_batch);
|
||||
@ -85,8 +88,14 @@ struct llama_context : public llama_graph_i {
|
||||
int32_t il_start,
|
||||
int32_t il_end);
|
||||
|
||||
////
|
||||
|
||||
virtual void synchronize();
|
||||
|
||||
virtual ggml_context_ptr graph_init();
|
||||
|
||||
// returns the result of ggml_backend_sched_graph_compute_async execution
|
||||
virtual enum ggml_status compute_graph(
|
||||
virtual enum ggml_status graph_compute(
|
||||
ggml_cgraph * graph,
|
||||
bool batched);
|
||||
|
||||
@ -297,7 +306,7 @@ public:
|
||||
|
||||
virtual void kv_self_update() override;
|
||||
|
||||
virtual ggml_context_ptr init() override;
|
||||
virtual ggml_context_ptr graph_init() override;
|
||||
|
||||
virtual void input_set(const llama_ubatch & ubatch) override;
|
||||
|
||||
@ -312,7 +321,7 @@ public:
|
||||
// certain implementations could require a padding for the context size
|
||||
uint32_t get_ctx_padding(const llama_cparams & cparams) const;
|
||||
|
||||
// === unified KV cache ===
|
||||
// === KV cache ===
|
||||
|
||||
llama_kv_cache kv_self;
|
||||
|
||||
|
Reference in New Issue
Block a user