context : abstract constructor and init

ggml-ci
2025-07-16 07:38:28 +00:00 · 2025-02-13 17:13:42 +02:00
parent ed3cb55abe
commit 131743ff4f
3 changed files with 359 additions and 325 deletions
--- a/src/llama-context.h
+++ b/src/llama-context.h
@ -21,9 +21,16 @@ class llama_io_write_i;
 using llama_loras = std::unordered_map<struct llama_adapter_lora *, float>;

 struct llama_context : public llama_graph_i {
-    llama_context(const llama_model & model);
+    llama_context(
+            const llama_model & model,
+            const llama_context_params & params);
+
    virtual ~llama_context();

+    // init scheduler and compute buffers
+    // call once after the context is constructed
+    virtual void init();
+
    const llama_model   & get_model()   const;
    const llama_cparams & get_cparams() const;

@ -52,10 +59,6 @@ struct llama_context : public llama_graph_i {

    virtual int64_t n_pos_per_token() const; // vision

-    virtual ggml_context_ptr init();
-
-    virtual void synchronize();
-
    virtual void attach_threadpool(
            ggml_threadpool_t   threadpool,
            ggml_threadpool_t   threadpool_batch);
@ -85,8 +88,14 @@ struct llama_context : public llama_graph_i {
                int32_t   il_start,
                int32_t   il_end);

+    ////
+
+    virtual void synchronize();
+
+    virtual ggml_context_ptr graph_init();
+
    // returns the result of ggml_backend_sched_graph_compute_async execution
-    virtual enum ggml_status compute_graph(
+    virtual enum ggml_status graph_compute(
                ggml_cgraph * graph,
                       bool   batched);

@ -297,7 +306,7 @@ public:

    virtual void kv_self_update() override;

-    virtual ggml_context_ptr init() override;
+    virtual ggml_context_ptr graph_init() override;

    virtual void input_set(const llama_ubatch & ubatch) override;

@ -312,7 +321,7 @@ public:
    // certain implementations could require a padding for the context size
    uint32_t get_ctx_padding(const llama_cparams & cparams) const;

-    // === unified KV cache ===
+    // === KV cache ===

    llama_kv_cache kv_self;