context : store graph build function callback

ggml-ci
2025-07-18 00:27:31 +00:00 · 2025-02-02 10:17:42 +02:00
parent 5d3491e789
commit 3e23be7911
3 changed files with 41 additions and 8 deletions
--- a/src/llama-context.h
+++ b/src/llama-context.h
@ -36,11 +36,13 @@ struct llama_batch_manager_i {
 // TODO: make implementation details private
 // TODO: become abstract base class, split the current implementation into different child classes
 struct llama_context {
-    // TODO: store the worst-case graph build function and reuse it later
+    // TODO: tmp until llama-model starts implementing the graph build function
+    typedef std::function<ggml_cgraph *(llama_context &, const llama_ubatch &, bool worst_case)> build_graph_callback;
+
    llama_context(
            const llama_model & model,
            const llama_context_params & params,
-            std::function<ggml_cgraph *(llama_context &, const llama_ubatch &)> fn_build_graph_worst);
+            build_graph_callback && cb_build_graph);

    const struct llama_model & model;

@ -49,6 +51,8 @@ struct llama_context {
    llama_adapter_cvec cvec;
    llama_loras        loras;

+    build_graph_callback cb_build_graph;
+
    std::vector<ggml_backend_ptr> backends;
    std::vector<std::pair<ggml_backend_t, ggml_backend_set_n_threads_t>> set_n_threads_fns;