context : store graph build function callback

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-02-02 10:17:42 +02:00
parent 5d3491e789
commit 3e23be7911
3 changed files with 41 additions and 8 deletions

View File

@ -36,11 +36,13 @@ struct llama_batch_manager_i {
// TODO: make implementation details private
// TODO: become abstract base class, split the current implementation into different child classes
struct llama_context {
// TODO: store the worst-case graph build function and reuse it later
// TODO: tmp until llama-model starts implementing the graph build function
typedef std::function<ggml_cgraph *(llama_context &, const llama_ubatch &, bool worst_case)> build_graph_callback;
llama_context(
const llama_model & model,
const llama_context_params & params,
std::function<ggml_cgraph *(llama_context &, const llama_ubatch &)> fn_build_graph_worst);
build_graph_callback && cb_build_graph);
const struct llama_model & model;
@ -49,6 +51,8 @@ struct llama_context {
llama_adapter_cvec cvec;
llama_loras loras;
build_graph_callback cb_build_graph;
std::vector<ggml_backend_ptr> backends;
std::vector<std::pair<ggml_backend_t, ggml_backend_set_n_threads_t>> set_n_threads_fns;