graph : avoid huge warm-up graphs for MoE models (#14753)

* graph : avoid huge warm-up graphs for MoE models

ggml-ci

* cont : bump max nodes to 8x model tensors
This commit is contained in:
Georgi Gerganov
2025-07-18 14:31:15 +03:00
committed by GitHub
parent eacdeb5bfc
commit d498af3d5a
2 changed files with 6 additions and 3 deletions

View File

@@ -1312,7 +1312,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
//
uint32_t llama_context::graph_max_nodes() const {
return std::max<uint32_t>(65536u, 5u*model.n_tensors());
return std::max<uint32_t>(1024u, 8u*model.n_tensors());
}
llm_graph_result * llama_context::get_gf_res_reserve() const {