graph : avoid huge warm-up graphs for MoE models (#14753)

* graph : avoid huge warm-up graphs for MoE models ggml-ci * cont : bump max nodes to 8x model tensors
2025-07-29 05:33:37 -04:00 · 2025-07-18 14:31:15 +03:00
parent eacdeb5bfc
commit d498af3d5a
2 changed files with 6 additions and 3 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1312,7 +1312,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
 //

 uint32_t llama_context::graph_max_nodes() const {
-    return std::max<uint32_t>(65536u, 5u*model.n_tensors());
+    return std::max<uint32_t>(1024u, 8u*model.n_tensors());
 }

 llm_graph_result * llama_context::get_gf_res_reserve() const {