mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-29 05:33:37 -04:00
graph : avoid huge warm-up graphs for MoE models (#14753)
* graph : avoid huge warm-up graphs for MoE models ggml-ci * cont : bump max nodes to 8x model tensors
This commit is contained in:
@@ -1312,7 +1312,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
|
||||
//
|
||||
|
||||
uint32_t llama_context::graph_max_nodes() const {
|
||||
return std::max<uint32_t>(65536u, 5u*model.n_tensors());
|
||||
return std::max<uint32_t>(1024u, 8u*model.n_tensors());
|
||||
}
|
||||
|
||||
llm_graph_result * llama_context::get_gf_res_reserve() const {
|
||||
|
Reference in New Issue
Block a user