model : fix llama4 graph (#13663)

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-05-20 19:21:04 +03:00
committed by GitHub
parent a4090d1174
commit be0239693c

View File

@ -4803,8 +4803,21 @@ struct llm_build_llama_iswa : public llm_graph_context {
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
cb(ffn_inp, "ffn_inp", il); cb(ffn_inp, "ffn_inp", il);
{ // feed-forward network (non-MoE)
// llama4 MoE if (model.layers[il].ffn_gate_inp == nullptr) {
cur = build_norm(ffn_inp,
model.layers[il].ffn_norm, NULL,
LLM_NORM_RMS, il);
cb(cur, "ffn_norm", il);
cur = build_ffn(cur,
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL,
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
NULL,
LLM_FFN_SILU, LLM_FFN_PAR, il);
cb(cur, "ffn_out", il);
} else {
ggml_tensor * ffn_inp_normed = build_norm(ffn_inp, ggml_tensor * ffn_inp_normed = build_norm(ffn_inp,
model.layers[il].ffn_norm, NULL, model.layers[il].ffn_norm, NULL,
LLM_NORM_RMS, il); LLM_NORM_RMS, il);