mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
@ -4803,8 +4803,21 @@ struct llm_build_llama_iswa : public llm_graph_context {
|
|||||||
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
|
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
|
||||||
cb(ffn_inp, "ffn_inp", il);
|
cb(ffn_inp, "ffn_inp", il);
|
||||||
|
|
||||||
{
|
// feed-forward network (non-MoE)
|
||||||
// llama4 MoE
|
if (model.layers[il].ffn_gate_inp == nullptr) {
|
||||||
|
cur = build_norm(ffn_inp,
|
||||||
|
model.layers[il].ffn_norm, NULL,
|
||||||
|
LLM_NORM_RMS, il);
|
||||||
|
cb(cur, "ffn_norm", il);
|
||||||
|
|
||||||
|
cur = build_ffn(cur,
|
||||||
|
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||||
|
model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL,
|
||||||
|
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||||
|
NULL,
|
||||||
|
LLM_FFN_SILU, LLM_FFN_PAR, il);
|
||||||
|
cb(cur, "ffn_out", il);
|
||||||
|
} else {
|
||||||
ggml_tensor * ffn_inp_normed = build_norm(ffn_inp,
|
ggml_tensor * ffn_inp_normed = build_norm(ffn_inp,
|
||||||
model.layers[il].ffn_norm, NULL,
|
model.layers[il].ffn_norm, NULL,
|
||||||
LLM_NORM_RMS, il);
|
LLM_NORM_RMS, il);
|
||||||
|
Reference in New Issue
Block a user