mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 12:35:16 +00:00
llama : fix incorrect Qwen2Moe ffn_moe_out graph callback (#12631)
This commit is contained in:
@ -6323,7 +6323,7 @@ struct llm_build_qwen2moe : public llm_graph_context {
|
|||||||
false, 0.0,
|
false, 0.0,
|
||||||
LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX,
|
LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX,
|
||||||
il);
|
il);
|
||||||
cb(cur, "ffn_moe_out", il);
|
cb(moe_out, "ffn_moe_out", il);
|
||||||
|
|
||||||
// FFN shared expert
|
// FFN shared expert
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user