mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 12:35:16 +00:00
llama-graph : use ggml_repeat_4d (#13998)
This commit is contained in:
@ -769,9 +769,8 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
|
|||||||
cur = ggml_reshape_3d(ctx0, cur, n_embd, 1, n_tokens);
|
cur = ggml_reshape_3d(ctx0, cur, n_embd, 1, n_tokens);
|
||||||
|
|
||||||
if (weight_before_ffn) {
|
if (weight_before_ffn) {
|
||||||
// TODO: this is a workaround as we don't yet have a repeat op that takes custom dim (ggml_repeat_4d)
|
// repeat cur to [n_embd, n_expert_used, n_tokens]
|
||||||
ggml_tensor * repeated = ggml_new_tensor_3d(ctx0, cur->type, n_embd, n_expert_used, n_tokens);
|
ggml_tensor * repeated = ggml_repeat_4d(ctx0, cur, n_embd, n_expert_used, n_tokens, 1);
|
||||||
repeated = ggml_repeat(ctx0, cur, repeated); // [n_embd, n_expert_used, n_tokens]
|
|
||||||
cur = ggml_mul(ctx0, repeated, weights);
|
cur = ggml_mul(ctx0, repeated, weights);
|
||||||
cb(cur, "ffn_moe_weighted", il);
|
cb(cur, "ffn_moe_weighted", il);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user