graph : fix geglu (#14077)

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-06-09 17:17:31 +03:00
committed by GitHub
parent e21d2d4ae2
commit 201b31dc2e

View File

@ -663,22 +663,14 @@ ggml_tensor * llm_graph_context::build_ffn(
{ {
// Split into two equal parts // Split into two equal parts
int64_t split_point = cur->ne[0] / 2; int64_t split_point = cur->ne[0] / 2;
ggml_tensor * output_ffn_up = ggml_cont(ctx0, ggml_view_2d( // TODO: these conts should not be needed
ctx0, cur, split_point, ggml_tensor * x0 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], 0));
cur->ne[1], cur->nb[1], 0 ggml_tensor * x1 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], split_point * ggml_element_size(cur)));
));
ggml_tensor * output_ffn_gate = ggml_cont(ctx0, ggml_view_2d(
ctx0, cur, split_point,
cur->ne[1], cur->nb[1],
split_point * ggml_element_size(cur)
));
// Apply GELU activation function to the first part x0 = ggml_gelu(ctx0, x0);
output_ffn_up = ggml_gelu(ctx0, output_ffn_up); cb(x0, "ffn_gelu", il);
cb(output_ffn_up, "ffn_gelu", il);
// Element-wise multiplication between the activated part and the gate part cur = ggml_mul(ctx0, x0, x1);
cur = ggml_mul(ctx0, output_ffn_up, output_ffn_gate);
cb(cur, "ffn_geglu", il); cb(cur, "ffn_geglu", il);
} break; } break;
} }