llama : support GEGLU for jina-bert-v2 (#14090)

2025-08-13 11:57:43 -04:00 · 2025-06-10 18:02:08 +02:00
parent 652b70e667
commit 3678b838bb
4 changed files with 9 additions and 33 deletions
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@@ -650,6 +650,7 @@ ggml_tensor * llm_graph_context::build_ffn(
            {
                // Project to 4h. If using swiglu double the output width, see https://arxiv.org/pdf/2002.05202.pdf
                int64_t split_point = cur->ne[0] / 2;
+                // TODO: these conts should not be needed, see https://github.com/ggml-org/llama.cpp/pull/14090#discussion_r2137437217
                ggml_tensor * x0 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], 0));
                ggml_tensor * x1 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], split_point * ggml_element_size(cur)));

@@ -663,7 +664,7 @@ ggml_tensor * llm_graph_context::build_ffn(
            {
                // Split into two equal parts
                int64_t split_point = cur->ne[0] / 2;
-                // TODO: these conts should not be needed
+                // TODO: these conts should not be needed, see https://github.com/ggml-org/llama.cpp/pull/14090#discussion_r2137437217
                ggml_tensor * x0 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], 0));
                ggml_tensor * x1 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], split_point * ggml_element_size(cur)));