implement swapped variants (cpu/cuda)

2025-08-13 20:07:41 -04:00 · 2025-06-13 22:48:53 +02:00
parent f8705a2399
commit 0b2703fc57
7 changed files with 117 additions and 45 deletions
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1101,23 +1101,37 @@ extern "C" {
    // gated linear unit ops
    // A: n columns, r rows,
    // result is n / 2 columns, r rows,
+    // expects gate in second half of row, unless swapped is true
    GGML_API struct ggml_tensor * ggml_glu(
            struct ggml_context * ctx,
             struct ggml_tensor * a,
-             enum ggml_glu_op op);
+             enum ggml_glu_op     op,
+             bool                 swapped);

    GGML_API struct ggml_tensor * ggml_reglu(
            struct ggml_context * ctx,
            struct ggml_tensor  * a);

+    GGML_API struct ggml_tensor * ggml_reglu_swapped(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
    GGML_API struct ggml_tensor * ggml_geglu(
            struct ggml_context * ctx,
            struct ggml_tensor  * a);

+    GGML_API struct ggml_tensor * ggml_geglu_swapped(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
    GGML_API struct ggml_tensor * ggml_swiglu(
            struct ggml_context * ctx,
            struct ggml_tensor  * a);

+    GGML_API struct ggml_tensor * ggml_swiglu_swapped(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
    // normalize along rows
    GGML_API struct ggml_tensor * ggml_norm(
            struct ggml_context * ctx,