tighten constraints again

2025-06-27 20:05:20 +00:00 · 2025-06-13 09:00:30 +02:00
parent a1a7b6dfa9
commit f8c20809de
2 changed files with 14 additions and 14 deletions
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@ -3211,8 +3211,8 @@ static void ggml_compute_forward_reglu_f32(
    const int nc = src0->ne[0] / 2;
    const int nr = ggml_nrows(src0);

-    GGML_ASSERT(dst->ne[0] >= nc);
-    GGML_ASSERT(ggml_nrows(dst) >= nr);
+    GGML_ASSERT(dst->ne[0] == nc);
+    GGML_ASSERT(ggml_nrows(dst) == nr);

    // rows per thread
    const int dr = (nr + nth - 1)/nth;
@ -3252,8 +3252,8 @@ static void ggml_compute_forward_reglu_f16(
    const int nc = src0->ne[0] / 2;
    const int nr = ggml_nrows(src0);

-    GGML_ASSERT(dst->ne[0] >= nc);
-    GGML_ASSERT(ggml_nrows(dst) >= nr);
+    GGML_ASSERT(dst->ne[0] == nc);
+    GGML_ASSERT(ggml_nrows(dst) == nr);

    // rows per thread
    const int dr = (nr + nth - 1)/nth;
@ -3318,8 +3318,8 @@ static void ggml_compute_forward_geglu_f32(
    const int nc = src0->ne[0] / 2;
    const int nr = ggml_nrows(src0);

-    GGML_ASSERT(dst->ne[0] >= nc);
-    GGML_ASSERT(ggml_nrows(dst) >= nr);
+    GGML_ASSERT(dst->ne[0] == nc);
+    GGML_ASSERT(ggml_nrows(dst) == nr);

    // rows per thread
    const int dr = (nr + nth - 1)/nth;
@ -3359,8 +3359,8 @@ static void ggml_compute_forward_geglu_f16(
    const int nc = src0->ne[0] / 2;
    const int nr = ggml_nrows(src0);

-    GGML_ASSERT(dst->ne[0] >= nc);
-    GGML_ASSERT(ggml_nrows(dst) >= nr);
+    GGML_ASSERT(dst->ne[0] == nc);
+    GGML_ASSERT(ggml_nrows(dst) == nr);

    // rows per thread
    const int dr = (nr + nth - 1)/nth;
@ -3425,8 +3425,8 @@ static void ggml_compute_forward_swiglu_f32(
    const int nc = src0->ne[0] / 2;
    const int nr = ggml_nrows(src0);

-    GGML_ASSERT(dst->ne[0] >= nc);
-    GGML_ASSERT(ggml_nrows(dst) >= nr);
+    GGML_ASSERT(dst->ne[0] == nc);
+    GGML_ASSERT(ggml_nrows(dst) == nr);

    // rows per thread
    const int dr = (nr + nth - 1)/nth;
@ -3466,8 +3466,8 @@ static void ggml_compute_forward_swiglu_f16(
    const int nc = src0->ne[0] / 2;
    const int nr = ggml_nrows(src0);

-    GGML_ASSERT(dst->ne[0] >= nc);
-    GGML_ASSERT(ggml_nrows(dst) >= nr);
+    GGML_ASSERT(dst->ne[0] == nc);
+    GGML_ASSERT(ggml_nrows(dst) == nr);

    // rows per thread
    const int dr = (nr + nth - 1)/nth;
--- a/ggml/src/ggml-cuda/unary.cu
+++ b/ggml/src/ggml-cuda/unary.cu
@ -230,8 +230,8 @@ void ggml_cuda_op_unary_gated(ggml_backend_cuda_context & ctx, ggml_tensor * dst
    GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
    GGML_ASSERT( dst->type == GGML_TYPE_F32 ||  dst->type == GGML_TYPE_F16);
    GGML_ASSERT(src0->type == dst->type);
-    GGML_ASSERT(dst->ne[0] >= nc);
-    GGML_ASSERT(ggml_nrows(dst) >= ggml_nrows(src0));
+    GGML_ASSERT(dst->ne[0] == nc);
+    GGML_ASSERT(ggml_nrows(dst) == ggml_nrows(src0));

    if (src0->type == GGML_TYPE_F16) {
        unary_gated_cuda<op>((const half *)src0_d, (half *)dst_d, ggml_nelements(dst), nc, src0->nb[1] / sizeof(half), stream);