tighten constraints again

This commit is contained in:
Sigbjørn Skjæret
2025-06-13 09:00:30 +02:00
committed by Akarshan
parent a1a7b6dfa9
commit f8c20809de
2 changed files with 14 additions and 14 deletions

View File

@ -3211,8 +3211,8 @@ static void ggml_compute_forward_reglu_f32(
const int nc = src0->ne[0] / 2; const int nc = src0->ne[0] / 2;
const int nr = ggml_nrows(src0); const int nr = ggml_nrows(src0);
GGML_ASSERT(dst->ne[0] >= nc); GGML_ASSERT(dst->ne[0] == nc);
GGML_ASSERT(ggml_nrows(dst) >= nr); GGML_ASSERT(ggml_nrows(dst) == nr);
// rows per thread // rows per thread
const int dr = (nr + nth - 1)/nth; const int dr = (nr + nth - 1)/nth;
@ -3252,8 +3252,8 @@ static void ggml_compute_forward_reglu_f16(
const int nc = src0->ne[0] / 2; const int nc = src0->ne[0] / 2;
const int nr = ggml_nrows(src0); const int nr = ggml_nrows(src0);
GGML_ASSERT(dst->ne[0] >= nc); GGML_ASSERT(dst->ne[0] == nc);
GGML_ASSERT(ggml_nrows(dst) >= nr); GGML_ASSERT(ggml_nrows(dst) == nr);
// rows per thread // rows per thread
const int dr = (nr + nth - 1)/nth; const int dr = (nr + nth - 1)/nth;
@ -3318,8 +3318,8 @@ static void ggml_compute_forward_geglu_f32(
const int nc = src0->ne[0] / 2; const int nc = src0->ne[0] / 2;
const int nr = ggml_nrows(src0); const int nr = ggml_nrows(src0);
GGML_ASSERT(dst->ne[0] >= nc); GGML_ASSERT(dst->ne[0] == nc);
GGML_ASSERT(ggml_nrows(dst) >= nr); GGML_ASSERT(ggml_nrows(dst) == nr);
// rows per thread // rows per thread
const int dr = (nr + nth - 1)/nth; const int dr = (nr + nth - 1)/nth;
@ -3359,8 +3359,8 @@ static void ggml_compute_forward_geglu_f16(
const int nc = src0->ne[0] / 2; const int nc = src0->ne[0] / 2;
const int nr = ggml_nrows(src0); const int nr = ggml_nrows(src0);
GGML_ASSERT(dst->ne[0] >= nc); GGML_ASSERT(dst->ne[0] == nc);
GGML_ASSERT(ggml_nrows(dst) >= nr); GGML_ASSERT(ggml_nrows(dst) == nr);
// rows per thread // rows per thread
const int dr = (nr + nth - 1)/nth; const int dr = (nr + nth - 1)/nth;
@ -3425,8 +3425,8 @@ static void ggml_compute_forward_swiglu_f32(
const int nc = src0->ne[0] / 2; const int nc = src0->ne[0] / 2;
const int nr = ggml_nrows(src0); const int nr = ggml_nrows(src0);
GGML_ASSERT(dst->ne[0] >= nc); GGML_ASSERT(dst->ne[0] == nc);
GGML_ASSERT(ggml_nrows(dst) >= nr); GGML_ASSERT(ggml_nrows(dst) == nr);
// rows per thread // rows per thread
const int dr = (nr + nth - 1)/nth; const int dr = (nr + nth - 1)/nth;
@ -3466,8 +3466,8 @@ static void ggml_compute_forward_swiglu_f16(
const int nc = src0->ne[0] / 2; const int nc = src0->ne[0] / 2;
const int nr = ggml_nrows(src0); const int nr = ggml_nrows(src0);
GGML_ASSERT(dst->ne[0] >= nc); GGML_ASSERT(dst->ne[0] == nc);
GGML_ASSERT(ggml_nrows(dst) >= nr); GGML_ASSERT(ggml_nrows(dst) == nr);
// rows per thread // rows per thread
const int dr = (nr + nth - 1)/nth; const int dr = (nr + nth - 1)/nth;

View File

@ -230,8 +230,8 @@ void ggml_cuda_op_unary_gated(ggml_backend_cuda_context & ctx, ggml_tensor * dst
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
GGML_ASSERT(src0->type == dst->type); GGML_ASSERT(src0->type == dst->type);
GGML_ASSERT(dst->ne[0] >= nc); GGML_ASSERT(dst->ne[0] == nc);
GGML_ASSERT(ggml_nrows(dst) >= ggml_nrows(src0)); GGML_ASSERT(ggml_nrows(dst) == ggml_nrows(src0));
if (src0->type == GGML_TYPE_F16) { if (src0->type == GGML_TYPE_F16) {
unary_gated_cuda<op>((const half *)src0_d, (half *)dst_d, ggml_nelements(dst), nc, src0->nb[1] / sizeof(half), stream); unary_gated_cuda<op>((const half *)src0_d, (half *)dst_d, ggml_nelements(dst), nc, src0->nb[1] / sizeof(half), stream);