mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 20:05:20 +00:00
tighten constraints again
This commit is contained in:
committed by
Akarshan
parent
a1a7b6dfa9
commit
f8c20809de
@ -3211,8 +3211,8 @@ static void ggml_compute_forward_reglu_f32(
|
||||
const int nc = src0->ne[0] / 2;
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_ASSERT(dst->ne[0] >= nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) >= nr);
|
||||
GGML_ASSERT(dst->ne[0] == nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) == nr);
|
||||
|
||||
// rows per thread
|
||||
const int dr = (nr + nth - 1)/nth;
|
||||
@ -3252,8 +3252,8 @@ static void ggml_compute_forward_reglu_f16(
|
||||
const int nc = src0->ne[0] / 2;
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_ASSERT(dst->ne[0] >= nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) >= nr);
|
||||
GGML_ASSERT(dst->ne[0] == nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) == nr);
|
||||
|
||||
// rows per thread
|
||||
const int dr = (nr + nth - 1)/nth;
|
||||
@ -3318,8 +3318,8 @@ static void ggml_compute_forward_geglu_f32(
|
||||
const int nc = src0->ne[0] / 2;
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_ASSERT(dst->ne[0] >= nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) >= nr);
|
||||
GGML_ASSERT(dst->ne[0] == nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) == nr);
|
||||
|
||||
// rows per thread
|
||||
const int dr = (nr + nth - 1)/nth;
|
||||
@ -3359,8 +3359,8 @@ static void ggml_compute_forward_geglu_f16(
|
||||
const int nc = src0->ne[0] / 2;
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_ASSERT(dst->ne[0] >= nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) >= nr);
|
||||
GGML_ASSERT(dst->ne[0] == nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) == nr);
|
||||
|
||||
// rows per thread
|
||||
const int dr = (nr + nth - 1)/nth;
|
||||
@ -3425,8 +3425,8 @@ static void ggml_compute_forward_swiglu_f32(
|
||||
const int nc = src0->ne[0] / 2;
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_ASSERT(dst->ne[0] >= nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) >= nr);
|
||||
GGML_ASSERT(dst->ne[0] == nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) == nr);
|
||||
|
||||
// rows per thread
|
||||
const int dr = (nr + nth - 1)/nth;
|
||||
@ -3466,8 +3466,8 @@ static void ggml_compute_forward_swiglu_f16(
|
||||
const int nc = src0->ne[0] / 2;
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_ASSERT(dst->ne[0] >= nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) >= nr);
|
||||
GGML_ASSERT(dst->ne[0] == nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) == nr);
|
||||
|
||||
// rows per thread
|
||||
const int dr = (nr + nth - 1)/nth;
|
||||
|
@ -230,8 +230,8 @@ void ggml_cuda_op_unary_gated(ggml_backend_cuda_context & ctx, ggml_tensor * dst
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src0->type == dst->type);
|
||||
GGML_ASSERT(dst->ne[0] >= nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) >= ggml_nrows(src0));
|
||||
GGML_ASSERT(dst->ne[0] == nc);
|
||||
GGML_ASSERT(ggml_nrows(dst) == ggml_nrows(src0));
|
||||
|
||||
if (src0->type == GGML_TYPE_F16) {
|
||||
unary_gated_cuda<op>((const half *)src0_d, (half *)dst_d, ggml_nelements(dst), nc, src0->nb[1] / sizeof(half), stream);
|
||||
|
Reference in New Issue
Block a user