From cfa9c7a47a1ec7f1e44662fef31de2c22831008c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Fri, 13 Jun 2025 16:10:03 +0200 Subject: [PATCH] add CUDA_GLU_BLOCK_SIZE [no ci] --- ggml/src/ggml-cuda/unary.cu | 4 ++-- ggml/src/ggml-cuda/unary.cuh | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/unary.cu b/ggml/src/ggml-cuda/unary.cu index 77ef81545..bb048ba4b 100644 --- a/ggml/src/ggml-cuda/unary.cu +++ b/ggml/src/ggml-cuda/unary.cu @@ -213,8 +213,8 @@ static __global__ void unary_gated_op_kernel(const T * x, T * dst, const int k, template static void unary_gated_cuda(const T * x, T * dst, const int k, const int n, const int o, cudaStream_t stream) { - const int num_blocks = (k + CUDA_NEG_BLOCK_SIZE - 1) / CUDA_NEG_BLOCK_SIZE; - unary_gated_op_kernel<<>>(x, dst, k, n, o); + const int num_blocks = (k + CUDA_GLU_BLOCK_SIZE - 1) / CUDA_GLU_BLOCK_SIZE; + unary_gated_op_kernel<<>>(x, dst, k, n, o); } template diff --git a/ggml/src/ggml-cuda/unary.cuh b/ggml/src/ggml-cuda/unary.cuh index d4533d24e..9094f1d0b 100644 --- a/ggml/src/ggml-cuda/unary.cuh +++ b/ggml/src/ggml-cuda/unary.cuh @@ -15,6 +15,7 @@ #define CUDA_SQRT_BLOCK_SIZE 256 #define CUDA_SIN_BLOCK_SIZE 256 #define CUDA_COS_BLOCK_SIZE 256 +#define CUDA_GLU_BLOCK_SIZE 256 void ggml_cuda_op_abs(ggml_backend_cuda_context & ctx, ggml_tensor * dst);