mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-30 12:55:17 +00:00
add CUDA_GLU_BLOCK_SIZE [no ci]
This commit is contained in:
committed by
Akarshan
parent
d9ddeb9dfd
commit
cfa9c7a47a
@ -213,8 +213,8 @@ static __global__ void unary_gated_op_kernel(const T * x, T * dst, const int k,
|
|||||||
|
|
||||||
template <float (*op)(float), typename T>
|
template <float (*op)(float), typename T>
|
||||||
static void unary_gated_cuda(const T * x, T * dst, const int k, const int n, const int o, cudaStream_t stream) {
|
static void unary_gated_cuda(const T * x, T * dst, const int k, const int n, const int o, cudaStream_t stream) {
|
||||||
const int num_blocks = (k + CUDA_NEG_BLOCK_SIZE - 1) / CUDA_NEG_BLOCK_SIZE;
|
const int num_blocks = (k + CUDA_GLU_BLOCK_SIZE - 1) / CUDA_GLU_BLOCK_SIZE;
|
||||||
unary_gated_op_kernel<op><<<num_blocks, CUDA_NEG_BLOCK_SIZE, 0, stream>>>(x, dst, k, n, o);
|
unary_gated_op_kernel<op><<<num_blocks, CUDA_GLU_BLOCK_SIZE, 0, stream>>>(x, dst, k, n, o);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <float (*op)(float)>
|
template <float (*op)(float)>
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#define CUDA_SQRT_BLOCK_SIZE 256
|
#define CUDA_SQRT_BLOCK_SIZE 256
|
||||||
#define CUDA_SIN_BLOCK_SIZE 256
|
#define CUDA_SIN_BLOCK_SIZE 256
|
||||||
#define CUDA_COS_BLOCK_SIZE 256
|
#define CUDA_COS_BLOCK_SIZE 256
|
||||||
|
#define CUDA_GLU_BLOCK_SIZE 256
|
||||||
|
|
||||||
void ggml_cuda_op_abs(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
void ggml_cuda_op_abs(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user