CUDA: tuned mul_mat_q kernels (#2546)

This commit is contained in:
Johannes Gäßler
2023-08-09 09:42:34 +02:00
committed by GitHub
parent f5bfea0580
commit 25d43e0eb5
3 changed files with 676 additions and 386 deletions

View File

@@ -253,11 +253,6 @@ ifdef LLAMA_CUDA_KQUANTS_ITER
else
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
endif
ifdef LLAMA_CUDA_MMQ_Y
NVCCFLAGS += -DGGML_CUDA_MMQ_Y=$(LLAMA_CUDA_MMQ_Y)
else
NVCCFLAGS += -DGGML_CUDA_MMQ_Y=64
endif # LLAMA_CUDA_MMQ_Y
#ifdef LLAMA_CUDA_CUBLAS
# NVCCFLAGS += -DGGML_CUDA_CUBLAS
#endif # LLAMA_CUDA_CUBLAS