CUDA: Fix models with output size != 32000 (#2480)

2025-06-26 19:55:04 +00:00 · 2023-08-02 16:48:10 +02:00
parent 220d931864
commit 4f6b60c776
2 changed files with 249 additions and 75 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -280,8 +280,8 @@ if (LLAMA_CUBLAS)
        # 52 == lowest CUDA 12 standard
        # 60 == f16 CUDA intrinsics
        # 61 == integer CUDA intrinsics
-        # 70 == (assumed) compute capability at which unrolling a loop in mul_mat_q kernels is faster
-        if (LLAMA_CUDA_DMMV_F16)
+        # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
+        if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
            set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
        else()
            set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics