mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-09-08 08:23:20 -04:00
CUDA: app option to compile without FlashAttention (#12025)
This commit is contained in:
@@ -69,6 +69,10 @@ if (CUDAToolkit_FOUND)
|
||||
add_compile_definitions(GGML_CUDA_NO_VMM)
|
||||
endif()
|
||||
|
||||
if (NOT GGML_CUDA_FA)
|
||||
add_compile_definitions(GGML_CUDA_NO_FA)
|
||||
endif()
|
||||
|
||||
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
|
||||
add_compile_definitions(GGML_CUDA_F16)
|
||||
endif()
|
||||
|
Reference in New Issue
Block a user