mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-11 11:05:39 -04:00
CUDA: app option to compile without FlashAttention (#12025)
This commit is contained in:
@@ -3203,7 +3203,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
case GGML_OP_FLASH_ATTN_EXT: {
|
||||
#ifndef FLASH_ATTN_AVAILABLE
|
||||
return false;
|
||||
#endif
|
||||
#endif // FLASH_ATTN_AVAILABLE
|
||||
if (op->src[1]->type == GGML_TYPE_BF16 || op->src[2]->type == GGML_TYPE_BF16) {
|
||||
return false;
|
||||
}
|
||||
|
Reference in New Issue
Block a user