diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml/src/ggml-cuda/fattn-vec-f16.cuh index 109253838..afef815ce 100644 --- a/ggml/src/ggml-cuda/fattn-vec-f16.cuh +++ b/ggml/src/ggml-cuda/fattn-vec-f16.cuh @@ -1,6 +1,12 @@ #include "common.cuh" #include "fattn-common.cuh" +// Currenlty llvm with the amdgcn target dose not support unrolling loops +// that contain a break that can not be resolved at compile time. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" +#endif // __clang__ template // D == head size #ifndef GGML_USE_HIP __launch_bounds__(D, 1) @@ -341,6 +347,9 @@ static __global__ void flash_attn_vec_ext_f16( NO_DEVICE_CODE; #endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE) } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif // __clang__ template void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { diff --git a/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/ggml/src/ggml-cuda/fattn-vec-f32.cuh index 2cf2e408e..3595e2969 100644 --- a/ggml/src/ggml-cuda/fattn-vec-f32.cuh +++ b/ggml/src/ggml-cuda/fattn-vec-f32.cuh @@ -1,6 +1,12 @@ #include "common.cuh" #include "fattn-common.cuh" +// Currenlty llvm with the amdgcn target dose not support unrolling loops +// that contain a break that can not be resolved at compile time. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" +#endif // __clang__ template // D == head size #ifndef GGML_USE_HIP __launch_bounds__(D, 1) @@ -336,6 +342,9 @@ static __global__ void flash_attn_vec_ext_f32( NO_DEVICE_CODE; #endif // FLASH_ATTN_AVAILABLE } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif // __clang__ template void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {