CUDA: fix race condition in FA vector kernels (#13742)

2025-06-26 19:55:04 +00:00 · 2025-05-24 11:46:19 +02:00
parent b775345d78
commit ffd0eae60b
2 changed files with 2 additions and 0 deletions
--- a/ggml/src/ggml-cuda/fattn-vec-f16.cuh
+++ b/ggml/src/ggml-cuda/fattn-vec-f16.cuh
@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(
                }
            }
            if (__all_sync(0xFFFFFFFF, skip)) {
+                __syncthreads();
                continue;
            }
 #endif // GGML_USE_HIP
--- a/ggml/src/ggml-cuda/fattn-vec-f32.cuh
+++ b/ggml/src/ggml-cuda/fattn-vec-f32.cuh
@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(
                }
            }
            if (__all_sync(0xFFFFFFFF, skip)) {
+                __syncthreads();
                continue;
            }
 #endif // GGML_USE_HIP