From 29c8fbe4e05fd23c44950d0958299e25fbeabc5c Mon Sep 17 00:00:00 2001 From: uvos Date: Wed, 13 Aug 2025 20:44:30 +0200 Subject: [PATCH] HIP: bump requirement to rocm 6.1 (#15296) --- .github/workflows/build.yml | 12 +----------- ggml/src/ggml-cuda/common.cuh | 14 +++++--------- ggml/src/ggml-cuda/ggml-cuda.cu | 24 ------------------------ ggml/src/ggml-cuda/vendors/hip.h | 16 ---------------- ggml/src/ggml-hip/CMakeLists.txt | 4 ++-- 5 files changed, 8 insertions(+), 62 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3d4f837e2..d4ed3ce7e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -443,7 +443,7 @@ jobs: ubuntu-22-cmake-hip: runs-on: ubuntu-22.04 - container: rocm/dev-ubuntu-22.04:6.0.2 + container: rocm/dev-ubuntu-22.04:6.1.2 steps: - name: Clone @@ -471,16 +471,6 @@ jobs: -DGGML_HIP=ON cmake --build build --config Release -j $(nproc) - - name: Build with legacy HIP support - id: cmake_build_legacy_hip - run: | - cmake -B build2 -S . \ - -DCMAKE_C_COMPILER=hipcc \ - -DCMAKE_CXX_COMPILER=hipcc \ - -DGGML_HIP_ROCWMMA_FATTN=ON \ - -DGGML_HIP=ON - cmake --build build2 --config Release -j $(nproc) - ubuntu-22-cmake-musa: runs-on: ubuntu-22.04 container: mthreads/musa:rc4.2.0-devel-ubuntu22.04-amd64 diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 5a2a3478d..2b14b30ac 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -464,25 +464,21 @@ static __device__ __forceinline__ half ggml_cuda_hmax(const half a, const half b } static __device__ __forceinline__ half2 ggml_cuda_hmax2(const half2 a, const half2 b) { -#if defined(GGML_USE_HIP) && HIP_VERSION >= 50700000 +#if defined(GGML_USE_HIP) return half2(__hmax(a.x, b.x), __hmax(a.y, b.y)); -#elif !defined(GGML_USE_HIP) && CUDART_VERSION >= CUDART_HMAX +#elif CUDART_VERSION >= CUDART_HMAX return __hmax2(a, b); -#elif !defined(GGML_USE_HIP) +#else half2 ret; reinterpret_cast(ret.x) = __float2half(fmaxf( __low2float(a), __low2float(b))); reinterpret_cast(ret.y) = __float2half(fmaxf(__high2float(a), __high2float(b))); return ret; -#else - GGML_UNUSED(a); - GGML_UNUSED(b); - NO_DEVICE_CODE; #endif } template static __device__ __forceinline__ half2 warp_reduce_max(half2 x) { -#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000) +#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP) #pragma unroll for (int offset = width/2; offset > 0; offset >>= 1) { x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, width)); @@ -491,7 +487,7 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) { #else GGML_UNUSED(x); NO_DEVICE_CODE; -#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000) +#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP) } #if CUDART_VERSION < CUDART_HMASK diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index d9110491e..0d92901cb 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -180,30 +180,6 @@ static int ggml_cuda_parse_id(char devName[]) { #endif // defined(GGML_USE_HIP) static ggml_cuda_device_info ggml_cuda_init() { -#if defined(GGML_USE_HIP) - // Workaround for a rocBLAS bug when using multiple graphics cards: - // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346 - { - int major_version = 0; - size_t version_length = 0; - if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) { - std::vector version(version_length+1, '\0'); - if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) { - version.resize(::strlen(version.data())); - int parsed_value = 0; - if (std::from_chars(version.data(), version.data() + version.size(), parsed_value).ec == std::errc()) { - major_version = parsed_value; - } - } - } - if (major_version < 4) { - GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n"); - rocblas_initialize(); - CUDA_CHECK(cudaDeviceSynchronize()); - } - } -#endif - ggml_cuda_device_info info = {}; cudaError_t err = cudaGetDeviceCount(&info.device_count); diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h index 96f8bc75e..ec1b59caa 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -5,8 +5,6 @@ #include #include #include -// for rocblas_initialize() -#include "rocblas/rocblas.h" #define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT #define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT @@ -251,17 +249,3 @@ static __device__ __forceinline__ unsigned int __vcmpne4(unsigned int a, unsigne } return c; } - -#if HIP_VERSION < 50600000 -// __shfl_xor() for half2 was added in ROCm 5.6 -static __device__ __forceinline__ half2 __shfl_xor(half2 var, int laneMask, int width) { - typedef union half2_b32 { - half2 val; - int b32; - } half2_b32_t; - half2_b32_t tmp; - tmp.val = var; - tmp.b32 = __shfl_xor(tmp.b32, laneMask, width); - return tmp.val; -} -#endif // HIP_VERSION < 50600000 diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index 852de9734..d327b90cc 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -46,8 +46,8 @@ if (GGML_HIP_ROCWMMA_FATTN) endif() endif() -if (${hip_VERSION} VERSION_LESS 5.5) - message(FATAL_ERROR "At least ROCM/HIP V5.5 is required") +if (${hip_VERSION} VERSION_LESS 6.1) + message(FATAL_ERROR "At least ROCM/HIP V6.1 is required") endif() message(STATUS "HIP and hipBLAS found")