HIP: bump requirement to rocm 6.1 (#15296)

This commit is contained in:
uvos
2025-08-13 20:44:30 +02:00
committed by GitHub
parent 1adc9812bd
commit 29c8fbe4e0
5 changed files with 8 additions and 62 deletions

View File

@@ -443,7 +443,7 @@ jobs:
ubuntu-22-cmake-hip: ubuntu-22-cmake-hip:
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
container: rocm/dev-ubuntu-22.04:6.0.2 container: rocm/dev-ubuntu-22.04:6.1.2
steps: steps:
- name: Clone - name: Clone
@@ -471,16 +471,6 @@ jobs:
-DGGML_HIP=ON -DGGML_HIP=ON
cmake --build build --config Release -j $(nproc) cmake --build build --config Release -j $(nproc)
- name: Build with legacy HIP support
id: cmake_build_legacy_hip
run: |
cmake -B build2 -S . \
-DCMAKE_C_COMPILER=hipcc \
-DCMAKE_CXX_COMPILER=hipcc \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DGGML_HIP=ON
cmake --build build2 --config Release -j $(nproc)
ubuntu-22-cmake-musa: ubuntu-22-cmake-musa:
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
container: mthreads/musa:rc4.2.0-devel-ubuntu22.04-amd64 container: mthreads/musa:rc4.2.0-devel-ubuntu22.04-amd64

View File

@@ -464,25 +464,21 @@ static __device__ __forceinline__ half ggml_cuda_hmax(const half a, const half b
} }
static __device__ __forceinline__ half2 ggml_cuda_hmax2(const half2 a, const half2 b) { static __device__ __forceinline__ half2 ggml_cuda_hmax2(const half2 a, const half2 b) {
#if defined(GGML_USE_HIP) && HIP_VERSION >= 50700000 #if defined(GGML_USE_HIP)
return half2(__hmax(a.x, b.x), __hmax(a.y, b.y)); return half2(__hmax(a.x, b.x), __hmax(a.y, b.y));
#elif !defined(GGML_USE_HIP) && CUDART_VERSION >= CUDART_HMAX #elif CUDART_VERSION >= CUDART_HMAX
return __hmax2(a, b); return __hmax2(a, b);
#elif !defined(GGML_USE_HIP) #else
half2 ret; half2 ret;
reinterpret_cast<half&>(ret.x) = __float2half(fmaxf( __low2float(a), __low2float(b))); reinterpret_cast<half&>(ret.x) = __float2half(fmaxf( __low2float(a), __low2float(b)));
reinterpret_cast<half&>(ret.y) = __float2half(fmaxf(__high2float(a), __high2float(b))); reinterpret_cast<half&>(ret.y) = __float2half(fmaxf(__high2float(a), __high2float(b)));
return ret; return ret;
#else
GGML_UNUSED(a);
GGML_UNUSED(b);
NO_DEVICE_CODE;
#endif #endif
} }
template<int width = WARP_SIZE> template<int width = WARP_SIZE>
static __device__ __forceinline__ half2 warp_reduce_max(half2 x) { static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000) #if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
#pragma unroll #pragma unroll
for (int offset = width/2; offset > 0; offset >>= 1) { for (int offset = width/2; offset > 0; offset >>= 1) {
x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, width)); x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, width));
@@ -491,7 +487,7 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
#else #else
GGML_UNUSED(x); GGML_UNUSED(x);
NO_DEVICE_CODE; NO_DEVICE_CODE;
#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000) #endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
} }
#if CUDART_VERSION < CUDART_HMASK #if CUDART_VERSION < CUDART_HMASK

View File

@@ -180,30 +180,6 @@ static int ggml_cuda_parse_id(char devName[]) {
#endif // defined(GGML_USE_HIP) #endif // defined(GGML_USE_HIP)
static ggml_cuda_device_info ggml_cuda_init() { static ggml_cuda_device_info ggml_cuda_init() {
#if defined(GGML_USE_HIP)
// Workaround for a rocBLAS bug when using multiple graphics cards:
// https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
{
int major_version = 0;
size_t version_length = 0;
if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) {
std::vector<char> version(version_length+1, '\0');
if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) {
version.resize(::strlen(version.data()));
int parsed_value = 0;
if (std::from_chars(version.data(), version.data() + version.size(), parsed_value).ec == std::errc()) {
major_version = parsed_value;
}
}
}
if (major_version < 4) {
GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n");
rocblas_initialize();
CUDA_CHECK(cudaDeviceSynchronize());
}
}
#endif
ggml_cuda_device_info info = {}; ggml_cuda_device_info info = {};
cudaError_t err = cudaGetDeviceCount(&info.device_count); cudaError_t err = cudaGetDeviceCount(&info.device_count);

View File

@@ -5,8 +5,6 @@
#include <hipblas/hipblas.h> #include <hipblas/hipblas.h>
#include <hip/hip_fp16.h> #include <hip/hip_fp16.h>
#include <hip/hip_bfloat16.h> #include <hip/hip_bfloat16.h>
// for rocblas_initialize()
#include "rocblas/rocblas.h"
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT #define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
#define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT #define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT
@@ -251,17 +249,3 @@ static __device__ __forceinline__ unsigned int __vcmpne4(unsigned int a, unsigne
} }
return c; return c;
} }
#if HIP_VERSION < 50600000
// __shfl_xor() for half2 was added in ROCm 5.6
static __device__ __forceinline__ half2 __shfl_xor(half2 var, int laneMask, int width) {
typedef union half2_b32 {
half2 val;
int b32;
} half2_b32_t;
half2_b32_t tmp;
tmp.val = var;
tmp.b32 = __shfl_xor(tmp.b32, laneMask, width);
return tmp.val;
}
#endif // HIP_VERSION < 50600000

View File

@@ -46,8 +46,8 @@ if (GGML_HIP_ROCWMMA_FATTN)
endif() endif()
endif() endif()
if (${hip_VERSION} VERSION_LESS 5.5) if (${hip_VERSION} VERSION_LESS 6.1)
message(FATAL_ERROR "At least ROCM/HIP V5.5 is required") message(FATAL_ERROR "At least ROCM/HIP V6.1 is required")
endif() endif()
message(STATUS "HIP and hipBLAS found") message(STATUS "HIP and hipBLAS found")