mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-15 04:33:06 -04:00
HIP: bump requirement to rocm 6.1 (#15296)
This commit is contained in:
12
.github/workflows/build.yml
vendored
12
.github/workflows/build.yml
vendored
@@ -443,7 +443,7 @@ jobs:
|
|||||||
|
|
||||||
ubuntu-22-cmake-hip:
|
ubuntu-22-cmake-hip:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
container: rocm/dev-ubuntu-22.04:6.0.2
|
container: rocm/dev-ubuntu-22.04:6.1.2
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
@@ -471,16 +471,6 @@ jobs:
|
|||||||
-DGGML_HIP=ON
|
-DGGML_HIP=ON
|
||||||
cmake --build build --config Release -j $(nproc)
|
cmake --build build --config Release -j $(nproc)
|
||||||
|
|
||||||
- name: Build with legacy HIP support
|
|
||||||
id: cmake_build_legacy_hip
|
|
||||||
run: |
|
|
||||||
cmake -B build2 -S . \
|
|
||||||
-DCMAKE_C_COMPILER=hipcc \
|
|
||||||
-DCMAKE_CXX_COMPILER=hipcc \
|
|
||||||
-DGGML_HIP_ROCWMMA_FATTN=ON \
|
|
||||||
-DGGML_HIP=ON
|
|
||||||
cmake --build build2 --config Release -j $(nproc)
|
|
||||||
|
|
||||||
ubuntu-22-cmake-musa:
|
ubuntu-22-cmake-musa:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
container: mthreads/musa:rc4.2.0-devel-ubuntu22.04-amd64
|
container: mthreads/musa:rc4.2.0-devel-ubuntu22.04-amd64
|
||||||
|
@@ -464,25 +464,21 @@ static __device__ __forceinline__ half ggml_cuda_hmax(const half a, const half b
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __device__ __forceinline__ half2 ggml_cuda_hmax2(const half2 a, const half2 b) {
|
static __device__ __forceinline__ half2 ggml_cuda_hmax2(const half2 a, const half2 b) {
|
||||||
#if defined(GGML_USE_HIP) && HIP_VERSION >= 50700000
|
#if defined(GGML_USE_HIP)
|
||||||
return half2(__hmax(a.x, b.x), __hmax(a.y, b.y));
|
return half2(__hmax(a.x, b.x), __hmax(a.y, b.y));
|
||||||
#elif !defined(GGML_USE_HIP) && CUDART_VERSION >= CUDART_HMAX
|
#elif CUDART_VERSION >= CUDART_HMAX
|
||||||
return __hmax2(a, b);
|
return __hmax2(a, b);
|
||||||
#elif !defined(GGML_USE_HIP)
|
#else
|
||||||
half2 ret;
|
half2 ret;
|
||||||
reinterpret_cast<half&>(ret.x) = __float2half(fmaxf( __low2float(a), __low2float(b)));
|
reinterpret_cast<half&>(ret.x) = __float2half(fmaxf( __low2float(a), __low2float(b)));
|
||||||
reinterpret_cast<half&>(ret.y) = __float2half(fmaxf(__high2float(a), __high2float(b)));
|
reinterpret_cast<half&>(ret.y) = __float2half(fmaxf(__high2float(a), __high2float(b)));
|
||||||
return ret;
|
return ret;
|
||||||
#else
|
|
||||||
GGML_UNUSED(a);
|
|
||||||
GGML_UNUSED(b);
|
|
||||||
NO_DEVICE_CODE;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int width = WARP_SIZE>
|
template<int width = WARP_SIZE>
|
||||||
static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
|
static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
|
||||||
#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000)
|
#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int offset = width/2; offset > 0; offset >>= 1) {
|
for (int offset = width/2; offset > 0; offset >>= 1) {
|
||||||
x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, width));
|
x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, width));
|
||||||
@@ -491,7 +487,7 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
|
|||||||
#else
|
#else
|
||||||
GGML_UNUSED(x);
|
GGML_UNUSED(x);
|
||||||
NO_DEVICE_CODE;
|
NO_DEVICE_CODE;
|
||||||
#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000)
|
#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CUDART_VERSION < CUDART_HMASK
|
#if CUDART_VERSION < CUDART_HMASK
|
||||||
|
@@ -180,30 +180,6 @@ static int ggml_cuda_parse_id(char devName[]) {
|
|||||||
#endif // defined(GGML_USE_HIP)
|
#endif // defined(GGML_USE_HIP)
|
||||||
|
|
||||||
static ggml_cuda_device_info ggml_cuda_init() {
|
static ggml_cuda_device_info ggml_cuda_init() {
|
||||||
#if defined(GGML_USE_HIP)
|
|
||||||
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
|
||||||
// https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
|
|
||||||
{
|
|
||||||
int major_version = 0;
|
|
||||||
size_t version_length = 0;
|
|
||||||
if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) {
|
|
||||||
std::vector<char> version(version_length+1, '\0');
|
|
||||||
if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) {
|
|
||||||
version.resize(::strlen(version.data()));
|
|
||||||
int parsed_value = 0;
|
|
||||||
if (std::from_chars(version.data(), version.data() + version.size(), parsed_value).ec == std::errc()) {
|
|
||||||
major_version = parsed_value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (major_version < 4) {
|
|
||||||
GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n");
|
|
||||||
rocblas_initialize();
|
|
||||||
CUDA_CHECK(cudaDeviceSynchronize());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ggml_cuda_device_info info = {};
|
ggml_cuda_device_info info = {};
|
||||||
|
|
||||||
cudaError_t err = cudaGetDeviceCount(&info.device_count);
|
cudaError_t err = cudaGetDeviceCount(&info.device_count);
|
||||||
|
16
ggml/src/ggml-cuda/vendors/hip.h
vendored
16
ggml/src/ggml-cuda/vendors/hip.h
vendored
@@ -5,8 +5,6 @@
|
|||||||
#include <hipblas/hipblas.h>
|
#include <hipblas/hipblas.h>
|
||||||
#include <hip/hip_fp16.h>
|
#include <hip/hip_fp16.h>
|
||||||
#include <hip/hip_bfloat16.h>
|
#include <hip/hip_bfloat16.h>
|
||||||
// for rocblas_initialize()
|
|
||||||
#include "rocblas/rocblas.h"
|
|
||||||
|
|
||||||
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
|
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
|
||||||
#define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT
|
#define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT
|
||||||
@@ -251,17 +249,3 @@ static __device__ __forceinline__ unsigned int __vcmpne4(unsigned int a, unsigne
|
|||||||
}
|
}
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HIP_VERSION < 50600000
|
|
||||||
// __shfl_xor() for half2 was added in ROCm 5.6
|
|
||||||
static __device__ __forceinline__ half2 __shfl_xor(half2 var, int laneMask, int width) {
|
|
||||||
typedef union half2_b32 {
|
|
||||||
half2 val;
|
|
||||||
int b32;
|
|
||||||
} half2_b32_t;
|
|
||||||
half2_b32_t tmp;
|
|
||||||
tmp.val = var;
|
|
||||||
tmp.b32 = __shfl_xor(tmp.b32, laneMask, width);
|
|
||||||
return tmp.val;
|
|
||||||
}
|
|
||||||
#endif // HIP_VERSION < 50600000
|
|
||||||
|
@@ -46,8 +46,8 @@ if (GGML_HIP_ROCWMMA_FATTN)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (${hip_VERSION} VERSION_LESS 5.5)
|
if (${hip_VERSION} VERSION_LESS 6.1)
|
||||||
message(FATAL_ERROR "At least ROCM/HIP V5.5 is required")
|
message(FATAL_ERROR "At least ROCM/HIP V6.1 is required")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
message(STATUS "HIP and hipBLAS found")
|
message(STATUS "HIP and hipBLAS found")
|
||||||
|
Reference in New Issue
Block a user