mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-11 11:05:39 -04:00
HIP: add GGML_CUDA_CC_IS_* for amd familys as increasing cc archtectures for amd gpus are not supersets of eatch other (#11601)
This fixes a bug where RDNA1 gpus other than gfx1010 where not handled correctly
This commit is contained in:
@@ -1205,7 +1205,7 @@ static void ggml_cuda_op_mul_mat_cublas(
|
||||
|
||||
CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream));
|
||||
|
||||
if (compute_capability == GGML_CUDA_CC_CDNA) {
|
||||
if (GGML_CUDA_CC_IS_CDNA(compute_capability)) {
|
||||
const float alpha = 1.0f;
|
||||
const float beta = 0.0f;
|
||||
CUBLAS_CHECK(
|
||||
@@ -1750,7 +1750,7 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co
|
||||
beta = &beta_f32;
|
||||
}
|
||||
|
||||
if (ggml_cuda_info().devices[ctx.device].cc == GGML_CUDA_CC_CDNA) {
|
||||
if (GGML_CUDA_CC_IS_CDNA(ggml_cuda_info().devices[ctx.device].cc)) {
|
||||
cu_compute_type = CUBLAS_COMPUTE_32F;
|
||||
alpha = &alpha_f32;
|
||||
beta = &beta_f32;
|
||||
|
Reference in New Issue
Block a user