HIP: add GGML_CUDA_CC_IS_* for amd familys as increasing cc archtectures for amd gpus are not supersets of eatch other (#11601)

This fixes a bug where RDNA1 gpus other than gfx1010 where not handled correctly
This commit is contained in:
uvos
2025-02-02 22:08:05 +01:00
committed by GitHub
parent 90f9b88afb
commit 4d0598e144
4 changed files with 11 additions and 4 deletions

View File

@@ -120,7 +120,7 @@ static constexpr __device__ int get_mmq_x_max_device() {
}
static constexpr int get_mmq_y_host(const int cc) {
return cc >= GGML_CUDA_CC_OFFSET_AMD ? (cc == GGML_CUDA_CC_RDNA1 ? 64 : 128) : (cc >= GGML_CUDA_CC_VOLTA ? 128 : 64);
return cc >= GGML_CUDA_CC_OFFSET_AMD ? (GGML_CUDA_CC_IS_RDNA1(cc) ? 64 : 128) : (cc >= GGML_CUDA_CC_VOLTA ? 128 : 64);
}
static constexpr __device__ int get_mmq_y_device() {