update HIP_UMA #7399 (#7414)

* update HIP_UMA #7399 add use of hipMemAdviseSetCoarseGrain when LLAMA_HIP_UMA is enable. - get x2 on prompte eval and x1.5 on token gen with rocm6.0 on ryzen 7940HX iGPU (780M/gfx1103) * simplify code, more consistent style --------- Co-authored-by: slaren <slarengh@gmail.com>
2025-09-02 05:14:11 -04:00 · 2024-05-28 01:40:47 +02:00
parent 0136966daf
commit 852aafb163
2 changed files with 17 additions and 8 deletions
--- a/ggml-cuda/common.cuh
+++ b/ggml-cuda/common.cuh
@@ -79,13 +79,8 @@
 #define cudaHostRegisterReadOnly hipHostRegisterReadOnly
 #define cudaHostUnregister hipHostUnregister
 #define cudaLaunchHostFunc hipLaunchHostFunc
-#ifdef GGML_HIP_UMA
-#define cudaMalloc hipMallocManaged
-#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size)
-#else
 #define cudaMalloc hipMalloc
 #define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
-#endif
 #define cudaMemcpy hipMemcpy
 #define cudaMemcpyAsync hipMemcpyAsync
 #define cudaMemcpyPeerAsync hipMemcpyPeerAsync