From bd6e55bfd3f3af3e5705cf87a10e5178cef7c3c1 Mon Sep 17 00:00:00 2001 From: R0CKSTAR Date: Thu, 13 Feb 2025 20:28:18 +0800 Subject: [PATCH] musa: bump MUSA SDK version to rc3.1.1 (#11822) * musa: Update MUSA SDK version to rc3.1.1 Signed-off-by: Xiaodong Ye * musa: Remove workaround in PR #10042 Signed-off-by: Xiaodong Ye --------- Signed-off-by: Xiaodong Ye --- .devops/musa.Dockerfile | 2 +- .github/workflows/build.yml | 2 +- docs/docker.md | 2 +- ggml/src/ggml-cuda/ggml-cuda.cu | 5 ----- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.devops/musa.Dockerfile b/.devops/musa.Dockerfile index bfd7fc1c1..1e87737ab 100644 --- a/.devops/musa.Dockerfile +++ b/.devops/musa.Dockerfile @@ -1,6 +1,6 @@ ARG UBUNTU_VERSION=22.04 # This needs to generally match the container host's environment. -ARG MUSA_VERSION=rc3.1.0 +ARG MUSA_VERSION=rc3.1.1 # Target the MUSA build image ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6841ba589..02f6a1363 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -443,7 +443,7 @@ jobs: ubuntu-22-cmake-musa: runs-on: ubuntu-22.04 - container: mthreads/musa:rc3.1.0-devel-ubuntu22.04 + container: mthreads/musa:rc3.1.1-devel-ubuntu22.04 steps: - name: Clone diff --git a/docs/docker.md b/docs/docker.md index dac9a9ec1..cab5ae957 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -104,7 +104,7 @@ You may want to pass in some different `ARGS`, depending on the MUSA environment The defaults are: -- `MUSA_VERSION` set to `rc3.1.0` +- `MUSA_VERSION` set to `rc3.1.1` The resulting images, are essentially the same as the non-MUSA images: diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 6ea415777..093ad7099 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -1480,12 +1480,7 @@ static void ggml_cuda_op_mul_mat( const size_t nbytes_data = ggml_nbytes(src0); const size_t nbytes_padding = ggml_row_size(src0->type, MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING); dev[id].src0_dd = dev[id].src0_dd_alloc.alloc(ctx.pool(id), nbytes_data + nbytes_padding); - // TODO: remove this for MUSA once the Guilty Lockup issue is resolved -#ifndef GGML_USE_MUSA CUDA_CHECK(cudaMemsetAsync(dev[id].src0_dd, 0, nbytes_data + nbytes_padding, stream)); -#else // GGML_USE_MUSA - CUDA_CHECK(cudaMemsetAsync(dev[id].src0_dd + nbytes_data, 0, nbytes_padding, stream)); -#endif // !GGML_USE_MUSA } // If src0 is on a temporary compute buffer (partial offloading) there may be some padding that needs to be cleared: