From b83e149ec6264d078e6a47412e7347bf5c2bfcc9 Mon Sep 17 00:00:00 2001 From: Andrew Godfrey Date: Fri, 17 Nov 2023 00:01:15 -0800 Subject: [PATCH] cuda : get_row_rounding F32 (#4095) * Fix #4017 * Update ggml-cuda.cu Co-authored-by: Jared Van Bortel * Update ggml-cuda.cu Co-authored-by: Jared Van Bortel --------- Co-authored-by: Jared Van Bortel --- ggml-cuda.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 9aa61fe4d..874ad9ac4 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -6356,6 +6356,7 @@ static int64_t get_row_rounding(ggml_type type) { case GGML_TYPE_Q8_0: return max_compute_capability >= CC_RDNA2 ? 128 : 64; case GGML_TYPE_F16: + case GGML_TYPE_F32: return 1; case GGML_TYPE_Q2_K: return max_compute_capability >= CC_RDNA2 ? 128 : 32; @@ -6378,6 +6379,7 @@ static int64_t get_row_rounding(ggml_type type) { case GGML_TYPE_Q8_0: return 64; case GGML_TYPE_F16: + case GGML_TYPE_F32: return 1; case GGML_TYPE_Q2_K: case GGML_TYPE_Q3_K: