CUDA: use mul_mat_q kernels by default (#2683)

2025-08-18 05:56:00 -04:00 · 2023-08-22 22:47:05 +02:00
parent 3b6cfe7c92
commit c63bb1d16a
4 changed files with 16 additions and 17 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -287,7 +287,7 @@ static int g_device_count = -1;
 static int g_main_device = 0;
 static int g_compute_capabilities[GGML_CUDA_MAX_DEVICES];
 static float g_tensor_split[GGML_CUDA_MAX_DEVICES] = {0};
-static bool g_mul_mat_q = false;
+static bool g_mul_mat_q = true;

 static void * g_scratch_buffer = nullptr;
 static size_t g_scratch_size = 1024*1024*1024; // 1 GB by default