mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-18 05:56:00 -04:00
CUDA: use mul_mat_q kernels by default (#2683)
This commit is contained in:
@@ -287,7 +287,7 @@ static int g_device_count = -1;
|
||||
static int g_main_device = 0;
|
||||
static int g_compute_capabilities[GGML_CUDA_MAX_DEVICES];
|
||||
static float g_tensor_split[GGML_CUDA_MAX_DEVICES] = {0};
|
||||
static bool g_mul_mat_q = false;
|
||||
static bool g_mul_mat_q = true;
|
||||
|
||||
static void * g_scratch_buffer = nullptr;
|
||||
static size_t g_scratch_size = 1024*1024*1024; // 1 GB by default
|
||||
|
Reference in New Issue
Block a user