CUDA: mmq CLI option, fixed mmq build issues (#2453)

2025-08-15 04:33:06 -04:00 · 2023-07-31 15:44:35 +02:00
parent 1215ed7d5c
commit 0728c5a8b9
10 changed files with 67 additions and 27 deletions
--- a/examples/common.h
+++ b/examples/common.h
@@ -74,6 +74,7 @@ struct gpt_params {
    size_t hellaswag_tasks = 400;   // number of tasks to use when computing the HellaSwag score

    bool low_vram          = false; // if true, reduce VRAM usage at the cost of performance
+    bool mul_mat_q         = false; // if true, use experimental mul_mat_q kernels
    bool memory_f16        = true;  // use f16 instead of f32 for memory kv
    bool random_prompt     = false; // do not randomize prompt if none provided
    bool use_color         = false; // use color to distinguish generations and inputs