CUDA: compress mode option and default to size (#12029)

cuda 12.8 added the option to specify stronger compression for binaries, so we now default to "size".
2025-06-26 19:55:04 +00:00 · 2025-03-01 12:57:22 +01:00
parent 2cc4a5e44a
commit 80c41ddd8f
2 changed files with 12 additions and 0 deletions
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@ -155,6 +155,9 @@ option(GGML_CUDA_NO_VMM                     "ggml: do not try to use CUDA VMM"
 option(GGML_CUDA_FA                         "ggml: compile ggml FlashAttention CUDA kernels"  ON)
 option(GGML_CUDA_FA_ALL_QUANTS              "ggml: compile all quants for FlashAttention"     OFF)
 option(GGML_CUDA_GRAPHS                     "ggml: use CUDA graphs (llama.cpp only)"          ${GGML_CUDA_GRAPHS_DEFAULT})
+set   (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
+                                            "ggml: cuda link binary compression mode; requires cuda 12.8+")
+set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")

 option(GGML_HIP                             "ggml: use HIP"                                   OFF)
 option(GGML_HIP_GRAPHS                      "ggml: use HIP graph, experimental, slow"         OFF)