llama : accept a list of devices to use to offload a model (#10497)

* llama : accept a list of devices to use to offload a model * accept `--dev none` to completely disable offloading * fix dev list with dl backends * rename env parameter to LLAMA_ARG_DEVICE for consistency
2025-08-14 20:29:41 -04:00 · 2024-11-25 19:30:06 +01:00
parent 1f922254f0
commit 10bce0450f
9 changed files with 104 additions and 27 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -272,6 +272,9 @@ extern "C" {
    };

    struct llama_model_params {
+        // NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
+        ggml_backend_dev_t * devices;
+
        int32_t n_gpu_layers; // number of layers to store in VRAM
        enum llama_split_mode split_mode; // how to split the model across multiple GPUs