mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 12:35:16 +00:00
llama : accept a list of devices to use to offload a model (#10497)
* llama : accept a list of devices to use to offload a model * accept `--dev none` to completely disable offloading * fix dev list with dl backends * rename env parameter to LLAMA_ARG_DEVICE for consistency
This commit is contained in:
@ -46,6 +46,7 @@ int main(int argc, char ** argv) {
|
||||
ctx_tgt = llama_init_tgt.context;
|
||||
|
||||
// load the draft model
|
||||
params.devices = params.speculative.devices;
|
||||
params.model = params.speculative.model;
|
||||
params.n_ctx = params.speculative.n_ctx;
|
||||
params.n_batch = params.speculative.n_ctx > 0 ? params.speculative.n_ctx : params.n_batch;
|
||||
|
Reference in New Issue
Block a user