mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 20:45:04 +00:00
llama : add option to override model tensor buffers (#11397)
* llama : add option to override tensor buffers * ggml : fix possible underflow in ggml_nbytes
This commit is contained in:
@ -255,7 +255,8 @@ llama_context::llama_context(
|
||||
model.n_devices() > 1 &&
|
||||
model.params.n_gpu_layers > (int) model.hparams.n_layer &&
|
||||
model.params.split_mode == LLAMA_SPLIT_MODE_LAYER &&
|
||||
cparams.offload_kqv;
|
||||
cparams.offload_kqv &&
|
||||
!model.has_tensor_overrides();
|
||||
|
||||
// pipeline parallelism requires support for async compute and events in all devices
|
||||
if (pipeline_parallel) {
|
||||
|
Reference in New Issue
Block a user