mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 04:35:05 +00:00
@ -271,7 +271,17 @@ static buft_list_t make_cpu_buft_list(const std::vector<ggml_backend_dev_t> & de
|
||||
}
|
||||
}
|
||||
|
||||
// add extra buffer types
|
||||
bool has_gpu_device = false;
|
||||
for (auto * dev : devices) {
|
||||
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
|
||||
has_gpu_device = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// add extra buffer types, only if no GPU device is present
|
||||
// ref: https://github.com/ggml-org/llama.cpp/issues/12481#issuecomment-2743136094
|
||||
if (!has_gpu_device) {
|
||||
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
|
||||
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
|
||||
@ -283,6 +293,9 @@ static buft_list_t make_cpu_buft_list(const std::vector<ggml_backend_dev_t> & de
|
||||
++extra_bufts;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LLAMA_LOG_WARN("%s: disabling extra buffer types (i.e. repacking) since a GPU device is available\n", __func__);
|
||||
}
|
||||
|
||||
// add a host buffer type
|
||||
// storing the tensors in a host buffer is useful when the processing of large batches
|
||||
|
Reference in New Issue
Block a user