mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-01 05:05:10 +00:00
kompute : llama-bench support and ggml_cpu_has_kompute() (#5226)
This commit is contained in:
@ -6878,11 +6878,6 @@ static int llama_decode_internal(
|
||||
n_threads = std::min(4, n_threads);
|
||||
}
|
||||
|
||||
const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 1;
|
||||
if ((ggml_cpu_has_cublas() || ggml_cpu_has_vulkan()) && fully_offloaded) {
|
||||
n_threads = 1;
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_MPI
|
||||
const int64_t n_layer = hparams.n_layer;
|
||||
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
|
||||
|
Reference in New Issue
Block a user