mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 11:45:21 +00:00
Add --no-op-offload
to improve -ot
pp perf in MoE models like llama4 400B (#13386)
This commit is contained in:
@ -853,7 +853,7 @@ int main(void) {
|
||||
backends_modded.insert(backends_modded.end(), backends.begin(), backends.end());
|
||||
|
||||
ggml_backend_sched_t backend_sched = ggml_backend_sched_new(
|
||||
backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false);
|
||||
backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
|
||||
|
||||
printf("Backend %zu/%zu: %s\n", i + 1, dev_count, ggml_backend_dev_name(devs[i]));
|
||||
printf(" Device description: %s\n", ggml_backend_dev_description(devs[i]));
|
||||
|
Reference in New Issue
Block a user