mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-17 21:51:27 -04:00
Add --no-op-offload
to improve -ot
pp perf in MoE models like llama4 400B (#13386)
This commit is contained in:
@@ -2437,6 +2437,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
}
|
||||
}
|
||||
));
|
||||
add_opt(common_arg(
|
||||
{"--no-op-offload"},
|
||||
string_format("disable offloading host tensor operations to device (default: %s)", params.no_op_offload ? "true" : "false"),
|
||||
[](common_params & params) {
|
||||
params.no_op_offload = true;
|
||||
}
|
||||
));
|
||||
add_opt(common_arg(
|
||||
{"--lora"}, "FNAME",
|
||||
"path to LoRA adapter (can be repeated to use multiple adapters)",
|
||||
|
Reference in New Issue
Block a user