mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 03:55:20 +00:00
llama : llama_perf + option to disable timings during decode (#9355)
* llama : llama_perf + option to disable timings during decode ggml-ci * common : add llama_arg * Update src/llama.cpp Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com> * perf : separate functions in the API ggml-ci * perf : safer pointer handling + naming update ggml-ci * minor : better local var name * perf : abort on invalid sampler pointer ggml-ci --------- Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
This commit is contained in:
@ -720,6 +720,14 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||
params.prompt = value;
|
||||
}
|
||||
));
|
||||
add_opt(llama_arg(
|
||||
{"--no-perf"},
|
||||
format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),
|
||||
[](gpt_params & params) {
|
||||
params.no_perf = true;
|
||||
params.sparams.no_perf = true;
|
||||
}
|
||||
).set_env("LLAMA_ARG_NO_PERF"));
|
||||
add_opt(llama_arg(
|
||||
{"-f", "--file"}, "FNAME",
|
||||
"a file containing the prompt (default: none)",
|
||||
|
Reference in New Issue
Block a user