Merge branch 'master' into xsn/private_batch_api

This commit is contained in:
Xuan Son Nguyen
2025-03-18 15:45:22 +01:00
76 changed files with 3990 additions and 902 deletions

View File

@@ -853,6 +853,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
}
).set_excludes({LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg(
{"-sysf", "--system-prompt-file"}, "FNAME",
"a file containing the system prompt (default: none)",
[](common_params & params, const std::string & value) {
std::ifstream file(value);
if (!file) {
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
}
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.system_prompt));
if (!params.system_prompt.empty() && params.system_prompt.back() == '\n') {
params.system_prompt.pop_back();
}
}
).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(common_arg(
{"--in-file"}, "FNAME",
"an input file (repeat to specify multiple files)",
@@ -1875,7 +1889,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) {
params.out_file = value;
}
).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA}));
).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS}));
add_opt(common_arg(
{"-ofreq", "--output-frequency"}, "N",
string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq),

View File

@@ -998,6 +998,8 @@ struct common_init_result common_init_from_params(common_params & params) {
if (params.warmup) {
LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
llama_set_warmup(lctx, true);
std::vector<llama_token> tmp;
llama_token bos = llama_vocab_bos(vocab);
llama_token eos = llama_vocab_eos(vocab);
@@ -1030,6 +1032,7 @@ struct common_init_result common_init_from_params(common_params & params) {
llama_kv_self_clear(lctx);
llama_synchronize(lctx);
llama_perf_context_reset(lctx);
llama_set_warmup(lctx, false);
}
iparams.model.reset(model);