mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 20:05:20 +00:00
imatrix : Add --parse-special for enabling parsing of special tokens in imatrix calculation (#13389)
* Add --parse-special for enabling parsing of special tokens in imatrix calculation * whitespace
This commit is contained in:
@ -2627,6 +2627,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
params.i_chunk = value;
|
params.i_chunk = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
||||||
|
add_opt(common_arg(
|
||||||
|
{"--parse-special"},
|
||||||
|
string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
|
||||||
|
[](common_params & params) {
|
||||||
|
params.parse_special = true;
|
||||||
|
}
|
||||||
|
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"-pps"},
|
{"-pps"},
|
||||||
string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"),
|
string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"),
|
||||||
|
@ -409,6 +409,7 @@ struct common_params {
|
|||||||
|
|
||||||
bool process_output = false; // collect data for the output tensor
|
bool process_output = false; // collect data for the output tensor
|
||||||
bool compute_ppl = true; // whether to compute perplexity
|
bool compute_ppl = true; // whether to compute perplexity
|
||||||
|
bool parse_special = false; // whether to parse special tokens during imatrix tokenization
|
||||||
|
|
||||||
// cvector-generator params
|
// cvector-generator params
|
||||||
int n_pca_batch = 100;
|
int n_pca_batch = 100;
|
||||||
|
@ -24,7 +24,8 @@ static void print_usage(int, char ** argv) {
|
|||||||
LOG("\n %s \\\n"
|
LOG("\n %s \\\n"
|
||||||
" -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
|
" -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
|
||||||
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
|
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
|
||||||
" [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
|
" [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...] \\\n"
|
||||||
|
" [--parse-special]\n" , argv[0]);
|
||||||
LOG("\n");
|
LOG("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -439,7 +440,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
|
|||||||
auto tim1 = std::chrono::high_resolution_clock::now();
|
auto tim1 = std::chrono::high_resolution_clock::now();
|
||||||
LOG_INF("%s: tokenizing the input ..\n", __func__);
|
LOG_INF("%s: tokenizing the input ..\n", __func__);
|
||||||
|
|
||||||
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
|
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true, params.parse_special);
|
||||||
|
|
||||||
auto tim2 = std::chrono::high_resolution_clock::now();
|
auto tim2 = std::chrono::high_resolution_clock::now();
|
||||||
LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
|
LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
|
||||||
|
Reference in New Issue
Block a user