finetune : add training data file to log message (#4979)

This commit adds the name of the training data file to the log message
printed when the training data is tokenized.

The motivation for this change is that it can be useful to show which
file is being tokenized when running the finetune example.

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>
This commit is contained in:
Daniel Bevenius
2024-01-16 18:54:24 +01:00
committed by GitHub
parent 334a835a1c
commit cec8a48470

View File

@ -1799,7 +1799,7 @@ int main(int argc, char ** argv) {
std::vector<llama_token> train_tokens; std::vector<llama_token> train_tokens;
std::vector<size_t> train_samples_begin; std::vector<size_t> train_samples_begin;
std::vector<size_t> train_samples_size; std::vector<size_t> train_samples_size;
printf("%s: tokenize training data\n", __func__); printf("%s: tokenize training data from %s\n", __func__, params.common.fn_train_data);
tokenize_file(lctx, tokenize_file(lctx,
params.common.fn_train_data, params.common.fn_train_data,
params.common.sample_start, params.common.sample_start,