mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
vocab : fix ugm tokenizer precision (#13743)
This commit is contained in:
@ -835,7 +835,7 @@ struct llm_tokenizer_ugm_session {
|
||||
}
|
||||
|
||||
// initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
|
||||
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -FLT_MAX});
|
||||
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -DBL_MAX});
|
||||
// at the beginning tokenization score is zero
|
||||
tokenization_results[0] = { vocab.token_unk(), 0, 0 };
|
||||
|
||||
@ -867,7 +867,7 @@ struct llm_tokenizer_ugm_session {
|
||||
const double challenger_score = current_best.score_sum + token_score;
|
||||
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
|
||||
if (challenger_score > current_champ.score_sum) {
|
||||
struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score };
|
||||
struct best_tokenization challenger = { token_id, input_offset, challenger_score };
|
||||
current_champ = challenger;
|
||||
}
|
||||
}
|
||||
@ -881,7 +881,7 @@ struct llm_tokenizer_ugm_session {
|
||||
prefix_offset = input_offset + n_utf8_code_units;
|
||||
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
|
||||
if (challenger_score > current_champ.score_sum) {
|
||||
struct best_tokenization challenger = { vocab.token_unk(), input_offset, (float) challenger_score };
|
||||
struct best_tokenization challenger = { vocab.token_unk(), input_offset, challenger_score };
|
||||
current_champ = challenger;
|
||||
}
|
||||
}
|
||||
@ -1007,7 +1007,7 @@ private:
|
||||
struct best_tokenization {
|
||||
llama_token token_id;
|
||||
size_t input_offset;
|
||||
float score_sum;
|
||||
double score_sum;
|
||||
};
|
||||
|
||||
struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) {
|
||||
|
Reference in New Issue
Block a user