vocab : prevent tokenizer overflow (#14301)

* vocab : prevent stack overflow in tokenize

* vocab : return error instead of aborting on oversized token count

* vocab : INT32_MIN from llama_tokenize on overflow
This commit is contained in:
Ruikai Peng
2025-06-20 22:13:06 +08:00
committed by GitHub
parent 8308f98c7f
commit dd6e6d0b6a
3 changed files with 9 additions and 0 deletions

View File

@ -1290,6 +1290,9 @@ std::vector<llama_token> common_tokenize(
int n_tokens = text.length() + 2 * add_special;
std::vector<llama_token> result(n_tokens);
n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
if (n_tokens == std::numeric_limits<int32_t>::min()) {
throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
}
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);