mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
vocab : prevent tokenizer overflow (#14301)
* vocab : prevent stack overflow in tokenize * vocab : return error instead of aborting on oversized token count * vocab : INT32_MIN from llama_tokenize on overflow
This commit is contained in:
@ -1290,6 +1290,9 @@ std::vector<llama_token> common_tokenize(
|
|||||||
int n_tokens = text.length() + 2 * add_special;
|
int n_tokens = text.length() + 2 * add_special;
|
||||||
std::vector<llama_token> result(n_tokens);
|
std::vector<llama_token> result(n_tokens);
|
||||||
n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
|
n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
|
||||||
|
if (n_tokens == std::numeric_limits<int32_t>::min()) {
|
||||||
|
throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
|
||||||
|
}
|
||||||
if (n_tokens < 0) {
|
if (n_tokens < 0) {
|
||||||
result.resize(-n_tokens);
|
result.resize(-n_tokens);
|
||||||
int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
|
int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
|
||||||
|
@ -1088,6 +1088,7 @@ extern "C" {
|
|||||||
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
||||||
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
||||||
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
||||||
|
/// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
|
||||||
/// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
|
/// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
|
||||||
/// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
|
/// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
|
||||||
/// as plaintext. Does not insert a leading space.
|
/// as plaintext. Does not insert a leading space.
|
||||||
|
@ -3074,6 +3074,11 @@ int32_t llama_vocab::tokenize(
|
|||||||
bool add_special,
|
bool add_special,
|
||||||
bool parse_special) const {
|
bool parse_special) const {
|
||||||
auto res = tokenize(std::string(text, text_len), add_special, parse_special);
|
auto res = tokenize(std::string(text, text_len), add_special, parse_special);
|
||||||
|
if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
|
||||||
|
LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
|
||||||
|
return std::numeric_limits<int32_t>::min();
|
||||||
|
}
|
||||||
|
|
||||||
if (n_tokens_max < (int) res.size()) {
|
if (n_tokens_max < (int) res.size()) {
|
||||||
// LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
|
// LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
|
||||||
return -((int) res.size());
|
return -((int) res.size());
|
||||||
|
Reference in New Issue
Block a user