vocab : prevent tokenizer overflow (#14301)

* vocab : prevent stack overflow in tokenize * vocab : return error instead of aborting on oversized token count * vocab : INT32_MIN from llama_tokenize on overflow
2025-08-04 16:23:49 -04:00 · 2025-06-20 22:13:06 +08:00
parent 8308f98c7f
commit dd6e6d0b6a
3 changed files with 9 additions and 0 deletions
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -3074,6 +3074,11 @@ int32_t llama_vocab::tokenize(
                        bool   add_special,
                        bool   parse_special) const {
    auto res = tokenize(std::string(text, text_len), add_special, parse_special);
+    if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
+        LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
+        return std::numeric_limits<int32_t>::min();
+    }
+    
    if (n_tokens_max < (int) res.size()) {
        // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
        return -((int) res.size());