From 3cfbbdb44e08fd19429fed6cc85b982a91f0efd5 Mon Sep 17 00:00:00 2001
From: Guy Goldenberg <guy110698@gmail.com>
Date: Fri, 13 Jun 2025 19:20:25 +0300
Subject: [PATCH] Merge commit from fork

* vocab : prevent integer overflow during load

* Add static cast and GGML_ABORT

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 src/llama-vocab.cpp | 5 +++++
 1 file changed, 5 insertions(+)
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index d8c9d9730..07e692208 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -19,6 +19,7 @@
 #include <set>
 #include <unordered_map>
 #include <cctype>
+#include <cinttypes>
 
 //
 // helpers
@@ -2572,6 +2573,10 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
     // copy piece chars to output text buffer
     // skip up to 'lstrip' leading spaces before copying
     auto _try_copy = [=] (const char * token, size_t size) -> int32_t {
+        if (size >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
+            GGML_ABORT("invalid token size: %zu exceeds int32_t limit", size);
+        }
+
         for (int32_t i = 0; i < lstrip && size && *token == ' '; ++i) {
             token++;
             size--;