diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index d8c9d9730..07e692208 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -19,6 +19,7 @@
 #include <set>
 #include <unordered_map>
 #include <cctype>
+#include <cinttypes>
 
 //
 // helpers
@@ -2572,6 +2573,10 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
     // copy piece chars to output text buffer
     // skip up to 'lstrip' leading spaces before copying
     auto _try_copy = [=] (const char * token, size_t size) -> int32_t {
+        if (size >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
+            GGML_ABORT("invalid token size: %zu exceeds int32_t limit", size);
+        }
+
         for (int32_t i = 0; i < lstrip && size && *token == ' '; ++i) {
             token++;
             size--;