Fix n^2 loop in tokenization (#254)

This causes long prompts to parse very slowly.
2025-08-16 21:22:37 -04:00 · 2023-03-18 04:17:19 -07:00
parent b2de7f18df
commit a81d0c2a17
1 changed files with 1 additions and 1 deletions
--- a/utils.cpp
+++ b/utils.cpp
@@ -302,7 +302,7 @@ std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::st
    // Forward pass
    for (int i = 0; i < len; i++) {
        int max_len = std::min(len - i, MAX_TOKEN_LEN);
-        for (int sub_len = 1; sub_len <= len - i; sub_len++) {
+        for (int sub_len = 1; sub_len <= max_len; sub_len++) {
            auto sub = text.substr(i, sub_len);
            auto token = vocab.token_to_id.find(sub);
            if (token != vocab.token_to_id.end()) {