llama : the WPM vocabs use the CLS token as BOS (#10930)

* llama : the WPM vocabs use the CLS token as BOS ggml-ci * llama : add comment
2025-08-10 18:54:09 -04:00 · 2024-12-24 09:44:20 +02:00
parent 60cfa728e2
commit 30caac3a68
2 changed files with 2 additions and 2 deletions
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -45,7 +45,7 @@ struct llama_vocab {
    id special_unk_id  = 0;
    id special_sep_id  = LLAMA_TOKEN_NULL;
    id special_pad_id  = LLAMA_TOKEN_NULL;
-    id special_cls_id  = LLAMA_TOKEN_NULL;
+    id special_cls_id  = LLAMA_TOKEN_NULL; // TODO: revisit if this is really needed https://github.com/ggerganov/llama.cpp/pull/10930
    id special_mask_id = LLAMA_TOKEN_NULL;

    id linefeed_id = 13;