llama : add pre-tokenizer regexes for BLOOM and gpt3-finnish (#8850)

2025-06-26 19:55:04 +00:00 · 2024-08-15 10:17:12 +03:00
parent d5492f0525
commit 6bda7ce6c3
5 changed files with 19 additions and 1 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -93,6 +93,8 @@ extern "C" {
        LLAMA_VOCAB_PRE_TYPE_TEKKEN         = 20,
        LLAMA_VOCAB_PRE_TYPE_SMOLLM         = 21,
        LLAMA_VOCAB_PRE_TYPE_CODESHELL      = 22,
+        LLAMA_VOCAB_PRE_TYPE_BLOOM          = 23,
+        LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH   = 24,
    };

    enum llama_rope_type {