mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-30 20:58:45 +00:00
llama : add pre-tokenizer regexes for BLOOM and gpt3-finnish (#8850)
This commit is contained in:
@ -410,6 +410,8 @@ struct llm_tokenizer_bpe {
|
||||
};
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_PORO:
|
||||
case LLAMA_VOCAB_PRE_TYPE_BLOOM:
|
||||
case LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH:
|
||||
regex_exprs = {
|
||||
" ?[^(\\s|.,!?…。,、।۔،)]+",
|
||||
};
|
||||
|
Reference in New Issue
Block a user