mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-11 22:04:31 +00:00
llama : add Trillion 7B model support (#12556)
* Support Trillion 7B * Update llama.h * Update llama.h * Update llama-vocab.cpp for Trillion * Update llama-vocab.cpp
This commit is contained in:
@ -342,6 +342,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
||||
case LLAMA_VOCAB_PRE_TYPE_MPT:
|
||||
case LLAMA_VOCAB_PRE_TYPE_OLMO:
|
||||
case LLAMA_VOCAB_PRE_TYPE_JAIS:
|
||||
case LLAMA_VOCAB_PRE_TYPE_TRILLION:
|
||||
regex_exprs = {
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||
};
|
||||
@ -1614,6 +1615,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||
tokenizer_pre == "superbpe") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_SUPERBPE;
|
||||
clean_spaces = false;
|
||||
} else if (
|
||||
tokenizer_pre == "trillion") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_TRILLION;
|
||||
clean_spaces = false;
|
||||
} else {
|
||||
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
||||
}
|
||||
|
Reference in New Issue
Block a user