diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 5f15c8257..9303a0476 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -852,6 +852,9 @@ class TextModel(ModelBase): if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb": # ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B res = "exaone4" + if chkhsh == "a1e163ecab2e718a4c829d1148b6e86824ec36163bb71941c3dca9cd5ac25756": + # ref: https://huggingface.co/JetBrains/Mellum-4b-base + res = "mellum" if res is None: logger.warning("\n") diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 211b81ff3..226805f1e 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -138,6 +138,7 @@ models = [ {"name": "midm-2.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct", }, {"name": "lfm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LiquidAI/LFM2-Tokenizer"}, {"name": "exaone4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B", }, + {"name": "mellum", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/JetBrains/Mellum-4b-base", }, ] # some models are known to be broken upstream, so we will skip them as exceptions diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 7b7a93566..959c86a14 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1856,7 +1856,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { tokenizer_pre == "gigachat" || tokenizer_pre == "jina-v2-es" || tokenizer_pre == "jina-v2-de" || - tokenizer_pre == "a.x-4.0") { + tokenizer_pre == "a.x-4.0" || + tokenizer_pre == "mellum") { pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2; } else if ( tokenizer_pre == "jina-v1-en" ||