vocab : add midm-2.0 model pre-tokenizer (#14626)

This commit is contained in:
Dowon
2025-07-11 16:36:04 +09:00
committed by GitHub
parent 0aedae00e6
commit 576c82eda2
3 changed files with 6 additions and 1 deletions

View File

@ -129,6 +129,7 @@ models = [
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
{"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
{"name": "midm-2.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct", },
]
# some models are known to be broken upstream, so we will skip them as exceptions