From f2d876ad5ab5013d4df5848038e18e2f8b51760a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Fri, 23 May 2025 09:22:45 +0200 Subject: [PATCH] additional unk_token_id fallback just in case [no ci] --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index bf7915a93..0f2c41ecc 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3689,7 +3689,7 @@ class BertModel(TextModel): else: added_vocab = tokenizer.get_added_vocab() unk_token = tokenizer_config_json.get("unk_token") - unk_token_id = added_vocab.get(unk_token, 3) + unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3)) for token_id in range(vocab_size): piece = tokenizer._convert_id_to_token(token_id)