additional unk_token_id fallback just in case [no ci]

This commit is contained in:
Sigbjørn Skjæret
2025-05-23 09:22:45 +02:00
committed by GitHub
parent 65a37fa8e5
commit f2d876ad5a

View File

@ -3689,7 +3689,7 @@ class BertModel(TextModel):
else:
added_vocab = tokenizer.get_added_vocab()
unk_token = tokenizer_config_json.get("unk_token")
unk_token_id = added_vocab.get(unk_token, 3)
unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
for token_id in range(vocab_size):
piece = tokenizer._convert_id_to_token(token_id)