mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-02 05:15:47 +00:00
additional unk_token_id fallback just in case [no ci]
This commit is contained in:
@ -3689,7 +3689,7 @@ class BertModel(TextModel):
|
||||
else:
|
||||
added_vocab = tokenizer.get_added_vocab()
|
||||
unk_token = tokenizer_config_json.get("unk_token")
|
||||
unk_token_id = added_vocab.get(unk_token, 3)
|
||||
unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
|
||||
|
||||
for token_id in range(vocab_size):
|
||||
piece = tokenizer._convert_id_to_token(token_id)
|
||||
|
Reference in New Issue
Block a user