revert vocab_size() change [no ci]

This commit is contained in:
Sigbjørn Skjæret
2025-05-26 08:40:46 +02:00
committed by GitHub
parent f2d876ad5a
commit b17e9811f4

View File

@ -3668,7 +3668,7 @@ class BertModel(TextModel):
toktypes: list[int] = [SentencePieceTokenTypes.UNUSED] * vocab_size
if isinstance(tokenizer, SentencePieceProcessor):
for token_id in range(vocab_size):
for token_id in range(tokenizer.vocab_size()):
piece = tokenizer.IdToPiece(token_id)
text = piece.encode("utf-8")
score = tokenizer.GetScore(token_id)