mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
fix(gguf-py): special tokens are no longer skipped when add_<token>_token is set to false (#5487)
* fix(gguf-py): special tokens are no longer skipped when add_<token>_token is set to false * fix(gguf-py): added missing cls and mask token ids to the gguf metadata
This commit is contained in:
@ -29,7 +29,7 @@ class SpecialVocab:
|
||||
if special_token_types is not None:
|
||||
self.special_token_types = special_token_types
|
||||
else:
|
||||
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
|
||||
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad', 'cls', 'mask')
|
||||
self._load(Path(path))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
@ -152,10 +152,6 @@ class SpecialVocab:
|
||||
add_entry = tokenizer_config.get(f'add_{typ}_token')
|
||||
if isinstance(add_entry, bool):
|
||||
self.add_special_token[typ] = add_entry
|
||||
if not added_tokens:
|
||||
# We will need this to get the content for the token, so if it's empty
|
||||
# may as well just give up.
|
||||
continue
|
||||
entry = tokenizer_config.get(f'{typ}_token')
|
||||
if isinstance(entry, str):
|
||||
tc_content = entry
|
||||
|
Reference in New Issue
Block a user