mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-02 15:15:43 +00:00
Respect tokenizer.ggml.add_bos_token value when tokenizing (#4040)
* gguf-py: gguf-dump: Respect --no-tensor flag in JSON mode. * Respect add_bos_token GGUF metadata value * gguf-py: Try to fix SpecialVocab giving up too easily for the Nth time
This commit is contained in:
@ -86,13 +86,14 @@ def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
|
||||
curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
|
||||
else:
|
||||
curr["value"] = field.parts[-1].tolist()[0]
|
||||
for idx, tensor in enumerate(reader.tensors):
|
||||
tensors[tensor.name] = {
|
||||
"index": idx,
|
||||
"shape": tensor.shape.tolist(),
|
||||
"type": tensor.tensor_type.name,
|
||||
"offset": tensor.field.offset,
|
||||
}
|
||||
if not args.no_tensors:
|
||||
for idx, tensor in enumerate(reader.tensors):
|
||||
tensors[tensor.name] = {
|
||||
"index": idx,
|
||||
"shape": tensor.shape.tolist(),
|
||||
"type": tensor.tensor_type.name,
|
||||
"offset": tensor.field.offset,
|
||||
}
|
||||
json.dump(result, sys.stdout)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user