mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
convert.py : add python logging instead of print() (#6511)
* convert.py: add python logging instead of print() * convert.py: verbose flag takes priority over dump flag log suppression * convert.py: named instance logging * convert.py: use explicit logger id string * convert.py: convert extra print() to named logger * convert.py: sys.stderr.write --> logger.error * *.py: Convert all python scripts to use logging module * requirements.txt: remove extra line * flake8: update flake8 ignore and exclude to match ci settings * gh-actions: add flake8-no-print to flake8 lint step * pre-commit: add flake8-no-print to flake8 and also update pre-commit version * convert-hf-to-gguf.py: print() to logger conversion * *.py: logging basiconfig refactor to use conditional expression * *.py: removed commented out logging * fixup! *.py: logging basiconfig refactor to use conditional expression * constant.py: logger.error then exit should be a raise exception instead * *.py: Convert logger error and sys.exit() into a raise exception (for atypical error) * gguf-convert-endian.py: refactor convert_byteorder() to use tqdm progressbar * verify-checksum-model.py: This is the result of the program, it should be printed to stdout. * compare-llama-bench.py: add blank line for readability during missing repo response * reader.py: read_gguf_file() use print() over logging * convert.py: warning goes to stderr and won't hurt the dump output * gguf-dump.py: dump_metadata() should print to stdout * convert-hf-to-gguf.py: print --> logger.debug or ValueError() * verify-checksum-models.py: use print() for printing table * *.py: refactor logging.basicConfig() * gguf-py/gguf/*.py: use __name__ as logger name Since they will be imported and not run directly. * python-lint.yml: use .flake8 file instead * constants.py: logger no longer required * convert-hf-to-gguf.py: add additional logging * convert-hf-to-gguf.py: print() --> logger * *.py: fix flake8 warnings * revert changes to convert-hf-to-gguf.py for get_name() * convert-hf-to-gguf-update.py: use triple quoted f-string instead * *.py: accidentally corrected the wrong line * *.py: add compilade warning suggestions and style fixes
This commit is contained in:
@ -1,13 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
from .gguf_writer import GGUFWriter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SpecialVocab:
|
||||
merges: list[str]
|
||||
@ -40,38 +42,29 @@ class SpecialVocab:
|
||||
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
|
||||
if self.merges:
|
||||
if not quiet:
|
||||
print(f'gguf: Adding {len(self.merges)} merge(s).')
|
||||
logger.info(f'Adding {len(self.merges)} merge(s).')
|
||||
gw.add_token_merges(self.merges)
|
||||
elif self.load_merges:
|
||||
print(
|
||||
'gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.',
|
||||
file = sys.stderr,
|
||||
)
|
||||
logger.warning('Adding merges requested but no merges found, output may be non-functional.')
|
||||
for typ, tokid in self.special_token_ids.items():
|
||||
id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
|
||||
if id_handler is None:
|
||||
print(
|
||||
f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping',
|
||||
file = sys.stderr,
|
||||
)
|
||||
logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
|
||||
continue
|
||||
if not quiet:
|
||||
print(f'gguf: Setting special token type {typ} to {tokid}')
|
||||
logger.info(f'Setting special token type {typ} to {tokid}')
|
||||
id_handler(tokid)
|
||||
for typ, value in self.add_special_token.items():
|
||||
add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
|
||||
if add_handler is None:
|
||||
print(
|
||||
f'gguf: WARNING: No handler for add_{typ}_token with value {value} - skipping',
|
||||
file = sys.stderr,
|
||||
)
|
||||
logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
|
||||
continue
|
||||
if not quiet:
|
||||
print(f'gguf: Setting add_{typ}_token to {value}')
|
||||
logger.info(f'Setting add_{typ}_token to {value}')
|
||||
add_handler(value)
|
||||
if self.chat_template is not None:
|
||||
if not quiet:
|
||||
print(f'gguf: Setting chat_template to {self.chat_template}')
|
||||
logger.info(f'Setting chat_template to {self.chat_template}')
|
||||
gw.add_chat_template(self.chat_template)
|
||||
|
||||
def _load(self, path: Path) -> None:
|
||||
@ -99,10 +92,7 @@ class SpecialVocab:
|
||||
continue
|
||||
parts = line.split(None, 3)
|
||||
if len(parts) != 2:
|
||||
print(
|
||||
f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring',
|
||||
file = sys.stderr,
|
||||
)
|
||||
logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
|
||||
continue
|
||||
merges.append(f'{parts[0]} {parts[1]}')
|
||||
self.merges = merges
|
||||
@ -118,10 +108,7 @@ class SpecialVocab:
|
||||
return
|
||||
self.special_token_ids[typ] = tid
|
||||
return
|
||||
print(
|
||||
f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
|
||||
file = sys.stderr,
|
||||
)
|
||||
logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
|
||||
|
||||
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
||||
tokenizer_file = path / 'tokenizer.json'
|
||||
@ -144,10 +131,7 @@ class SpecialVocab:
|
||||
if chat_template is None or isinstance(chat_template, (str, list)):
|
||||
self.chat_template = chat_template
|
||||
else:
|
||||
print(
|
||||
f'gguf: WARNING: Bad type for chat_template field in {tokenizer_config_file!r} - ignoring',
|
||||
file = sys.stderr
|
||||
)
|
||||
logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
|
||||
for typ in self.special_token_types:
|
||||
add_entry = tokenizer_config.get(f'add_{typ}_token')
|
||||
if isinstance(add_entry, bool):
|
||||
|
Reference in New Issue
Block a user