mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 12:25:03 +00:00
gguf-py: Refactor and allow reading/modifying existing GGUF files (#3981)
* gguf-py: Refactor and add file reading support * Replay changes from #3871 Credit to @cebtenzzre for that pull * Various type annotation fixes. * sort imports with isort (again) * Fix missing return statement in add_tensor * style cleanup with flake8 * fix NamedTuple and Enum usage * Fix an issue with state init in GGUFReader Move examples to an examples/ directory Clean up examples Add an example of modifying keys in a GGUF file Update documentation with info on examples Try to support people importing gguf/gguf.py directly * Damagage is not a word. * Clean up gguf-py/examples/modify_gguf.py whitespace Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * Update gguf-py/examples/modify_gguf.py formatting Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * Update gguf-py/gguf/gguf_reader.py type hint Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * Make examples executable, formatting changes * Add more information to GGUFReader and examples comments * Include a gguf Python package version bump * Add convert-gguf-endian.py script * cleanup * gguf-py : bump minor version * Reorganize scripts * Make GGUFReader endian detection less arbitrary * Add JSON dumping support to gguf-dump.py Which I kind of regret now * A few for gguf-dump.py cleanups * Murder accidental tuple in gguf-py/scripts/gguf-dump.py Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * cleanup * constants : remove unneeded type annotations * fix python 3.8 compat * Set up gguf- scripts in pyproject.toml * And include scripts/__init__.py, derp * convert.py: We can't currently support Q8_0 on big endian. * gguf-py: SpecialVocab: Always try available sources for special token ids gguf-py: SpecialVocab: Try to load merges from merges.txt if not in tokenizer.json gguf-py: SpecialVocab: Add 'add_bos_token' type bools to GGUF metadata u * cleanup * Promote add_X_token to GGUF metadata for BOS and EOS --------- Co-authored-by: Jared Van Bortel <jared@nomic.ai> Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
This commit is contained in:
16
convert.py
16
convert.py
@ -3,11 +3,9 @@ from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import copy
|
||||
import enum
|
||||
import faulthandler
|
||||
import functools
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
import math
|
||||
@ -23,14 +21,14 @@ from abc import ABCMeta, abstractmethod
|
||||
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import IO, TYPE_CHECKING, Any, Callable, Generator, Iterable, Literal, Sequence, TypeVar
|
||||
from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar
|
||||
|
||||
import numpy as np
|
||||
from sentencepiece import SentencePieceProcessor
|
||||
|
||||
import os
|
||||
if 'NO_LOCAL_GGUF' not in os.environ:
|
||||
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
|
||||
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
|
||||
import gguf
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -851,7 +849,7 @@ class OutputFile:
|
||||
elif isinstance(vocab, BpeVocab):
|
||||
self.gguf.add_tokenizer_model("gpt2")
|
||||
else:
|
||||
raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
|
||||
raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab')
|
||||
self.gguf.add_token_list(tokens)
|
||||
self.gguf.add_token_scores(scores)
|
||||
self.gguf.add_token_types(toktypes)
|
||||
@ -905,7 +903,7 @@ class OutputFile:
|
||||
return dt.quantize(arr)
|
||||
|
||||
@staticmethod
|
||||
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None:
|
||||
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
|
||||
check_vocab_size(params, vocab)
|
||||
|
||||
of = OutputFile(fname_out, endianess=endianess)
|
||||
@ -1114,11 +1112,15 @@ def do_dump_model(model_plus: ModelPlus) -> None:
|
||||
|
||||
|
||||
def main(args_in: list[str] | None = None) -> None:
|
||||
output_choices = ["f32", "f16"]
|
||||
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
|
||||
# We currently only support Q8_0 output on little endian systems.
|
||||
output_choices.append("q8_0")
|
||||
parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
|
||||
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
|
||||
parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
|
||||
parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
|
||||
parser.add_argument("--outtype", choices=["f32", "f16", "q8_0"], help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
|
||||
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
|
||||
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
|
||||
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
|
||||
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
|
||||
|
Reference in New Issue
Block a user