mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-09-04 06:15:41 -04:00
gguf-py : simplify support for quant types (#8838)
* gguf-py : use classes for quants * convert_hf : simplify internal quantization type selection * gguf-py : fix flake8 lint * gguf-py : fix BF16 numpy view type * gguf-py : remove LlamaFileTypeMap Too specific to 'llama.cpp', and would be a maintenance burden to keep up to date. * gguf-py : add generic quantize and dequantize functions The quant classes no longer need to be known, only the target or the source type, for 'quantize' and 'dequantize', respectively.
This commit is contained in:
@@ -191,6 +191,8 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
||||
class LazyNumpyTensor(LazyBase):
|
||||
_tensor_type = np.ndarray
|
||||
|
||||
shape: tuple[int, ...] # Makes the type checker happy in quants.py
|
||||
|
||||
@classmethod
|
||||
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
|
||||
# The initial idea was to use np.nan as the fill value,
|
||||
|
Reference in New Issue
Block a user