mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-26 19:23:37 -04:00
ggml : drop support for QK_K=64 (#7473)
* ggml : drop support for QK_K=64 ggml-ci * opencl : restore QK_K=256 define
This commit is contained in:
@ -905,9 +905,8 @@ class GGUFValueType(IntEnum):
|
||||
raise ValueError(f"Unknown type: {type(val)}")
|
||||
|
||||
|
||||
# Note: Does not support GGML_QKK_64
|
||||
QK_K = 256
|
||||
# Items here are (block size, type size)
|
||||
QK_K = 256
|
||||
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
||||
GGMLQuantizationType.F32: (1, 4),
|
||||
GGMLQuantizationType.F16: (1, 2),
|
||||
|
Reference in New Issue
Block a user