mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 12:25:03 +00:00
gguf-py : add IQ1_M to GGML_QUANT_SIZES (#6761)
This commit is contained in:
@ -872,6 +872,7 @@ GGML_QUANT_SIZES = {
|
|||||||
GGMLQuantizationType.I32: (1, 4),
|
GGMLQuantizationType.I32: (1, 4),
|
||||||
GGMLQuantizationType.I64: (1, 8),
|
GGMLQuantizationType.I64: (1, 8),
|
||||||
GGMLQuantizationType.F64: (1, 8),
|
GGMLQuantizationType.F64: (1, 8),
|
||||||
|
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user