ggml-quants : 1.625 bpw ternary packing for BitNet 1.58b

2025-06-29 04:35:05 +00:00 · 2024-06-19 12:21:08 -04:00
parent ac146628e4
commit bd807499f7
11 changed files with 594 additions and 4 deletions
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@ -26,6 +26,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
    { "IQ2_M",  LLAMA_FTYPE_MOSTLY_IQ2_M,  " 2.7  bpw quantization",            },
    { "IQ1_S",  LLAMA_FTYPE_MOSTLY_IQ1_S,  " 1.56 bpw quantization",            },
    { "IQ1_M",  LLAMA_FTYPE_MOSTLY_IQ1_M,  " 1.75 bpw quantization",            },
+    { "Q1_3",   LLAMA_FTYPE_MOSTLY_Q1_3,   " 1.63 bpw for BitNet 1.58b",        },
+    { "Q2_2",   LLAMA_FTYPE_MOSTLY_Q2_2,   " 2.00 bpw for BitNet 1.58b",        },
    { "Q2_K",   LLAMA_FTYPE_MOSTLY_Q2_K,   " 2.96G, +3.5199 ppl @ Llama-3-8B",  },
    { "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B",  },
    { "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization",            },