ggml : add TQ1_0 and TQ2_0 ternary quantization types

2025-07-02 13:45:43 +00:00 · 2024-07-30 17:55:54 -04:00
parent 79a278e922
commit 77b8f84ae7
10 changed files with 563 additions and 16 deletions
--- a/ggml/src/ggml-common.h
+++ b/ggml/src/ggml-common.h
@ -241,6 +241,25 @@ typedef struct {
 } block_q8_0x8;
 static_assert(sizeof(block_q8_0x8) == 8 * sizeof(ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");

+//
+// Ternary quantization
+//
+
+// 1.6875 bpw
+typedef struct {
+    uint8_t q[(QK_K - 4 * QK_K / 64) / 5]; // 5 elements per byte (3^5 = 243 < 256)
+    uint8_t qs[QK_K/64]; // 4 elements per byte
+    ggml_half d;
+} block_tq1_0;
+static_assert(sizeof(block_tq1_0) == sizeof(ggml_half) + QK_K / 64 + (QK_K - 4 * QK_K / 64) / 5, "wrong tq1_0 block size/padding");
+
+// 2.0625 bpw
+typedef struct {
+    uint8_t q[QK_K/4]; // 2 bits per element
+    ggml_half d;
+} block_tq2_0;
+static_assert(sizeof(block_tq2_0) == sizeof(ggml_half) + QK_K / 4, "wrong tq2_0 block size/padding");
+
 //
 // Super-block quantization structures
 //