ggml-quants : rename fields of TQ1_0 and TQ2_0 structs for consistency

2025-06-30 04:45:17 +00:00 · 2024-08-03 16:22:04 -04:00
parent 04eec58112
commit f034aa1bb1
2 changed files with 46 additions and 46 deletions
--- a/ggml/src/ggml-common.h
+++ b/ggml/src/ggml-common.h
@ -233,15 +233,15 @@ static_assert(sizeof(block_q8_0x8) == 8 * sizeof(ggml_half) + QK8_0 * 8, "wrong

 // 1.6875 bpw
 typedef struct {
-    uint8_t q[(QK_K - 4 * QK_K / 64) / 5]; // 5 elements per byte (3^5 = 243 < 256)
-    uint8_t qs[QK_K/64]; // 4 elements per byte
+    uint8_t qs[(QK_K - 4 * QK_K / 64) / 5]; // 5 elements per byte (3^5 = 243 < 256)
+    uint8_t qh[QK_K/64]; // 4 elements per byte
    ggml_half d;
 } block_tq1_0;
 static_assert(sizeof(block_tq1_0) == sizeof(ggml_half) + QK_K / 64 + (QK_K - 4 * QK_K / 64) / 5, "wrong tq1_0 block size/padding");

 // 2.0625 bpw
 typedef struct {
-    uint8_t q[QK_K/4]; // 2 bits per element
+    uint8_t qs[QK_K/4]; // 2 bits per element
    ggml_half d;
 } block_tq2_0;
 static_assert(sizeof(block_tq2_0) == sizeof(ggml_half) + QK_K / 4, "wrong tq2_0 block size/padding");
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@ -3326,7 +3326,7 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y,
        y[i].d = GGML_FP32_TO_FP16(d);

        // 5 elements per byte, along 32 bytes
-        for (size_t j = 0; j < sizeof(y->q) - sizeof(y->q) % 32; j += 32) {
+        for (size_t j = 0; j < sizeof(y->qs) - sizeof(y->qs) % 32; j += 32) {
            for (size_t m = 0; m < 32; ++m) {
                uint8_t q = 0;
                for (size_t n = 0; n < 5; ++n) {
@ -3336,12 +3336,12 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y,
                }
                // ceiling division (243 == pow(3, 5))
                q = ((uint16_t)q * 256 + (243 - 1)) / 243;
-                y[i].q[j + m] = q;
+                y[i].qs[j + m] = q;
            }
            x += 5*32;
        }
        // along 16 bytes
-        for (size_t j = sizeof(y->q) - sizeof(y->q) % 32; j < sizeof(y->q); j += 16) {
+        for (size_t j = sizeof(y->qs) - sizeof(y->qs) % 32; j < sizeof(y->qs); j += 16) {
            for (size_t m = 0; m < 16; ++m) {
                uint8_t q = 0;
                for (size_t n = 0; n < 5; ++n) {
@ -3351,16 +3351,16 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y,
                }
                // ceiling division (243 == pow(3, 5))
                q = ((uint16_t)q * 256 + (243 - 1)) / 243;
-                y[i].q[j + m] = q;
+                y[i].qs[j + m] = q;
            }
            x += 5*16;
        }
        // 4 elements per byte
-        for (size_t j = 0; j < sizeof(y->qs); ++j) {
+        for (size_t j = 0; j < sizeof(y->qh); ++j) {
            uint8_t q = 0;
            for (size_t m = 0; m < 4; ++m) {
                // -1, 0, 1 -> 0, 1, 2
-                int xi = nearest_int(x[j + m*sizeof(y->qs)] * id) + 1;
+                int xi = nearest_int(x[j + m*sizeof(y->qh)] * id) + 1;
                q *= 3;
                q += xi;
            }
@ -3368,9 +3368,9 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y,
            q *= 3;
            // ceiling division (243 == pow(3, 5))
            q = ((uint16_t)q * 256 + (243 - 1)) / 243;
-            y[i].qs[j] = q;
+            y[i].qh[j] = q;
        }
-        x += 4*sizeof(y->qs);
+        x += 4*sizeof(y->qh);
    }
 }

@ -3392,7 +3392,7 @@ void quantize_row_tq2_0_ref(const float * restrict x, block_tq2_0 * restrict y,
        y[i].d = GGML_FP32_TO_FP16(d);

        // TODO: should it be along 64 bytes instead for AVX512?
-        for (size_t j = 0; j < sizeof(y->q); j += 32) {
+        for (size_t j = 0; j < sizeof(y->qs); j += 32) {
            for (size_t m = 0; m < 32; ++m) {
                uint8_t q = 0;
                for (size_t n = 0; n < 4; ++n) {
@ -3400,7 +3400,7 @@ void quantize_row_tq2_0_ref(const float * restrict x, block_tq2_0 * restrict y,
                    int xi = nearest_int(x[m + n*32] * id) + 1;
                    q += (xi & 3) << (2*n);
                }
-                y[i].q[j + m] = q;
+                y[i].qs[j + m] = q;
            }
            x += 4*32;
        }
@ -3444,19 +3444,19 @@ void dequantize_row_tq1_0(const block_tq1_0 * restrict x, float * restrict y, in

        const float d = GGML_FP16_TO_FP32(x[i].d);

-        for (size_t j = 0; j < sizeof(x->q) - sizeof(x->q) % 32; j += 32) {
+        for (size_t j = 0; j < sizeof(x->qs) - sizeof(x->qs) % 32; j += 32) {
            for (size_t n = 0; n < 5; ++n) {
                for (size_t m = 0; m < 32; ++m) {
-                    uint8_t q = x[i].q[j + m] * pow3[n];
+                    uint8_t q = x[i].qs[j + m] * pow3[n];
                    int16_t xi = ((uint16_t) q * 3) >> 8;
                    *y++ = (float) (xi - 1) * d;
                }
            }
        }
-        for (size_t j = sizeof(x->q) - sizeof(x->q) % 32; j < sizeof(x->q); j += 16) {
+        for (size_t j = sizeof(x->qs) - sizeof(x->qs) % 32; j < sizeof(x->qs); j += 16) {
            for (size_t n = 0; n < 5; ++n) {
                for (size_t m = 0; m < 16; ++m) {
-                    uint8_t q = x[i].q[j + m] * pow3[n];
+                    uint8_t q = x[i].qs[j + m] * pow3[n];
                    int16_t xi = ((uint16_t) q * 3) >> 8;
                    *y++ = (float) (xi - 1) * d;
                }
@ -3464,8 +3464,8 @@ void dequantize_row_tq1_0(const block_tq1_0 * restrict x, float * restrict y, in
        }

        for (size_t n = 0; n < 4; ++n) {
-            for (size_t j = 0; j < sizeof(x->qs); ++j) {
-                uint8_t q = x[i].qs[j] * pow3[n];
+            for (size_t j = 0; j < sizeof(x->qh); ++j) {
+                uint8_t q = x[i].qh[j] * pow3[n];
                int16_t xi = ((uint16_t) q * 3) >> 8;
                *y++ = (float) (xi - 1) * d;
            }
@ -3481,10 +3481,10 @@ void dequantize_row_tq2_0(const block_tq2_0 * restrict x, float * restrict y, in

        const float d = GGML_FP16_TO_FP32(x[i].d);

-        for (size_t j = 0; j < sizeof(x->q); j += 32) {
+        for (size_t j = 0; j < sizeof(x->qs); j += 32) {
            for (size_t l = 0; l < 4; ++l) {
                for (size_t m = 0; m < 32; ++m) {
-                    int8_t q = (x[i].q[j + m] >> (l*2)) & 3;
+                    int8_t q = (x[i].qs[j + m] >> (l*2)) & 3;
                    *y++ = (float) (q - 1) * d;
                }
            }
@ -5681,8 +5681,8 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * restrict s, size_t bs, const void *

        // first 32 bytes of 5 elements
        {
-            uint8x16_t qx0 = vld1q_u8(x[i].q + 0);
-            uint8x16_t qx1 = vld1q_u8(x[i].q + 16);
+            uint8x16_t qx0 = vld1q_u8(x[i].qs + 0);
+            uint8x16_t qx1 = vld1q_u8(x[i].qs + 16);
            uint8x16_t qx2 = vmulq_u8(qx0, vdupq_n_u8(3));
            uint8x16_t qx3 = vmulq_u8(qx1, vdupq_n_u8(3));
            uint8x16_t qx4 = vmulq_u8(qx0, vdupq_n_u8(9));
@ -5739,14 +5739,14 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * restrict s, size_t bs, const void *

        // last 16 bytes of 5-element, along with the 4 bytes of 4 elements
        {
-            uint8x16_t qx0 = vld1q_u8(x[i].q + 32);
+            uint8x16_t qx0 = vld1q_u8(x[i].qs + 32);
            uint8x16_t qx1 = vmulq_u8(qx0, vdupq_n_u8(3));
            uint8x16_t qx2 = vmulq_u8(qx0, vdupq_n_u8(9));
            uint8x16_t qx3 = vmulq_u8(qx0, vdupq_n_u8(27));
            uint8x16_t qx4 = vmulq_u8(qx0, vdupq_n_u8(81));
-            uint32_t qs;
-            memcpy(&qs, x[i].qs, sizeof(qs)); // potentially unaligned
-            uint8x16_t qx5 = vreinterpretq_u8_u32(vdupq_n_u32(qs));
+            uint32_t qh;
+            memcpy(&qh, x[i].qh, sizeof(qh)); // potentially unaligned
+            uint8x16_t qx5 = vreinterpretq_u8_u32(vdupq_n_u32(qh));
            qx5 = vmulq_u8(qx5, shift);

            // multiply by 3 and keep the 2 bits above 8 bits
@ -5802,7 +5802,7 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * restrict s, size_t bs, const void *

        // first 32 bytes of 5 elements
        {
-            __m256i qx0 = _mm256_loadu_si256((const __m256i *) (x[i].q));
+            __m256i qx0 = _mm256_loadu_si256((const __m256i *) (x[i].qs));
            // 8-bit multiplies with shifts, masks and adds
            __m256i qx1 = _mm256_add_epi8(qx0, _mm256_add_epi8(qx0, qx0)); // 1 * 3
            __m256i qx2 = _mm256_add_epi8(_mm256_and_si256(_mm256_slli_epi16(qx0, 3), _mm256_set1_epi8(-8)), qx0); // 1 * 9
@ -5848,10 +5848,10 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * restrict s, size_t bs, const void *

        // last 16 bytes of 5-element, along with the 4 bytes of 4 elements
        {
-            __m128i qx0 = _mm_loadu_si128((const __m128i *) (x[i].q + 32));
-            uint32_t qs;
-            memcpy(&qs, x[i].qs, sizeof(qs)); // potentially unaligned
-            __m256i qx5_l = _mm256_cvtepu8_epi16(_mm_set1_epi32(qs));
+            __m128i qx0 = _mm_loadu_si128((const __m128i *) (x[i].qs + 32));
+            uint32_t qh;
+            memcpy(&qh, x[i].qh, sizeof(qh)); // potentially unaligned
+            __m256i qx5_l = _mm256_cvtepu8_epi16(_mm_set1_epi32(qh));
            __m128i qx1 = _mm_add_epi8(qx0, _mm_add_epi8(qx0, qx0)); // 1 * 3
            __m128i qx2 = _mm_add_epi8(_mm_and_si128(_mm_slli_epi16(qx0, 3), _mm_set1_epi8(-8)), qx0); // 1 * 9
            __m128i qx3 = _mm_add_epi8(_mm_and_si128(_mm_slli_epi16(qx1, 3), _mm_set1_epi8(-8)), qx1); // 3 * 9
@ -5911,19 +5911,19 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * restrict s, size_t bs, const void *
    for (int i = 0; i < nb; ++i) {
        int sum = 0;

-        for (size_t j = 0; j < sizeof(x->q) - sizeof(x->q) % 32; j += 32) {
+        for (size_t j = 0; j < sizeof(x->qs) - sizeof(x->qs) % 32; j += 32) {
            for (size_t l = 0; l < 5; ++l) {
                for (size_t m = 0; m < 32; ++m) {
-                    uint8_t q = x[i].q[j + m] * pow3[l];
+                    uint8_t q = x[i].qs[j + m] * pow3[l];
                    uint16_t xi = ((uint16_t) q * 3) >> 8;
                    sum += (xi - 1) * y[i].qs[j*5 + l*32 + m];
                }
            }
        }
-        for (size_t j = sizeof(x->q) - sizeof(x->q) % 32; j < sizeof(x->q); j += 16) {
+        for (size_t j = sizeof(x->qs) - sizeof(x->qs) % 32; j < sizeof(x->qs); j += 16) {
            for (size_t l = 0; l < 5; ++l) {
                for (size_t m = 0; m < 16; ++m) {
-                    uint8_t q = x[i].q[j + m] * pow3[l];
+                    uint8_t q = x[i].qs[j + m] * pow3[l];
                    uint16_t xi = ((uint16_t) q * 3) >> 8;
                    sum += (xi - 1) * y[i].qs[j*5 + l*16 + m];
                }
@ -5931,10 +5931,10 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * restrict s, size_t bs, const void *
        }

        for (size_t l = 0; l < 4; ++l) {
-            for (size_t j = 0; j < sizeof(x->qs); ++j) {
-                uint8_t q = x[i].qs[j] * pow3[l];
+            for (size_t j = 0; j < sizeof(x->qh); ++j) {
+                uint8_t q = x[i].qh[j] * pow3[l];
                uint16_t xi = ((uint16_t) q * 3) >> 8;
-                sum += (xi - 1) * y[i].qs[sizeof(x->q)*5 + l*sizeof(x->qs) + j];
+                sum += (xi - 1) * y[i].qs[sizeof(x->qs)*5 + l*sizeof(x->qh) + j];
            }
        }

@ -5966,9 +5966,9 @@ void ggml_vec_dot_tq2_0_q8_K(int n, float * restrict s, size_t bs, const void *
        int16x8_t sumi0 = vdupq_n_s16(0);
        int16x8_t sumi1 = vdupq_n_s16(0);

-        for (size_t j = 0; j < sizeof(x->q); j += 32) {
-            uint8x16_t qx0 = vld1q_u8(x[i].q + j);
-            uint8x16_t qx1 = vld1q_u8(x[i].q + j + 16);
+        for (size_t j = 0; j < sizeof(x->qs); j += 32) {
+            uint8x16_t qx0 = vld1q_u8(x[i].qs + j);
+            uint8x16_t qx1 = vld1q_u8(x[i].qs + j + 16);
            uint8x16_t qx2 = vshrq_n_u8(qx0, 2);
            uint8x16_t qx3 = vshrq_n_u8(qx1, 2);
            uint8x16_t qx4 = vshrq_n_u8(qx0, 4);
@ -6033,8 +6033,8 @@ void ggml_vec_dot_tq2_0_q8_K(int n, float * restrict s, size_t bs, const void *
        __m256i sumi0 = _mm256_setzero_si256();
        __m256i sumi1 = _mm256_setzero_si256();

-        for (size_t j = 0; j < sizeof(x->q); j += 32) {
-            __m256i qx0 = _mm256_loadu_si256((const __m256i *) (x[i].q + j));
+        for (size_t j = 0; j < sizeof(x->qs); j += 32) {
+            __m256i qx0 = _mm256_loadu_si256((const __m256i *) (x[i].qs + j));
            __m256i qx1 = _mm256_srli_epi16(qx0, 2);
            __m256i qx2 = _mm256_srli_epi16(qx0, 4);
            __m256i qx3 = _mm256_srli_epi16(qx0, 6);
@ -6077,10 +6077,10 @@ void ggml_vec_dot_tq2_0_q8_K(int n, float * restrict s, size_t bs, const void *
    for (int i = 0; i < nb; ++i) {
        int32_t sumi = 0;

-        for (size_t j = 0; j < sizeof(x->q); j += 32) {
+        for (size_t j = 0; j < sizeof(x->qs); j += 32) {
            for (size_t l = 0; l < 4; ++l) {
                for (size_t k = 0; k < 32; ++k) {
-                    sumi += y[i].qs[j*4 + l*32 + k] * (((x[i].q[j + k] >> (l*2)) & 3) - 1);
+                    sumi += y[i].qs[j*4 + l*32 + k] * (((x[i].qs[j + k] >> (l*2)) & 3) - 1);
                }
            }
        }