mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-18 05:56:00 -04:00
ggml : add Q5_0 and Q5_1 quantization (#1187)
* ggml : add Q5_0 quantization (cuBLAS only) * ggml : fix Q5_0 qh -> uint32_t * ggml : fix q5_0 histogram stats * ggml : q5_0 scalar dot product * ggml : q5_0 ARM NEON dot * ggml : q5_0 more efficient ARM NEON using uint64_t masks * ggml : rename Q5_0 -> Q5_1 * ggml : adding Q5_0 mode * quantize : add Q5_0 and Q5_1 to map * ggml : AVX2 optimizations for Q5_0, Q5_1 (#1195) --------- Co-authored-by: Stephan Walter <stephan@walter.name>
This commit is contained in:
@@ -35,6 +35,8 @@ void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t st
|
||||
void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);
|
||||
void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream);
|
||||
void dequantize_row_q4_3_cuda(const void * vx, float * y, int k, cudaStream_t stream);
|
||||
void dequantize_row_q5_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);
|
||||
void dequantize_row_q5_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);
|
||||
void dequantize_row_q8_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
Reference in New Issue
Block a user