Improve cuBLAS performance by dequantizing on the GPU (#1065)

2025-08-18 05:56:00 -04:00 · 2023-04-20 03:14:14 +02:00
parent 834695fe3a
commit 02d6988121
5 changed files with 221 additions and 41 deletions
--- a/ggml-cuda.h
+++ b/ggml-cuda.h
@@ -0,0 +1,11 @@
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);
+void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);
+void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream);
+
+#ifdef  __cplusplus
+}
+#endif