ggml : use F16 instead of F32 in Q4_0, Q4_1, Q8_0 (#1508)

* ggml : use F16 instead of F32 in Q4_0, Q4_1 and Q8_0 * llama : bump LLAMA_FILE_VERSION to 3 * cuda : update Q4 and Q8 dequantize kernels * ggml : fix AVX dot products * readme : update performance table + hot topics
2025-08-19 06:25:15 -04:00 · 2023-05-19 22:17:18 +03:00
parent 6986c7835a
commit 2d5db48371
6 changed files with 109 additions and 102 deletions
--- a/llama.h
+++ b/llama.h
@@ -19,7 +19,7 @@
 #    define LLAMA_API
 #endif

-#define LLAMA_FILE_VERSION           2
+#define LLAMA_FILE_VERSION           3
 #define LLAMA_FILE_MAGIC             'ggjt'
 #define LLAMA_FILE_MAGIC_UNVERSIONED 'ggml'
 #define LLAMA_SESSION_MAGIC          'ggsn'