mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-01 15:09:32 -04:00
gguf-py : Numpy (de)quantization for TQ1_0 and TQ2_0
* ggml-quants : use roundf instead of nearest_int for TQ1_0 and TQ2_0 This does not change anything for ternary models, since their values should never end up being in halfway cases anyway.
This commit is contained in:
@@ -66,6 +66,7 @@ class GGMLQuants:
|
||||
for t in (
|
||||
"q4_0", "q4_1", "q5_0", "q5_1", "q8_0",
|
||||
"q2_K", "q3_K", "q4_K", "q5_K", "q6_K",
|
||||
"tq1_0", "tq2_0",
|
||||
"iq2_xxs", "iq2_xs", "iq2_s", "iq3_xxs", "iq3_s", "iq1_s", "iq1_m",
|
||||
"iq4_nl", "iq4_xs",
|
||||
):
|
||||
|
Reference in New Issue
Block a user