mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-05 08:28:37 -04:00
ggml : fix quant dot product with odd number of blocks (#8549)
* ggml : fix iq4_nl dot product with odd number of blocks * ggml : fix odd blocks for ARM_NEON (#8556) * ggml : fix iq4_nl dot product with odd number of blocks * ggml : fix q4_1 * ggml : fix q5_0 * ggml : fix q5_1 * ggml : fix iq4_nl metal ggml-ci * ggml : fix q4_0 * ggml : fix q8_0 ggml-ci * ggml : remove special Q4_0 code for first 2 blocks * ggml : fix sumf redefinition --------- Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@@ -1786,10 +1786,6 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||
}
|
||||
};
|
||||
|
||||
if (ggml_is_quantized(src0t)) {
|
||||
GGML_ASSERT(ne00 >= nth0*nth1);
|
||||
}
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
|
||||
|
Reference in New Issue
Block a user