llama : fix integer overflow during quantization (#6063)

2025-07-01 05:05:10 +00:00 · 2024-03-14 22:58:41 +02:00
parent 6e0438da3c
commit 4755afd1cb
1 changed files with 1 additions and 1 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -11977,7 +11977,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
    return new_type;
 }

-static int32_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
+static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
    std::mutex mutex;
    int counter = 0;
    size_t new_size = 0;