mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-16 21:22:37 -04:00
llama : fix F16/F32 downcast + improve names (#5980)
This commit is contained in:
2
llama.h
2
llama.h
@@ -278,7 +278,7 @@ extern "C" {
|
||||
bool allow_requantize; // allow quantizing non-f32/f16 tensors
|
||||
bool quantize_output_tensor; // quantize output.weight
|
||||
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||
bool pure; // disable k-quant mixtures and quantize all tensors to the same type
|
||||
bool pure; // quantize all tensors to the default type
|
||||
void * imatrix; // pointer to importance matrix data
|
||||
} llama_model_quantize_params;
|
||||
|
||||
|
Reference in New Issue
Block a user