mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-29 11:39:14 -04:00
ggml : introduce structs for the q4 data blocks (#356)
* Introduce structs for the q4 data blocks * ggml : rename quant struct variables + fix ARM_NEON --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
3
llama.h
3
llama.h
@@ -81,8 +81,7 @@ extern "C" {
|
||||
LLAMA_API int llama_model_quantize(
|
||||
const char * fname_inp,
|
||||
const char * fname_out,
|
||||
int itype,
|
||||
int qk);
|
||||
int itype);
|
||||
|
||||
// Run the llama inference to obtain the logits and probabilities for the next token.
|
||||
// tokens + n_tokens is the provided batch of new tokens to process
|
||||
|
Reference in New Issue
Block a user