mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-01 21:15:06 +00:00
* Use F16 for memory_k and memory_v * add command line switch to use f16 instead of f32 for memory k+v --------- Co-authored-by: Ty Everett <ty@tyweb.us>
This commit is contained in:
1
utils.h
1
utils.h
@ -18,6 +18,7 @@ struct gpt_params {
|
||||
int32_t n_predict = 128; // new tokens to predict
|
||||
int32_t repeat_last_n = 64; // last n tokens to penalize
|
||||
int32_t n_ctx = 512; //context size
|
||||
bool memory_f16 = false; // use f16 instead of f32 for memory kv
|
||||
|
||||
// sampling parameters
|
||||
int32_t top_k = 40;
|
||||
|
Reference in New Issue
Block a user