mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-17 21:51:27 -04:00
main : option to disable context shift (#9484)
* added cli arg to disable context shift * reverted precommit * updated README.md for main * white space * allow disabling context shift in the server * Update common/arg.cpp no-context-shift only works for main example Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * added server example to --no-context-shift args * removed server changes * white space --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
committed by
GitHub
parent
c4965a64f7
commit
441b72b91f
@@ -246,6 +246,7 @@ struct gpt_params {
|
||||
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
||||
bool flash_attn = false; // flash attention
|
||||
bool no_perf = false; // disable performance metrics
|
||||
bool ctx_shift = true; // context shift on inifinite text generation
|
||||
|
||||
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
||||
bool logits_all = false; // return logits for all tokens in the batch
|
||||
|
Reference in New Issue
Block a user