mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-28 02:58:27 -04:00
server : enable continuous batching by default (#6231)
This commit is contained in:
@@ -139,7 +139,7 @@ struct gpt_params {
|
||||
bool interactive_first = false; // wait for user input immediately
|
||||
bool multiline_input = false; // reverse the usage of `\`
|
||||
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
||||
bool cont_batching = false; // insert new sequences for decoding on-the-fly
|
||||
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
||||
|
||||
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
||||
bool ignore_eos = false; // ignore generated EOS tokens
|
||||
|
Reference in New Issue
Block a user