mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-09-01 21:04:58 -04:00
llama : adjust default context size + print warnings (#10136)
* llama : adjust default context size + print warnings ggml-ci * ggml-ci : add missing gpu-layers + adjust context sizes
This commit is contained in:
@@ -155,7 +155,7 @@ struct common_sampler_params {
|
||||
|
||||
struct common_params {
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_ctx = 0; // context size
|
||||
int32_t n_ctx = 4096; // context size
|
||||
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
|
Reference in New Issue
Block a user