llama : adjust default context size + print warnings (#10136)

* llama : adjust default context size + print warnings ggml-ci * ggml-ci : add missing gpu-layers + adjust context sizes
2025-09-01 21:04:58 -04:00 · 2024-11-02 15:18:56 +02:00
parent b634f8a26f
commit 1926d6e39d
3 changed files with 103 additions and 89 deletions
--- a/common/common.h
+++ b/common/common.h
@@ -155,7 +155,7 @@ struct common_sampler_params {

 struct common_params {
    int32_t n_predict             =    -1; // new tokens to predict
-    int32_t n_ctx                 =     0; // context size
+    int32_t n_ctx                 =  4096; // context size
    int32_t n_batch               =  2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
    int32_t n_ubatch              =   512; // physical batch size for prompt processing (must be >=32 to use BLAS)
    int32_t n_keep                =     0; // number of tokens to keep from initial prompt