mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-18 05:56:00 -04:00
Inifinite generation via context swapping (#71)
This commit is contained in:
@@ -21,6 +21,7 @@ struct gpt_params {
|
||||
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
||||
int32_t n_ctx = 512; // context size
|
||||
int32_t n_batch = 8; // batch size for prompt processing
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
|
||||
// sampling parameters
|
||||
int32_t top_k = 40;
|
||||
|
Reference in New Issue
Block a user