mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-30 04:45:17 +00:00
kv-cache : rework kv_cell (#13706)
* kv-cache : rework kv_cell ggml-ci * kv-cells : use "shift" instead of "delta" consistently ggml-ci * llama : add llama_max_parallel_sequences() ggml-ci * kv-cells : update comments [no ci] * context : fail upon construction if sequences exceed max value ggml-ci * kv-cells : get_pos() -> pos_get() + comments ggml-ci * kv-cells : fix tracking of "used" cells ggml-ci
This commit is contained in:
@ -25,7 +25,11 @@ llama_context::llama_context(
|
||||
|
||||
const auto & hparams = model.hparams;
|
||||
|
||||
cparams.n_seq_max = std::max(1u, params.n_seq_max);
|
||||
cparams.n_seq_max = std::max(1u, params.n_seq_max);
|
||||
if (cparams.n_seq_max > LLAMA_MAX_PARALLEL_SEQUENCES) {
|
||||
throw std::runtime_error("n_seq_max must be <= " + std::to_string(LLAMA_MAX_PARALLEL_SEQUENCES));
|
||||
}
|
||||
|
||||
cparams.n_threads = params.n_threads;
|
||||
cparams.n_threads_batch = params.n_threads_batch;
|
||||
cparams.yarn_ext_factor = params.yarn_ext_factor;
|
||||
|
Reference in New Issue
Block a user