mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-08 19:39:50 +00:00
simple-chat : fix context-exceeded condition (#14494)
* simple-chat : fix context-exceeded condition ggml-ci * cont : fix n_ctx_used computation ggml-ci
This commit is contained in:
@ -113,15 +113,16 @@ int main(int argc, char ** argv) {
|
|||||||
while (true) {
|
while (true) {
|
||||||
// check if we have enough space in the context to evaluate this batch
|
// check if we have enough space in the context to evaluate this batch
|
||||||
int n_ctx = llama_n_ctx(ctx);
|
int n_ctx = llama_n_ctx(ctx);
|
||||||
int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0);
|
int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0) + 1;
|
||||||
if (n_ctx_used + batch.n_tokens > n_ctx) {
|
if (n_ctx_used + batch.n_tokens > n_ctx) {
|
||||||
printf("\033[0m\n");
|
printf("\033[0m\n");
|
||||||
fprintf(stderr, "context size exceeded\n");
|
fprintf(stderr, "context size exceeded\n");
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (llama_decode(ctx, batch)) {
|
int ret = llama_decode(ctx, batch);
|
||||||
GGML_ABORT("failed to decode\n");
|
if (ret != 0) {
|
||||||
|
GGML_ABORT("failed to decode, ret = %d\n", ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
// sample the next token
|
// sample the next token
|
||||||
|
Reference in New Issue
Block a user