apply to the rest

2025-06-29 04:35:05 +00:00 · 2025-03-13 22:36:27 +01:00
parent 4aabf4e8f4
commit 47086fa82d
18 changed files with 242 additions and 323 deletions
--- a/examples/simple-chat/simple-chat.cpp
+++ b/examples/simple-chat/simple-chat.cpp
@ -108,19 +108,20 @@ int main(int argc, char ** argv) {
        }

        // prepare a batch for the prompt
-        llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size());
+        llama_batch_ext * batch = llama_batch_ext_init_from_text(prompt_tokens.data(), prompt_tokens.size(), 0, 0);
+        llama_batch_ext_set_output_last(batch);
        llama_token new_token_id;
        while (true) {
            // check if we have enough space in the context to evaluate this batch
            int n_ctx = llama_n_ctx(ctx);
            int n_ctx_used = llama_kv_self_used_cells(ctx);
-            if (n_ctx_used + batch.n_tokens > n_ctx) {
+            if (n_ctx_used + llama_batch_ext_get_n_tokens(batch) > n_ctx) {
                printf("\033[0m\n");
                fprintf(stderr, "context size exceeded\n");
                exit(0);
            }

-            if (llama_decode(ctx, batch)) {
+            if (llama_decode_ext(ctx, batch)) {
                GGML_ABORT("failed to decode\n");
            }

@ -144,9 +145,13 @@ int main(int argc, char ** argv) {
            response += piece;

            // prepare the next batch with the sampled token
-            batch = llama_batch_get_one(&new_token_id, 1);
+            llama_batch_ext_clear(batch);
+            llama_seq_id seq_id = 0;
+            llama_batch_ext_add_text(batch, new_token_id, 0, &seq_id, 1, true);
        }

+        llama_batch_ext_free(batch);
+
        return response;
    };