compile ok

2025-07-26 03:03:25 -04:00 · 2025-03-13 22:56:35 +01:00
parent 9fb2d81eab
commit 65f0184517
9 changed files with 46 additions and 29 deletions
--- a/examples/llava/llava-cli.cpp
+++ b/examples/llava/llava-cli.cpp
@ -20,7 +20,8 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
        if (n_eval > n_batch) {
            n_eval = n_batch;
        }
-        if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
+        llama_batch_ext_ptr batch(llama_batch_ext_init_from_text(&tokens[i], n_eval, *n_past, 0));
+        if (llama_decode_ext(ctx_llama, batch.get())) {
            LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
            return false;
        }
--- a/examples/llava/minicpmv-cli.cpp
+++ b/examples/llava/minicpmv-cli.cpp
@ -101,7 +101,8 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
        if (n_eval > n_batch) {
            n_eval = n_batch;
        }
-        if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
+        llama_batch_ext_ptr batch(llama_batch_ext_init_from_text(&tokens[i], n_eval, *n_past, 0));
+        if (llama_decode_ext(ctx_llama, batch.get())) {
            LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
            return false;
        }
--- a/examples/llava/qwen2vl-cli.cpp
+++ b/examples/llava/qwen2vl-cli.cpp
@ -96,16 +96,24 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
        if (n_eval > n_batch) {
            n_eval = n_batch;
        }
-        auto batch = llama_batch_get_one(&tokens[i], n_eval);
-        // TODO: add mrope pos ids somewhere else
-        pos.resize(batch.n_tokens * 4);
-        std::fill(pos.begin(), pos.end(), 0);
-        for (int j = 0; j < batch.n_tokens * 3; j ++) {
-            pos[j] = *st_pos_id + (j % batch.n_tokens);
-        }
-        batch.pos = pos.data();

-        if (llama_decode(ctx_llama, batch)) {
+        // TODO: add mrope pos ids somewhere else
+        int n_tokens = n_eval;
+        pos.resize(n_tokens * 4);
+        std::fill(pos.begin(), pos.end(), 0);
+        for (int j = 0; j < n_tokens * 3; j ++) {
+            pos[j] = *st_pos_id + (j % n_tokens);
+        }
+
+        llama_batch_ext_ptr batch(llama_batch_ext_init(n_eval, 1));
+        for (int j = 0; j < n_eval; j++) {
+            llama_token token = tokens[i + j];
+            llama_seq_id seq_id = 0;
+            llama_batch_ext_add_text(batch.get(), token, pos[j], &seq_id, 1, false);
+        }
+        llama_batch_ext_set_output_last(batch.get());
+
+        if (llama_decode_ext(ctx_llama, batch.get())) {
            LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
            return false;
        }