context : perform output reorder lazily upon access after sync (#14853)

* context : perform output reorder after lazily upon access after sync ggml-ci * cont : add TODO
2025-07-28 21:23:55 -04:00 · 2025-07-24 16:31:48 +03:00
parent 820de57d4f
commit e4868d16d2
3 changed files with 47 additions and 13 deletions
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -181,6 +181,8 @@ private:
    // Returns max number of outputs for which space was reserved.
    uint32_t output_reserve(int32_t n_outputs);

+    void output_reorder();
+
    //
    // graph
    //
@@ -250,6 +252,13 @@ private:

    std::vector<int32_t> output_ids; // map batch token positions to ids of the logits and embd buffers

+    struct swap_info {
+        uint32_t i0;
+        uint32_t i1;
+    };
+
+    std::vector<swap_info> output_swaps;
+
    ggml_backend_sched_ptr sched;

    ggml_backend_t backend_cpu = nullptr;