context : perform output reorder lazily upon access after sync (#14853)

* context : perform output reorder after lazily upon access after sync

ggml-ci

* cont : add TODO
This commit is contained in:
Georgi Gerganov
2025-07-24 16:31:48 +03:00
committed by GitHub
parent 820de57d4f
commit e4868d16d2
3 changed files with 47 additions and 13 deletions

View File

@@ -181,6 +181,8 @@ private:
// Returns max number of outputs for which space was reserved.
uint32_t output_reserve(int32_t n_outputs);
void output_reorder();
//
// graph
//
@@ -250,6 +252,13 @@ private:
std::vector<int32_t> output_ids; // map batch token positions to ids of the logits and embd buffers
struct swap_info {
uint32_t i0;
uint32_t i1;
};
std::vector<swap_info> output_swaps;
ggml_backend_sched_ptr sched;
ggml_backend_t backend_cpu = nullptr;