mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-28 21:23:55 -04:00
context : perform output reorder lazily upon access after sync (#14853)
* context : perform output reorder after lazily upon access after sync ggml-ci * cont : add TODO
This commit is contained in:
@@ -181,6 +181,8 @@ private:
|
||||
// Returns max number of outputs for which space was reserved.
|
||||
uint32_t output_reserve(int32_t n_outputs);
|
||||
|
||||
void output_reorder();
|
||||
|
||||
//
|
||||
// graph
|
||||
//
|
||||
@@ -250,6 +252,13 @@ private:
|
||||
|
||||
std::vector<int32_t> output_ids; // map batch token positions to ids of the logits and embd buffers
|
||||
|
||||
struct swap_info {
|
||||
uint32_t i0;
|
||||
uint32_t i1;
|
||||
};
|
||||
|
||||
std::vector<swap_info> output_swaps;
|
||||
|
||||
ggml_backend_sched_ptr sched;
|
||||
|
||||
ggml_backend_t backend_cpu = nullptr;
|
||||
|
Reference in New Issue
Block a user