ubatch : new splitting logic (#14217)

ggml-ci
2025-07-28 21:23:55 -04:00 · 2025-06-20 10:14:14 +03:00
parent 9eaa51e7f0
commit 4c9fdfbe15
19 changed files with 992 additions and 915 deletions
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -247,7 +247,7 @@ private:
    std::map<llama_seq_id, std::vector<float>> embd_seq;

    // reuse the batch_allocr to avoid unnecessary memory allocations
-    std::unique_ptr<llama_batch_allocr> batch_allocr;
+    std::unique_ptr<llama_batch_allocr> balloc;

    uint32_t n_outputs = 0; // number of actually-used outputs in the current ubatch or last logical batch