rework, targeting llama-server

2025-08-15 12:42:40 -04:00 · 2025-02-14 18:16:49 +01:00
parent 4ed4fe75ed
commit f2e59a8eb9
10 changed files with 191 additions and 136 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -580,6 +580,7 @@ std::string string_from(const struct llama_context * ctx, const std::vector<llam
    return buf.str();
 }

+/*
 std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch) {
    std::stringstream buf;

@@ -614,6 +615,7 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat

    return buf.str();
 }
+*/

 void string_process_escapes(std::string & input) {
    std::size_t input_len = input.length();
@@ -1608,27 +1610,20 @@ std::pair<std::string, std::string> common_get_hf_file(const std::string &, cons
 // Batch utils
 //

-void common_batch_clear(struct llama_batch & batch) {
-    batch.n_tokens = 0;
+void common_batch_clear(struct llama_batch * batch) {
+    llama_batch_clear(batch);
 }

 void common_batch_add(
-                 struct llama_batch & batch,
+                 struct llama_batch * batch,
                        llama_token   id,
                          llama_pos   pos,
    const std::vector<llama_seq_id> & seq_ids,
                               bool   logits) {
-    GGML_ASSERT(batch.seq_id[batch.n_tokens] && "llama_batch size exceeded");
-
-    batch.token   [batch.n_tokens] = id;
-    batch.pos     [batch.n_tokens] = pos;
-    batch.n_seq_id[batch.n_tokens] = seq_ids.size();
-    for (size_t i = 0; i < seq_ids.size(); ++i) {
-        batch.seq_id[batch.n_tokens][i] = seq_ids[i];
+    int32_t res = llama_batch_add_text_token(batch, id, pos, seq_ids.data(), seq_ids.size(), logits);
+    if (res == -1) {
+        LOG_ERR("%s: llama_batch size exceeded\n", __func__);
    }
-    batch.logits  [batch.n_tokens] = logits;
-
-    batch.n_tokens++;
 }

 //