From 4bf7ca3943dfa6f34f3ab63deb58cfdec59d2fa6 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 24 Feb 2025 17:01:20 +0100 Subject: [PATCH] llama_decode_ext --- examples/server/server.cpp | 4 ++-- include/llama.h | 4 ++-- src/llama.cpp | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 029bd9777..89d79f73e 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3149,7 +3149,7 @@ struct server_context { llama_batch_ext_ptr batch_view(llama_batch_ext_get_view(batch.get(), i, n_tokens)); - const int ret = llama_text_decode(ctx, batch_view.get()); + const int ret = llama_decode_ext(ctx, batch_view.get()); metrics.on_decoded(slots); if (ret != 0) { @@ -3294,7 +3294,7 @@ struct server_context { SLT_DBG(slot, "decoding speculative batch, size = %d\n", llama_batch_ext_get_n_tokens(slot.batch_spec.get())); - llama_text_decode(ctx, slot.batch_spec.get()); + llama_decode_ext(ctx, slot.batch_spec.get()); // the accepted tokens from the speculation const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft); diff --git a/include/llama.h b/include/llama.h index 32b4cdbe1..c0a3533de 100644 --- a/include/llama.h +++ b/include/llama.h @@ -944,7 +944,7 @@ extern "C" { DEPRECATED(LLAMA_API int32_t llama_encode( struct llama_context * ctx, struct llama_batch batch), "use llama_batch_ext API instead"); - LLAMA_API int32_t llama_text_encode( + LLAMA_API int32_t llama_encode_ext( struct llama_context * ctx, struct llama_batch_ext * batch); @@ -955,7 +955,7 @@ extern "C" { DEPRECATED(LLAMA_API int32_t llama_decode( struct llama_context * ctx, struct llama_batch batch), "use llama_batch_ext API instead"); - LLAMA_API int32_t llama_text_decode( + LLAMA_API int32_t llama_decode_ext( struct llama_context * ctx, struct llama_batch_ext * batch); diff --git a/src/llama.cpp b/src/llama.cpp index a3dc7824a..fb0e88c5b 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -9977,7 +9977,7 @@ int32_t llama_encode( // also convert llama_batch to llama_batch_ext llama_batch_allocr batch_allocr(batch, batch.pos ? -1 : ctx->kv_self.max_pos() + 1); llama_batch_ext * batch_ext = batch_allocr.batch; - return llama_text_encode(ctx, batch_ext); + return llama_encode_ext(ctx, batch_ext); } // DEPRECATED @@ -9988,10 +9988,10 @@ int32_t llama_decode( // also convert llama_batch to llama_batch_ext llama_batch_allocr batch_allocr(batch, batch.pos ? -1 : ctx->kv_self.max_pos() + 1); llama_batch_ext * batch_ext = batch_allocr.batch; - return llama_text_decode(ctx, batch_ext); + return llama_decode_ext(ctx, batch_ext); } -int32_t llama_text_encode( +int32_t llama_encode_ext( struct llama_context * ctx, struct llama_batch_ext * batch) { const int ret = llama_encode_impl(*ctx, *batch); @@ -10002,7 +10002,7 @@ int32_t llama_text_encode( return ret; } -int32_t llama_text_decode( +int32_t llama_decode_ext( struct llama_context * ctx, struct llama_batch_ext * batch) { const int ret = llama_decode_impl(*ctx, *batch);