llama : reorder encode/decode in sources

2025-07-25 04:02:50 +00:00 · 2025-02-18 14:47:53 +02:00
parent bc6f187e9c
commit befe14f06f
2 changed files with 174 additions and 174 deletions
--- a/src/llama-context.h
+++ b/src/llama-context.h
@ -116,6 +116,17 @@ struct llama_context : public llama_graph_i {
    // TODO: maybe remove this
    virtual void output_reorder();

+    // encode a batch of tokens by evaluating the encoder part of the transformer
+    //
+    //   - lctx:      llama context
+    //   - batch:     batch to evaluate
+    //
+    // return 0 on success
+    // return positive int on warning
+    // return negative int on error
+    //
+    virtual int encode(llama_batch & inp_batch) = 0;
+
    // decode a batch of tokens by evaluating the transformer
    // in case of unsuccessful decoding (error or warning),
    // the kv_cache state will be returned to its original state
@ -130,17 +141,6 @@ struct llama_context : public llama_graph_i {
    //
    virtual int decode(llama_batch & inp_batch) = 0;

-    // encode a batch of tokens by evaluating the encoder part of the transformer
-    //
-    //   - lctx:      llama context
-    //   - batch:     batch to evaluate
-    //
-    // return 0 on success
-    // return positive int on warning
-    // return negative int on error
-    //
-    virtual int encode(llama_batch & inp_batch) = 0;
-
    //
    // graph build API (generic)
    //
@ -336,8 +336,8 @@ public:

    virtual void input_set(const llama_ubatch & ubatch) override;

-    virtual int decode(llama_batch & inp_batch) override;
    virtual int encode(llama_batch & inp_batch) override;
+    virtual int decode(llama_batch & inp_batch) override;

    // max token position across all sequences in the current context
    llama_pos pos_max() const;