llama : reorder encode/decode in sources

This commit is contained in:
Georgi Gerganov
2025-02-18 14:47:53 +02:00
parent bc6f187e9c
commit befe14f06f
2 changed files with 174 additions and 174 deletions

View File

@ -116,6 +116,17 @@ struct llama_context : public llama_graph_i {
// TODO: maybe remove this
virtual void output_reorder();
// encode a batch of tokens by evaluating the encoder part of the transformer
//
// - lctx: llama context
// - batch: batch to evaluate
//
// return 0 on success
// return positive int on warning
// return negative int on error
//
virtual int encode(llama_batch & inp_batch) = 0;
// decode a batch of tokens by evaluating the transformer
// in case of unsuccessful decoding (error or warning),
// the kv_cache state will be returned to its original state
@ -130,17 +141,6 @@ struct llama_context : public llama_graph_i {
//
virtual int decode(llama_batch & inp_batch) = 0;
// encode a batch of tokens by evaluating the encoder part of the transformer
//
// - lctx: llama context
// - batch: batch to evaluate
//
// return 0 on success
// return positive int on warning
// return negative int on error
//
virtual int encode(llama_batch & inp_batch) = 0;
//
// graph build API (generic)
//
@ -336,8 +336,8 @@ public:
virtual void input_set(const llama_ubatch & ubatch) override;
virtual int decode(llama_batch & inp_batch) override;
virtual int encode(llama_batch & inp_batch) override;
virtual int decode(llama_batch & inp_batch) override;
// max token position across all sequences in the current context
llama_pos pos_max() const;