mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-22 10:48:12 +00:00
llama : reorder encode/decode in sources
This commit is contained in:
@ -116,6 +116,17 @@ struct llama_context : public llama_graph_i {
|
||||
// TODO: maybe remove this
|
||||
virtual void output_reorder();
|
||||
|
||||
// encode a batch of tokens by evaluating the encoder part of the transformer
|
||||
//
|
||||
// - lctx: llama context
|
||||
// - batch: batch to evaluate
|
||||
//
|
||||
// return 0 on success
|
||||
// return positive int on warning
|
||||
// return negative int on error
|
||||
//
|
||||
virtual int encode(llama_batch & inp_batch) = 0;
|
||||
|
||||
// decode a batch of tokens by evaluating the transformer
|
||||
// in case of unsuccessful decoding (error or warning),
|
||||
// the kv_cache state will be returned to its original state
|
||||
@ -130,17 +141,6 @@ struct llama_context : public llama_graph_i {
|
||||
//
|
||||
virtual int decode(llama_batch & inp_batch) = 0;
|
||||
|
||||
// encode a batch of tokens by evaluating the encoder part of the transformer
|
||||
//
|
||||
// - lctx: llama context
|
||||
// - batch: batch to evaluate
|
||||
//
|
||||
// return 0 on success
|
||||
// return positive int on warning
|
||||
// return negative int on error
|
||||
//
|
||||
virtual int encode(llama_batch & inp_batch) = 0;
|
||||
|
||||
//
|
||||
// graph build API (generic)
|
||||
//
|
||||
@ -336,8 +336,8 @@ public:
|
||||
|
||||
virtual void input_set(const llama_ubatch & ubatch) override;
|
||||
|
||||
virtual int decode(llama_batch & inp_batch) override;
|
||||
virtual int encode(llama_batch & inp_batch) override;
|
||||
virtual int decode(llama_batch & inp_batch) override;
|
||||
|
||||
// max token position across all sequences in the current context
|
||||
llama_pos pos_max() const;
|
||||
|
Reference in New Issue
Block a user