mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-27 03:33:46 -04:00
context : perform output reorder lazily upon access after sync (#14853)
* context : perform output reorder after lazily upon access after sync ggml-ci * cont : add TODO
This commit is contained in:
@ -956,6 +956,7 @@ extern "C" {
|
||||
// in the order they have appeared in the batch.
|
||||
// Rows: number of tokens for which llama_batch.logits[i] != 0
|
||||
// Cols: n_vocab
|
||||
// TODO: deprecate in favor of llama_get_logits_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
|
||||
LLAMA_API float * llama_get_logits(struct llama_context * ctx);
|
||||
|
||||
// Logits for the ith token. For positive indices, Equivalent to:
|
||||
@ -970,6 +971,7 @@ extern "C" {
|
||||
// in the order they have appeared in the batch.
|
||||
// shape: [n_outputs*n_embd]
|
||||
// Otherwise, returns NULL.
|
||||
// TODO: deprecate in favor of llama_get_embeddings_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
|
||||
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
||||
|
||||
// Get the embeddings for the ith token. For positive indices, Equivalent to:
|
||||
|
Reference in New Issue
Block a user