mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-19 09:08:04 +00:00
cont : move kv_self update to llama_context
ggml-ci
This commit is contained in:
@ -79,6 +79,13 @@ struct llama_context {
|
||||
ggml_abort_callback abort_callback = nullptr;
|
||||
void * abort_callback_data = nullptr;
|
||||
|
||||
// returns the result of ggml_backend_sched_graph_compute_async execution
|
||||
enum ggml_status compute_graph(
|
||||
ggml_cgraph * graph,
|
||||
bool batched);
|
||||
|
||||
llama_pos pos_max() const;
|
||||
|
||||
void reset();
|
||||
|
||||
void prepare_k_shift();
|
||||
@ -129,6 +136,9 @@ struct llama_context {
|
||||
struct ggml_tensor * inp_KQ_mask_cross; // F32 [n_outputs_enc, n_batch]
|
||||
struct ggml_tensor * inp_K_shift; // I32 [kv_size]
|
||||
|
||||
// return true if need to reserve new worst-case graph
|
||||
bool kv_self_update();
|
||||
|
||||
void build_attn_inp(
|
||||
ggml_context * ctx0,
|
||||
int32_t n_tokens,
|
||||
|
Reference in New Issue
Block a user