kv-cache : use ggml_set_rows

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-06-19 19:26:47 +03:00
parent 1f647b5992
commit 79dac3c861
4 changed files with 89 additions and 18 deletions

View File

@ -248,8 +248,12 @@ public:
void set_input(const llama_ubatch * ubatch) override;
ggml_tensor * get_kv_idxs() const { return self_kv_idxs; }
ggml_tensor * get_kq_mask() const { return self_kq_mask_cnv; }
// TODO: should this be I64?
ggml_tensor * self_kv_idxs = nullptr; // I32 [n_batch]
ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch]
ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch]
@ -273,9 +277,14 @@ public:
void set_input(const llama_ubatch * ubatch) override;
ggml_tensor * get_kv_idxs() const { return self_kv_idxs; }
ggml_tensor * get_kv_idxs_swa() const { return self_kv_idxs_swa; }
ggml_tensor * get_kq_mask() const { return self_kq_mask_cnv; }
ggml_tensor * get_kq_mask_swa() const { return self_kq_mask_swa_cnv; }
ggml_tensor * self_kv_idxs = nullptr; // I32 [n_batch]
ggml_tensor * self_kv_idxs_swa = nullptr; // I32 [n_batch]
ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch]
ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch]
ggml_tensor * self_kq_mask_swa = nullptr; // F32 [n_kv, n_batch]