mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-05 08:28:37 -04:00
kv-cache : use ggml_set_rows (#14285)
* kv-cache : use ggml_set_rows ggml-ci * graph : separate k and v indices ggml-ci * cont : remove redundant ifs ggml-ci * kv-cache : improve find_slot impl * kv-cache : bounds-check when accessing slot_info indices * kv-cache : add comments ggml-ci * ggml : add TODOs for adding GGML_OP_SET_ROWS support in the backends ggml-ci
This commit is contained in:
@@ -105,10 +105,30 @@ public:
|
||||
res.resize(n);
|
||||
|
||||
for (uint32_t j = 0; j < n; ++j) {
|
||||
res.pos[j] = pos[i + j];
|
||||
res.seq[j] = seq[i + j];
|
||||
const auto idx = i + j;
|
||||
|
||||
assert(shift[i + j] == 0);
|
||||
res.pos[j] = pos[idx];
|
||||
res.seq[j] = seq[idx];
|
||||
|
||||
assert(shift[idx] == 0);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// copy the state of cells [idxs[0], idxs[1], ..., idxs[idxs.size() - 1])
|
||||
llama_kv_cells_unified cp(const std::vector<uint32_t> & idxs) const {
|
||||
llama_kv_cells_unified res;
|
||||
|
||||
res.resize(idxs.size());
|
||||
|
||||
for (uint32_t j = 0; j < idxs.size(); ++j) {
|
||||
const auto idx = idxs[j];
|
||||
|
||||
res.pos[j] = pos[idx];
|
||||
res.seq[j] = seq[idx];
|
||||
|
||||
assert(shift[idx] == 0);
|
||||
}
|
||||
|
||||
return res;
|
||||
@@ -119,26 +139,58 @@ public:
|
||||
assert(i + other.pos.size() <= pos.size());
|
||||
|
||||
for (uint32_t j = 0; j < other.pos.size(); ++j) {
|
||||
if (pos[i + j] == -1 && other.pos[j] != -1) {
|
||||
const auto idx = i + j;
|
||||
|
||||
if (pos[idx] == -1 && other.pos[j] != -1) {
|
||||
used.insert(i + j);
|
||||
}
|
||||
|
||||
if (pos[i + j] != -1 && other.pos[j] == -1) {
|
||||
if (pos[idx] != -1 && other.pos[j] == -1) {
|
||||
used.erase(i + j);
|
||||
}
|
||||
|
||||
if (pos[i + j] != -1) {
|
||||
if (pos[idx] != -1) {
|
||||
seq_pos_rm(i + j);
|
||||
}
|
||||
|
||||
pos[i + j] = other.pos[j];
|
||||
seq[i + j] = other.seq[j];
|
||||
pos[idx] = other.pos[j];
|
||||
seq[idx] = other.seq[j];
|
||||
|
||||
if (pos[i + j] != -1) {
|
||||
if (pos[idx] != -1) {
|
||||
seq_pos_add(i + j);
|
||||
}
|
||||
|
||||
assert(shift[i + j] == 0);
|
||||
assert(shift[idx] == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// set the state of cells [idxs[0], idxs[1], ..., idxs[idxs.size() - 1])
|
||||
void set(const std::vector<uint32_t> & idxs, const llama_kv_cells_unified & other) {
|
||||
assert(idxs.size() == other.pos.size());
|
||||
|
||||
for (uint32_t j = 0; j < other.pos.size(); ++j) {
|
||||
const auto idx = idxs[j];
|
||||
|
||||
if (pos[idx] == -1 && other.pos[j] != -1) {
|
||||
used.insert(idx);
|
||||
}
|
||||
|
||||
if (pos[idx] != -1 && other.pos[j] == -1) {
|
||||
used.erase(idx);
|
||||
}
|
||||
|
||||
if (pos[idx] != -1) {
|
||||
seq_pos_rm(idx);
|
||||
}
|
||||
|
||||
pos[idx] = other.pos[j];
|
||||
seq[idx] = other.seq[j];
|
||||
|
||||
if (pos[idx] != -1) {
|
||||
seq_pos_add(idx);
|
||||
}
|
||||
|
||||
assert(shift[idx] == 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user