mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-06 17:13:34 -04:00
fix: Fix resize vs reserve and skip null tensors in size computation
https://github.com/ggml-org/llama.cpp/pull/13979/files#r2149469788 Branch: HybridRecurrentCache Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> Co-Authored-By: @younesbelkada
This commit is contained in:
@@ -60,8 +60,8 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
|
|||||||
return it->second;
|
return it->second;
|
||||||
};
|
};
|
||||||
|
|
||||||
k_l.reserve(n_layer);
|
k_l.resize(n_layer);
|
||||||
v_l.reserve(n_layer);
|
v_l.resize(n_layer);
|
||||||
|
|
||||||
for (int i = 0; i < n_layer; i++) {
|
for (int i = 0; i < n_layer; i++) {
|
||||||
if (filter && !filter(i)) {
|
if (filter && !filter(i)) {
|
||||||
@@ -647,7 +647,9 @@ size_t llama_kv_cache_recurrent::size_k_bytes() const {
|
|||||||
size_t size_k_bytes = 0;
|
size_t size_k_bytes = 0;
|
||||||
|
|
||||||
for (const auto & k : k_l) {
|
for (const auto & k : k_l) {
|
||||||
size_k_bytes += ggml_nbytes(k);
|
if (k != nullptr) {
|
||||||
|
size_k_bytes += ggml_nbytes(k);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return size_k_bytes;
|
return size_k_bytes;
|
||||||
@@ -657,7 +659,9 @@ size_t llama_kv_cache_recurrent::size_v_bytes() const {
|
|||||||
size_t size_v_bytes = 0;
|
size_t size_v_bytes = 0;
|
||||||
|
|
||||||
for (const auto & v : v_l) {
|
for (const auto & v : v_l) {
|
||||||
size_v_bytes += ggml_nbytes(v);
|
if (v != nullptr) {
|
||||||
|
size_v_bytes += ggml_nbytes(v);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return size_v_bytes;
|
return size_v_bytes;
|
||||||
|
Reference in New Issue
Block a user