kv-cache : log (debug) all streams in find_slot (#15176)

This commit updates `llama_kv_cache_unified::find_slot` to log
information for all streams when debug is enabled.

The motivation for this change is that currently if a non-unified
kv-cache is used, then only one stream will be logged because the
code was currently uses `seq_to_stream[1]`.
This commit is contained in:
Daniel Bevenius
2025-08-11 11:21:19 +02:00
committed by GitHub
parent 50e81bdf5d
commit cd3069dfcb

View File

@@ -738,13 +738,16 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
} }
llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch, bool cont) const { llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch, bool cont) const {
if (debug > 0) { if (debug > 0) {
const auto & cells = v_cells[seq_to_stream[1]]; for (uint32_t s = 0; s < ubatch.n_seqs_unq; ++s) {
const auto seq_id = ubatch.seq_id_unq[s];
const auto stream_id = seq_to_stream[seq_id];
const auto & cells = v_cells[stream_id];
const uint32_t head_cur = v_heads[stream_id];
const uint32_t head_cur = v_heads[1]; LLAMA_LOG_DEBUG("%s: stream[%d], n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n",
__func__, stream_id, cells.used_max_p1(), cells.get_used(), head_cur, get_size(), n_swa);
LLAMA_LOG_DEBUG("%s: n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n",
__func__, cells.used_max_p1(), cells.get_used(), head_cur, get_size(), n_swa);
if ((debug == 2 && n_swa > 0) || debug > 2) { if ((debug == 2 && n_swa > 0) || debug > 2) {
std::string ss; std::string ss;
@@ -797,7 +800,8 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_
continue; continue;
} }
LLAMA_LOG_DEBUG("%s: min[%d] = %5d, max[%d] = %5d\n", __func__, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s)); LLAMA_LOG_DEBUG("%s: stream[%d] min[%d] = %5d, max[%d] = %5d\n", __func__, stream_id, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));
}
} }
} }