From a4569c41fd2253c89ef52fc2378687bdbf42f61a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 2 Aug 2025 17:14:21 +0300 Subject: [PATCH] llama : enable LLAMA_SET_ROWS=1 by default (#14959) ggml-ci --- src/llama-context.cpp | 2 +- src/llama-context.h | 2 +- src/llama-kv-cache-unified.cpp | 2 +- src/llama-kv-cache-unified.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/llama-context.cpp b/src/llama-context.cpp index bd637f3df..958bcc047 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -105,7 +105,7 @@ llama_context::llama_context( { const char * LLAMA_SET_ROWS = getenv("LLAMA_SET_ROWS"); - supports_set_rows = LLAMA_SET_ROWS ? (atoi(LLAMA_SET_ROWS) != 0) : false; + supports_set_rows = LLAMA_SET_ROWS ? (atoi(LLAMA_SET_ROWS) != 0) : supports_set_rows; if (!supports_set_rows && !cparams.kv_unified) { LLAMA_LOG_WARN("%s: non-unified KV cache requires ggml_set_rows() - forcing unified KV cache\n", __func__); diff --git a/src/llama-context.h b/src/llama-context.h index 7cfdc6a51..25c143d56 100644 --- a/src/llama-context.h +++ b/src/llama-context.h @@ -289,7 +289,7 @@ private: // env: LLAMA_SET_ROWS (temporary) // ref: https://github.com/ggml-org/llama.cpp/pull/14285 - bool supports_set_rows = false; + bool supports_set_rows = true; // env: LLAMA_GRAPH_REUSE_DISABLE bool graph_reuse_disable = false; diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp index 321dc79fc..c741014cf 100644 --- a/src/llama-kv-cache-unified.cpp +++ b/src/llama-kv-cache-unified.cpp @@ -193,7 +193,7 @@ llama_kv_cache_unified::llama_kv_cache_unified( debug = LLAMA_KV_CACHE_DEBUG ? atoi(LLAMA_KV_CACHE_DEBUG) : 0; const char * LLAMA_SET_ROWS = getenv("LLAMA_SET_ROWS"); - supports_set_rows = LLAMA_SET_ROWS ? atoi(LLAMA_SET_ROWS) != 0 : 0; + supports_set_rows = LLAMA_SET_ROWS ? atoi(LLAMA_SET_ROWS) != 0 : supports_set_rows; if (!supports_set_rows) { // ref: https://github.com/ggml-org/llama.cpp/pull/14363 diff --git a/src/llama-kv-cache-unified.h b/src/llama-kv-cache-unified.h index 3e28e346c..342a67596 100644 --- a/src/llama-kv-cache-unified.h +++ b/src/llama-kv-cache-unified.h @@ -230,7 +230,7 @@ private: // env: LLAMA_SET_ROWS (temporary) // ref: https://github.com/ggml-org/llama.cpp/pull/14285 - bool supports_set_rows = false; + bool supports_set_rows = true; const llama_swa_type swa_type = LLAMA_SWA_TYPE_NONE;