diff --git a/include/llama.h b/include/llama.h index a13350e15..06c56395c 100644 --- a/include/llama.h +++ b/include/llama.h @@ -1232,6 +1232,7 @@ extern "C" { "will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)"); /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 + /// Setting k <= 0 makes this a noop LLAMA_API struct llama_sampler * llama_sampler_init_top_k (int32_t k); /// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index d14979850..c0a5f9340 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -232,7 +232,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k) // } if (k <= 0) { - k = cur_p->size; + return; } k = std::min(k, (int) cur_p->size); @@ -298,6 +298,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k) } cur_p->sorted = true; } + cur_p->size = k; }