sampling : min-p should always return at least one token

ggml-ci
2025-08-04 16:23:49 -04:00 · 2025-05-27 11:24:40 +03:00
parent 4f81b33e32
commit fe12a5d47e
2 changed files with 3 additions and 2 deletions
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -798,7 +798,7 @@ static void llama_sampler_min_p_apply(struct llama_sampler * smpl, llama_token_d
        }

        // if we have enough values the operation was a success
-        if (filtered_tokens.size() >= ctx->min_keep) {
+        if (!filtered_tokens.empty() && filtered_tokens.size() >= ctx->min_keep) {
            memcpy(cur_p->data, filtered_tokens.data(), filtered_tokens.size()*sizeof(llama_token_data));
            cur_p->size = filtered_tokens.size();
            min_p_applied = true;
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@@ -109,7 +109,7 @@ static void test_min_p(const std::vector<float> & probs, const std::vector<float
    sampler_tester tester(probs, probs_expected);

    DUMP(&tester.cur_p);
-    tester.apply(llama_sampler_init_min_p(p, 1));
+    tester.apply(llama_sampler_init_min_p(p, 0));
    tester.apply(llama_sampler_init_dist (0));
    DUMP(&tester.cur_p);

@@ -332,6 +332,7 @@ int main(void) {
    test_min_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f/0.7f, 0.3f/0.7f},                       0.74f);
    test_min_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f/0.4f},                                  0.76f);
    test_min_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f/0.4f},                                  1.00f);
+    test_min_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f/0.4f},                                  1.05f);

    printf("XTC should:\n");
    test_xtc({0.4f, 0.3f, 0.2f, 0.1f},   {0.1f},                                0.99f, 0.09f);