llama : default sampling changes + greedy update (#9897)

* llama : deprecate softmax sampler + fix dist sampler ggml-ci * tests : replace macros with functions ggml-ci * sampling : change temperature sampler logic For t <= 0.0f, keep the max logit intact and set the rest to -inf * cont : no need for special "greedy" logic top-k == 1 is the same * tests : init prob correctly * llama : handle temp <= 0.0 in the temp_ext sampler too ggml-ci * cont : avoid extra loop in temperature sampler for sub-zero temp ggml-ci
2025-08-13 11:57:43 -04:00 · 2024-10-21 09:46:40 +03:00
parent bc21975084
commit 55e47786e3
7 changed files with 202 additions and 218 deletions
--- a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
+++ b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
@@ -46,7 +46,6 @@ actor LlamaContext {
        let sparams = llama_sampler_chain_default_params()
        self.sampling = llama_sampler_chain_init(sparams)
        llama_sampler_chain_add(self.sampling, llama_sampler_init_temp(0.4))
-        llama_sampler_chain_add(self.sampling, llama_sampler_init_softmax())
        llama_sampler_chain_add(self.sampling, llama_sampler_init_dist(1234))
    }