mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-01 21:15:06 +00:00
sampling : deduplicated code for probability distribution access (#6240)
* sampling: remove duplicated code for probability distribution access * free original_logits * fix original_logits allocation * fixes based on review @cebtenzzre * change function name to `llama_sampling_prepare`
This commit is contained in:
@ -131,12 +131,14 @@ llama_token llama_sampling_sample(
|
||||
struct llama_context * ctx_cfg,
|
||||
int idx = 0);
|
||||
|
||||
// returns the probability that token of given id will be sampled
|
||||
llama_token_data_array llama_sampling_probability_distribution(
|
||||
// Prepares and adjusts the set of token candidates for sampling based on penalties, biases, and sampling parameters.
|
||||
llama_token_data_array llama_sampling_prepare(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
struct llama_context * ctx_cfg,
|
||||
int idx = 0);
|
||||
int idx = 0,
|
||||
bool apply_grammar = true,
|
||||
std::vector<float> * original_logits = nullptr);
|
||||
|
||||
void llama_sampling_accept(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
|
Reference in New Issue
Block a user