mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-13 11:57:43 -04:00
Add back top_k (#56)
* Add back top_k * Update utils.cpp * Update utils.h --------- Co-authored-by: Bill Hamilton <bill.hamilton@shopify.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
3
main.cpp
3
main.cpp
@@ -825,6 +825,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
if (i >= embd_inp.size()) {
|
||||
// sample next token
|
||||
const float top_k = params.top_k;
|
||||
const float top_p = params.top_p;
|
||||
const float temp = params.temp;
|
||||
const float repeat_penalty = params.repeat_penalty;
|
||||
@@ -836,7 +837,7 @@ int main(int argc, char ** argv) {
|
||||
{
|
||||
const int64_t t_start_sample_us = ggml_time_us();
|
||||
|
||||
id = llama_sample_top_p(vocab, logits.data() + (logits.size() - n_vocab), last_n_tokens, repeat_penalty, top_p, temp, rng);
|
||||
id = llama_sample_top_p_top_k(vocab, logits.data() + (logits.size() - n_vocab), last_n_tokens, repeat_penalty, top_k, top_p, temp, rng);
|
||||
|
||||
last_n_tokens.erase(last_n_tokens.begin());
|
||||
last_n_tokens.push_back(id);
|
||||
|
Reference in New Issue
Block a user