mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 03:55:20 +00:00
speculative : update default params (#11954)
* speculative : update default params * speculative : do not discard the last drafted token
This commit is contained in:
@ -252,11 +252,6 @@ llama_tokens common_speculative_gen_draft(
|
||||
// add drafted token for each sequence
|
||||
const llama_token id = cur_p->data[0].id;
|
||||
|
||||
// only collect very high-confidence draft tokens
|
||||
if (cur_p->data[0].p < params.p_min) {
|
||||
break;
|
||||
}
|
||||
|
||||
common_sampler_accept(smpl, id, true);
|
||||
|
||||
result.push_back(id);
|
||||
@ -265,6 +260,11 @@ llama_tokens common_speculative_gen_draft(
|
||||
break;
|
||||
}
|
||||
|
||||
// only collect very high-confidence draft tokens
|
||||
if (cur_p->data[0].p < params.p_min) {
|
||||
break;
|
||||
}
|
||||
|
||||
common_batch_add(batch, id, n_past + i + 1, { 0 }, true);
|
||||
|
||||
// evaluate the drafted tokens on the draft model
|
||||
|
Reference in New Issue
Block a user