speculative : update default params (#11954)

* speculative : update default params

* speculative : do not discard the last drafted token
This commit is contained in:
Georgi Gerganov
2025-02-19 13:29:42 +02:00
committed by GitHub
parent 9626d9351a
commit abd4d0bc4f
4 changed files with 9 additions and 9 deletions

View File

@ -252,11 +252,6 @@ llama_tokens common_speculative_gen_draft(
// add drafted token for each sequence
const llama_token id = cur_p->data[0].id;
// only collect very high-confidence draft tokens
if (cur_p->data[0].p < params.p_min) {
break;
}
common_sampler_accept(smpl, id, true);
result.push_back(id);
@ -265,6 +260,11 @@ llama_tokens common_speculative_gen_draft(
break;
}
// only collect very high-confidence draft tokens
if (cur_p->data[0].p < params.p_min) {
break;
}
common_batch_add(batch, id, n_past + i + 1, { 0 }, true);
// evaluate the drafted tokens on the draft model