mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
llama : minor sampling refactor (2) (#9386)
This commit is contained in:
@ -140,8 +140,6 @@ while n_cur <= n_len {
|
||||
|
||||
let new_token_id = llama_sampler_sample(smpl, context, i_batch[i])
|
||||
|
||||
llama_sampler_accept(smpl, new_token_id)
|
||||
|
||||
// is it an end of stream? -> mark the stream as finished
|
||||
if llama_token_is_eog(model, new_token_id) || n_cur == n_len {
|
||||
i_batch[i] = -1
|
||||
|
Reference in New Issue
Block a user