mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 20:45:04 +00:00
@ -1050,6 +1050,9 @@ int llama_context::decode(llama_batch & inp_batch) {
|
||||
// finalize the batch processing
|
||||
kv_guard.commit();
|
||||
|
||||
// set to total number of outputs in the batch, for use in llama_get_logits_ith
|
||||
n_outputs = n_outputs_all;
|
||||
|
||||
// set output mappings
|
||||
{
|
||||
bool sorted_output = true;
|
||||
@ -1103,9 +1106,6 @@ int llama_context::decode(llama_batch & inp_batch) {
|
||||
}
|
||||
}
|
||||
|
||||
// set to total number of outputs in the batch, for use in llama_get_logits_ith
|
||||
n_outputs = n_outputs_all;
|
||||
|
||||
// wait for the computation to finish (automatically done when obtaining the model output)
|
||||
//synchronize();
|
||||
|
||||
|
Reference in New Issue
Block a user