mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 20:45:04 +00:00
@ -1050,6 +1050,9 @@ int llama_context::decode(llama_batch & inp_batch) {
|
|||||||
// finalize the batch processing
|
// finalize the batch processing
|
||||||
kv_guard.commit();
|
kv_guard.commit();
|
||||||
|
|
||||||
|
// set to total number of outputs in the batch, for use in llama_get_logits_ith
|
||||||
|
n_outputs = n_outputs_all;
|
||||||
|
|
||||||
// set output mappings
|
// set output mappings
|
||||||
{
|
{
|
||||||
bool sorted_output = true;
|
bool sorted_output = true;
|
||||||
@ -1103,9 +1106,6 @@ int llama_context::decode(llama_batch & inp_batch) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// set to total number of outputs in the batch, for use in llama_get_logits_ith
|
|
||||||
n_outputs = n_outputs_all;
|
|
||||||
|
|
||||||
// wait for the computation to finish (automatically done when obtaining the model output)
|
// wait for the computation to finish (automatically done when obtaining the model output)
|
||||||
//synchronize();
|
//synchronize();
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user