mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 12:25:03 +00:00
Merge branch 'master' into xsn/private_batch_api
This commit is contained in:
@ -4,6 +4,7 @@
|
||||
#include "llama.h"
|
||||
|
||||
#include <ctime>
|
||||
#include <algorithm>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
@ -37,7 +38,7 @@ static void batch_decode(llama_context * ctx, common_batch & batch, float * outp
|
||||
const struct llama_model * model = llama_get_model(ctx);
|
||||
|
||||
// clear previous kv_cache values (irrelevant for embeddings)
|
||||
llama_kv_cache_clear(ctx);
|
||||
llama_kv_self_clear(ctx);
|
||||
|
||||
// run model
|
||||
LOG_INF("%s: n_tokens = %d, n_seq = %d\n", __func__, llama_batch_ext_get_n_tokens(batch.get()), n_seq);
|
||||
|
Reference in New Issue
Block a user