mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-24 17:18:28 -04:00
apply various in places
This commit is contained in:
@@ -91,8 +91,10 @@ int main(int argc, char ** argv){
|
||||
|
||||
const auto t_enc_start = ggml_time_us();
|
||||
|
||||
llama_decode(ctx, llama_batch_get_one( inp.data(), n_input - 1));
|
||||
llama_decode(ctx, llama_batch_get_one(&inp.back(), 1));
|
||||
llama_batch_ext_ptr batch0(llama_batch_ext_init_from_text( inp.data(), n_input - 1, 0, 0));
|
||||
llama_batch_ext_ptr batch1(llama_batch_ext_init_from_text(&inp.back(), 1, 0, 0));
|
||||
llama_decode_ext(ctx, batch0.get());
|
||||
llama_decode_ext(ctx, batch1.get());
|
||||
|
||||
const auto t_enc_end = ggml_time_us();
|
||||
|
||||
@@ -108,7 +110,7 @@ int main(int argc, char ** argv){
|
||||
|
||||
std::vector<llama_token> draft;
|
||||
|
||||
llama_batch batch_tgt = llama_batch_init(params.n_ctx, 0, 1);
|
||||
llama_batch_ext * batch_tgt = llama_batch_ext_init(params.n_ctx, 1);
|
||||
|
||||
// debug
|
||||
struct llama_kv_cache_view kvc_view = llama_kv_cache_view_init(ctx, 1);
|
||||
@@ -194,8 +196,9 @@ int main(int argc, char ** argv){
|
||||
// clean the cache of draft tokens that weren't accepted
|
||||
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||
|
||||
common_batch_clear(batch_tgt);
|
||||
common_batch_add(batch_tgt, draft[0], n_past, { 0 }, true);
|
||||
const llama_seq_id seq_id = 0;
|
||||
llama_batch_ext_clear(batch_tgt);
|
||||
llama_batch_ext_add_text(batch_tgt, draft[0], n_past, &seq_id, 1, true);
|
||||
|
||||
// Draft already contains a single token sampled from the model:
|
||||
GGML_ASSERT(draft.size() == 1);
|
||||
@@ -205,13 +208,13 @@ int main(int argc, char ** argv){
|
||||
common_ngram_cache_draft(inp, draft, n_draft, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, ngram_cache_context, ngram_cache_dynamic, ngram_cache_static);
|
||||
|
||||
for (size_t i = 1; i < draft.size(); ++i) {
|
||||
common_batch_add(batch_tgt, draft[i], n_past + i, { 0 }, true);
|
||||
llama_batch_ext_add_text(batch_tgt, draft[i], n_past + i, &seq_id, 1, true);
|
||||
}
|
||||
|
||||
t_draft_us += ggml_time_us() - t_start_draft_us;
|
||||
n_drafted += draft.size() - 1;
|
||||
|
||||
llama_decode(ctx, batch_tgt);
|
||||
llama_decode_ext(ctx, batch_tgt);
|
||||
++n_past;
|
||||
|
||||
draft.erase(draft.begin());
|
||||
@@ -243,7 +246,7 @@ int main(int argc, char ** argv){
|
||||
|
||||
common_sampler_free(smpl);
|
||||
|
||||
llama_batch_free(batch_tgt);
|
||||
llama_batch_ext_free(batch_tgt);
|
||||
|
||||
llama_backend_free();
|
||||
|
||||
|
Reference in New Issue
Block a user