apply various in places

This commit is contained in:
Xuan Son Nguyen
2025-03-01 20:42:18 +01:00
parent 1d6ba97789
commit 46596caf6d
12 changed files with 142 additions and 133 deletions

View File

@@ -91,8 +91,10 @@ int main(int argc, char ** argv){
const auto t_enc_start = ggml_time_us();
llama_decode(ctx, llama_batch_get_one( inp.data(), n_input - 1));
llama_decode(ctx, llama_batch_get_one(&inp.back(), 1));
llama_batch_ext_ptr batch0(llama_batch_ext_init_from_text( inp.data(), n_input - 1, 0, 0));
llama_batch_ext_ptr batch1(llama_batch_ext_init_from_text(&inp.back(), 1, 0, 0));
llama_decode_ext(ctx, batch0.get());
llama_decode_ext(ctx, batch1.get());
const auto t_enc_end = ggml_time_us();
@@ -108,7 +110,7 @@ int main(int argc, char ** argv){
std::vector<llama_token> draft;
llama_batch batch_tgt = llama_batch_init(params.n_ctx, 0, 1);
llama_batch_ext * batch_tgt = llama_batch_ext_init(params.n_ctx, 1);
// debug
struct llama_kv_cache_view kvc_view = llama_kv_cache_view_init(ctx, 1);
@@ -194,8 +196,9 @@ int main(int argc, char ** argv){
// clean the cache of draft tokens that weren't accepted
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
common_batch_clear(batch_tgt);
common_batch_add(batch_tgt, draft[0], n_past, { 0 }, true);
const llama_seq_id seq_id = 0;
llama_batch_ext_clear(batch_tgt);
llama_batch_ext_add_text(batch_tgt, draft[0], n_past, &seq_id, 1, true);
// Draft already contains a single token sampled from the model:
GGML_ASSERT(draft.size() == 1);
@@ -205,13 +208,13 @@ int main(int argc, char ** argv){
common_ngram_cache_draft(inp, draft, n_draft, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, ngram_cache_context, ngram_cache_dynamic, ngram_cache_static);
for (size_t i = 1; i < draft.size(); ++i) {
common_batch_add(batch_tgt, draft[i], n_past + i, { 0 }, true);
llama_batch_ext_add_text(batch_tgt, draft[i], n_past + i, &seq_id, 1, true);
}
t_draft_us += ggml_time_us() - t_start_draft_us;
n_drafted += draft.size() - 1;
llama_decode(ctx, batch_tgt);
llama_decode_ext(ctx, batch_tgt);
++n_past;
draft.erase(draft.begin());
@@ -243,7 +246,7 @@ int main(int argc, char ** argv){
common_sampler_free(smpl);
llama_batch_free(batch_tgt);
llama_batch_ext_free(batch_tgt);
llama_backend_free();