compile ok

This commit is contained in:
Xuan Son Nguyen
2025-03-13 22:56:35 +01:00
parent 9fb2d81eab
commit 65f0184517
9 changed files with 46 additions and 29 deletions

View File

@ -20,7 +20,8 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
if (n_eval > n_batch) {
n_eval = n_batch;
}
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
llama_batch_ext_ptr batch(llama_batch_ext_init_from_text(&tokens[i], n_eval, *n_past, 0));
if (llama_decode_ext(ctx_llama, batch.get())) {
LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
return false;
}

View File

@ -101,7 +101,8 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
if (n_eval > n_batch) {
n_eval = n_batch;
}
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
llama_batch_ext_ptr batch(llama_batch_ext_init_from_text(&tokens[i], n_eval, *n_past, 0));
if (llama_decode_ext(ctx_llama, batch.get())) {
LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
return false;
}

View File

@ -96,16 +96,24 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
if (n_eval > n_batch) {
n_eval = n_batch;
}
auto batch = llama_batch_get_one(&tokens[i], n_eval);
// TODO: add mrope pos ids somewhere else
pos.resize(batch.n_tokens * 4);
std::fill(pos.begin(), pos.end(), 0);
for (int j = 0; j < batch.n_tokens * 3; j ++) {
pos[j] = *st_pos_id + (j % batch.n_tokens);
}
batch.pos = pos.data();
if (llama_decode(ctx_llama, batch)) {
// TODO: add mrope pos ids somewhere else
int n_tokens = n_eval;
pos.resize(n_tokens * 4);
std::fill(pos.begin(), pos.end(), 0);
for (int j = 0; j < n_tokens * 3; j ++) {
pos[j] = *st_pos_id + (j % n_tokens);
}
llama_batch_ext_ptr batch(llama_batch_ext_init(n_eval, 1));
for (int j = 0; j < n_eval; j++) {
llama_token token = tokens[i + j];
llama_seq_id seq_id = 0;
llama_batch_ext_add_text(batch.get(), token, pos[j], &seq_id, 1, false);
}
llama_batch_ext_set_output_last(batch.get());
if (llama_decode_ext(ctx_llama, batch.get())) {
LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
return false;
}