mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 04:35:05 +00:00
apply various in places
This commit is contained in:
@ -500,7 +500,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
|
||||
// clear the KV cache
|
||||
llama_kv_cache_clear(ctx);
|
||||
|
||||
llama_batch batch = llama_batch_init(n_batch, 0, 1);
|
||||
llama_batch_ext * batch = llama_batch_ext_init(n_batch, 1);
|
||||
|
||||
for (int j = 0; j < num_batches; ++j) {
|
||||
const int batch_start = start + j * n_batch;
|
||||
@ -514,14 +514,15 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
|
||||
tokens[batch_start] = llama_vocab_bos(vocab);
|
||||
}
|
||||
|
||||
common_batch_clear(batch);
|
||||
llama_batch_ext_clear(batch);
|
||||
for (int i = 0; i < batch_size; i++) {
|
||||
common_batch_add(batch, tokens[batch_start + i], j*n_batch + i, {0}, true);
|
||||
const llama_seq_id seq_id = 0;
|
||||
llama_batch_ext_add_text(batch, tokens[batch_start + i], j*n_batch + i, &seq_id, 1, true);
|
||||
}
|
||||
|
||||
if (llama_decode(ctx, batch)) {
|
||||
if (llama_decode_ext(ctx, batch)) {
|
||||
LOG_ERR("%s : failed to eval\n", __func__);
|
||||
llama_batch_free(batch);
|
||||
llama_batch_ext_free(batch);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -534,7 +535,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
|
||||
}
|
||||
}
|
||||
|
||||
llama_batch_free(batch);
|
||||
llama_batch_ext_free(batch);
|
||||
|
||||
const auto t_end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
|
Reference in New Issue
Block a user