2024-11-25 16:56:24 -05:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#ifndef __cplusplus
|
|
|
|
#error "This header is for C++ only"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
|
|
|
|
#include "llama.h"
|
|
|
|
|
|
|
|
struct llama_model_deleter {
|
2025-01-06 10:55:18 +02:00
|
|
|
void operator()(llama_model * model) { llama_model_free(model); }
|
2024-11-25 16:56:24 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
struct llama_context_deleter {
|
|
|
|
void operator()(llama_context * context) { llama_free(context); }
|
|
|
|
};
|
|
|
|
|
|
|
|
struct llama_sampler_deleter {
|
|
|
|
void operator()(llama_sampler * sampler) { llama_sampler_free(sampler); }
|
|
|
|
};
|
|
|
|
|
2025-01-12 11:32:42 +02:00
|
|
|
struct llama_adapter_lora_deleter {
|
|
|
|
void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); }
|
2025-01-03 10:18:53 +02:00
|
|
|
};
|
|
|
|
|
2025-02-16 00:02:53 +01:00
|
|
|
struct llama_batch_ext_deleter {
|
|
|
|
void operator()(llama_batch_ext * batch) { llama_batch_ext_free(batch); }
|
2025-02-14 18:16:49 +01:00
|
|
|
};
|
|
|
|
|
2024-11-25 16:56:24 -05:00
|
|
|
typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
|
|
|
|
typedef std::unique_ptr<llama_context, llama_context_deleter> llama_context_ptr;
|
|
|
|
typedef std::unique_ptr<llama_sampler, llama_sampler_deleter> llama_sampler_ptr;
|
2025-01-12 11:32:42 +02:00
|
|
|
typedef std::unique_ptr<llama_adapter_lora, llama_adapter_lora_deleter> llama_adapter_lora_ptr;
|
2025-03-14 17:12:03 +01:00
|
|
|
|
|
|
|
struct llama_batch_ext_ptr : std::unique_ptr<llama_batch_ext, llama_batch_ext_deleter> {
|
2025-03-14 22:30:29 +01:00
|
|
|
llama_batch_ext_ptr() : std::unique_ptr<llama_batch_ext, llama_batch_ext_deleter>() {}
|
2025-03-14 17:12:03 +01:00
|
|
|
llama_batch_ext_ptr(llama_batch_ext * batch) : std::unique_ptr<llama_batch_ext, llama_batch_ext_deleter>(batch) {}
|
|
|
|
|
2025-03-14 22:17:07 +01:00
|
|
|
// convenience function to create a batch from text tokens, without worrying about manually freeing it
|
|
|
|
static llama_batch_ext_ptr init_from_text(llama_token * tokens,
|
2025-03-14 17:12:03 +01:00
|
|
|
int32_t n_tokens,
|
|
|
|
int32_t pos0,
|
|
|
|
int32_t seq_id,
|
|
|
|
bool output_last) {
|
|
|
|
return llama_batch_ext_ptr(llama_batch_ext_init_from_text(tokens, n_tokens, pos0, seq_id, output_last));
|
|
|
|
}
|
|
|
|
|
2025-03-14 22:17:07 +01:00
|
|
|
// convenience function to create a batch from text embeddings, without worrying about manually freeing it
|
|
|
|
static llama_batch_ext_ptr init_from_embd(float * embd,
|
2025-03-14 17:12:03 +01:00
|
|
|
size_t n_tokens,
|
|
|
|
size_t n_embd,
|
|
|
|
int32_t pos0,
|
|
|
|
int32_t seq_id) {
|
|
|
|
return llama_batch_ext_ptr(llama_batch_ext_init_from_embd(embd, n_tokens, n_embd, pos0, seq_id));
|
|
|
|
}
|
|
|
|
};
|