llama : add llama_vocab, functions -> methods, naming (#11110)

* llama : functions -> methods (#11110)

* llama : add struct llama_vocab to the API (#11156)

ggml-ci

* hparams : move vocab params to llama_vocab (#11159)

ggml-ci

* vocab : more pimpl (#11165)

ggml-ci

* vocab : minor tokenization optimizations (#11160)

ggml-ci

Co-authored-by: Diego Devesa <slarengh@gmail.com>

* lora : update API names (#11167)

ggml-ci

* llama : update API names to use correct prefix (#11174)

* llama : update API names to use correct prefix

ggml-ci

* cont

ggml-ci

* cont

ggml-ci

* minor [no ci]

* vocab : llama_vocab_add_[be]os -> llama_vocab_get_add_[be]os (#11174)

ggml-ci

* vocab : llama_vocab_n_vocab -> llama_vocab_n_tokens (#11174)

ggml-ci

---------

Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
Georgi Gerganov
2025-01-12 11:32:42 +02:00
committed by GitHub
parent c05e8c9934
commit afa8a9ec9b
68 changed files with 5855 additions and 5400 deletions

View File

@ -14,7 +14,7 @@ int main(int argc, char ** argv) {
std::thread([&model_path]() {
llama_backend_init();
auto * model = llama_model_load_from_file(model_path, llama_model_default_params());
auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
auto * ctx = llama_init_from_model(model, llama_context_default_params());
llama_free(ctx);
llama_model_free(model);
llama_backend_free();

View File

@ -157,7 +157,7 @@ int main(void) {
}
// test invalid chat template
res = llama_chat_apply_template(nullptr, "INVALID TEMPLATE", conversation, message_count, true, formatted_chat.data(), formatted_chat.size());
res = llama_chat_apply_template("INVALID TEMPLATE", conversation, message_count, true, formatted_chat.data(), formatted_chat.size());
assert(res < 0);
for (size_t i = 0; i < templates.size(); i++) {
@ -165,7 +165,6 @@ int main(void) {
std::string expected = expected_output[i];
formatted_chat.resize(1024);
res = llama_chat_apply_template(
nullptr,
custom_template.c_str(),
conversation,
message_count,

View File

@ -161,7 +161,7 @@ int main(int argc, char **argv) {
auto cparams = llama_context_default_params();
ctx = llama_new_context_with_model(model, cparams);
ctx = llama_init_from_model(model, cparams);
if (ctx == NULL) {
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());

View File

@ -55,7 +55,7 @@ int main(int argc, char **argv) {
auto cparams = llama_context_default_params();
ctx = llama_new_context_with_model(model, cparams);
ctx = llama_init_from_model(model, cparams);
if (ctx == NULL) {
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
@ -64,8 +64,10 @@ int main(int argc, char **argv) {
}
}
//GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE);
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
const llama_vocab * vocab = llama_model_get_vocab(model);
//GGML_ASSERT(llama_vocab_type(vocab) == LLAMA_VOCAB_TYPE_BPE);
if (llama_vocab_type(vocab) != LLAMA_VOCAB_TYPE_BPE) {
return 99;
}
@ -75,7 +77,7 @@ int main(int argc, char **argv) {
atexit([]() { console::cleanup(); });
#endif
const int n_vocab = llama_n_vocab(model);
const int n_vocab = llama_vocab_n_tokens(vocab);
for (int i = 0; i < n_vocab; ++i) {
std::string str = common_detokenize(ctx, std::vector<int>(1, i));

View File

@ -43,7 +43,7 @@ int main(int argc, char ** argv) {
auto cparams = llama_context_default_params();
ctx = llama_new_context_with_model(model, cparams);
ctx = llama_init_from_model(model, cparams);
if (ctx == NULL) {
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
@ -52,8 +52,10 @@ int main(int argc, char ** argv) {
}
}
const llama_vocab * vocab = llama_model_get_vocab(model);
//GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
if (llama_vocab_type(vocab) != LLAMA_VOCAB_TYPE_SPM) {
return 99;
}
@ -63,7 +65,7 @@ int main(int argc, char ** argv) {
atexit([]() { console::cleanup(); });
#endif
const int n_vocab = llama_n_vocab(model);
const int n_vocab = llama_vocab_n_tokens(vocab);
for (int i = 0; i < n_vocab; ++i) {
std::string str = common_detokenize(ctx, std::vector<int>(1, i), true);

View File

@ -76,7 +76,7 @@ class LibLlamaModel:
self.ffi = libllama.ffi
if isinstance(mparams, dict):
mparams = libllama.model_default_params(**mparams)
self.model = self.lib.llama_load_model_from_file(path_model.encode(), mparams)
self.model = self.lib.llama_model_load_from_file(path_model.encode(), mparams)
if not self.model:
raise RuntimeError("error: failed to load model '%s'" % path_model)
if isinstance(cparams, dict):
@ -92,7 +92,7 @@ class LibLlamaModel:
if self.ctx:
self.lib.llama_free(self.ctx)
if self.model:
self.lib.llama_free_model(self.model)
self.lib.llama_model_free(self.model)
self.ctx = None
self.model = None
self.lib = None