llama : add llama_vocab, functions -> methods, naming (#11110)

* llama : functions -> methods (#11110) * llama : add struct llama_vocab to the API (#11156) ggml-ci * hparams : move vocab params to llama_vocab (#11159) ggml-ci * vocab : more pimpl (#11165) ggml-ci * vocab : minor tokenization optimizations (#11160) ggml-ci Co-authored-by: Diego Devesa <slarengh@gmail.com> * lora : update API names (#11167) ggml-ci * llama : update API names to use correct prefix (#11174) * llama : update API names to use correct prefix ggml-ci * cont ggml-ci * cont ggml-ci * minor [no ci] * vocab : llama_vocab_add_[be]os -> llama_vocab_get_add_[be]os (#11174) ggml-ci * vocab : llama_vocab_n_vocab -> llama_vocab_n_tokens (#11174) ggml-ci --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
2025-06-26 19:55:04 +00:00 · 2025-01-12 11:32:42 +02:00
parent c05e8c9934
commit afa8a9ec9b
68 changed files with 5855 additions and 5400 deletions
--- a/tests/test-autorelease.cpp
+++ b/tests/test-autorelease.cpp
@ -14,7 +14,7 @@ int main(int argc, char ** argv) {
    std::thread([&model_path]() {
        llama_backend_init();
        auto * model = llama_model_load_from_file(model_path, llama_model_default_params());
-        auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
+        auto * ctx = llama_init_from_model(model, llama_context_default_params());
        llama_free(ctx);
        llama_model_free(model);
        llama_backend_free();
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@ -157,7 +157,7 @@ int main(void) {
    }

    // test invalid chat template
-    res = llama_chat_apply_template(nullptr, "INVALID TEMPLATE", conversation, message_count, true, formatted_chat.data(), formatted_chat.size());
+    res = llama_chat_apply_template("INVALID TEMPLATE", conversation, message_count, true, formatted_chat.data(), formatted_chat.size());
    assert(res < 0);

    for (size_t i = 0; i < templates.size(); i++) {
@ -165,7 +165,6 @@ int main(void) {
        std::string expected = expected_output[i];
        formatted_chat.resize(1024);
        res = llama_chat_apply_template(
-            nullptr,
            custom_template.c_str(),
            conversation,
            message_count,
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@ -161,7 +161,7 @@ int main(int argc, char **argv) {

        auto cparams = llama_context_default_params();

-        ctx = llama_new_context_with_model(model, cparams);
+        ctx = llama_init_from_model(model, cparams);

        if (ctx == NULL) {
            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
--- a/tests/test-tokenizer-1-bpe.cpp
+++ b/tests/test-tokenizer-1-bpe.cpp
@ -55,7 +55,7 @@ int main(int argc, char **argv) {

        auto cparams = llama_context_default_params();

-        ctx = llama_new_context_with_model(model, cparams);
+        ctx = llama_init_from_model(model, cparams);

        if (ctx == NULL) {
            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
@ -64,8 +64,10 @@ int main(int argc, char **argv) {
        }
    }

-    //GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE);
-    if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
+    const llama_vocab * vocab = llama_model_get_vocab(model);
+
+    //GGML_ASSERT(llama_vocab_type(vocab) == LLAMA_VOCAB_TYPE_BPE);
+    if (llama_vocab_type(vocab) != LLAMA_VOCAB_TYPE_BPE) {
        return 99;
    }

@ -75,7 +77,7 @@ int main(int argc, char **argv) {
    atexit([]() { console::cleanup(); });
 #endif

-    const int n_vocab = llama_n_vocab(model);
+    const int n_vocab = llama_vocab_n_tokens(vocab);

    for (int i = 0; i < n_vocab; ++i) {
        std::string str = common_detokenize(ctx, std::vector<int>(1, i));
--- a/tests/test-tokenizer-1-spm.cpp
+++ b/tests/test-tokenizer-1-spm.cpp
@ -43,7 +43,7 @@ int main(int argc, char ** argv) {

        auto cparams = llama_context_default_params();

-        ctx = llama_new_context_with_model(model, cparams);
+        ctx = llama_init_from_model(model, cparams);

        if (ctx == NULL) {
            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
@ -52,8 +52,10 @@ int main(int argc, char ** argv) {
        }
    }

+    const llama_vocab * vocab = llama_model_get_vocab(model);
+
    //GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
-    if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
+    if (llama_vocab_type(vocab) != LLAMA_VOCAB_TYPE_SPM) {
        return 99;
    }

@ -63,7 +65,7 @@ int main(int argc, char ** argv) {
    atexit([]() { console::cleanup(); });
 #endif

-    const int n_vocab = llama_n_vocab(model);
+    const int n_vocab = llama_vocab_n_tokens(vocab);

    for (int i = 0; i < n_vocab; ++i) {
        std::string str = common_detokenize(ctx, std::vector<int>(1, i), true);
--- a/tests/test-tokenizer-random.py
+++ b/tests/test-tokenizer-random.py
@ -76,7 +76,7 @@ class LibLlamaModel:
        self.ffi = libllama.ffi
        if isinstance(mparams, dict):
            mparams = libllama.model_default_params(**mparams)
-        self.model = self.lib.llama_load_model_from_file(path_model.encode(), mparams)
+        self.model = self.lib.llama_model_load_from_file(path_model.encode(), mparams)
        if not self.model:
            raise RuntimeError("error: failed to load model '%s'" % path_model)
        if isinstance(cparams, dict):
@ -92,7 +92,7 @@ class LibLlamaModel:
        if self.ctx:
            self.lib.llama_free(self.ctx)
        if self.model:
-            self.lib.llama_free_model(self.model)
+            self.lib.llama_model_free(self.model)
        self.ctx = None
        self.model = None
        self.lib = None