llama : cache llama_token_to_piece (#7587)

* llama : cache llama_token_to_piece ggml-ci * llama : use vectors and avoid has_cache ggml-ci * llama : throw on unknown tokenizer types ggml-ci * llama : print a log of the total cache size
2025-08-19 22:36:13 -04:00 · 2024-05-30 19:01:41 +03:00
parent 5dcdf94676
commit 5921b8f089
2 changed files with 119 additions and 84 deletions
--- a/llama.h
+++ b/llama.h
@@ -424,8 +424,8 @@ extern "C" {

    LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);

-    LLAMA_API enum llama_vocab_type   llama_vocab_type  (const struct llama_model   * model);
-    LLAMA_API enum llama_rope_type    llama_rope_type   (const struct llama_model   * model);
+    LLAMA_API enum llama_vocab_type   llama_vocab_type  (const struct llama_model * model);
+    LLAMA_API enum llama_rope_type    llama_rope_type   (const struct llama_model * model);

    LLAMA_API int32_t llama_n_vocab    (const struct llama_model * model);
    LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);