From bea6d0617ee48504f62f2205a3e941e8248cbdf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Mon, 7 Jul 2025 07:24:28 +0200 Subject: [PATCH] add adapter metadata api --- common/common.cpp | 4 +-- common/common.h | 4 +-- include/llama.h | 20 +++++++++--- src/llama-adapter.cpp | 74 +++++++++++++++++++++++++++++++++++++------ src/llama-adapter.h | 5 +-- 5 files changed, 87 insertions(+), 20 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 5a97fe868..9567f99b8 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -993,8 +993,8 @@ struct common_init_result common_init_from_params(common_params & params) { } la.ptr = lora.get(); - la.task_name = llama_adapter_lora_task_name(la.ptr); - la.prompt_prefix = llama_adapter_lora_prompt_prefix(la.ptr); + llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", la.task_name, sizeof(la.task_name)); + llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", la.prompt_prefix, sizeof(la.prompt_prefix)); iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters } diff --git a/common/common.h b/common/common.h index 705295a80..42e1c29e3 100644 --- a/common/common.h +++ b/common/common.h @@ -31,8 +31,8 @@ struct common_adapter_lora_info { std::string path; float scale; - std::string task_name; - std::string prompt_prefix; + char task_name[64]; + char prompt_prefix[256]; struct llama_adapter_lora * ptr; }; diff --git a/include/llama.h b/include/llama.h index 7b5279179..51bc73c77 100644 --- a/include/llama.h +++ b/include/llama.h @@ -588,11 +588,23 @@ extern "C" { struct llama_model * model, const char * path_lora); - // Get the LoRA task name. Returns a blank string if not applicable - LLAMA_API const char * llama_adapter_lora_task_name(struct llama_adapter_lora * adapter); + // Functions to access the adapter's GGUF metadata scalar values + // - The functions return the length of the string on success, or -1 on failure + // - The output string is always null-terminated and cleared on failure + // - When retrieving a string, an extra byte must be allocated to account for the null terminator + // - GGUF array values are not supported by these functions - // Get the required LoRA prompt prefix. Returns a blank string if not applicable - LLAMA_API const char * llama_adapter_lora_prompt_prefix(struct llama_adapter_lora * adapter); + // Get metadata value as a string by key name + LLAMA_API int32_t llama_adapter_meta_val_str(const struct llama_adapter_lora * adapter, const char * key, char * buf, size_t buf_size); + + // Get the number of metadata key/value pairs + LLAMA_API int32_t llama_adapter_meta_count(const struct llama_adapter_lora * adapter); + + // Get metadata key name by index + LLAMA_API int32_t llama_adapter_meta_key_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size); + + // Get metadata value as a string by index + LLAMA_API int32_t llama_adapter_meta_val_str_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size); // Manually free a LoRA adapter // Note: loaded adapters will be free when the associated model is deleted diff --git a/src/llama-adapter.cpp b/src/llama-adapter.cpp index f79f60582..772ce1b44 100644 --- a/src/llama-adapter.cpp +++ b/src/llama-adapter.cpp @@ -163,13 +163,38 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_ // check metadata { + const gguf_context * gguf_ctx = ctx_gguf.get(); + + LLAMA_LOG_INFO("%s: Dumping metadata keys/values.\n", __func__); + + // get metadata as string + for (int i = 0; i < gguf_get_n_kv(gguf_ctx); i++) { + gguf_type type = gguf_get_kv_type(gguf_ctx, i); + const std::string type_name = + type == GGUF_TYPE_ARRAY + ? format("%s[%s,%zu]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(gguf_ctx, i)), gguf_get_arr_n(gguf_ctx, i)) + : gguf_type_name(type); + const char * name = gguf_get_key(gguf_ctx, i); + const std::string value = gguf_kv_to_str(gguf_ctx, i); + + if (type != GGUF_TYPE_ARRAY) { + adapter.gguf_kv.emplace(name, value); + } + + const size_t MAX_VALUE_LEN = 40; + std::string print_value = value.size() > MAX_VALUE_LEN ? format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()) : value; + replace_all(print_value, "\n", "\\n"); + + LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), print_value.c_str()); + } + auto get_kv_str = [&](const std::string & key) -> std::string { - int id = gguf_find_key(ctx_gguf.get(), key.c_str()); - return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf.get(), id)); + int id = gguf_find_key(gguf_ctx, key.c_str()); + return id < 0 ? "" : std::string(gguf_get_val_str(gguf_ctx, id)); }; auto get_kv_f32 = [&](const std::string & key) -> float { - int id = gguf_find_key(ctx_gguf.get(), key.c_str()); - return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf.get(), id); + int id = gguf_find_key(gguf_ctx, key.c_str()); + return id < 0 ? 0.0f : gguf_get_val_f32(gguf_ctx, id); }; LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN); @@ -190,8 +215,6 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_ } adapter.alpha = get_kv_f32(llm_kv(LLM_KV_ADAPTER_LORA_ALPHA)); - adapter.task_name = get_kv_str(llm_kv(LLM_KV_ADAPTER_LORA_TASK_NAME)); - adapter.prompt_prefix = get_kv_str(llm_kv(LLM_KV_ADAPTER_LORA_PROMPT_PREFIX)); } int n_tensors = gguf_get_n_tensors(ctx_gguf.get()); @@ -385,12 +408,43 @@ llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * p return nullptr; } -const char * llama_adapter_lora_task_name(llama_adapter_lora * adapter) { - return adapter->task_name.c_str(); +int32_t llama_adapter_meta_val_str(const llama_adapter_lora * adapter, const char * key, char * buf, size_t buf_size) { + const auto & it = adapter->gguf_kv.find(key); + if (it == adapter->gguf_kv.end()) { + if (buf_size > 0) { + buf[0] = '\0'; + } + return -1; + } + return snprintf(buf, buf_size, "%s", it->second.c_str()); } -const char * llama_adapter_lora_prompt_prefix(llama_adapter_lora * adapter) { - return adapter->prompt_prefix.c_str(); +int32_t llama_adapter_meta_count(const llama_adapter_lora * adapter) { + return (int)adapter->gguf_kv.size(); +} + +int32_t llama_adapter_meta_key_by_index(const llama_adapter_lora * adapter, int i, char * buf, size_t buf_size) { + if (i < 0 || i >= (int)adapter->gguf_kv.size()) { + if (buf_size > 0) { + buf[0] = '\0'; + } + return -1; + } + auto it = adapter->gguf_kv.begin(); + std::advance(it, i); + return snprintf(buf, buf_size, "%s", it->first.c_str()); +} + +int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size) { + if (i < 0 || i >= (int)adapter->gguf_kv.size()) { + if (buf_size > 0) { + buf[0] = '\0'; + } + return -1; + } + auto it = adapter->gguf_kv.begin(); + std::advance(it, i); + return snprintf(buf, buf_size, "%s", it->second.c_str()); } void llama_adapter_lora_free(llama_adapter_lora * adapter) { diff --git a/src/llama-adapter.h b/src/llama-adapter.h index c95618e1b..9084e7cab 100644 --- a/src/llama-adapter.h +++ b/src/llama-adapter.h @@ -66,8 +66,9 @@ struct llama_adapter_lora { std::vector bufs; float alpha; - std::string task_name; - std::string prompt_prefix; + + // gguf metadata + std::unordered_map gguf_kv; llama_adapter_lora() = default; ~llama_adapter_lora() = default;