clip : refactor clip_init, add tests (#12757)

* refactor clip_init

* fix loading file

* fix style

* test ok

* better test with report

* add missing headers

* clarify

* add KEY_MM_PATCH_MERGE_TYPE

* remove bool has_* pattern

* Apply suggestions from code review

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update examples/llava/clip.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* use ggml_soft_max_ext

* refactor logging system

* add minicpm-v-o 2.6 for testing

* use nullptr everywhere

* fix Yi-VL model

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Xuan-Son Nguyen
2025-04-05 17:17:40 +02:00
committed by GitHub
parent c6ff5d2a8d
commit 0364178ca2
9 changed files with 916 additions and 887 deletions

View File

@ -241,7 +241,7 @@ static struct llava_context * llava_init_context(common_params * params, llama_m
prompt = "describe the image in detail.";
}
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO);
llama_context_params ctx_params = common_context_params_to_llama(*params);
ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings