Fixes Qwen2.5VL segfault during inference with https://github.com/ggml-org/llama.cpp/pull/12402 as has_qwen2vl_merger migration was incomplete

This commit is contained in:
Concedo
2025-04-27 18:36:57 +08:00
parent ca2bb89eac
commit 37ae6a281a

View File

@ -1718,7 +1718,8 @@ struct clip_model_loader {
if (ctx_clip.proj_type == PROJECTOR_TYPE_MINICPMV if (ctx_clip.proj_type == PROJECTOR_TYPE_MINICPMV
|| ctx_clip.proj_type == PROJECTOR_TYPE_GLM_EDGE || ctx_clip.proj_type == PROJECTOR_TYPE_GLM_EDGE
|| ctx_clip.proj_type == PROJECTOR_TYPE_QWEN2VL) { || ctx_clip.proj_type == PROJECTOR_TYPE_QWEN2VL
|| ctx_clip.proj_type == PROJECTOR_TYPE_QWEN25VL) {
n_layer += 1; n_layer += 1;
} }
@ -2744,7 +2745,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
} }
return true; return true;
} }
else if (ctx->proj_type == PROJECTOR_TYPE_QWEN2VL) { else if (ctx->proj_type == PROJECTOR_TYPE_QWEN2VL || ctx->proj_type == PROJECTOR_TYPE_QWEN25VL) {
clip_image_u8 resized; clip_image_u8 resized;
auto patch_size = clip_get_patch_size(ctx) * 2; auto patch_size = clip_get_patch_size(ctx) * 2;
int nx = ceil((float)img->nx / patch_size) * patch_size; int nx = ceil((float)img->nx / patch_size) * patch_size;
@ -3139,7 +3140,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
else { else {
// non-minicpmv models // non-minicpmv models
if (ctx->proj_type == PROJECTOR_TYPE_QWEN2VL) { if (ctx->proj_type == PROJECTOR_TYPE_QWEN2VL || ctx->proj_type == PROJECTOR_TYPE_QWEN25VL) {
// pw * ph = number of tokens output by ViT after apply patch merger // pw * ph = number of tokens output by ViT after apply patch merger
// ipw * ipw = number of vision token been processed inside ViT // ipw * ipw = number of vision token been processed inside ViT
const int merge_ratio = 2; const int merge_ratio = 2;
@ -3279,7 +3280,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
} }
} }
if (use_window_attn && ctx->proj_type == PROJECTOR_TYPE_QWEN25VL) { if (use_window_attn && (ctx->proj_type == PROJECTOR_TYPE_QWEN2VL || ctx->proj_type == PROJECTOR_TYPE_QWEN25VL)) {
struct ggml_tensor * window_idx = ggml_graph_get_tensor(gf, "window_idx"); struct ggml_tensor * window_idx = ggml_graph_get_tensor(gf, "window_idx");
struct ggml_tensor * inv_window_idx = ggml_graph_get_tensor(gf, "inv_window_idx"); struct ggml_tensor * inv_window_idx = ggml_graph_get_tensor(gf, "inv_window_idx");
struct ggml_tensor * window_mask = ggml_graph_get_tensor(gf, "window_mask"); struct ggml_tensor * window_mask = ggml_graph_get_tensor(gf, "window_mask");