llava: Fix cpu-only clip image encoding sefault (#12907)

* llava: Fix cpu-only clip image encoding

* clip : no smart ptr for ggml_backend_t

* Fix for backend_ptr push_back

---------

Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
This commit is contained in:
Matt Clayton
2025-04-12 01:29:03 -04:00
committed by GitHub
parent c94085df28
commit e59ea539b8

View File

@ -323,8 +323,8 @@ struct clip_ctx {
std::vector<ggml_backend_t> backend_ptrs; std::vector<ggml_backend_t> backend_ptrs;
std::vector<ggml_backend_buffer_type_t> backend_buft; std::vector<ggml_backend_buffer_type_t> backend_buft;
ggml_backend_ptr backend; ggml_backend_t backend;
ggml_backend_ptr backend_cpu; ggml_backend_t backend_cpu;
ggml_backend_buffer_ptr buf; ggml_backend_buffer_ptr buf;
ggml_backend_sched_ptr sched; ggml_backend_sched_ptr sched;
@ -332,27 +332,34 @@ struct clip_ctx {
clip_image_size load_image_size; clip_image_size load_image_size;
clip_ctx(clip_context_params & ctx_params) { clip_ctx(clip_context_params & ctx_params) {
backend_cpu = ggml_backend_ptr(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr)); backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
backend = ggml_backend_ptr(ctx_params.use_gpu backend = ctx_params.use_gpu
? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr) ? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr)
: nullptr); : nullptr;
if (backend) { if (backend) {
LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend.get())); LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend));
backend_ptrs.push_back(backend.get()); backend_ptrs.push_back(backend);
backend_buft.push_back(ggml_backend_get_default_buffer_type(backend.get())); backend_buft.push_back(ggml_backend_get_default_buffer_type(backend));
} else { } else {
backend = std::move(backend_cpu); backend = backend_cpu;
LOG_INF("%s: CLIP using CPU backend\n", __func__); LOG_INF("%s: CLIP using CPU backend\n", __func__);
} }
backend_ptrs.push_back(backend_cpu.get()); backend_ptrs.push_back(backend_cpu);
backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu.get())); backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu));
sched.reset( sched.reset(
ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false) ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false)
); );
} }
~clip_ctx() {
ggml_backend_free(backend);
if (backend != backend_cpu) {
ggml_backend_free(backend_cpu);
}
}
}; };
static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_image_f32_batch & imgs) { static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_image_f32_batch & imgs) {
@ -1428,7 +1435,7 @@ struct clip_model_loader {
} }
// alloc memory and offload data // alloc memory and offload data
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(ctx_clip.backend.get()); ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(ctx_clip.backend);
ctx_clip.buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(ctx_clip.ctx_data.get(), buft)); ctx_clip.buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(ctx_clip.ctx_data.get(), buft));
ggml_backend_buffer_set_usage(ctx_clip.buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS); ggml_backend_buffer_set_usage(ctx_clip.buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
for (auto & t : tensors_to_load) { for (auto & t : tensors_to_load) {
@ -2610,7 +2617,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
} }
} }
ggml_backend_cpu_set_n_threads(ctx->backend_cpu.get(), n_threads); ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads);
auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf); auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf);
if (status != GGML_STATUS_SUCCESS) { if (status != GGML_STATUS_SUCCESS) {