arg : add --no-mmproj-offload (#13093)

* arg : add --no-mmproj-offload * Update common/arg.cpp
2025-08-13 11:57:43 -04:00 · 2025-04-24 14:04:14 +02:00
parent 80982e815e
commit 7c727fbe39
3 changed files with 12 additions and 3 deletions
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2140,6 +2140,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
            params.no_mmproj = true;
        }
    ).set_examples(mmproj_examples));
+    add_opt(common_arg(
+        {"--no-mmproj-offload"},
+        "do not offload multimodal projector to GPU",
+        [](common_params & params) {
+            params.mmproj_use_gpu = false;
+        }
+    ).set_examples(mmproj_examples));
    add_opt(common_arg(
        {"--image"}, "FILE",
        "path to an image file. use with multimodal models. Specify multiple times for batching",
--- a/common/common.h
+++ b/common/common.h
@@ -342,6 +342,7 @@ struct common_params {

    // multimodal models (see examples/llava)
    struct common_params_model mmproj;
+    bool mmproj_use_gpu = true;     // use GPU for multimodal model
    bool no_mmproj = false;         // explicitly disable multimodal model
    std::vector<std::string> image; // path to image file(s)

--- a/examples/llava/mtmd-cli.cpp
+++ b/examples/llava/mtmd-cli.cpp
@@ -40,7 +40,8 @@ static void show_additional_info(int /*argc*/, char ** argv) {
        "Usage: %s [options] -m <model> --mmproj <mmproj> --image <image> -p <prompt>\n\n"
        "  -m and --mmproj are required\n"
        "  -hf user/repo can replace both -m and --mmproj in most cases\n"
-        "  --image and -p are optional, if NOT provided, the CLI will run in chat mode\n",
+        "  --image and -p are optional, if NOT provided, the CLI will run in chat mode\n"
+        "  to disable using GPU for mmproj model, add --no-mmproj-offload\n",
        argv[0]
    );
 }
@@ -112,10 +113,10 @@ struct mtmd_cli_context {
    void init_vision_context(common_params & params) {
        const char * clip_path = params.mmproj.path.c_str();
        ctx_vision.reset(mtmd_init_from_file(clip_path, model, mtmd_context_params{
-            /* use_gpu */   true,
+            /* use_gpu */   params.mmproj_use_gpu,
            /* timings */   true,
            /* n_threads */ params.cpuparams.n_threads,
-            /* verbosity */ GGML_LOG_LEVEL_INFO,
+            /* verbosity */ params.verbosity > 0 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO,
        }));
        if (!ctx_vision.get()) {
            LOG_ERR("Failed to load vision model from %s\n", clip_path);