mtmd : Support Pixtral 12B (#13065)

* add pixtral text model (vision is wip) * cgraph ok, just missing 2D RoPE * fix bad rebase * first working version * fix problem with img_break token * support dynamic image size * update docs * update test script
2025-08-16 05:02:58 -04:00 · 2025-04-23 20:21:59 +02:00
parent eb1776b15a
commit ecda2ec4b3
14 changed files with 643 additions and 31 deletions
--- a/examples/llava/tests.sh
+++ b/examples/llava/tests.sh
@@ -13,6 +13,14 @@ mkdir -p $SCRIPT_DIR/output
 PROJ_ROOT="$SCRIPT_DIR/../.."
 cd $PROJ_ROOT

+# Check if the first argument is "big", then run test with big models
+# This is useful if we're running the script on a larger machine, so we can test the big models
+RUN_BIG_TESTS=false
+if [ "${1:-}" = "big" ]; then
+    RUN_BIG_TESTS=true
+    echo "Include BIG models..."
+fi
+
 ###############

 arr_bin=()
@@ -28,6 +36,12 @@ add_test() {
    arr_tmpl+=("$tmpl")
 }

+add_test_big() {
+    if [ "$RUN_BIG_TESTS" = true ]; then
+        add_test "$@"
+    fi
+}
+
 add_test "llama-mtmd-cli"  "ggml-org/SmolVLM-500M-Instruct-GGUF:Q8_0"
 add_test "llama-mtmd-cli"  "ggml-org/SmolVLM2-2.2B-Instruct-GGUF:Q4_K_M"
 add_test "llama-mtmd-cli"  "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF:Q8_0"
@@ -42,6 +56,9 @@ add_test "llama-mtmd-cli"  "openbmb/MiniCPM-V-2_6-gguf:Q2_K"
 add_test "llama-mtmd-cli"  "openbmb/MiniCPM-o-2_6-gguf:Q4_0"
 add_test "llama-qwen2vl-cli"  "bartowski/Qwen2-VL-2B-Instruct-GGUF:Q4_K_M"

+# to test the big models, run: ./tests.sh big
+add_test_big "llama-mtmd-cli" "ggml-org/pixtral-12b-GGUF:Q4_K_M"
+
 # these models always give the wrong answer, not sure why
 # add_test "llama-mtmd-cli"  "ggml-org/SmolVLM-Instruct-GGUF:Q4_K_M"
 # add_test "llama-mtmd-cli"  "ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0"