mtmd : rename llava directory to mtmd (#13311)

* mv llava to mtmd

* change ref everywhere
This commit is contained in:
Xuan-Son Nguyen
2025-05-05 16:02:55 +02:00
committed by GitHub
parent 5215b91e93
commit 9b61acf060
37 changed files with 44 additions and 44 deletions

View File

@ -1394,36 +1394,36 @@ llama-gen-docs: examples/gen-docs/gen-docs.cpp \
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
libllava.a: tools/llava/llava.cpp \
tools/llava/llava.h \
tools/llava/clip.cpp \
tools/llava/clip.h \
libllava.a: tools/mtmd/llava.cpp \
tools/mtmd/llava.h \
tools/mtmd/clip.cpp \
tools/mtmd/clip.h \
common/stb_image.h \
common/base64.hpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
llama-llava-cli: tools/llava/llava-cli.cpp \
tools/llava/llava.cpp \
tools/llava/llava.h \
tools/llava/clip.cpp \
tools/llava/clip.h \
llama-llava-cli: tools/mtmd/llava-cli.cpp \
tools/mtmd/llava.cpp \
tools/mtmd/llava.h \
tools/mtmd/clip.cpp \
tools/mtmd/clip.h \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
llama-minicpmv-cli: tools/llava/minicpmv-cli.cpp \
tools/llava/llava.cpp \
tools/llava/llava.h \
tools/llava/clip.cpp \
tools/llava/clip.h \
llama-minicpmv-cli: tools/mtmd/minicpmv-cli.cpp \
tools/mtmd/llava.cpp \
tools/mtmd/llava.h \
tools/mtmd/clip.cpp \
tools/mtmd/clip.h \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
llama-qwen2vl-cli: tools/llava/qwen2vl-cli.cpp \
tools/llava/llava.cpp \
tools/llava/llava.h \
tools/llava/clip.cpp \
tools/llava/clip.h \
llama-qwen2vl-cli: tools/mtmd/qwen2vl-cli.cpp \
tools/mtmd/llava.cpp \
tools/mtmd/llava.h \
tools/mtmd/clip.cpp \
tools/mtmd/clip.h \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual

View File

@ -2211,14 +2211,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
add_opt(common_arg(
{"--mmproj"}, "FILE",
"path to a multimodal projector file. see tools/llava/README.md",
"path to a multimodal projector file. see tools/mtmd/README.md",
[](common_params & params, const std::string & value) {
params.mmproj.path = value;
}
).set_examples(mmproj_examples));
add_opt(common_arg(
{"--mmproj-url"}, "URL",
"URL to a multimodal projector file. see tools/llava/README.md",
"URL to a multimodal projector file. see tools/mtmd/README.md",
[](common_params & params, const std::string & value) {
params.mmproj.url = value;
}

View File

@ -340,7 +340,7 @@ struct common_params {
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
// multimodal models (see tools/llava)
// multimodal models (see tools/mtmd)
struct common_params_model mmproj;
bool mmproj_use_gpu = true; // use GPU for multimodal model
bool no_mmproj = false; // explicitly disable multimodal model

View File

@ -33,13 +33,13 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
2. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
```sh
python ./tools/llava/llava_surgery.py -m path/to/MobileVLM-1.7B
python ./tools/mtmd/llava_surgery.py -m path/to/MobileVLM-1.7B
```
3. Use `convert_image_encoder_to_gguf.py` with `--projector-type ldp` (for **V2** please use `--projector-type ldpv2`) to convert the LLaVA image encoder to GGUF:
```sh
python ./tools/llava/convert_image_encoder_to_gguf.py \
python ./tools/mtmd/convert_image_encoder_to_gguf.py \
-m path/to/clip-vit-large-patch14-336 \
--llava-projector path/to/MobileVLM-1.7B/llava.projector \
--output-dir path/to/MobileVLM-1.7B \
@ -47,7 +47,7 @@ python ./tools/llava/convert_image_encoder_to_gguf.py \
```
```sh
python ./tools/llava/convert_image_encoder_to_gguf.py \
python ./tools/mtmd/convert_image_encoder_to_gguf.py \
-m path/to/clip-vit-large-patch14-336 \
--llava-projector path/to/MobileVLM-1.7B_V2/llava.projector \
--output-dir path/to/MobileVLM-1.7B_V2 \
@ -69,10 +69,10 @@ Now both the LLaMA part and the image encoder is in the `MobileVLM-1.7B` directo
## Android compile and run
### compile
refer to `tools/llava/android/build_64.sh`
refer to `tools/mtmd/android/build_64.sh`
```sh
mkdir tools/llava/android/build_64
cd tools/llava/android/build_64
mkdir tools/mtmd/android/build_64
cd tools/mtmd/android/build_64
../build_64.sh
```
### run on Android

View File

@ -25,13 +25,13 @@ git clone https://huggingface.co/THUDM/glm-edge-v-5b or https://huggingface.co/T
2. Use `glmedge-surgery.py` to split the GLMV-EDGE model to LLM and multimodel projector constituents:
```sh
python ./tools/llava/glmedge-surgery.py -m ../model_path
python ./tools/mtmd/glmedge-surgery.py -m ../model_path
```
4. Use `glmedge-convert-image-encoder-to-gguf.py` to convert the GLMV-EDGE image encoder to GGUF:
```sh
python ./tools/llava/glmedge-convert-image-encoder-to-gguf.py -m ../model_path --llava-projector ../model_path/glm.projector --output-dir ../model_path
python ./tools/mtmd/glmedge-convert-image-encoder-to-gguf.py -m ../model_path --llava-projector ../model_path/glm.projector --output-dir ../model_path
```
5. Use `examples/convert_hf_to_gguf.py` to convert the LLM part of GLMV-EDGE to GGUF:

View File

@ -37,19 +37,19 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
2. Install the required Python packages:
```sh
pip install -r tools/llava/requirements.txt
pip install -r tools/mtmd/requirements.txt
```
3. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
```sh
python ./tools/llava/llava_surgery.py -m ../llava-v1.5-7b
python ./tools/mtmd/llava_surgery.py -m ../llava-v1.5-7b
```
4. Use `convert_image_encoder_to_gguf.py` to convert the LLaVA image encoder to GGUF:
```sh
python ./tools/llava/convert_image_encoder_to_gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
python ./tools/mtmd/convert_image_encoder_to_gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
```
5. Use `examples/convert_legacy_llama.py` to convert the LLaMA part of LLaVA to GGUF:
@ -69,12 +69,12 @@ git clone https://huggingface.co/liuhaotian/llava-v1.6-vicuna-7b
2) Install the required Python packages:
```sh
pip install -r tools/llava/requirements.txt
pip install -r tools/mtmd/requirements.txt
```
3) Use `llava_surgery_v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models:
```console
python tools/llava/llava_surgery_v2.py -C -m ../llava-v1.6-vicuna-7b/
python tools/mtmd/llava_surgery_v2.py -C -m ../llava-v1.6-vicuna-7b/
```
- you will find a llava.projector and a llava.clip file in your model directory
@ -88,7 +88,7 @@ curl -s -q https://huggingface.co/cmp-nct/llava-1.6-gguf/raw/main/config_vit.jso
5) Create the visual gguf model:
```console
python ./tools/llava/convert_image_encoder_to_gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision
python ./tools/mtmd/convert_image_encoder_to_gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision
```
- This is similar to llava-1.5, the difference is that we tell the encoder that we are working with the pure vision model part of CLIP

View File

@ -29,8 +29,8 @@ cmake --build build --config Release
Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf) by us)
```bash
python ./tools/llava/minicpmv-surgery.py -m ../MiniCPM-o-2_6
python ./tools/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-o-2_6 --minicpmv-projector ../MiniCPM-o-2_6/minicpmv.projector --output-dir ../MiniCPM-o-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4
python ./tools/mtmd/minicpmv-surgery.py -m ../MiniCPM-o-2_6
python ./tools/mtmd/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-o-2_6 --minicpmv-projector ../MiniCPM-o-2_6/minicpmv.projector --output-dir ../MiniCPM-o-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4
python ./convert_hf_to_gguf.py ../MiniCPM-o-2_6/model
# quantize int4 version

View File

@ -28,8 +28,8 @@ cmake --build build --config Release
Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) by us)
```bash
python ./tools/llava/minicpmv-surgery.py -m ../MiniCPM-Llama3-V-2_5
python ./tools/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-Llama3-V-2_5 --minicpmv-projector ../MiniCPM-Llama3-V-2_5/minicpmv.projector --output-dir ../MiniCPM-Llama3-V-2_5/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 2
python ./tools/mtmd/minicpmv-surgery.py -m ../MiniCPM-Llama3-V-2_5
python ./tools/mtmd/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-Llama3-V-2_5 --minicpmv-projector ../MiniCPM-Llama3-V-2_5/minicpmv.projector --output-dir ../MiniCPM-Llama3-V-2_5/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 2
python ./convert_hf_to_gguf.py ../MiniCPM-Llama3-V-2_5/model
# quantize int4 version

View File

@ -28,8 +28,8 @@ cmake --build build --config Release
Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) by us)
```bash
python ./tools/llava/minicpmv-surgery.py -m ../MiniCPM-V-2_6
python ./tools/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-2_6 --minicpmv-projector ../MiniCPM-V-2_6/minicpmv.projector --output-dir ../MiniCPM-V-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 3
python ./tools/mtmd/minicpmv-surgery.py -m ../MiniCPM-V-2_6
python ./tools/mtmd/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-2_6 --minicpmv-projector ../MiniCPM-V-2_6/minicpmv.projector --output-dir ../MiniCPM-V-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 3
python ./convert_hf_to_gguf.py ../MiniCPM-V-2_6/model
# quantize int4 version

View File

@ -1,4 +1,4 @@
-r ../tools/llava/requirements.txt
-r ../tools/mtmd/requirements.txt
-r ../tools/server/bench/requirements.txt
-r ../tools/server/tests/requirements.txt

View File

@ -27,7 +27,7 @@ else()
add_subdirectory(run)
add_subdirectory(tokenize)
add_subdirectory(tts)
add_subdirectory(llava)
add_subdirectory(mtmd)
if (GGML_RPC)
add_subdirectory(rpc)
endif()

View File

Before

Width:  |  Height:  |  Size: 121 KiB

After

Width:  |  Height:  |  Size: 121 KiB