mtmd : rename llava directory to mtmd (#13311)

* mv llava to mtmd

* change ref everywhere
This commit is contained in:
Xuan-Son Nguyen
2025-05-05 16:02:55 +02:00
committed by GitHub
parent 5215b91e93
commit 9b61acf060
37 changed files with 44 additions and 44 deletions

View File

@ -1394,36 +1394,36 @@ llama-gen-docs: examples/gen-docs/gen-docs.cpp \
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
libllava.a: tools/llava/llava.cpp \ libllava.a: tools/mtmd/llava.cpp \
tools/llava/llava.h \ tools/mtmd/llava.h \
tools/llava/clip.cpp \ tools/mtmd/clip.cpp \
tools/llava/clip.h \ tools/mtmd/clip.h \
common/stb_image.h \ common/stb_image.h \
common/base64.hpp \ common/base64.hpp \
$(OBJ_ALL) $(OBJ_ALL)
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
llama-llava-cli: tools/llava/llava-cli.cpp \ llama-llava-cli: tools/mtmd/llava-cli.cpp \
tools/llava/llava.cpp \ tools/mtmd/llava.cpp \
tools/llava/llava.h \ tools/mtmd/llava.h \
tools/llava/clip.cpp \ tools/mtmd/clip.cpp \
tools/llava/clip.h \ tools/mtmd/clip.h \
$(OBJ_ALL) $(OBJ_ALL)
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
llama-minicpmv-cli: tools/llava/minicpmv-cli.cpp \ llama-minicpmv-cli: tools/mtmd/minicpmv-cli.cpp \
tools/llava/llava.cpp \ tools/mtmd/llava.cpp \
tools/llava/llava.h \ tools/mtmd/llava.h \
tools/llava/clip.cpp \ tools/mtmd/clip.cpp \
tools/llava/clip.h \ tools/mtmd/clip.h \
$(OBJ_ALL) $(OBJ_ALL)
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
llama-qwen2vl-cli: tools/llava/qwen2vl-cli.cpp \ llama-qwen2vl-cli: tools/mtmd/qwen2vl-cli.cpp \
tools/llava/llava.cpp \ tools/mtmd/llava.cpp \
tools/llava/llava.h \ tools/mtmd/llava.h \
tools/llava/clip.cpp \ tools/mtmd/clip.cpp \
tools/llava/clip.h \ tools/mtmd/clip.h \
$(OBJ_ALL) $(OBJ_ALL)
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual

View File

@ -2211,14 +2211,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING")); ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
add_opt(common_arg( add_opt(common_arg(
{"--mmproj"}, "FILE", {"--mmproj"}, "FILE",
"path to a multimodal projector file. see tools/llava/README.md", "path to a multimodal projector file. see tools/mtmd/README.md",
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.mmproj.path = value; params.mmproj.path = value;
} }
).set_examples(mmproj_examples)); ).set_examples(mmproj_examples));
add_opt(common_arg( add_opt(common_arg(
{"--mmproj-url"}, "URL", {"--mmproj-url"}, "URL",
"URL to a multimodal projector file. see tools/llava/README.md", "URL to a multimodal projector file. see tools/mtmd/README.md",
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.mmproj.url = value; params.mmproj.url = value;
} }

View File

@ -340,7 +340,7 @@ struct common_params {
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO; common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
// multimodal models (see tools/llava) // multimodal models (see tools/mtmd)
struct common_params_model mmproj; struct common_params_model mmproj;
bool mmproj_use_gpu = true; // use GPU for multimodal model bool mmproj_use_gpu = true; // use GPU for multimodal model
bool no_mmproj = false; // explicitly disable multimodal model bool no_mmproj = false; // explicitly disable multimodal model

View File

@ -33,13 +33,13 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
2. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents: 2. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
```sh ```sh
python ./tools/llava/llava_surgery.py -m path/to/MobileVLM-1.7B python ./tools/mtmd/llava_surgery.py -m path/to/MobileVLM-1.7B
``` ```
3. Use `convert_image_encoder_to_gguf.py` with `--projector-type ldp` (for **V2** please use `--projector-type ldpv2`) to convert the LLaVA image encoder to GGUF: 3. Use `convert_image_encoder_to_gguf.py` with `--projector-type ldp` (for **V2** please use `--projector-type ldpv2`) to convert the LLaVA image encoder to GGUF:
```sh ```sh
python ./tools/llava/convert_image_encoder_to_gguf.py \ python ./tools/mtmd/convert_image_encoder_to_gguf.py \
-m path/to/clip-vit-large-patch14-336 \ -m path/to/clip-vit-large-patch14-336 \
--llava-projector path/to/MobileVLM-1.7B/llava.projector \ --llava-projector path/to/MobileVLM-1.7B/llava.projector \
--output-dir path/to/MobileVLM-1.7B \ --output-dir path/to/MobileVLM-1.7B \
@ -47,7 +47,7 @@ python ./tools/llava/convert_image_encoder_to_gguf.py \
``` ```
```sh ```sh
python ./tools/llava/convert_image_encoder_to_gguf.py \ python ./tools/mtmd/convert_image_encoder_to_gguf.py \
-m path/to/clip-vit-large-patch14-336 \ -m path/to/clip-vit-large-patch14-336 \
--llava-projector path/to/MobileVLM-1.7B_V2/llava.projector \ --llava-projector path/to/MobileVLM-1.7B_V2/llava.projector \
--output-dir path/to/MobileVLM-1.7B_V2 \ --output-dir path/to/MobileVLM-1.7B_V2 \
@ -69,10 +69,10 @@ Now both the LLaMA part and the image encoder is in the `MobileVLM-1.7B` directo
## Android compile and run ## Android compile and run
### compile ### compile
refer to `tools/llava/android/build_64.sh` refer to `tools/mtmd/android/build_64.sh`
```sh ```sh
mkdir tools/llava/android/build_64 mkdir tools/mtmd/android/build_64
cd tools/llava/android/build_64 cd tools/mtmd/android/build_64
../build_64.sh ../build_64.sh
``` ```
### run on Android ### run on Android

View File

@ -25,13 +25,13 @@ git clone https://huggingface.co/THUDM/glm-edge-v-5b or https://huggingface.co/T
2. Use `glmedge-surgery.py` to split the GLMV-EDGE model to LLM and multimodel projector constituents: 2. Use `glmedge-surgery.py` to split the GLMV-EDGE model to LLM and multimodel projector constituents:
```sh ```sh
python ./tools/llava/glmedge-surgery.py -m ../model_path python ./tools/mtmd/glmedge-surgery.py -m ../model_path
``` ```
4. Use `glmedge-convert-image-encoder-to-gguf.py` to convert the GLMV-EDGE image encoder to GGUF: 4. Use `glmedge-convert-image-encoder-to-gguf.py` to convert the GLMV-EDGE image encoder to GGUF:
```sh ```sh
python ./tools/llava/glmedge-convert-image-encoder-to-gguf.py -m ../model_path --llava-projector ../model_path/glm.projector --output-dir ../model_path python ./tools/mtmd/glmedge-convert-image-encoder-to-gguf.py -m ../model_path --llava-projector ../model_path/glm.projector --output-dir ../model_path
``` ```
5. Use `examples/convert_hf_to_gguf.py` to convert the LLM part of GLMV-EDGE to GGUF: 5. Use `examples/convert_hf_to_gguf.py` to convert the LLM part of GLMV-EDGE to GGUF:

View File

@ -37,19 +37,19 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
2. Install the required Python packages: 2. Install the required Python packages:
```sh ```sh
pip install -r tools/llava/requirements.txt pip install -r tools/mtmd/requirements.txt
``` ```
3. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents: 3. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
```sh ```sh
python ./tools/llava/llava_surgery.py -m ../llava-v1.5-7b python ./tools/mtmd/llava_surgery.py -m ../llava-v1.5-7b
``` ```
4. Use `convert_image_encoder_to_gguf.py` to convert the LLaVA image encoder to GGUF: 4. Use `convert_image_encoder_to_gguf.py` to convert the LLaVA image encoder to GGUF:
```sh ```sh
python ./tools/llava/convert_image_encoder_to_gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b python ./tools/mtmd/convert_image_encoder_to_gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
``` ```
5. Use `examples/convert_legacy_llama.py` to convert the LLaMA part of LLaVA to GGUF: 5. Use `examples/convert_legacy_llama.py` to convert the LLaMA part of LLaVA to GGUF:
@ -69,12 +69,12 @@ git clone https://huggingface.co/liuhaotian/llava-v1.6-vicuna-7b
2) Install the required Python packages: 2) Install the required Python packages:
```sh ```sh
pip install -r tools/llava/requirements.txt pip install -r tools/mtmd/requirements.txt
``` ```
3) Use `llava_surgery_v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models: 3) Use `llava_surgery_v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models:
```console ```console
python tools/llava/llava_surgery_v2.py -C -m ../llava-v1.6-vicuna-7b/ python tools/mtmd/llava_surgery_v2.py -C -m ../llava-v1.6-vicuna-7b/
``` ```
- you will find a llava.projector and a llava.clip file in your model directory - you will find a llava.projector and a llava.clip file in your model directory
@ -88,7 +88,7 @@ curl -s -q https://huggingface.co/cmp-nct/llava-1.6-gguf/raw/main/config_vit.jso
5) Create the visual gguf model: 5) Create the visual gguf model:
```console ```console
python ./tools/llava/convert_image_encoder_to_gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision python ./tools/mtmd/convert_image_encoder_to_gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision
``` ```
- This is similar to llava-1.5, the difference is that we tell the encoder that we are working with the pure vision model part of CLIP - This is similar to llava-1.5, the difference is that we tell the encoder that we are working with the pure vision model part of CLIP

View File

@ -29,8 +29,8 @@ cmake --build build --config Release
Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf) by us) Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf) by us)
```bash ```bash
python ./tools/llava/minicpmv-surgery.py -m ../MiniCPM-o-2_6 python ./tools/mtmd/minicpmv-surgery.py -m ../MiniCPM-o-2_6
python ./tools/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-o-2_6 --minicpmv-projector ../MiniCPM-o-2_6/minicpmv.projector --output-dir ../MiniCPM-o-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4 python ./tools/mtmd/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-o-2_6 --minicpmv-projector ../MiniCPM-o-2_6/minicpmv.projector --output-dir ../MiniCPM-o-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4
python ./convert_hf_to_gguf.py ../MiniCPM-o-2_6/model python ./convert_hf_to_gguf.py ../MiniCPM-o-2_6/model
# quantize int4 version # quantize int4 version

View File

@ -28,8 +28,8 @@ cmake --build build --config Release
Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) by us) Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) by us)
```bash ```bash
python ./tools/llava/minicpmv-surgery.py -m ../MiniCPM-Llama3-V-2_5 python ./tools/mtmd/minicpmv-surgery.py -m ../MiniCPM-Llama3-V-2_5
python ./tools/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-Llama3-V-2_5 --minicpmv-projector ../MiniCPM-Llama3-V-2_5/minicpmv.projector --output-dir ../MiniCPM-Llama3-V-2_5/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 2 python ./tools/mtmd/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-Llama3-V-2_5 --minicpmv-projector ../MiniCPM-Llama3-V-2_5/minicpmv.projector --output-dir ../MiniCPM-Llama3-V-2_5/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 2
python ./convert_hf_to_gguf.py ../MiniCPM-Llama3-V-2_5/model python ./convert_hf_to_gguf.py ../MiniCPM-Llama3-V-2_5/model
# quantize int4 version # quantize int4 version

View File

@ -28,8 +28,8 @@ cmake --build build --config Release
Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) by us) Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) by us)
```bash ```bash
python ./tools/llava/minicpmv-surgery.py -m ../MiniCPM-V-2_6 python ./tools/mtmd/minicpmv-surgery.py -m ../MiniCPM-V-2_6
python ./tools/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-2_6 --minicpmv-projector ../MiniCPM-V-2_6/minicpmv.projector --output-dir ../MiniCPM-V-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 3 python ./tools/mtmd/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-2_6 --minicpmv-projector ../MiniCPM-V-2_6/minicpmv.projector --output-dir ../MiniCPM-V-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 3
python ./convert_hf_to_gguf.py ../MiniCPM-V-2_6/model python ./convert_hf_to_gguf.py ../MiniCPM-V-2_6/model
# quantize int4 version # quantize int4 version

View File

@ -1,4 +1,4 @@
-r ../tools/llava/requirements.txt -r ../tools/mtmd/requirements.txt
-r ../tools/server/bench/requirements.txt -r ../tools/server/bench/requirements.txt
-r ../tools/server/tests/requirements.txt -r ../tools/server/tests/requirements.txt

View File

@ -27,7 +27,7 @@ else()
add_subdirectory(run) add_subdirectory(run)
add_subdirectory(tokenize) add_subdirectory(tokenize)
add_subdirectory(tts) add_subdirectory(tts)
add_subdirectory(llava) add_subdirectory(mtmd)
if (GGML_RPC) if (GGML_RPC)
add_subdirectory(rpc) add_subdirectory(rpc)
endif() endif()

View File

Before

Width:  |  Height:  |  Size: 121 KiB

After

Width:  |  Height:  |  Size: 121 KiB