mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
tests : add test-tokenizers-repo (#14017)
This commit is contained in:
@ -42,6 +42,34 @@ function(llama_test target)
|
|||||||
set_property(TEST ${TEST_NAME} PROPERTY LABELS ${LLAMA_TEST_LABEL})
|
set_property(TEST ${TEST_NAME} PROPERTY LABELS ${LLAMA_TEST_LABEL})
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
|
function(llama_test_cmd target)
|
||||||
|
include(CMakeParseArguments)
|
||||||
|
set(options)
|
||||||
|
set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
|
||||||
|
set(multiValueArgs ARGS)
|
||||||
|
cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||||
|
|
||||||
|
if (NOT DEFINED LLAMA_TEST_LABEL)
|
||||||
|
set(LLAMA_TEST_LABEL "main")
|
||||||
|
endif()
|
||||||
|
if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
|
||||||
|
set(LLAMA_TEST_WORKING_DIRECTORY .)
|
||||||
|
endif()
|
||||||
|
if (DEFINED LLAMA_TEST_NAME)
|
||||||
|
set(TEST_NAME ${LLAMA_TEST_NAME})
|
||||||
|
else()
|
||||||
|
set(TEST_NAME ${target})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_test(
|
||||||
|
NAME ${TEST_NAME}
|
||||||
|
WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
|
||||||
|
COMMAND ${target}
|
||||||
|
${LLAMA_TEST_ARGS})
|
||||||
|
|
||||||
|
set_property(TEST ${TEST_NAME} PROPERTY LABELS ${LLAMA_TEST_LABEL})
|
||||||
|
endfunction()
|
||||||
|
|
||||||
# Builds and runs a test source file.
|
# Builds and runs a test source file.
|
||||||
# Optional args:
|
# Optional args:
|
||||||
# - NAME: name of the executable & test target (defaults to the source file name without extension)
|
# - NAME: name of the executable & test target (defaults to the source file name without extension)
|
||||||
@ -97,8 +125,14 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${CMAKE
|
|||||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
|
||||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
|
||||||
|
|
||||||
# TODO: missing HF tokenizer for this model in convert_hf_to_gguf_update.py, see https://github.com/ggml-org/llama.cpp/pull/13847
|
if (NOT WIN32)
|
||||||
# llama_test(test-tokenizer-0 NAME test-tokenizer-0-nomic-bert-moe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-nomic-bert-moe.gguf)
|
llama_test_cmd(
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/test-tokenizers-repo.sh
|
||||||
|
NAME test-tokenizers-ggml-vocabs
|
||||||
|
WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}
|
||||||
|
ARGS https://huggingface.co/ggml-org/vocabs ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocabs
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (LLAMA_LLGUIDANCE)
|
if (LLAMA_LLGUIDANCE)
|
||||||
llama_build_and_test(test-grammar-llguidance.cpp ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf)
|
llama_build_and_test(test-grammar-llguidance.cpp ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf)
|
||||||
|
36
tests/test-tokenizers-repo.sh
Executable file
36
tests/test-tokenizers-repo.sh
Executable file
@ -0,0 +1,36 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ $# -lt 2 ]; then
|
||||||
|
printf "Usage: $0 <git-repo> <target-folder> [<test-exe>]\n"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $# -eq 3 ]; then
|
||||||
|
toktest=$3
|
||||||
|
else
|
||||||
|
toktest="./test-tokenizer-0"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -x $toktest ]; then
|
||||||
|
printf "Test executable \"$toktest\" not found!\n"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
repo=$1
|
||||||
|
folder=$2
|
||||||
|
|
||||||
|
if [ -d $folder ] && [ -d $folder/.git ]; then
|
||||||
|
(cd $folder; git pull)
|
||||||
|
else
|
||||||
|
git clone $repo $folder
|
||||||
|
fi
|
||||||
|
|
||||||
|
shopt -s globstar
|
||||||
|
for gguf in $folder/**/*.gguf; do
|
||||||
|
if [ -f $gguf.inp ] && [ -f $gguf.out ]; then
|
||||||
|
$toktest $gguf
|
||||||
|
else
|
||||||
|
printf "Found \"$gguf\" without matching inp/out files, ignoring...\n"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
Reference in New Issue
Block a user