mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
llama : lookup word in vocab before doing BPE merges (#7193)
* fix: llama-3 ignore_merges * test: add test for llama-3 bpe ignore_merges * fix: set ignore_merges only for llama-3 * fix: test-tokenizer-1-bpe --ingore-merges detection * fix: copy to fix fallthrough * fix: change ignore_merges to bool * fix: add ignore merges tests to cmake * llama : alternative merge ignore logic --------- Co-authored-by: Haoxiang Fei <feihaoxiang@idea.edu.cn> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@ -92,7 +92,7 @@ target_link_libraries(test-tokenizer-1-bpe PRIVATE common)
|
||||
install(TARGETS test-tokenizer-1-bpe RUNTIME)
|
||||
|
||||
# TODO: disabled due to slowness
|
||||
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf)
|
||||
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges)
|
||||
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
||||
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
||||
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
|
||||
|
Reference in New Issue
Block a user