mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
mtmd : fix glm-edge redundant token count (#13139)
* mtmd : fix glm-edge redundant token count * fix chat template * temporary disable GLMEdge test chat tmpl
This commit is contained in:
@ -203,9 +203,6 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// llava-1.5, llava-1.6, Yi-VL, Yi-34B, granite: don't need to add prefix and suffix
|
// llava-1.5, llava-1.6, Yi-VL, Yi-34B, granite: don't need to add prefix and suffix
|
||||||
// for glm-edge, we don't need to add because the tokens are already in the returned embeddings
|
|
||||||
|
|
||||||
// TODO @ngxson : glm-edge : remove BOI / EOI tokens embeddings, decode them as normal tokens
|
|
||||||
|
|
||||||
std::vector<std::string> parts = string_split_str(prompt_modified, ctx->image_marker);
|
std::vector<std::string> parts = string_split_str(prompt_modified, ctx->image_marker);
|
||||||
output.clear();
|
output.clear();
|
||||||
@ -246,7 +243,7 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
|
|||||||
};
|
};
|
||||||
|
|
||||||
for (const auto & part : parts) {
|
for (const auto & part : parts) {
|
||||||
//printf("tokenizing part: %s\n", part.c_str());
|
// printf("tokenizing part: %s\n", part.c_str());
|
||||||
bool add_bos = &parts.front() == ∂
|
bool add_bos = &parts.front() == ∂
|
||||||
auto tokens = mtmd_tokenize_text_internal(vocab, part, text.add_special && add_bos, text.parse_special);
|
auto tokens = mtmd_tokenize_text_internal(vocab, part, text.add_special && add_bos, text.parse_special);
|
||||||
if (tokens.empty()) {
|
if (tokens.empty()) {
|
||||||
@ -338,11 +335,6 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
|
|||||||
LOG_DBG("image_tokens->ny = %d\n", image_tokens->ny);
|
LOG_DBG("image_tokens->ny = %d\n", image_tokens->ny);
|
||||||
LOG_DBG("batch_f32 size = %d\n", (int)image_tokens->batch_f32.entries.size());
|
LOG_DBG("batch_f32 size = %d\n", (int)image_tokens->batch_f32.entries.size());
|
||||||
|
|
||||||
if (clip_is_glm(ctx->ctx_clip)) {
|
|
||||||
// glm-edge
|
|
||||||
image_tokens->nx += 2; // add 2 for the begin_of_image and end_of_image token embeddings
|
|
||||||
}
|
|
||||||
|
|
||||||
mtmd_input_chunk chunk{
|
mtmd_input_chunk chunk{
|
||||||
MTMD_INPUT_CHUNK_TYPE_IMAGE,
|
MTMD_INPUT_CHUNK_TYPE_IMAGE,
|
||||||
{},
|
{},
|
||||||
|
@ -447,7 +447,7 @@ int32_t llm_chat_apply_template(
|
|||||||
if (add_ass) {
|
if (add_ass) {
|
||||||
ss << "<|assistant|>";
|
ss << "<|assistant|>";
|
||||||
}
|
}
|
||||||
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
|
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4 || tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
|
||||||
ss << "[gMASK]" << "<sop>";
|
ss << "[gMASK]" << "<sop>";
|
||||||
for (auto message : chat) {
|
for (auto message : chat) {
|
||||||
std::string role(message->role);
|
std::string role(message->role);
|
||||||
@ -456,14 +456,6 @@ int32_t llm_chat_apply_template(
|
|||||||
if (add_ass) {
|
if (add_ass) {
|
||||||
ss << "<|assistant|>";
|
ss << "<|assistant|>";
|
||||||
}
|
}
|
||||||
} else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
|
|
||||||
for (auto message : chat) {
|
|
||||||
std::string role(message->role);
|
|
||||||
ss << "<|" << role << "|>" << "\n" << message->content;
|
|
||||||
}
|
|
||||||
if (add_ass) {
|
|
||||||
ss << "<|assistant|>";
|
|
||||||
}
|
|
||||||
} else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
|
} else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
|
||||||
// MiniCPM-3B-OpenHermes-2.5-v2-GGUF
|
// MiniCPM-3B-OpenHermes-2.5-v2-GGUF
|
||||||
for (auto message : chat) {
|
for (auto message : chat) {
|
||||||
|
@ -187,14 +187,15 @@ int main(void) {
|
|||||||
/* .bos_token= */ "",
|
/* .bos_token= */ "",
|
||||||
/* .eos_token= */ "",
|
/* .eos_token= */ "",
|
||||||
},
|
},
|
||||||
{
|
// TODO @ngxson : GLMEdge produces poor result without `[gMASK]<sop>`, so we're temporarily using GLM4 template for it. We should fix this in the future.
|
||||||
/* .name= */ "GLMEdge",
|
// {
|
||||||
/* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
|
// /* .name= */ "GLMEdge",
|
||||||
/* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
|
// /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
|
||||||
/* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
|
// /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
|
||||||
/* .bos_token= */ "",
|
// /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
|
||||||
/* .eos_token= */ "",
|
// /* .bos_token= */ "",
|
||||||
},
|
// /* .eos_token= */ "",
|
||||||
|
// },
|
||||||
{
|
{
|
||||||
/* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
|
/* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
|
||||||
/* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
|
/* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
|
||||||
|
Reference in New Issue
Block a user