llama : improve sep token handling (#14272)

This commit is contained in:
Sigbjørn Skjæret
2025-06-20 14:04:09 +02:00
committed by GitHub
parent e28c1b93fd
commit 88fc854b4b
15 changed files with 161 additions and 29 deletions

View File

@ -271,12 +271,20 @@ static llama_tokens format_rerank(const struct llama_vocab * vocab, const llama_
}
result.reserve(doc.size() + query.size() + 4);
result.push_back(llama_vocab_bos(vocab));
if (llama_vocab_get_add_bos(vocab)) {
result.push_back(llama_vocab_bos(vocab));
}
result.insert(result.end(), query.begin(), query.end());
result.push_back(eos_token);
result.push_back(llama_vocab_sep(vocab));
if (llama_vocab_get_add_eos(vocab)) {
result.push_back(eos_token);
}
if (llama_vocab_get_add_sep(vocab)) {
result.push_back(llama_vocab_sep(vocab));
}
result.insert(result.end(), doc.begin(), doc.end());
result.push_back(eos_token);
if (llama_vocab_get_add_eos(vocab)) {
result.push_back(eos_token);
}
return result;
}