mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 20:45:04 +00:00
Fix token count accounting
This commit is contained in:
2
main.cpp
2
main.cpp
@ -976,6 +976,8 @@ int main(int argc, char ** argv) {
|
|||||||
std::vector<gpt_vocab::id> line_inp = ::llama_tokenize(vocab, buf, false);
|
std::vector<gpt_vocab::id> line_inp = ::llama_tokenize(vocab, buf, false);
|
||||||
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
||||||
|
|
||||||
|
remaining_tokens -= line_inp.size();
|
||||||
|
|
||||||
input_noecho = true; // do not echo this again
|
input_noecho = true; // do not echo this again
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user