mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
Fix token count accounting
This commit is contained in:
2
main.cpp
2
main.cpp
@ -976,6 +976,8 @@ int main(int argc, char ** argv) {
|
||||
std::vector<gpt_vocab::id> line_inp = ::llama_tokenize(vocab, buf, false);
|
||||
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
||||
|
||||
remaining_tokens -= line_inp.size();
|
||||
|
||||
input_noecho = true; // do not echo this again
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user