mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-01 13:05:52 +00:00
ggml : fix rope + llama minor optimizations (#3560)
* Minor fixes and fixed memleak * Using const auto references in range-based loop C++17
This commit is contained in:
@ -6324,7 +6324,6 @@ struct llm_tokenizer_bpe {
|
||||
llm_symbol sym;
|
||||
size_t char_len = std::min(word.size() - offset, (size_t) ::utf8_len(word[offset]));
|
||||
sym.text = word.c_str() + offset;
|
||||
sym.n = 1;
|
||||
sym.n = char_len;
|
||||
offset += sym.n;
|
||||
sym.prev = index - 1;
|
||||
@ -7054,7 +7053,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
|
||||
std::vector<llama_grammar_candidate> rejects;
|
||||
|
||||
if (stack.empty()) {
|
||||
for (auto tok : candidates) {
|
||||
for (const auto & tok : candidates) {
|
||||
if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
|
||||
rejects.push_back(tok);
|
||||
}
|
||||
@ -7065,7 +7064,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
|
||||
const llama_grammar_element * stack_pos = stack.back();
|
||||
|
||||
std::vector<llama_grammar_candidate> next_candidates;
|
||||
for (auto tok : candidates) {
|
||||
for (const auto & tok : candidates) {
|
||||
if (*tok.code_points == 0) {
|
||||
// reached end of full codepoints in token, reject iff it ended in a partial sequence
|
||||
// that cannot satisfy this position in grammar
|
||||
@ -7091,7 +7090,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
|
||||
llama_grammar_advance_stack(rules, stack_after, next_stacks);
|
||||
|
||||
auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
|
||||
for (auto tok : next_rejects) {
|
||||
for (const auto & tok : next_rejects) {
|
||||
rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user