mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-15 12:42:40 -04:00
rework, targeting llama-server
This commit is contained in:
@@ -580,6 +580,7 @@ std::string string_from(const struct llama_context * ctx, const std::vector<llam
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
/*
|
||||
std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch) {
|
||||
std::stringstream buf;
|
||||
|
||||
@@ -614,6 +615,7 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
*/
|
||||
|
||||
void string_process_escapes(std::string & input) {
|
||||
std::size_t input_len = input.length();
|
||||
@@ -1608,27 +1610,20 @@ std::pair<std::string, std::string> common_get_hf_file(const std::string &, cons
|
||||
// Batch utils
|
||||
//
|
||||
|
||||
void common_batch_clear(struct llama_batch & batch) {
|
||||
batch.n_tokens = 0;
|
||||
void common_batch_clear(struct llama_batch * batch) {
|
||||
llama_batch_clear(batch);
|
||||
}
|
||||
|
||||
void common_batch_add(
|
||||
struct llama_batch & batch,
|
||||
struct llama_batch * batch,
|
||||
llama_token id,
|
||||
llama_pos pos,
|
||||
const std::vector<llama_seq_id> & seq_ids,
|
||||
bool logits) {
|
||||
GGML_ASSERT(batch.seq_id[batch.n_tokens] && "llama_batch size exceeded");
|
||||
|
||||
batch.token [batch.n_tokens] = id;
|
||||
batch.pos [batch.n_tokens] = pos;
|
||||
batch.n_seq_id[batch.n_tokens] = seq_ids.size();
|
||||
for (size_t i = 0; i < seq_ids.size(); ++i) {
|
||||
batch.seq_id[batch.n_tokens][i] = seq_ids[i];
|
||||
int32_t res = llama_batch_add_text_token(batch, id, pos, seq_ids.data(), seq_ids.size(), logits);
|
||||
if (res == -1) {
|
||||
LOG_ERR("%s: llama_batch size exceeded\n", __func__);
|
||||
}
|
||||
batch.logits [batch.n_tokens] = logits;
|
||||
|
||||
batch.n_tokens++;
|
||||
}
|
||||
|
||||
//
|
||||
|
Reference in New Issue
Block a user