diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 84f9ccab4..9e77fe6d8 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -298,7 +298,7 @@ llama_context::llama_context( cross.v_embd.clear(); - // reserve pp graph first so that buffers are only allocated once + // reserve pp (prompt processing) graph first so that buffers are only allocated once { auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mctx.get()); if (!gf) { @@ -309,7 +309,7 @@ llama_context::llama_context( n_nodes_pp = ggml_graph_n_nodes(gf); } - // reserve with tg graph to get the number of splits and nodes + // reserve with tg (token generation) graph to get the number of splits and nodes { auto * gf = graph_reserve(n_seqs, n_seqs, n_seqs, mctx.get()); if (!gf) {