mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 03:55:20 +00:00
@ -89,6 +89,13 @@ int main(int argc, char ** argv) {
|
|||||||
common_init();
|
common_init();
|
||||||
|
|
||||||
params.embedding = true;
|
params.embedding = true;
|
||||||
|
|
||||||
|
// utilize the full context
|
||||||
|
if (params.n_batch < params.n_ctx) {
|
||||||
|
LOG_WRN("%s: setting batch size to %d\n", __func__, params.n_ctx);
|
||||||
|
params.n_batch = params.n_ctx;
|
||||||
|
}
|
||||||
|
|
||||||
// For non-causal models, batch size must be equal to ubatch size
|
// For non-causal models, batch size must be equal to ubatch size
|
||||||
params.n_ubatch = params.n_batch;
|
params.n_ubatch = params.n_batch;
|
||||||
|
|
||||||
@ -134,7 +141,6 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// max batch size
|
// max batch size
|
||||||
const uint64_t n_batch = params.n_batch;
|
const uint64_t n_batch = params.n_batch;
|
||||||
GGML_ASSERT(params.n_batch >= params.n_ctx);
|
|
||||||
|
|
||||||
// tokenize the prompts and trim
|
// tokenize the prompts and trim
|
||||||
std::vector<std::vector<int32_t>> inputs;
|
std::vector<std::vector<int32_t>> inputs;
|
||||||
|
Reference in New Issue
Block a user