mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 20:05:20 +00:00
llama : add --completion-bash option (#11846)
This commit adds a new option `--completion-bash` to the llama.cpp which outputs a source-able bash completion script. The motivation for this change is to provide a more user-friendly experience for users who use the command-line interface of llama.cpp. This is currently only basic and all options are displayed for all llama executables but this can be improved in the future if needed. Example usage: ```console $ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash $ source ~/.llama-completion.bash $ ./build/bin/llama-server --m<TAB> --main-gpu --mirostat --mirostat-lr --model --multiline-input --min-p --mirostat-ent --mlock --model-url ```
This commit is contained in:
14
README.md
14
README.md
@ -521,3 +521,17 @@ If your issue is with model generation quality, then please at least scan the fo
|
|||||||
|
|
||||||
#### References
|
#### References
|
||||||
|
|
||||||
|
|
||||||
|
### Completions
|
||||||
|
Command-line completion is available for some environments.
|
||||||
|
|
||||||
|
#### Bash Completion
|
||||||
|
```bash
|
||||||
|
$ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash
|
||||||
|
$ source ~/.llama-completion.bash
|
||||||
|
```
|
||||||
|
Optionally this can be added to your `.bashrc` or `.bash_profile` to load it
|
||||||
|
automatically. For example:
|
||||||
|
```console
|
||||||
|
$ echo "source ~/.llama-completion.bash" >> ~/.bashrc
|
||||||
|
```
|
||||||
|
113
common/arg.cpp
113
common/arg.cpp
@ -365,6 +365,108 @@ static void common_params_print_usage(common_params_context & ctx_arg) {
|
|||||||
print_options(specific_options);
|
print_options(specific_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void common_params_print_completion(common_params_context & ctx_arg) {
|
||||||
|
std::vector<common_arg *> common_options;
|
||||||
|
std::vector<common_arg *> sparam_options;
|
||||||
|
std::vector<common_arg *> specific_options;
|
||||||
|
|
||||||
|
for (auto & opt : ctx_arg.options) {
|
||||||
|
if (opt.is_sparam) {
|
||||||
|
sparam_options.push_back(&opt);
|
||||||
|
} else if (opt.in_example(ctx_arg.ex)) {
|
||||||
|
specific_options.push_back(&opt);
|
||||||
|
} else {
|
||||||
|
common_options.push_back(&opt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("_llama_completions() {\n");
|
||||||
|
printf(" local cur prev opts\n");
|
||||||
|
printf(" COMPREPLY=()\n");
|
||||||
|
printf(" cur=\"${COMP_WORDS[COMP_CWORD]}\"\n");
|
||||||
|
printf(" prev=\"${COMP_WORDS[COMP_CWORD-1]}\"\n\n");
|
||||||
|
|
||||||
|
printf(" opts=\"");
|
||||||
|
auto print_options = [](const std::vector<common_arg *> & options) {
|
||||||
|
for (const common_arg * opt : options) {
|
||||||
|
for (const char * arg : opt->args) {
|
||||||
|
printf("%s ", arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
print_options(common_options);
|
||||||
|
print_options(sparam_options);
|
||||||
|
print_options(specific_options);
|
||||||
|
printf("\"\n\n");
|
||||||
|
|
||||||
|
printf(" case \"$prev\" in\n");
|
||||||
|
printf(" --model)\n");
|
||||||
|
printf(" COMPREPLY=( $(compgen -f -X '!*.gguf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
|
||||||
|
printf(" return 0\n");
|
||||||
|
printf(" ;;\n");
|
||||||
|
printf(" --grammar-file)\n");
|
||||||
|
printf(" COMPREPLY=( $(compgen -f -X '!*.gbnf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
|
||||||
|
printf(" return 0\n");
|
||||||
|
printf(" ;;\n");
|
||||||
|
printf(" *)\n");
|
||||||
|
printf(" COMPREPLY=( $(compgen -W \"${opts}\" -- \"$cur\") )\n");
|
||||||
|
printf(" return 0\n");
|
||||||
|
printf(" ;;\n");
|
||||||
|
printf(" esac\n");
|
||||||
|
printf("}\n\n");
|
||||||
|
|
||||||
|
std::set<std::string> executables = {
|
||||||
|
"llama-batched",
|
||||||
|
"llama-batched-bench",
|
||||||
|
"llama-bench",
|
||||||
|
"llama-cli",
|
||||||
|
"llama-convert-llama2c-to-ggml",
|
||||||
|
"llama-cvector-generator",
|
||||||
|
"llama-embedding",
|
||||||
|
"llama-eval-callback",
|
||||||
|
"llama-export-lora",
|
||||||
|
"llama-gbnf-validator",
|
||||||
|
"llama-gen-docs",
|
||||||
|
"llama-gguf",
|
||||||
|
"llama-gguf-hash",
|
||||||
|
"llama-gguf-split",
|
||||||
|
"llama-gritlm",
|
||||||
|
"llama-imatrix",
|
||||||
|
"llama-infill",
|
||||||
|
"llama-llava-cli",
|
||||||
|
"llama-llava-clip-quantize-cli",
|
||||||
|
"llama-lookahead",
|
||||||
|
"llama-lookup",
|
||||||
|
"llama-lookup-create",
|
||||||
|
"llama-lookup-merge",
|
||||||
|
"llama-lookup-stats",
|
||||||
|
"llama-minicpmv-cli",
|
||||||
|
"llama-parallel",
|
||||||
|
"llama-passkey",
|
||||||
|
"llama-perplexity",
|
||||||
|
"llama-q8dot",
|
||||||
|
"llama-quantize",
|
||||||
|
"llama-quantize-stats",
|
||||||
|
"llama-qwen2vl-cli",
|
||||||
|
"llama-retrieval",
|
||||||
|
"llama-run",
|
||||||
|
"llama-save-load-state",
|
||||||
|
"llama-server",
|
||||||
|
"llama-simple",
|
||||||
|
"llama-simple-chat",
|
||||||
|
"llama-speculative",
|
||||||
|
"llama-speculative-simple",
|
||||||
|
"llama-tokenize",
|
||||||
|
"llama-tts",
|
||||||
|
"llama-vdot"
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const auto& exe : executables) {
|
||||||
|
printf("complete -F _llama_completions %s\n", exe.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & value) {
|
static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & value) {
|
||||||
std::vector<ggml_backend_dev_t> devices;
|
std::vector<ggml_backend_dev_t> devices;
|
||||||
auto dev_names = string_split<std::string>(value, ',');
|
auto dev_names = string_split<std::string>(value, ',');
|
||||||
@ -426,6 +528,10 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
|
|||||||
}
|
}
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
if (ctx_arg.params.completion) {
|
||||||
|
common_params_print_completion(ctx_arg);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
} catch (const std::invalid_argument & ex) {
|
} catch (const std::invalid_argument & ex) {
|
||||||
fprintf(stderr, "%s\n", ex.what());
|
fprintf(stderr, "%s\n", ex.what());
|
||||||
ctx_arg.params = params_org;
|
ctx_arg.params = params_org;
|
||||||
@ -494,6 +600,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
|
add_opt(common_arg(
|
||||||
|
{"--completion-bash"},
|
||||||
|
"print source-able bash completion script for llama.cpp",
|
||||||
|
[](common_params & params) {
|
||||||
|
params.completion = true;
|
||||||
|
}
|
||||||
|
));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--verbose-prompt"},
|
{"--verbose-prompt"},
|
||||||
string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),
|
string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),
|
||||||
|
@ -298,6 +298,7 @@ struct common_params {
|
|||||||
bool kl_divergence = false; // compute KL divergence
|
bool kl_divergence = false; // compute KL divergence
|
||||||
|
|
||||||
bool usage = false; // print usage
|
bool usage = false; // print usage
|
||||||
|
bool completion = false; // print source-able completion script
|
||||||
bool use_color = false; // use color to distinguish generations and inputs
|
bool use_color = false; // use color to distinguish generations and inputs
|
||||||
bool special = false; // enable special token output
|
bool special = false; // enable special token output
|
||||||
bool interactive = false; // interactive mode
|
bool interactive = false; // interactive mode
|
||||||
|
Reference in New Issue
Block a user