From c48f630d1c1942fce08aa7cb18a53ace443cd611 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 13 Feb 2025 14:46:59 +0100 Subject: [PATCH] llama : add --completion-bash option (#11846) This commit adds a new option `--completion-bash` to the llama.cpp which outputs a source-able bash completion script. The motivation for this change is to provide a more user-friendly experience for users who use the command-line interface of llama.cpp. This is currently only basic and all options are displayed for all llama executables but this can be improved in the future if needed. Example usage: ```console $ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash $ source ~/.llama-completion.bash $ ./build/bin/llama-server --m --main-gpu --mirostat --mirostat-lr --model --multiline-input --min-p --mirostat-ent --mlock --model-url ``` --- README.md | 14 ++++++ common/arg.cpp | 113 ++++++++++++++++++++++++++++++++++++++++++++++++ common/common.h | 1 + 3 files changed, 128 insertions(+) diff --git a/README.md b/README.md index 43290f2c5..ebbd11a02 100644 --- a/README.md +++ b/README.md @@ -521,3 +521,17 @@ If your issue is with model generation quality, then please at least scan the fo #### References + +### Completions +Command-line completion is available for some environments. + +#### Bash Completion +```bash +$ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash +$ source ~/.llama-completion.bash +``` +Optionally this can be added to your `.bashrc` or `.bash_profile` to load it +automatically. For example: +```console +$ echo "source ~/.llama-completion.bash" >> ~/.bashrc +``` diff --git a/common/arg.cpp b/common/arg.cpp index cda03cb77..144e93fa9 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -365,6 +365,108 @@ static void common_params_print_usage(common_params_context & ctx_arg) { print_options(specific_options); } +static void common_params_print_completion(common_params_context & ctx_arg) { + std::vector common_options; + std::vector sparam_options; + std::vector specific_options; + + for (auto & opt : ctx_arg.options) { + if (opt.is_sparam) { + sparam_options.push_back(&opt); + } else if (opt.in_example(ctx_arg.ex)) { + specific_options.push_back(&opt); + } else { + common_options.push_back(&opt); + } + } + + printf("_llama_completions() {\n"); + printf(" local cur prev opts\n"); + printf(" COMPREPLY=()\n"); + printf(" cur=\"${COMP_WORDS[COMP_CWORD]}\"\n"); + printf(" prev=\"${COMP_WORDS[COMP_CWORD-1]}\"\n\n"); + + printf(" opts=\""); + auto print_options = [](const std::vector & options) { + for (const common_arg * opt : options) { + for (const char * arg : opt->args) { + printf("%s ", arg); + } + } + }; + + print_options(common_options); + print_options(sparam_options); + print_options(specific_options); + printf("\"\n\n"); + + printf(" case \"$prev\" in\n"); + printf(" --model)\n"); + printf(" COMPREPLY=( $(compgen -f -X '!*.gguf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n"); + printf(" return 0\n"); + printf(" ;;\n"); + printf(" --grammar-file)\n"); + printf(" COMPREPLY=( $(compgen -f -X '!*.gbnf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n"); + printf(" return 0\n"); + printf(" ;;\n"); + printf(" *)\n"); + printf(" COMPREPLY=( $(compgen -W \"${opts}\" -- \"$cur\") )\n"); + printf(" return 0\n"); + printf(" ;;\n"); + printf(" esac\n"); + printf("}\n\n"); + + std::set executables = { + "llama-batched", + "llama-batched-bench", + "llama-bench", + "llama-cli", + "llama-convert-llama2c-to-ggml", + "llama-cvector-generator", + "llama-embedding", + "llama-eval-callback", + "llama-export-lora", + "llama-gbnf-validator", + "llama-gen-docs", + "llama-gguf", + "llama-gguf-hash", + "llama-gguf-split", + "llama-gritlm", + "llama-imatrix", + "llama-infill", + "llama-llava-cli", + "llama-llava-clip-quantize-cli", + "llama-lookahead", + "llama-lookup", + "llama-lookup-create", + "llama-lookup-merge", + "llama-lookup-stats", + "llama-minicpmv-cli", + "llama-parallel", + "llama-passkey", + "llama-perplexity", + "llama-q8dot", + "llama-quantize", + "llama-quantize-stats", + "llama-qwen2vl-cli", + "llama-retrieval", + "llama-run", + "llama-save-load-state", + "llama-server", + "llama-simple", + "llama-simple-chat", + "llama-speculative", + "llama-speculative-simple", + "llama-tokenize", + "llama-tts", + "llama-vdot" + }; + + for (const auto& exe : executables) { + printf("complete -F _llama_completions %s\n", exe.c_str()); + } +} + static std::vector parse_device_list(const std::string & value) { std::vector devices; auto dev_names = string_split(value, ','); @@ -426,6 +528,10 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e } exit(0); } + if (ctx_arg.params.completion) { + common_params_print_completion(ctx_arg); + exit(0); + } } catch (const std::invalid_argument & ex) { fprintf(stderr, "%s\n", ex.what()); ctx_arg.params = params_org; @@ -494,6 +600,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex exit(0); } )); + add_opt(common_arg( + {"--completion-bash"}, + "print source-able bash completion script for llama.cpp", + [](common_params & params) { + params.completion = true; + } + )); add_opt(common_arg( {"--verbose-prompt"}, string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"), diff --git a/common/common.h b/common/common.h index 5c0f46284..98b9a4464 100644 --- a/common/common.h +++ b/common/common.h @@ -298,6 +298,7 @@ struct common_params { bool kl_divergence = false; // compute KL divergence bool usage = false; // print usage + bool completion = false; // print source-able completion script bool use_color = false; // use color to distinguish generations and inputs bool special = false; // enable special token output bool interactive = false; // interactive mode