llama : add --completion-bash option (#11846)

This commit adds a new option `--completion-bash` to the llama.cpp which outputs a source-able bash completion script. The motivation for this change is to provide a more user-friendly experience for users who use the command-line interface of llama.cpp. This is currently only basic and all options are displayed for all llama executables but this can be improved in the future if needed. Example usage: ```console $ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash $ source ~/.llama-completion.bash $ ./build/bin/llama-server --m<TAB> --main-gpu --mirostat --mirostat-lr --model --multiline-input --min-p --mirostat-ent --mlock --model-url ```
2025-08-14 04:17:53 -04:00 · 2025-02-13 14:46:59 +01:00
parent bd6e55bfd3
commit c48f630d1c
3 changed files with 128 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -521,3 +521,17 @@ If your issue is with model generation quality, then please at least scan the fo
 #### References
 ### Completions
 Command-line completion is available for some environments.
 #### Bash Completion
 ```bash
 $ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash
 $ source ~/.llama-completion.bash
 ```
 Optionally this can be added to your `.bashrc` or `.bash_profile` to load it
 automatically. For example:
 ```console
 $ echo "source ~/.llama-completion.bash" >> ~/.bashrc
 ```
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -365,6 +365,108 @@ static void common_params_print_usage(common_params_context & ctx_arg) {
    print_options(specific_options);
 }
 static void common_params_print_completion(common_params_context & ctx_arg) {
    std::vector<common_arg *> common_options;
    std::vector<common_arg *> sparam_options;
    std::vector<common_arg *> specific_options;
    for (auto & opt : ctx_arg.options) {
        if (opt.is_sparam) {
            sparam_options.push_back(&opt);
        } else if (opt.in_example(ctx_arg.ex)) {
            specific_options.push_back(&opt);
        } else {
            common_options.push_back(&opt);
        }
    }
    printf("_llama_completions() {\n");
    printf("    local cur prev opts\n");
    printf("    COMPREPLY=()\n");
    printf("    cur=\"${COMP_WORDS[COMP_CWORD]}\"\n");
    printf("    prev=\"${COMP_WORDS[COMP_CWORD-1]}\"\n\n");
    printf("    opts=\"");
    auto print_options = [](const std::vector<common_arg *> & options) {
        for (const common_arg * opt : options) {
            for (const char * arg : opt->args) {
                printf("%s ", arg);
            }
        }
    };
    print_options(common_options);
    print_options(sparam_options);
    print_options(specific_options);
    printf("\"\n\n");
    printf("    case \"$prev\" in\n");
    printf("        --model)\n");
    printf("            COMPREPLY=( $(compgen -f -X '!*.gguf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
    printf("            return 0\n");
    printf("            ;;\n");
    printf("        --grammar-file)\n");
    printf("            COMPREPLY=( $(compgen -f -X '!*.gbnf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
    printf("            return 0\n");
    printf("            ;;\n");
    printf("        *)\n");
    printf("            COMPREPLY=( $(compgen -W \"${opts}\" -- \"$cur\") )\n");
    printf("            return 0\n");
    printf("            ;;\n");
    printf("    esac\n");
    printf("}\n\n");
    std::set<std::string> executables = {
        "llama-batched",
        "llama-batched-bench",
        "llama-bench",
        "llama-cli",
        "llama-convert-llama2c-to-ggml",
        "llama-cvector-generator",
        "llama-embedding",
        "llama-eval-callback",
        "llama-export-lora",
        "llama-gbnf-validator",
        "llama-gen-docs",
        "llama-gguf",
        "llama-gguf-hash",
        "llama-gguf-split",
        "llama-gritlm",
        "llama-imatrix",
        "llama-infill",
        "llama-llava-cli",
        "llama-llava-clip-quantize-cli",
        "llama-lookahead",
        "llama-lookup",
        "llama-lookup-create",
        "llama-lookup-merge",
        "llama-lookup-stats",
        "llama-minicpmv-cli",
        "llama-parallel",
        "llama-passkey",
        "llama-perplexity",
        "llama-q8dot",
        "llama-quantize",
        "llama-quantize-stats",
        "llama-qwen2vl-cli",
        "llama-retrieval",
        "llama-run",
        "llama-save-load-state",
        "llama-server",
        "llama-simple",
        "llama-simple-chat",
        "llama-speculative",
        "llama-speculative-simple",
        "llama-tokenize",
        "llama-tts",
        "llama-vdot"
    };
    for (const auto& exe : executables) {
        printf("complete -F _llama_completions %s\n", exe.c_str());
    }
 }
 static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & value) {
    std::vector<ggml_backend_dev_t> devices;
    auto dev_names = string_split<std::string>(value, ',');
@@ -426,6 +528,10 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
            }
            exit(0);
        }
        if (ctx_arg.params.completion) {
            common_params_print_completion(ctx_arg);
            exit(0);
        }
    } catch (const std::invalid_argument & ex) {
        fprintf(stderr, "%s\n", ex.what());
        ctx_arg.params = params_org;
@@ -494,6 +600,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
            exit(0);
        }
    ));
    add_opt(common_arg(
        {"--completion-bash"},
        "print source-able bash completion script for llama.cpp",
        [](common_params & params) {
            params.completion = true;
        }
    ));
    add_opt(common_arg(
        {"--verbose-prompt"},
        string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),
--- a/common/common.h
+++ b/common/common.h
@@ -298,6 +298,7 @@ struct common_params {
    bool   kl_divergence    = false; // compute KL divergence
    bool usage             = false; // print usage
    bool completion        = false; // print source-able completion script
    bool use_color         = false; // use color to distinguish generations and inputs
    bool special           = false; // enable special token output
    bool interactive       = false; // interactive mode