main : add option to save full output to session (#1338)

* main : add option to save full output to session * split behavior into --session and --prompt-cache * restore original implementation with new names * PR comments * move the check for incompatible parameters to gpt_params_parse * Fix whitespace Co-authored-by: DannyDaemonic <DannyDaemonic@gmail.com> --------- Co-authored-by: DannyDaemonic <DannyDaemonic@gmail.com>
2025-08-17 21:51:27 -04:00 · 2023-05-10 11:37:14 -04:00
parent e6a46b0ed1
commit cf348a60e0
4 changed files with 30 additions and 18 deletions
--- a/examples/common.h
+++ b/examples/common.h
@@ -46,9 +46,9 @@ struct gpt_params {

    std::string model  = "models/lamma-7B/ggml-model.bin"; // model path
    std::string prompt = "";
-    std::string path_session = "";       // path to file for saving/loading model eval state
-    std::string input_prefix = "";       // string to prefix user inputs with
-    std::string input_suffix = "";       // string to suffix user inputs with
+    std::string path_prompt_cache = "";  // path to file for saving/loading prompt eval state
+    std::string input_prefix      = "";  // string to prefix user inputs with
+    std::string input_suffix      = "";  // string to suffix user inputs with
    std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted

    std::string lora_adapter = "";  // lora adapter path
@@ -58,6 +58,7 @@ struct gpt_params {
    bool random_prompt     = false; // do not randomize prompt if none provided
    bool use_color         = false; // use color to distinguish generations and inputs
    bool interactive       = false; // interactive mode
+    bool prompt_cache_all  = false; // save user input and generations to prompt cache

    bool embedding         = false; // get only sentence embedding
    bool interactive_first = false; // wait for user input immediately