#include "arg.h" #include "log.h" #include "common.h" #include "sampling.h" #include "llama.h" #include "ggml.h" #include "console.h" #include "chat.h" #include "mtmd.h" #include "mtmd-helper.h" #include #include #include #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) #include #include #elif defined (_WIN32) #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX #define NOMINMAX #endif #include #include #endif // volatile, because of signal being an interrupt static volatile bool g_is_generating = false; static volatile bool g_is_interrupted = false; /** * Please note that this is NOT a production-ready stuff. * It is a playground for trying multimodal support in llama.cpp. * For contributors: please keep this code simple and easy to understand. */ static void show_additional_info(int /*argc*/, char ** argv) { LOG( "Experimental CLI for multimodal\n\n" "Usage: %s [options] -m --mmproj --image --audio