Adding UTF-8 support to llama.cpp (#12111)

For emojis, non-alpha characters, etc.

Signed-off-by: Eric Curtin <ecurtin@redhat.com>
This commit is contained in:
Eric Curtin
2025-03-03 12:44:56 +00:00
committed by GitHub
parent 7b69003af7
commit c950a1f692
2 changed files with 917 additions and 263 deletions

File diff suppressed because it is too large Load Diff

View File

@ -47,27 +47,27 @@ extern "C" {
#include <stddef.h> /* For size_t. */ #include <stddef.h> /* For size_t. */
#include <stdlib.h> #include <stdlib.h>
extern const char *linenoiseEditMore; extern const char * linenoiseEditMore;
/* The linenoiseState structure represents the state during line editing. /* The linenoiseState structure represents the state during line editing.
* We pass this state to functions implementing specific editing * We pass this state to functions implementing specific editing
* functionalities. */ * functionalities. */
struct linenoiseState { struct linenoiseState {
int in_completion; /* The user pressed TAB and we are now in completion int in_completion; /* The user pressed TAB and we are now in completion
* mode, so input is handled by completeLine(). */ * mode, so input is handled by completeLine(). */
size_t completion_idx; /* Index of next completion to propose. */ size_t completion_idx; /* Index of next completion to propose. */
int ifd; /* Terminal stdin file descriptor. */ int ifd; /* Terminal stdin file descriptor. */
int ofd; /* Terminal stdout file descriptor. */ int ofd; /* Terminal stdout file descriptor. */
char *buf; /* Edited line buffer. */ char * buf; /* Edited line buffer. */
size_t buflen; /* Edited line buffer size. */ size_t buflen; /* Edited line buffer size. */
const char *prompt; /* Prompt to display. */ const char * prompt; /* Prompt to display. */
size_t plen; /* Prompt length. */ size_t plen; /* Prompt length. */
size_t pos; /* Current cursor position. */ size_t pos; /* Current cursor position. */
size_t oldpos; /* Previous refresh cursor position. */ size_t oldcolpos; /* Previous refresh cursor column position. */
size_t len; /* Current edited line length. */ size_t len; /* Current edited line length. */
size_t cols; /* Number of columns in terminal. */ size_t cols; /* Number of columns in terminal. */
size_t oldrows; /* Rows used by last refrehsed line (multiline mode) */ size_t oldrows; /* Rows used by last refreshed line (multiline mode) */
int history_index; /* The history index we are currently editing. */ int history_index; /* The history index we are currently editing. */
}; };
struct linenoiseCompletions { struct linenoiseCompletions {
@ -89,19 +89,20 @@ struct linenoiseCompletions {
}; };
/* Non blocking API. */ /* Non blocking API. */
int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt); int linenoiseEditStart(struct linenoiseState * l, int stdin_fd, int stdout_fd, char * buf, size_t buflen,
const char *linenoiseEditFeed(struct linenoiseState *l); const char * prompt);
void linenoiseEditStop(struct linenoiseState *l); const char * linenoiseEditFeed(struct linenoiseState * l);
void linenoiseHide(struct linenoiseState *l); void linenoiseEditStop(struct linenoiseState * l);
void linenoiseShow(struct linenoiseState *l); void linenoiseHide(struct linenoiseState * l);
void linenoiseShow(struct linenoiseState * l);
/* Blocking API. */ /* Blocking API. */
const char *linenoise(const char *prompt); const char * linenoise(const char * prompt);
void linenoiseFree(void *ptr); void linenoiseFree(void * ptr);
/* Completion API. */ /* Completion API. */
typedef void(linenoiseCompletionCallback)(const char *, linenoiseCompletions *); typedef void(linenoiseCompletionCallback)(const char *, linenoiseCompletions *);
typedef const char*(linenoiseHintsCallback)(const char *, int *color, int *bold); typedef const char *(linenoiseHintsCallback) (const char *, int * color, int * bold);
typedef void(linenoiseFreeHintsCallback)(const char *); typedef void(linenoiseFreeHintsCallback)(const char *);
void linenoiseSetCompletionCallback(linenoiseCompletionCallback *); void linenoiseSetCompletionCallback(linenoiseCompletionCallback *);
void linenoiseSetHintsCallback(linenoiseHintsCallback *); void linenoiseSetHintsCallback(linenoiseHintsCallback *);
@ -109,10 +110,10 @@ void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *);
void linenoiseAddCompletion(linenoiseCompletions *, const char *); void linenoiseAddCompletion(linenoiseCompletions *, const char *);
/* History API. */ /* History API. */
int linenoiseHistoryAdd(const char *line); int linenoiseHistoryAdd(const char * line);
int linenoiseHistorySetMaxLen(int len); int linenoiseHistorySetMaxLen(int len);
int linenoiseHistorySave(const char *filename); int linenoiseHistorySave(const char * filename);
int linenoiseHistoryLoad(const char *filename); int linenoiseHistoryLoad(const char * filename);
/* Other utilities. */ /* Other utilities. */
void linenoiseClearScreen(void); void linenoiseClearScreen(void);
@ -121,6 +122,14 @@ void linenoisePrintKeyCodes(void);
void linenoiseMaskModeEnable(void); void linenoiseMaskModeEnable(void);
void linenoiseMaskModeDisable(void); void linenoiseMaskModeDisable(void);
/* Encoding functions. */
typedef size_t(linenoisePrevCharLen)(const char * buf, size_t buf_len, size_t pos, size_t * col_len);
typedef size_t(linenoiseNextCharLen)(const char * buf, size_t buf_len, size_t pos, size_t * col_len);
typedef size_t(linenoiseReadCode)(int fd, char * buf, size_t buf_len, int * c);
void linenoiseSetEncodingFunctions(linenoisePrevCharLen * prevCharLenFunc, linenoiseNextCharLen * nextCharLenFunc,
linenoiseReadCode * readCodeFunc);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif