mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-18 05:56:00 -04:00
server : add SWA checkpoints (#15293)
* server : add SWA checkpoints ggml-ci * cont : server clean-up * server : handle state restore fails * llama : add extended llama_state_seq_ API * server : do not make checkpoints if --swa-full ggml-ci * llama : remove flags value for NONE * server : configure number of SWA checkpoints with CLI arg ggml-ci * args : fix scope of new argument
This commit is contained in:
@@ -870,6 +870,29 @@ extern "C" {
|
||||
size_t n_token_capacity,
|
||||
size_t * n_token_count_out);
|
||||
|
||||
#define LLAMA_STATE_SEQ_FLAGS_SWA_ONLY 1
|
||||
|
||||
typedef uint32_t llama_state_seq_flags;
|
||||
|
||||
LLAMA_API size_t llama_state_seq_get_size_ext(
|
||||
struct llama_context * ctx,
|
||||
llama_seq_id seq_id,
|
||||
llama_state_seq_flags flags);
|
||||
|
||||
LLAMA_API size_t llama_state_seq_get_data_ext(
|
||||
struct llama_context * ctx,
|
||||
uint8_t * dst,
|
||||
size_t size,
|
||||
llama_seq_id seq_id,
|
||||
llama_state_seq_flags flags);
|
||||
|
||||
LLAMA_API size_t llama_state_seq_set_data_ext(
|
||||
struct llama_context * ctx,
|
||||
const uint8_t * src,
|
||||
size_t size,
|
||||
llama_seq_id dest_seq_id,
|
||||
llama_state_seq_flags flags);
|
||||
|
||||
//
|
||||
// Decoding
|
||||
//
|
||||
|
Reference in New Issue
Block a user