mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-19 14:31:06 -04:00
llama : streamline embeddings from "non-embedding" models (#8087)
This commit is contained in:
@@ -99,6 +99,7 @@ struct gpt_params {
|
||||
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
|
||||
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
||||
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
||||
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
|
||||
|
||||
// // sampling parameters
|
||||
struct llama_sampling_params sparams;
|
||||
|
Reference in New Issue
Block a user