mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 12:25:03 +00:00
context : allow cache-less context for embeddings (#13108)
* context : allow cache-less context for embeddings ggml-ci * context : enable reranking with encode() ggml-ci * context : encode() clears embd_seq ggml-ci * examples : use llama_encode() when appropriate ggml-ci * models : nomic bert moe does not require KV cache * llama : update comments for llama_decode/llama_encode ggml-ci * context : update warning log [no ci]
This commit is contained in:
@ -12852,6 +12852,13 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
|
||||
llama_memory_i * res;
|
||||
|
||||
switch (arch) {
|
||||
case LLM_ARCH_BERT:
|
||||
case LLM_ARCH_JINA_BERT_V2:
|
||||
case LLM_ARCH_NOMIC_BERT:
|
||||
case LLM_ARCH_NOMIC_BERT_MOE:
|
||||
{
|
||||
res = nullptr;
|
||||
} break;
|
||||
case LLM_ARCH_MAMBA:
|
||||
case LLM_ARCH_RWKV6:
|
||||
case LLM_ARCH_RWKV6QWEN2:
|
||||
|
Reference in New Issue
Block a user