mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-26 19:23:37 -04:00
llama : avoid redundant state copy for Mamba 1 and 2
This commit is contained in:
@ -1833,7 +1833,8 @@ extern "C" {
|
||||
struct ggml_tensor * A,
|
||||
struct ggml_tensor * B,
|
||||
struct ggml_tensor * C,
|
||||
struct ggml_tensor * D);
|
||||
struct ggml_tensor * D,
|
||||
struct ggml_tensor * ids);
|
||||
|
||||
// partition into non-overlapping windows with padding if needed
|
||||
// example:
|
||||
|
Reference in New Issue
Block a user