mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 11:45:21 +00:00
CUDA: noncont MMVQ + batched bs1 MUL_MAT_ID (#13014)
* CUDA: noncont MMVQ + batched bs1 MUL_MAT_ID * fix logic for RoPE support, CUDA graphs
This commit is contained in:
@ -2071,7 +2071,7 @@ struct test_mul_mat_id : public test_case {
|
||||
const ggml_type type_b;
|
||||
const int n_mats;
|
||||
const int n_used;
|
||||
const bool b; // brodcast b matrix
|
||||
const bool b; // broadcast b matrix
|
||||
const int64_t m;
|
||||
const int64_t n;
|
||||
const int64_t k;
|
||||
|
Reference in New Issue
Block a user