mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-18 05:56:00 -04:00
CUDA: stream-k decomposition for MMQ (#8018)
* CUDA: stream-k decomposition for MMQ * fix undefined memory reads for small matrices
This commit is contained in:
@@ -652,8 +652,8 @@ static int get_mmq_x_max_host(const int cc) {
|
||||
}
|
||||
|
||||
// Round rows to this value for --split-mode row:
|
||||
static int get_mmq_y_host(const int cc, const int mmq_x) {
|
||||
return cc >= CC_VOLTA && mmq_x >= 32 ? 128 : 64;
|
||||
static int get_mmq_y_host(const int cc) {
|
||||
return cc >= CC_VOLTA ? 128 : 64;
|
||||
}
|
||||
|
||||
//////////////////////
|
||||
|
Reference in New Issue
Block a user