cuda : support Falcon-H1 state size for SSM_SCAN (#14602)

This commit is contained in:
compilade
2025-07-09 23:54:38 -04:00
committed by GitHub
parent cb9178f885
commit a57d1bcb3c
3 changed files with 16 additions and 4 deletions

View File

@@ -3335,8 +3335,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_SSM_SCAN: {
if (op->src[3]->ne[0] == 1) {
// Mamba2
// (kernel only supports d_state == 128 && d_head % 16 == 0)
return op->src[0]->ne[0] == 128 && op->src[0]->ne[1] % 16 == 0;
// (kernel only supports (d_state == 128 || d_state == 256) && d_head % 16 == 0)
return (op->src[0]->ne[0] == 128 || op->src[0]->ne[0] == 256) && op->src[0]->ne[1] % 16 == 0;
} else {
// Mamba
// (kernel only supports d_state == 16, d_head == 1, n_head % 128 == 0, n_group == 1)