mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-13 14:29:17 +00:00
ggml : fix FA mask dim 2 and 3 (#14505)
* ggml : fix FA mask dim 2 and 3 ggml-ci * backends : unsupport batched FA in CUDA and Vulkan ggml-ci * vulkan : disable FA for mask->ne[2] != 1
This commit is contained in:
@ -3637,7 +3637,7 @@ struct test_flash_attn_ext : public test_case {
|
||||
|
||||
ggml_tensor * m = nullptr;
|
||||
if (mask) {
|
||||
m = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, kv, GGML_PAD(nb, GGML_KQ_MASK_PAD), nr23[1], 1);
|
||||
m = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, kv, GGML_PAD(nb, GGML_KQ_MASK_PAD), nr23[0], nr23[1]);
|
||||
ggml_set_name(m, "m");
|
||||
}
|
||||
|
||||
@ -4751,7 +4751,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, m_prec, {1, 1}, scale, max_bias));
|
||||
|
||||
if (ne0 <= 32 && ne1 <= 32) {
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, m_prec, {3, 1}, scale, max_bias));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 3}, mask, m_prec, {3, 1}, scale, max_bias));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, m_prec, {2, 3}, scale, max_bias));
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user