graph : make FA compatible with MLA + add initial Metal kernels (#12953)

* graph : make mla compatible with FA

* metal : add exp FA kernels for DeepSeek models

ggml-ci

* llama : minor naming updates

ggml-ci

* ggml : disable FA for DS head sizes

* tests : add FA tests for MLA shapes

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-04-17 18:16:36 +03:00
committed by GitHub
parent 207c22ec2d
commit 2f74c354c0
8 changed files with 117 additions and 26 deletions

View File

@ -4428,10 +4428,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_timestep_embedding());
test_cases.emplace_back(new test_leaky_relu());
for (int hsk : { 64, 80, 128, 192, 256, }) {
for (int hsv : { 64, 80, 128, 192, 256, }) {
if (hsk != 192 && hsk != hsv) continue;
for (int hsk : { 64, 80, 128, 192, 256, 576 }) {
for (int hsv : { 64, 80, 128, 192, 256, 512 }) {
if (hsk != 192 && hsk != 576 && hsk != hsv) continue;
if (hsk == 192 && (hsv != 128 && hsv != 192)) continue;
if (hsk == 576 && hsv != 512) continue; // DeepSeek MLA
for (bool mask : { true, false } ) {
for (float max_bias : { 0.0f, 8.0f }) {