CUDA: generalize FP16 fattn vec kernel (#7061)

* CUDA: generalize FP16 fattn vec kernel

* disable unsupported head sizes for AMD in test

* try AMD fix

* fix batch size 2-8

* partially revert changes
This commit is contained in:
Johannes Gäßler
2024-05-09 14:32:02 +02:00
committed by GitHub
parent f31ec120bc
commit a743d76a01
4 changed files with 374 additions and 220 deletions

View File

@ -15519,13 +15519,6 @@ struct llama_context * llama_new_context_with_model(
cparams.flash_attn = false;
}
#ifdef GGML_USE_HIPBLAS
if (cparams.flash_attn) {
LLAMA_LOG_WARN("%s: flash_attn is not yet compatible with HIPBLAS builds - forcing off\n", __func__);
cparams.flash_attn = false;
}
#endif
if (params.seed == LLAMA_DEFAULT_SEED) {
params.seed = time(NULL);
}