mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
CUDA: generalize FP16 fattn vec kernel (#7061)
* CUDA: generalize FP16 fattn vec kernel * disable unsupported head sizes for AMD in test * try AMD fix * fix batch size 2-8 * partially revert changes
This commit is contained in:
@ -2175,7 +2175,11 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
||||
test_cases.emplace_back(new test_timestep_embedding());
|
||||
test_cases.emplace_back(new test_leaky_relu());
|
||||
|
||||
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
|
||||
for (int hs : { 64, 128, }) { // other head sizes not implemented
|
||||
#else
|
||||
for (int hs : { 64, 80, 128, 256, }) {
|
||||
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
|
||||
for (int nh : { 32, }) {
|
||||
for (int kv : { 512, 1024, }) {
|
||||
for (int nb : { 1, 2, 4, 8, }) {
|
||||
|
Reference in New Issue
Block a user