mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 12:35:16 +00:00
vulkan: fix mul_mat_vec failure in backend tests (#12529)
The OOB calculation could be wrong if the last iteration was during one of the unrolled loops. Adjust the unrolling counts to avoid this. Add a couple new backend tests that hit this failure on NVIDIA GPUs.
This commit is contained in:
@ -105,6 +105,16 @@ void compute_outputs(const uint32_t first_row, const uint32_t num_rows) {
|
|||||||
int unroll_count = 4;
|
int unroll_count = 4;
|
||||||
uint unrolled_iters = num_iters & ~(unroll_count - 1);
|
uint unrolled_iters = num_iters & ~(unroll_count - 1);
|
||||||
|
|
||||||
|
#if K_PER_ITER == 2
|
||||||
|
// If the K dimension is odd, we need lastiter==true on the last iteration
|
||||||
|
// so OOB is computed correctly. Skip some unrolling to make that happen.
|
||||||
|
if ((p.ncols & 1) != 0 &&
|
||||||
|
unrolled_iters == num_iters &&
|
||||||
|
unrolled_iters > 0) {
|
||||||
|
unrolled_iters -= unroll_count;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
uint i = 0;
|
uint i = 0;
|
||||||
while (i < unrolled_iters) {
|
while (i < unrolled_iters) {
|
||||||
// Manually partially unroll the loop
|
// Manually partially unroll the loop
|
||||||
@ -113,8 +123,18 @@ void compute_outputs(const uint32_t first_row, const uint32_t num_rows) {
|
|||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unroll_count = 2;
|
unroll_count = 2;
|
||||||
unrolled_iters = num_iters & ~(unroll_count - 1);
|
unrolled_iters = num_iters & ~(unroll_count - 1);
|
||||||
|
|
||||||
|
#if K_PER_ITER == 2
|
||||||
|
if ((p.ncols & 1) != 0 &&
|
||||||
|
unrolled_iters == num_iters &&
|
||||||
|
unrolled_iters > 0) {
|
||||||
|
unrolled_iters -= unroll_count;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
while (i < unrolled_iters) {
|
while (i < unrolled_iters) {
|
||||||
// Manually partially unroll the loop
|
// Manually partially unroll the loop
|
||||||
[[unroll]] for (uint k = 0; k < unroll_count; ++k) {
|
[[unroll]] for (uint k = 0; k < unroll_count; ++k) {
|
||||||
|
@ -4204,6 +4204,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 64, { 8, 1}, {4, 1}));
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 64, { 8, 1}, {4, 1}));
|
||||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 45, 128, { 8, 1}, {4, 1}));
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 45, 128, { 8, 1}, {4, 1}));
|
||||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 45, 64, { 8, 1}, {4, 1}));
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 45, 64, { 8, 1}, {4, 1}));
|
||||||
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056, 1, 193, {1, 1}, {4, 1}, {0, 2, 1, 3}));
|
||||||
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056, 1, 67, {1, 1}, {4, 1}, {0, 2, 1, 3}));
|
||||||
|
|
||||||
for (auto bs : {1,2,4,8}) {
|
for (auto bs : {1,2,4,8}) {
|
||||||
for (auto nr : {1,4}) {
|
for (auto nr : {1,4}) {
|
||||||
|
Reference in New Issue
Block a user