sync : ggml (#13268)

* vulkan : kernels for depthwise 2D convolution (CONV_2D_DW) (ggml/1204)

* vulkan : add kernels for depthwise 2d convolution (OP_CONV_2D_DW)

* review: remove src_x/y < 0 checks; add performance tests

* sync : ggml

ggml-ci

* vulkan : fix lint (#0)

---------

Co-authored-by: Acly <aclysia@gmail.com>
This commit is contained in:
Georgi Gerganov
2025-05-02 20:54:30 +03:00
committed by GitHub
parent a75cb30dc9
commit b34443923c
5 changed files with 225 additions and 1 deletions

View File

@ -2765,6 +2765,48 @@ struct test_im2col : public test_case {
}
};
// GGML_OP_CONV_2D_DW
struct test_conv_2d_dw : public test_case {
const std::array<int64_t, 4> ne_input;
const std::array<int64_t, 4> ne_kernel;
const int stride;
const int padding;
const int dilation;
const bool cwhn;
std::string vars() override {
return VARS_TO_STR6(ne_input, ne_kernel, stride, padding, dilation, cwhn);
}
test_conv_2d_dw(std::array<int64_t, 4> ne_input = {64, 64, 16, 1},
std::array<int64_t, 4> ne_kernel = {3, 3, 1, 16},
int stride = 1, int padding = 0, int dilation = 1, bool cwhn = false)
: ne_input(ne_input), ne_kernel(ne_kernel), stride(stride), padding(padding), dilation(dilation), cwhn(cwhn) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
ggml_set_name(input, "input");
ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
ggml_set_name(kernel, "kernel");
if (cwhn) {
// change memory layout to channel-most-contiguous (CWHN),
// then permute it back so NE matches the original input
input = ggml_cont(ctx, ggml_permute(ctx, input, 1, 2, 0, 3));
input = ggml_permute(ctx, input, 2, 0, 1, 3);
kernel = ggml_cont(ctx, ggml_permute(ctx, kernel, 2, 3, 1, 0));
kernel = ggml_permute(ctx, kernel, 3, 2, 0, 1);
}
ggml_tensor * out = ggml_conv_2d_dw_direct(
ctx, kernel, input,
stride, stride, padding, padding, dilation, dilation);
ggml_set_name(out, "out");
return out;
}
};
// GGML_OP_CONCAT
struct test_concat : public test_case {
const ggml_type type;
@ -3975,6 +4017,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, false));
test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, true));
test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, false));
test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, true));
test_cases.emplace_back(new test_conv_transpose_1d());
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 3, 0, 1));
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 2, 0, 1));
@ -4549,6 +4596,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
}
}
test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, false));
test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, true));
return test_cases;
}