mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-27 19:53:42 -04:00
@ -2353,9 +2353,12 @@ struct test_bin_bcast : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
const std::array<int, 4> nr;
|
||||
int nf; // number of fused ops, nf == 1 -> single op (no fusion)
|
||||
|
||||
bool run_whole_graph() override { return true; }
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR3(type, ne, nr);
|
||||
return VARS_TO_STR4(type, ne, nr, nf);
|
||||
}
|
||||
|
||||
size_t op_size(ggml_tensor * t) override {
|
||||
@ -2364,24 +2367,35 @@ struct test_bin_bcast : public test_case {
|
||||
|
||||
test_bin_bcast(op_t op, ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {10, 10, 1, 1},
|
||||
std::array<int, 4> nr = {1, 2, 1, 1})
|
||||
: op(op), type(type), ne(ne), nr(nr) {}
|
||||
std::array<int, 4> nr = {1, 2, 1, 1},
|
||||
int nf = 1)
|
||||
: op(op), type(type), ne(ne), nr(nr), nf(nf) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
GGML_ASSERT(nf <= 8);
|
||||
|
||||
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0]*nr[0], ne[1]*nr[1], ne[2]*nr[2], ne[3]*nr[3]);
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(b, "b");
|
||||
|
||||
// The backward pass supports broadcasting only for GGML_ADD:
|
||||
const bool grad_supported = op == ggml_add || ggml_are_same_shape(a, b);
|
||||
if (grad_supported) {
|
||||
ggml_set_param(a);
|
||||
ggml_set_param(b);
|
||||
ggml_tensor * b[8];
|
||||
for (int i = 0; i < nf; ++i) {
|
||||
b[i] = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(b[i], (std::string("b") + std::to_string(i)).c_str());
|
||||
}
|
||||
|
||||
// The backward pass supports broadcasting only for GGML_ADD:
|
||||
const bool grad_supported = op == ggml_add && ggml_are_same_shape(a, b[0]) && nf == 1;
|
||||
if (grad_supported) {
|
||||
ggml_set_param(a);
|
||||
ggml_set_param(b[0]);
|
||||
}
|
||||
|
||||
ggml_tensor * out = a;
|
||||
|
||||
for (int i = 0; i < nf; ++i) {
|
||||
out = op(ctx, out, b[i]);
|
||||
}
|
||||
|
||||
ggml_tensor * out = op(ctx, a, b);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
@ -2622,15 +2636,15 @@ struct test_rms_norm_back : public test_case {
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_RMS_NORM + GGML_OP_MUL
|
||||
struct test_rms_norm_mul : public test_case {
|
||||
// GGML_OP_RMS_NORM + GGML_OP_MUL + GGML_OP_ADD
|
||||
struct test_rms_norm_mul_add : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
const float eps;
|
||||
|
||||
std::string op_desc(ggml_tensor * t) override {
|
||||
GGML_UNUSED(t);
|
||||
return "RMS_NORM_MUL";
|
||||
return "RMS_NORM_MUL_ADD";
|
||||
}
|
||||
|
||||
bool run_whole_graph() override { return true; }
|
||||
@ -2639,7 +2653,7 @@ struct test_rms_norm_mul : public test_case {
|
||||
return VARS_TO_STR3(type, ne, eps);
|
||||
}
|
||||
|
||||
test_rms_norm_mul(ggml_type type = GGML_TYPE_F32,
|
||||
test_rms_norm_mul_add(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
||||
float eps = 1e-6f)
|
||||
: type(type), ne(ne), eps(eps) {}
|
||||
@ -2647,14 +2661,17 @@ struct test_rms_norm_mul : public test_case {
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_tensor * c = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_param(a);
|
||||
ggml_set_name(a, "a");
|
||||
ggml_set_param(b);
|
||||
ggml_set_name(b, "b");
|
||||
ggml_set_param(c);
|
||||
ggml_set_name(c, "c");
|
||||
|
||||
// Use a and b early, so we don't end up with an OP_NONE between rms_norm and mul
|
||||
a = ggml_add(ctx, a, b);
|
||||
ggml_tensor * out = ggml_mul(ctx, ggml_rms_norm(ctx, a, eps), b);
|
||||
// Use a, b and c early, so we don't end up with an OP_NONE between rms_norm and mul
|
||||
a = ggml_add(ctx, ggml_add(ctx, a, b), c);
|
||||
ggml_tensor * out = ggml_add(ctx, ggml_mul(ctx, ggml_rms_norm(ctx, a, eps), b), c);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
@ -5151,6 +5168,15 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
//add_test_bin_bcast(type, {3, 3, 2560, 1280}, {2, 1, 1, 1});
|
||||
}
|
||||
|
||||
// fusion
|
||||
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {10, 5, 4, 3}, {2, 1, 1, 1}, 2));
|
||||
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {16, 5, 4, 3}, {1, 2, 1, 1}, 3));
|
||||
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 2, 1}, 4));
|
||||
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {16, 5, 4, 3}, {1, 1, 1, 2}, 5));
|
||||
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 2, 2}, 6));
|
||||
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {10, 5, 4, 3}, {1, 2, 2, 2}, 7));
|
||||
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {16, 5, 4, 3}, {2, 2, 2, 2}, 8));
|
||||
|
||||
test_cases.emplace_back(new test_add1());
|
||||
test_cases.emplace_back(new test_scale());
|
||||
test_cases.emplace_back(new test_scale(GGML_TYPE_F32, {10, 10, 10, 10}, 2.0f, 1.0f));
|
||||
@ -5165,7 +5191,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_l2_norm (GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
}
|
||||
for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f, 1.0f}) {
|
||||
test_cases.emplace_back(new test_rms_norm_mul(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, {64, 5, 4, 3}, 1e-12f));
|
||||
|
Reference in New Issue
Block a user