ggml : upgrade init_tensor API to return a ggml_status (#11854)

* Upgrade init_tensor API to return a ggml_status

To prepare for an 'abort-free' ggml
(ggml not to abort on OOMs but return a OOM status),
as agreeed with Diego in the ggml repo,
upgrade the init_tensor() and view_init() APIs
to return a ggml_status.

* misc fixes

---------

Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
William Tambellini
2025-02-28 05:41:47 -08:00
committed by GitHub
parent c43a3e7996
commit 70680c48e5
16 changed files with 136 additions and 97 deletions

View File

@ -18,6 +18,7 @@
#include <ggml.h>
#include <ggml-alloc.h>
#include <ggml-backend.h>
#include <ggml-cpp.h>
#include <algorithm>
#include <array>
@ -467,6 +468,7 @@ struct test_case {
// allocate
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1);
if (buf == NULL) {
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend1));
ggml_free(ctx);
@ -588,14 +590,13 @@ struct test_case {
/* .mem_base = */ NULL,
/* .no_alloc = */ true,
};
ggml_context * ctx = ggml_init(params);
ggml_context_ptr ctx(ggml_init(params)); // smart ptr
GGML_ASSERT(ctx);
ggml_tensor * out = build_graph(ctx);
ggml_tensor * out = build_graph(ctx.get());
if (op_name != nullptr && op_desc(out) != op_name) {
//printf(" %s: skipping\n", op_desc(out).c_str());
ggml_free(ctx);
return true;
}
@ -605,7 +606,6 @@ struct test_case {
// check if backends support op
if (!ggml_backend_supports_op(backend, out)) {
printf("not supported\n");
ggml_free(ctx);
return true;
}
@ -618,22 +618,26 @@ struct test_case {
printf("%*s", last - len, "");
// allocate
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
if (buf == NULL) {
printf("failed to allocate tensors\n");
ggml_free(ctx);
return false;
}
// randomize tensors
initialize_tensors(ctx);
initialize_tensors(ctx.get());
// build graph
ggml_cgraph * gf = ggml_new_graph_custom(ctx, graph_nodes, false);
ggml_cgraph * gf = ggml_new_graph_custom(ctx.get(), graph_nodes, false);
ggml_build_forward_expand(gf, out);
// warmup run
ggml_backend_graph_compute(backend, gf);
ggml_status status = ggml_backend_graph_compute(backend, gf);
if (status != GGML_STATUS_SUCCESS) {
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
return false;
}
// determine number of runs
int n_runs;
@ -684,7 +688,11 @@ struct test_case {
int total_runs = 0;
do {
int64_t start_time = ggml_time_us();
ggml_backend_graph_compute(backend, gf);
ggml_status status = ggml_backend_graph_compute(backend, gf);
if (status != GGML_STATUS_SUCCESS) {
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
return false;
}
int64_t end_time = ggml_time_us();
total_time_us += end_time - start_time;
@ -722,10 +730,6 @@ struct test_case {
}
printf("\n");
ggml_backend_buffer_free(buf);
ggml_free(ctx);
return true;
}
@ -738,17 +742,16 @@ struct test_case {
/* .mem_base = */ NULL,
/* .no_alloc = */ true,
};
ggml_context * ctx = ggml_init(params);
ggml_context_ptr ctx(ggml_init(params)); // smart ptr
GGML_ASSERT(ctx);
gf = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
gb = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
gf = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true);
gb = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true);
ggml_tensor * out = build_graph(ctx);
ggml_tensor * out = build_graph(ctx.get());
if ((op_name != nullptr && op_desc(out) != op_name) || out->op == GGML_OP_OPT_STEP_ADAMW) {
//printf(" %s: skipping\n", op_desc(out).c_str());
ggml_free(ctx);
return true;
}
@ -756,7 +759,6 @@ struct test_case {
fflush(stdout);
if (out->type != GGML_TYPE_F32) {
ggml_free(ctx);
printf("not supported [%s->type != FP32]\n", out->name);
return true;
}
@ -764,7 +766,7 @@ struct test_case {
// check if the backend supports the ops
bool supported = true;
bool any_params = false;
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
if (!ggml_backend_supports_op(backend, t)) {
printf("not supported [%s] ", ggml_backend_name(backend));
supported = false;
@ -785,40 +787,38 @@ struct test_case {
}
if (!supported) {
printf("\n");
ggml_free(ctx);
return true;
}
int64_t ngrads = 0;
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
if (t->flags & GGML_TENSOR_FLAG_PARAM) {
ngrads += ggml_nelements(t);
}
}
if (ngrads > grad_nmax()) {
printf("skipping large tensors for speed \n");
ggml_free(ctx);
return true;
}
if (!ggml_is_scalar(out)) {
out = ggml_sum(ctx, out);
out = ggml_sum(ctx.get(), out);
ggml_set_name(out, "sum_of_out");
}
ggml_set_loss(out);
ggml_build_forward_expand(gf, out);
ggml_graph_cpy(gf, gb);
ggml_build_backward_expand(ctx, ctx, gb, false);
ggml_build_backward_expand(ctx.get(), ctx.get(), gb, false);
if (expect.size() != 1 || expect[0] != 0.0f) {
GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
GGML_ASSERT(!(t->flags & GGML_TENSOR_FLAG_PARAM) || ggml_graph_get_grad(gb, t)->op != GGML_OP_NONE);
}
}
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
if (!ggml_backend_supports_op(backend, t)) {
printf("not supported [%s] ", ggml_backend_name(backend));
supported = false;
@ -832,27 +832,32 @@ struct test_case {
}
if (!supported) {
printf("\n");
ggml_free(ctx);
return true;
}
// allocate
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
if (buf == NULL) {
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend));
ggml_free(ctx);
return false;
}
initialize_tensors(ctx); // Randomizes all tensors (including gradients).
initialize_tensors(ctx.get()); // Randomizes all tensors (including gradients).
ggml_graph_reset(gb); // Sets gradients to 1 if loss, 0 otherwise.
ggml_backend_graph_compute(backend, gf);
ggml_backend_graph_compute(backend, gb);
ggml_status status = ggml_backend_graph_compute(backend, gf);
if (status != GGML_STATUS_SUCCESS) {
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
return false;
}
status = ggml_backend_graph_compute(backend, gb);
if (status != GGML_STATUS_SUCCESS) {
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
return false;
}
bool ok = true;
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != nullptr; t = ggml_get_next_tensor(ctx.get(), t)) {
if (!(t->flags & GGML_TENSOR_FLAG_PARAM)) {
continue;
}
@ -897,20 +902,36 @@ struct test_case {
float fu, fuh, fdh, fd; // output values for xiu, xiuh, xid, xidh
ggml_backend_tensor_set(t, &xiu, i*sizeof(float), sizeof(float));
ggml_backend_graph_compute(backend, gf);
status = ggml_backend_graph_compute(backend, gf);
if (status != GGML_STATUS_SUCCESS) {
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
return false;
}
ggml_backend_tensor_get(out, &fu, 0, ggml_nbytes(out));
ggml_backend_tensor_set(t, &xid, i*sizeof(float), sizeof(float));
ggml_backend_graph_compute(backend, gf);
status = ggml_backend_graph_compute(backend, gf);
if (status != GGML_STATUS_SUCCESS) {
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
return false;
}
ggml_backend_tensor_get(out, &fd, 0, ggml_nbytes(out));
if (grad_precise()) {
ggml_backend_tensor_set(t, &xiuh, i*sizeof(float), sizeof(float));
ggml_backend_graph_compute(backend, gf);
status = ggml_backend_graph_compute(backend, gf);
if (status != GGML_STATUS_SUCCESS) {
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
return false;
}
ggml_backend_tensor_get(out, &fuh, 0, ggml_nbytes(out));
ggml_backend_tensor_set(t, &xidh, i*sizeof(float), sizeof(float));
ggml_backend_graph_compute(backend, gf);
status = ggml_backend_graph_compute(backend, gf);
if (status != GGML_STATUS_SUCCESS) {
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
return false;
}
ggml_backend_tensor_get(out, &fdh, 0, ggml_nbytes(out));
gn[i] = (8.0*(double)fuh + (double)fd - (8.0*(double)fdh + (double)fu)) / (6.0*(double)eps);
@ -936,10 +957,6 @@ struct test_case {
printf("compare failed ");
}
ggml_backend_buffer_free(buf);
ggml_free(ctx);
if (ok) {
printf("\033[1;32mOK\033[0m\n");
return true;