ggml : upgrade init_tensor API to return a ggml_status (#11854)

* Upgrade init_tensor API to return a ggml_status To prepare for an 'abort-free' ggml (ggml not to abort on OOMs but return a OOM status), as agreeed with Diego in the ggml repo, upgrade the init_tensor() and view_init() APIs to return a ggml_status. * misc fixes --------- Co-authored-by: slaren <slarengh@gmail.com>
2025-06-26 19:55:04 +00:00 · 2025-02-28 05:41:47 -08:00
parent c43a3e7996
commit 70680c48e5
16 changed files with 136 additions and 97 deletions
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@ -18,6 +18,7 @@
 #include <ggml.h>
 #include <ggml-alloc.h>
 #include <ggml-backend.h>
+#include <ggml-cpp.h>

 #include <algorithm>
 #include <array>
@ -467,6 +468,7 @@ struct test_case {

        // allocate
        ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1);
+
        if (buf == NULL) {
            printf("failed to allocate tensors [%s] ", ggml_backend_name(backend1));
            ggml_free(ctx);
@ -588,14 +590,13 @@ struct test_case {
            /* .mem_base = */ NULL,
            /* .no_alloc = */ true,
        };
-        ggml_context * ctx = ggml_init(params);
+        ggml_context_ptr ctx(ggml_init(params)); // smart ptr
        GGML_ASSERT(ctx);

-        ggml_tensor * out = build_graph(ctx);
+        ggml_tensor * out = build_graph(ctx.get());

        if (op_name != nullptr && op_desc(out) != op_name) {
            //printf("  %s: skipping\n", op_desc(out).c_str());
-            ggml_free(ctx);
            return true;
        }

@ -605,7 +606,6 @@ struct test_case {
        // check if backends support op
        if (!ggml_backend_supports_op(backend, out)) {
            printf("not supported\n");
-            ggml_free(ctx);
            return true;
        }

@ -618,22 +618,26 @@ struct test_case {
        printf("%*s", last - len, "");

        // allocate
-        ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
+        ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
+
        if (buf == NULL) {
            printf("failed to allocate tensors\n");
-            ggml_free(ctx);
            return false;
        }

        // randomize tensors
-        initialize_tensors(ctx);
+        initialize_tensors(ctx.get());

        // build graph
-        ggml_cgraph * gf = ggml_new_graph_custom(ctx, graph_nodes, false);
+        ggml_cgraph * gf = ggml_new_graph_custom(ctx.get(), graph_nodes, false);
        ggml_build_forward_expand(gf, out);

        // warmup run
-        ggml_backend_graph_compute(backend, gf);
+        ggml_status status = ggml_backend_graph_compute(backend, gf);
+        if (status != GGML_STATUS_SUCCESS) {
+            fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+            return false;
+        }

        // determine number of runs
        int n_runs;
@ -684,7 +688,11 @@ struct test_case {
        int total_runs = 0;
        do {
            int64_t start_time = ggml_time_us();
-            ggml_backend_graph_compute(backend, gf);
+            ggml_status status = ggml_backend_graph_compute(backend, gf);
+            if (status != GGML_STATUS_SUCCESS) {
+                fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+                return false;
+            }
            int64_t end_time = ggml_time_us();

            total_time_us += end_time - start_time;
@ -722,10 +730,6 @@ struct test_case {
        }
        printf("\n");

-        ggml_backend_buffer_free(buf);
-
-        ggml_free(ctx);
-
        return true;
    }

@ -738,17 +742,16 @@ struct test_case {
            /* .mem_base = */ NULL,
            /* .no_alloc = */ true,
        };
-        ggml_context * ctx = ggml_init(params);
+        ggml_context_ptr ctx(ggml_init(params)); // smart ptr
        GGML_ASSERT(ctx);

-        gf = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
-        gb = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
+        gf = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true);
+        gb = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true);

-        ggml_tensor * out = build_graph(ctx);
+        ggml_tensor * out = build_graph(ctx.get());

        if ((op_name != nullptr && op_desc(out) != op_name) || out->op == GGML_OP_OPT_STEP_ADAMW) {
            //printf("  %s: skipping\n", op_desc(out).c_str());
-            ggml_free(ctx);
            return true;
        }

@ -756,7 +759,6 @@ struct test_case {
        fflush(stdout);

        if (out->type != GGML_TYPE_F32) {
-            ggml_free(ctx);
            printf("not supported [%s->type != FP32]\n", out->name);
            return true;
        }
@ -764,7 +766,7 @@ struct test_case {
        // check if the backend supports the ops
        bool supported = true;
        bool any_params = false;
-        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
            if (!ggml_backend_supports_op(backend, t)) {
                printf("not supported [%s] ", ggml_backend_name(backend));
                supported = false;
@ -785,40 +787,38 @@ struct test_case {
        }
        if (!supported) {
            printf("\n");
-            ggml_free(ctx);
            return true;
        }

        int64_t ngrads = 0;
-        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
            if (t->flags & GGML_TENSOR_FLAG_PARAM) {
                ngrads += ggml_nelements(t);
            }
        }
        if (ngrads > grad_nmax()) {
            printf("skipping large tensors for speed \n");
-            ggml_free(ctx);
            return true;
        }


        if (!ggml_is_scalar(out)) {
-            out = ggml_sum(ctx, out);
+            out = ggml_sum(ctx.get(), out);
            ggml_set_name(out, "sum_of_out");
        }
        ggml_set_loss(out);

        ggml_build_forward_expand(gf, out);
        ggml_graph_cpy(gf, gb);
-        ggml_build_backward_expand(ctx, ctx, gb, false);
+        ggml_build_backward_expand(ctx.get(), ctx.get(), gb, false);
        if (expect.size() != 1 || expect[0] != 0.0f) {
            GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
-            for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
                GGML_ASSERT(!(t->flags & GGML_TENSOR_FLAG_PARAM) || ggml_graph_get_grad(gb, t)->op != GGML_OP_NONE);
            }
        }

-        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
            if (!ggml_backend_supports_op(backend, t)) {
                printf("not supported [%s] ", ggml_backend_name(backend));
                supported = false;
@ -832,27 +832,32 @@ struct test_case {
        }
        if (!supported) {
            printf("\n");
-            ggml_free(ctx);
            return true;
        }

        // allocate
-        ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
+        ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
        if (buf == NULL) {
            printf("failed to allocate tensors [%s] ", ggml_backend_name(backend));
-            ggml_free(ctx);
            return false;
        }

-
-        initialize_tensors(ctx); // Randomizes all tensors (including gradients).
+        initialize_tensors(ctx.get()); // Randomizes all tensors (including gradients).
        ggml_graph_reset(gb);    // Sets gradients to 1 if loss, 0 otherwise.

-        ggml_backend_graph_compute(backend, gf);
-        ggml_backend_graph_compute(backend, gb);
+        ggml_status status = ggml_backend_graph_compute(backend, gf);
+        if (status != GGML_STATUS_SUCCESS) {
+            fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+            return false;
+        }
+        status = ggml_backend_graph_compute(backend, gb);
+        if (status != GGML_STATUS_SUCCESS) {
+            fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+            return false;
+        }

        bool ok = true;
-        for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
+        for (struct ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != nullptr; t = ggml_get_next_tensor(ctx.get(), t)) {
            if (!(t->flags & GGML_TENSOR_FLAG_PARAM)) {
                continue;
            }
@ -897,20 +902,36 @@ struct test_case {
                float fu, fuh, fdh, fd; // output values for xiu, xiuh, xid, xidh

                ggml_backend_tensor_set(t, &xiu, i*sizeof(float), sizeof(float));
-                ggml_backend_graph_compute(backend, gf);
+                status = ggml_backend_graph_compute(backend, gf);
+                if (status != GGML_STATUS_SUCCESS) {
+                    fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+                    return false;
+                }
                ggml_backend_tensor_get(out, &fu, 0, ggml_nbytes(out));

                ggml_backend_tensor_set(t, &xid, i*sizeof(float), sizeof(float));
-                ggml_backend_graph_compute(backend, gf);
+                status = ggml_backend_graph_compute(backend, gf);
+                if (status != GGML_STATUS_SUCCESS) {
+                    fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+                    return false;
+                }
                ggml_backend_tensor_get(out, &fd, 0, ggml_nbytes(out));

                if (grad_precise()) {
                    ggml_backend_tensor_set(t, &xiuh, i*sizeof(float), sizeof(float));
-                    ggml_backend_graph_compute(backend, gf);
+                    status = ggml_backend_graph_compute(backend, gf);
+                    if (status != GGML_STATUS_SUCCESS) {
+                        fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+                        return false;
+                    }
                    ggml_backend_tensor_get(out, &fuh, 0, ggml_nbytes(out));

                    ggml_backend_tensor_set(t, &xidh, i*sizeof(float), sizeof(float));
-                    ggml_backend_graph_compute(backend, gf);
+                    status = ggml_backend_graph_compute(backend, gf);
+                    if (status != GGML_STATUS_SUCCESS) {
+                        fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+                        return false;
+                    }
                    ggml_backend_tensor_get(out, &fdh, 0, ggml_nbytes(out));

                    gn[i] = (8.0*(double)fuh + (double)fd - (8.0*(double)fdh + (double)fu)) / (6.0*(double)eps);
@ -936,10 +957,6 @@ struct test_case {
            printf("compare failed ");
        }

-        ggml_backend_buffer_free(buf);
-
-        ggml_free(ctx);
-
        if (ok) {
            printf("\033[1;32mOK\033[0m\n");
            return true;