ggml : hide ggml_object, ggml_cgraph, ggml_hash_set

ggml-ci
2025-08-17 21:51:27 -04:00 · 2024-09-10 16:38:06 +03:00
parent 00ba2ff781
commit c8a3f291fe
13 changed files with 131 additions and 92 deletions
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -183,7 +183,7 @@ int main(int argc, char ** argv)  {

    ggml_graph_compute_helper(work_buffer, gf, benchmark_params.n_threads);

-    TENSOR_DUMP(gf->nodes[0]);
+    TENSOR_DUMP(ggml_graph_node(gf, 0));

    printf("\n------ Test 2 - Matrix Mult via %s code\n", ggml_type_name(qtype));

@@ -224,7 +224,7 @@ int main(int argc, char ** argv)  {


    // Let's use the F32 result from above as a reference for the quantized multiplication
-    float sum_of_F32_reference = tensor_sum_elements(gf->nodes[0]);
+    float sum_of_F32_reference = tensor_sum_elements(ggml_graph_node(gf, 0));

    printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; gigaFLOPS\n");
    printf("=====================================================================================\n");
@@ -252,7 +252,7 @@ int main(int argc, char ** argv)  {

        // Check that the matrix multiplication result is in the right ballpark
        // We cannot use the exact value from the F32 multiplication because the quantizuation will be slightly different
-        float sum_of_Q4_result = tensor_sum_elements(gf31->nodes[0]);
+        float sum_of_Q4_result = tensor_sum_elements(ggml_graph_node(gf31, 0));
        float delta = std::abs(sum_of_Q4_result - sum_of_F32_reference);
        float allowed_delta = (sum_of_F32_reference) / 1000 / 1000; //  Let's accept an epsilon of 10^-6

--- a/examples/cvector-generator/pca.hpp
+++ b/examples/cvector-generator/pca.hpp
@@ -226,8 +226,8 @@ static ggml_status compute_piter(
        result.eigenvectors.resize(params.n_batch);
        result.distances.resize(params.n_batch);
        // get output nodes
-        for (int i = 0; i < gf->n_nodes; ++i) {
-            auto node = gf->nodes[i];
+        for (int i = 0; i < ggml_graph_n_nodes(gf); ++i) {
+            auto node = ggml_graph_node(gf, i);
            int iter = -1;
            // find b_tensor (without copying data from device)
            if ((iter = extract_i("b_tensor_norm_", node->name)) > -1) {
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -370,7 +370,7 @@ struct lora_merge_ctx {

        // write data to output file
        {
-            auto result = gf->nodes[gf->n_nodes - 1];
+            auto * result = ggml_graph_node(gf, -1);
            size_t len = ggml_nbytes(result);
            if (read_buf.size() < len) {
                read_buf.resize(len);
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -2449,7 +2449,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
    ggml_backend_graph_compute(ctx->backend, gf);

    // the last node is the embedding tensor
-    struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 1];
+    struct ggml_tensor * embeddings = ggml_graph_node(gf, -1);

    // copy the embeddings to the location passed by the user
    ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings));
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@@ -184,7 +184,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
    // ggml_tensor_printf(flatten,"flatten",__LINE__,false,false);
    ggml_build_forward_expand(gf, flatten);
    ggml_graph_compute_with_ctx(model.ctx, gf, 1);
-    struct ggml_tensor* result = gf->nodes[gf->n_nodes - 1];
+    struct ggml_tensor* result = ggml_graph_node(gf, -1);

    memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
    // append without newline tokens (default behavior in llava_arch when not using unpad ):