mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
ggml : upgrade init_tensor API to return a ggml_status (#11854)
* Upgrade init_tensor API to return a ggml_status To prepare for an 'abort-free' ggml (ggml not to abort on OOMs but return a OOM status), as agreeed with Diego in the ggml repo, upgrade the init_tensor() and view_init() APIs to return a ggml_status. * misc fixes --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
committed by
GitHub
parent
c43a3e7996
commit
70680c48e5
@ -18,6 +18,7 @@
|
||||
#include <ggml.h>
|
||||
#include <ggml-alloc.h>
|
||||
#include <ggml-backend.h>
|
||||
#include <ggml-cpp.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
@ -467,6 +468,7 @@ struct test_case {
|
||||
|
||||
// allocate
|
||||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1);
|
||||
|
||||
if (buf == NULL) {
|
||||
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend1));
|
||||
ggml_free(ctx);
|
||||
@ -588,14 +590,13 @@ struct test_case {
|
||||
/* .mem_base = */ NULL,
|
||||
/* .no_alloc = */ true,
|
||||
};
|
||||
ggml_context * ctx = ggml_init(params);
|
||||
ggml_context_ptr ctx(ggml_init(params)); // smart ptr
|
||||
GGML_ASSERT(ctx);
|
||||
|
||||
ggml_tensor * out = build_graph(ctx);
|
||||
ggml_tensor * out = build_graph(ctx.get());
|
||||
|
||||
if (op_name != nullptr && op_desc(out) != op_name) {
|
||||
//printf(" %s: skipping\n", op_desc(out).c_str());
|
||||
ggml_free(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -605,7 +606,6 @@ struct test_case {
|
||||
// check if backends support op
|
||||
if (!ggml_backend_supports_op(backend, out)) {
|
||||
printf("not supported\n");
|
||||
ggml_free(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -618,22 +618,26 @@ struct test_case {
|
||||
printf("%*s", last - len, "");
|
||||
|
||||
// allocate
|
||||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
|
||||
ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
|
||||
|
||||
if (buf == NULL) {
|
||||
printf("failed to allocate tensors\n");
|
||||
ggml_free(ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
// randomize tensors
|
||||
initialize_tensors(ctx);
|
||||
initialize_tensors(ctx.get());
|
||||
|
||||
// build graph
|
||||
ggml_cgraph * gf = ggml_new_graph_custom(ctx, graph_nodes, false);
|
||||
ggml_cgraph * gf = ggml_new_graph_custom(ctx.get(), graph_nodes, false);
|
||||
ggml_build_forward_expand(gf, out);
|
||||
|
||||
// warmup run
|
||||
ggml_backend_graph_compute(backend, gf);
|
||||
ggml_status status = ggml_backend_graph_compute(backend, gf);
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
||||
return false;
|
||||
}
|
||||
|
||||
// determine number of runs
|
||||
int n_runs;
|
||||
@ -684,7 +688,11 @@ struct test_case {
|
||||
int total_runs = 0;
|
||||
do {
|
||||
int64_t start_time = ggml_time_us();
|
||||
ggml_backend_graph_compute(backend, gf);
|
||||
ggml_status status = ggml_backend_graph_compute(backend, gf);
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
||||
return false;
|
||||
}
|
||||
int64_t end_time = ggml_time_us();
|
||||
|
||||
total_time_us += end_time - start_time;
|
||||
@ -722,10 +730,6 @@ struct test_case {
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
ggml_backend_buffer_free(buf);
|
||||
|
||||
ggml_free(ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -738,17 +742,16 @@ struct test_case {
|
||||
/* .mem_base = */ NULL,
|
||||
/* .no_alloc = */ true,
|
||||
};
|
||||
ggml_context * ctx = ggml_init(params);
|
||||
ggml_context_ptr ctx(ggml_init(params)); // smart ptr
|
||||
GGML_ASSERT(ctx);
|
||||
|
||||
gf = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
|
||||
gb = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
|
||||
gf = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true);
|
||||
gb = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true);
|
||||
|
||||
ggml_tensor * out = build_graph(ctx);
|
||||
ggml_tensor * out = build_graph(ctx.get());
|
||||
|
||||
if ((op_name != nullptr && op_desc(out) != op_name) || out->op == GGML_OP_OPT_STEP_ADAMW) {
|
||||
//printf(" %s: skipping\n", op_desc(out).c_str());
|
||||
ggml_free(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -756,7 +759,6 @@ struct test_case {
|
||||
fflush(stdout);
|
||||
|
||||
if (out->type != GGML_TYPE_F32) {
|
||||
ggml_free(ctx);
|
||||
printf("not supported [%s->type != FP32]\n", out->name);
|
||||
return true;
|
||||
}
|
||||
@ -764,7 +766,7 @@ struct test_case {
|
||||
// check if the backend supports the ops
|
||||
bool supported = true;
|
||||
bool any_params = false;
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
|
||||
if (!ggml_backend_supports_op(backend, t)) {
|
||||
printf("not supported [%s] ", ggml_backend_name(backend));
|
||||
supported = false;
|
||||
@ -785,40 +787,38 @@ struct test_case {
|
||||
}
|
||||
if (!supported) {
|
||||
printf("\n");
|
||||
ggml_free(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
int64_t ngrads = 0;
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
|
||||
if (t->flags & GGML_TENSOR_FLAG_PARAM) {
|
||||
ngrads += ggml_nelements(t);
|
||||
}
|
||||
}
|
||||
if (ngrads > grad_nmax()) {
|
||||
printf("skipping large tensors for speed \n");
|
||||
ggml_free(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
if (!ggml_is_scalar(out)) {
|
||||
out = ggml_sum(ctx, out);
|
||||
out = ggml_sum(ctx.get(), out);
|
||||
ggml_set_name(out, "sum_of_out");
|
||||
}
|
||||
ggml_set_loss(out);
|
||||
|
||||
ggml_build_forward_expand(gf, out);
|
||||
ggml_graph_cpy(gf, gb);
|
||||
ggml_build_backward_expand(ctx, ctx, gb, false);
|
||||
ggml_build_backward_expand(ctx.get(), ctx.get(), gb, false);
|
||||
if (expect.size() != 1 || expect[0] != 0.0f) {
|
||||
GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
|
||||
GGML_ASSERT(!(t->flags & GGML_TENSOR_FLAG_PARAM) || ggml_graph_get_grad(gb, t)->op != GGML_OP_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
|
||||
if (!ggml_backend_supports_op(backend, t)) {
|
||||
printf("not supported [%s] ", ggml_backend_name(backend));
|
||||
supported = false;
|
||||
@ -832,27 +832,32 @@ struct test_case {
|
||||
}
|
||||
if (!supported) {
|
||||
printf("\n");
|
||||
ggml_free(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
// allocate
|
||||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
|
||||
ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
|
||||
if (buf == NULL) {
|
||||
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend));
|
||||
ggml_free(ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
initialize_tensors(ctx); // Randomizes all tensors (including gradients).
|
||||
initialize_tensors(ctx.get()); // Randomizes all tensors (including gradients).
|
||||
ggml_graph_reset(gb); // Sets gradients to 1 if loss, 0 otherwise.
|
||||
|
||||
ggml_backend_graph_compute(backend, gf);
|
||||
ggml_backend_graph_compute(backend, gb);
|
||||
ggml_status status = ggml_backend_graph_compute(backend, gf);
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
||||
return false;
|
||||
}
|
||||
status = ggml_backend_graph_compute(backend, gb);
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ok = true;
|
||||
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
|
||||
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != nullptr; t = ggml_get_next_tensor(ctx.get(), t)) {
|
||||
if (!(t->flags & GGML_TENSOR_FLAG_PARAM)) {
|
||||
continue;
|
||||
}
|
||||
@ -897,20 +902,36 @@ struct test_case {
|
||||
float fu, fuh, fdh, fd; // output values for xiu, xiuh, xid, xidh
|
||||
|
||||
ggml_backend_tensor_set(t, &xiu, i*sizeof(float), sizeof(float));
|
||||
ggml_backend_graph_compute(backend, gf);
|
||||
status = ggml_backend_graph_compute(backend, gf);
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
||||
return false;
|
||||
}
|
||||
ggml_backend_tensor_get(out, &fu, 0, ggml_nbytes(out));
|
||||
|
||||
ggml_backend_tensor_set(t, &xid, i*sizeof(float), sizeof(float));
|
||||
ggml_backend_graph_compute(backend, gf);
|
||||
status = ggml_backend_graph_compute(backend, gf);
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
||||
return false;
|
||||
}
|
||||
ggml_backend_tensor_get(out, &fd, 0, ggml_nbytes(out));
|
||||
|
||||
if (grad_precise()) {
|
||||
ggml_backend_tensor_set(t, &xiuh, i*sizeof(float), sizeof(float));
|
||||
ggml_backend_graph_compute(backend, gf);
|
||||
status = ggml_backend_graph_compute(backend, gf);
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
||||
return false;
|
||||
}
|
||||
ggml_backend_tensor_get(out, &fuh, 0, ggml_nbytes(out));
|
||||
|
||||
ggml_backend_tensor_set(t, &xidh, i*sizeof(float), sizeof(float));
|
||||
ggml_backend_graph_compute(backend, gf);
|
||||
status = ggml_backend_graph_compute(backend, gf);
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
||||
return false;
|
||||
}
|
||||
ggml_backend_tensor_get(out, &fdh, 0, ggml_nbytes(out));
|
||||
|
||||
gn[i] = (8.0*(double)fuh + (double)fd - (8.0*(double)fdh + (double)fu)) / (6.0*(double)eps);
|
||||
@ -936,10 +957,6 @@ struct test_case {
|
||||
printf("compare failed ");
|
||||
}
|
||||
|
||||
ggml_backend_buffer_free(buf);
|
||||
|
||||
ggml_free(ctx);
|
||||
|
||||
if (ok) {
|
||||
printf("\033[1;32mOK\033[0m\n");
|
||||
return true;
|
||||
|
Reference in New Issue
Block a user