mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 12:25:03 +00:00
ggml : upgrade init_tensor API to return a ggml_status (#11854)
* Upgrade init_tensor API to return a ggml_status To prepare for an 'abort-free' ggml (ggml not to abort on OOMs but return a OOM status), as agreeed with Diego in the ggml repo, upgrade the init_tensor() and view_init() APIs to return a ggml_status. * misc fixes --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
committed by
GitHub
parent
c43a3e7996
commit
70680c48e5
@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
|
||||
return talloc;
|
||||
}
|
||||
|
||||
void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
|
||||
enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
|
||||
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
|
||||
size = GGML_PAD(size, talloc->alignment);
|
||||
|
||||
@ -104,7 +104,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
|
||||
|
||||
assert(((uintptr_t)addr % talloc->alignment) == 0);
|
||||
|
||||
ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
|
||||
return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
|
||||
}
|
||||
|
||||
// dynamic tensor allocator
|
||||
@ -933,42 +933,51 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
|
||||
|
||||
// utils
|
||||
|
||||
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
|
||||
for (size_t i = 0; i < *n_buffers; i++) {
|
||||
ggml_backend_buffer_free((*buffers)[i]);
|
||||
}
|
||||
free(*buffers);
|
||||
}
|
||||
|
||||
static bool alloc_tensor_range(struct ggml_context * ctx,
|
||||
struct ggml_tensor * first, struct ggml_tensor * last,
|
||||
ggml_backend_buffer_type_t buft, size_t size,
|
||||
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
|
||||
|
||||
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
|
||||
if (buffer == NULL) {
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
|
||||
#endif
|
||||
for (size_t i = 0; i < *n_buffers; i++) {
|
||||
ggml_backend_buffer_free((*buffers)[i]);
|
||||
}
|
||||
free(*buffers);
|
||||
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
|
||||
free_buffers(buffers, n_buffers);
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
|
||||
|
||||
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
|
||||
if (t->data == NULL) {
|
||||
if (t->view_src == NULL) {
|
||||
ggml_tallocr_alloc(&tallocr, t);
|
||||
} else if (t->buffer == NULL) {
|
||||
ggml_backend_view_init(t);
|
||||
}
|
||||
} else {
|
||||
if (t->view_src != NULL && t->buffer == NULL) {
|
||||
// view of a pre-allocated tensor
|
||||
ggml_backend_view_init(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
|
||||
(*buffers)[(*n_buffers)++] = buffer;
|
||||
|
||||
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
|
||||
|
||||
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
|
||||
enum ggml_status status = GGML_STATUS_SUCCESS;
|
||||
if (t->data == NULL) {
|
||||
if (t->view_src == NULL) {
|
||||
status = ggml_tallocr_alloc(&tallocr, t);
|
||||
} else if (t->buffer == NULL) {
|
||||
status = ggml_backend_view_init(t);
|
||||
}
|
||||
} else {
|
||||
if (t->view_src != NULL && t->buffer == NULL) {
|
||||
// view of a pre-allocated tensor
|
||||
status = ggml_backend_view_init(t);
|
||||
}
|
||||
}
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
|
||||
free_buffers(buffers, n_buffers);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user