mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-19 14:31:06 -04:00
metal : handle buffers larger than device's maxBufferLength (#1826)
* metal : handle buffers larger than device's maxBufferLength * metal : print more verbose device info + handle errors * metal : fix prints for overlapping views * metal : minimize view overlap to try to utilize device memory better
This commit is contained in:
5
ggml.h
5
ggml.h
@@ -500,8 +500,9 @@ extern "C" {
|
||||
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
||||
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
||||
|
||||
GGML_API void * ggml_get_mem_buffer(struct ggml_context * ctx);
|
||||
GGML_API size_t ggml_get_mem_size (struct ggml_context * ctx);
|
||||
GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
|
||||
GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx);
|
||||
GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_new_tensor(
|
||||
struct ggml_context * ctx,
|
||||
|
Reference in New Issue
Block a user