From 67ae5312e255ae97852a4a216e2245580bfafd72 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 21 Jun 2025 08:04:18 +0300 Subject: [PATCH] metal : fix thread-safety (#14300) ggml-ci --- ggml/src/ggml-metal/ggml-metal.m | 88 ++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 28 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m index 4e7f373cb..19f4d59e5 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -48,22 +48,28 @@ static struct ggml_backend_metal_device_context { int mtl_device_ref_count; id mtl_library; + NSLock * mtl_lock; + bool has_simdgroup_reduction; bool has_simdgroup_mm; bool has_residency_sets; bool has_bfloat; bool use_bfloat; + size_t max_size; + char name[128]; } g_ggml_ctx_dev_main = { /*.mtl_device =*/ nil, /*.mtl_device_ref_count =*/ 0, /*.mtl_library =*/ nil, + /*.mtl_lock =*/ nil, /*.has_simdgroup_reduction =*/ false, /*.has_simdgroup_mm =*/ false, /*.has_residency_sets =*/ false, /*.has_bfloat =*/ false, /*.use_bfloat =*/ false, + /*.max_size =*/ 0, /*.name =*/ "", }; @@ -71,6 +77,10 @@ static struct ggml_backend_metal_device_context { static id ggml_backend_metal_device_acq(struct ggml_backend_metal_device_context * ctx) { assert(ctx != NULL); + if (ctx->mtl_lock == nil) { + ctx->mtl_lock = [[NSLock alloc] init]; + } + if (ctx->mtl_device == nil) { ctx->mtl_device = MTLCreateSystemDefaultDevice(); } @@ -94,6 +104,8 @@ static id ggml_backend_metal_device_acq(struct ggml_backend_metal_dev ctx->use_bfloat = false; #endif + ctx->max_size = ctx->mtl_device.maxBufferLength; + strncpy(ctx->name, [[ctx->mtl_device name] UTF8String], sizeof(ctx->name) - 1); } @@ -110,6 +122,11 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte ctx->mtl_device_ref_count--; if (ctx->mtl_device_ref_count == 0) { + if (ctx->mtl_lock) { + [ctx->mtl_lock release]; + ctx->mtl_lock = nil; + } + if (ctx->mtl_library) { [ctx->mtl_library release]; ctx->mtl_library = nil; @@ -977,7 +994,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context)); struct ggml_backend_metal_device_context * ctx_dev = dev->context; - id device = ggml_backend_metal_device_acq(ctx_dev); + id device = ctx_dev->mtl_device; GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]); @@ -991,9 +1008,16 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); // load library - if (ctx_dev->mtl_library == nil) { - ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat); + { + [ctx_dev->mtl_lock lock]; + + if (ctx_dev->mtl_library == nil) { + ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat); + } + + [ctx_dev->mtl_lock unlock]; } + id metal_library = ctx_dev->mtl_library; if (metal_library == nil) { GGML_LOG_ERROR("%s: error: metal library is nil\n", __func__); @@ -5284,7 +5308,6 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer) } ggml_backend_metal_buffer_rset_free(ctx); - ggml_backend_metal_device_rel(buffer->buft->device->context); if (ctx->owned) { #if TARGET_OS_OSX @@ -5393,7 +5416,10 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba } struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device->context; - id device = ggml_backend_metal_device_acq(ctx_dev); + + GGML_ASSERT(ctx_dev->mtl_device != nil); + + id device = ctx_dev->mtl_device; ctx->all_data = ggml_metal_host_malloc(size_aligned); ctx->all_size = size_aligned; @@ -5416,14 +5442,12 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) { GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0); free(ctx); - ggml_backend_metal_device_rel(ctx_dev); return NULL; } if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) { GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__); free(ctx); - ggml_backend_metal_device_rel(ctx_dev); return NULL; } @@ -5434,17 +5458,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { return 32; + GGML_UNUSED(buft); } static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { - id device = ggml_backend_metal_device_acq(buft->device->context); - const size_t max_size = device.maxBufferLength; - ggml_backend_metal_device_rel(buft->device->context); + const size_t max_size = ((struct ggml_backend_metal_device_context *)buft->device->context)->max_size; return max_size; - - GGML_UNUSED(buft); } static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) { @@ -5517,7 +5538,10 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz } struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main; - id device = ggml_backend_metal_device_acq(ctx_dev); + + GGML_ASSERT(ctx_dev->mtl_device != nil); + + id device = ctx_dev->mtl_device; // the buffer fits into the max buffer size allowed by the device if (size_aligned <= device.maxBufferLength) { @@ -5573,7 +5597,6 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) { GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__); free(ctx); - ggml_backend_metal_device_rel(ctx_dev); return NULL; } @@ -5589,10 +5612,8 @@ static const char * ggml_backend_metal_name(ggml_backend_t backend) { } static void ggml_backend_metal_free(ggml_backend_t backend) { - struct ggml_backend_metal_context * ctx = backend->context; - struct ggml_backend_metal_device_context * ctx_dev = backend->device->context; + struct ggml_backend_metal_context * ctx = backend->context; - ggml_backend_metal_device_rel(ctx_dev); ggml_metal_free(ctx); free(backend); @@ -5732,6 +5753,8 @@ bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family) { struct ggml_backend_metal_device_context * ctx_dev = backend->device->context; + GGML_ASSERT(ctx_dev->mtl_device != nil); + return [ctx_dev->mtl_device supportsFamily:(MTLGPUFamilyApple1 + family - 1)]; } @@ -5751,10 +5774,7 @@ static const char * ggml_backend_metal_device_get_name(ggml_backend_dev_t dev) { } static const char * ggml_backend_metal_device_get_description(ggml_backend_dev_t dev) { - // acq/rel just to populate ctx->name in case it hasn't been done yet struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context; - ggml_backend_metal_device_acq(ctx_dev); - ggml_backend_metal_device_rel(ctx_dev); return ctx_dev->name; } @@ -5762,12 +5782,10 @@ static const char * ggml_backend_metal_device_get_description(ggml_backend_dev_t static void ggml_backend_metal_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { if (@available(macOS 10.12, iOS 16.0, *)) { struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context; - id device = ggml_backend_metal_device_acq(ctx_dev); + id device = ctx_dev->mtl_device; *total = device.recommendedMaxWorkingSetSize; *free = *total - device.currentAllocatedSize; - - ggml_backend_metal_device_rel(ctx_dev); } else { *free = 1; *total = 1; @@ -5845,7 +5863,10 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back } struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context; - id device = ggml_backend_metal_device_acq(ctx_dev); + + GGML_ASSERT(ctx_dev->mtl_device != nil); + + id device = ctx_dev->mtl_device; // the buffer fits into the max buffer size allowed by the device if (size_aligned <= device.maxBufferLength) { @@ -5901,7 +5922,6 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) { GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__); free(ctx); - ggml_backend_metal_device_rel(ctx_dev); return NULL; } @@ -5915,8 +5935,9 @@ static bool ggml_backend_metal_device_supports_op(ggml_backend_dev_t dev, const } static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { - return buft->iface.get_name == ggml_backend_metal_buffer_type_get_name || - buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name; + return + buft->iface.get_name == ggml_backend_metal_buffer_type_get_name || + buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name; GGML_UNUSED(dev); } @@ -6001,8 +6022,19 @@ static struct ggml_backend_reg_i ggml_backend_metal_reg_i = { /* .get_proc_address = */ ggml_backend_metal_get_proc_address, }; +// called upon program exit +static void ggml_metal_cleanup(void) { + ggml_backend_metal_device_rel(&g_ggml_ctx_dev_main); +} + +// TODO: make thread-safe ggml_backend_reg_t ggml_backend_metal_reg(void) { - // TODO: make this thread-safe somehow? + ggml_backend_metal_device_acq(&g_ggml_ctx_dev_main); + + // register cleanup callback + // TODO: not ideal, but not sure if there is a better way to do this in Objective-C + atexit(ggml_metal_cleanup); + { g_ggml_backend_metal_reg = (struct ggml_backend_reg) { /* .api_version = */ GGML_BACKEND_API_VERSION,