From 8d56c155f9f34dfc25710a9683c6a5ef8c15a9e3 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 16:58:58 -0700 Subject: [PATCH 1/4] set page_free to 0 by default --- src/options.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/options.c b/src/options.c index 580e1887..b9794dcb 100644 --- a/src/options.c +++ b/src/options.c @@ -60,14 +60,14 @@ static mi_option_desc_t options[_mi_option_last] = // the following options are experimental and not all combinations make sense. { 1, UNINIT, MI_OPTION_LEGACY(segment_eager_commit,eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) - { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, - { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) + { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) + { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit) { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) #if defined(__NetBSD__) From cb0369452d93054d89008f3b3edb6254207a2d13 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 17:54:33 -0700 Subject: [PATCH 2/4] use exponetially sized arenas --- include/mimalloc/internal.h | 1 + include/mimalloc/prim.h | 11 ++++--- src/arena.c | 62 +++++++++++++++++++++++-------------- src/os.c | 8 ++++- src/prim/unix/prim.c | 1 + src/prim/wasi/prim.c | 1 + src/prim/windows/prim.c | 1 + 7 files changed, 56 insertions(+), 29 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 9b73c92c..155fd862 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -92,6 +92,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to f size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); +bool _mi_os_has_virtual_reserve(void); bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index f07bb4bd..094d7ab9 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -22,11 +22,12 @@ terms of the MIT license. A copy of the license can be found in the file // OS memory configuration typedef struct mi_os_mem_config_s { - size_t page_size; // 4KiB - size_t large_page_size; // 2MiB - size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) - bool has_overcommit; // can we reserve more memory than can be actually committed? - bool must_free_whole; // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc) + size_t page_size; // 4KiB + size_t large_page_size; // 2MiB + size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) + bool has_overcommit; // can we reserve more memory than can be actually committed? + bool must_free_whole; // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc) + bool has_virtual_reserve; // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) } mi_os_mem_config_t; // Initialize diff --git a/src/arena.c b/src/arena.c index 5a3dfb91..6ad9a5a1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,7 +1,5 @@ - - /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2022, Microsoft Research, Daan Leijen +Copyright (c) 2019-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -25,7 +23,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo #include "mimalloc/atomic.h" #include // memset -#include // ENOMEM +#include // ENOMEM #include "bitmap.h" // atomic bitmap @@ -38,7 +36,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo typedef uintptr_t mi_block_info_t; #define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB -#define MI_MAX_ARENAS (64) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) +#define MI_MAX_ARENAS (112) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) // A memory arena descriptor typedef struct mi_arena_s { @@ -277,6 +275,35 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size return NULL; } +// try to reserve a fresh arena +static bool mi_arena_reserve(size_t size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) +{ + if (_mi_preloading()) return false; + if (req_arena_id != _mi_arena_id_none()) return false; + + const size_t arena_count = mi_atomic_load_relaxed(&mi_arena_count); + if (arena_count > (MI_MAX_ARENAS - 4)) return false; + + size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); + if (arena_reserve == 0) return false; + + if (!_mi_os_has_virtual_reserve()) { + arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for some embedded systems for example) + } + arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); + if (arena_count >= 8 && arena_count <= 128) { + arena_reserve = (1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially + } + if (arena_reserve < size) return false; + + // commit eagerly? + bool arena_commit = false; + if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } + else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } + + return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0); +} + void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { @@ -296,24 +323,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena - size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); - arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); - if (arena_reserve > 0 && arena_reserve >= size && // eager reserve enabled and large enough? - req_arena_id == _mi_arena_id_none() && // not exclusive? - mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) && // not too many arenas already? - !_mi_preloading() ) // and not before main runs - { - mi_arena_id_t arena_id = 0; - - // commit eagerly? - bool arena_commit = false; - if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } - else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } - - if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { - p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); - if (p != NULL) return p; - } + mi_arena_id_t arena_id = 0; + if (mi_arena_reserve(size,*large,req_arena_id,&arena_id)) { + // and try allocate in there + p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + if (p != NULL) return p; } } @@ -334,6 +348,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, b return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); } + void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (size != NULL) *size = 0; size_t arena_index = mi_arena_id_index(arena_id); @@ -344,6 +359,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { return arena->start; } + /* ----------------------------------------------------------- Arena purge ----------------------------------------------------------- */ diff --git a/src/os.c b/src/os.c index e639c751..4710b809 100644 --- a/src/os.c +++ b/src/os.c @@ -21,13 +21,19 @@ static mi_os_mem_config_t mi_os_mem_config = { 0, // large page size (usually 2MiB) 4096, // allocation granularity true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) - false // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) + false, // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) + true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) }; bool _mi_os_has_overcommit(void) { return mi_os_mem_config.has_overcommit; } +bool _mi_os_has_virtual_reserve(void) { + return mi_os_mem_config.has_virtual_reserve; +} + + // OS (small) page size size_t _mi_os_page_size(void) { return mi_os_mem_config.page_size; diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index eec6ca6d..e3a6f8a9 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -134,6 +134,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? config->has_overcommit = unix_detect_overcommit(); config->must_free_whole = false; // mmap can free in parts + config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) } diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index 3f2659dd..bf78a258 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -21,6 +21,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->alloc_granularity = 16; config->has_overcommit = false; config->must_free_whole = true; + config->has_virtual_reserve = false; } //--------------------------------------------- diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 514fe647..af6af5fe 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -113,6 +113,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->has_overcommit = false; config->must_free_whole = true; + config->has_virtual_reserve = true; // get the page size SYSTEM_INFO si; GetSystemInfo(&si); From 4c4f2f4084a7c42c595e8f95fefee904a40c5ea1 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 18:13:35 -0700 Subject: [PATCH 3/4] clean up arena function names --- src/arena.c | 42 ++++++++++++++++++++++-------------------- src/segment.c | 2 +- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/arena.c b/src/arena.c index 6ad9a5a1..9eedcfdb 100644 --- a/src/arena.c +++ b/src/arena.c @@ -133,7 +133,9 @@ static size_t mi_arena_block_size(size_t bcount) { /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ -static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) + +// claim the `blocks_inuse` bits +static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { @@ -148,7 +150,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena Allocation ----------------------------------------------------------- */ -static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, +static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { @@ -157,7 +159,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; mi_bitmap_index_t bitmap_index; - if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; + if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL; // claimed it! void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index)); @@ -199,9 +201,9 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren } // allocate in a speficic arena -static void* mi_arena_alloc_in(mi_arena_id_t arena_id, int numa_node, size_t size, size_t alignment, - bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) +static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, int numa_node, size_t size, size_t alignment, + bool* commit, bool* large, bool* is_pinned, bool* is_zero, + mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); @@ -216,12 +218,12 @@ static void* mi_arena_alloc_in(mi_arena_id_t arena_id, int numa_node, size_t siz if (arena == NULL) return NULL; if (arena->numa_node >= 0 && arena->numa_node != numa_node) return NULL; if (!(*large) && arena->is_large) return NULL; - return mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + return mi_arena_alloc_at(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); } // allocate from an arena with fallback to the OS -static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, +static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) { @@ -233,14 +235,14 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size mi_assert_internal(size <= mi_arena_block_size(bcount)); size_t arena_index = mi_arena_id_index(req_arena_id); - if (arena_index < MI_MAX_ARENAS) { + if (arena_index < MI_MAX_ARENAS && arena_index < max_arena) { // try a specific arena if requested mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); if ((arena != NULL) && - (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + // (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { - void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arena_alloc_at(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -251,9 +253,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; // end reached if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arena_alloc_at(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -264,9 +266,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; // end reached if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arena_alloc_at(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -276,7 +278,7 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size } // try to reserve a fresh arena -static bool mi_arena_reserve(size_t size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) +static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) { if (_mi_preloading()) return false; if (req_arena_id != _mi_arena_id_none()) return false; @@ -294,7 +296,7 @@ static bool mi_arena_reserve(size_t size, bool allow_large, mi_arena_id_t req_ar if (arena_count >= 8 && arena_count <= 128) { arena_reserve = (1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially } - if (arena_reserve < size) return false; + if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size // commit eagerly? bool arena_commit = false; @@ -319,14 +321,14 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { - void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arenas_alloc(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena mi_arena_id_t arena_id = 0; if (mi_arena_reserve(size,*large,req_arena_id,&arena_id)) { // and try allocate in there - p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + p = mi_arena_alloc_at_id(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; } } diff --git a/src/segment.c b/src/segment.c index 0eec0727..bfd2b75b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -575,7 +575,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && // don't delay for large objects // !_mi_os_has_overcommit() && // never delay on overcommit systems _mi_current_thread_count() > 1 && // do not delay for the first N threads - tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + tld->peak_count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager; // || (page_kind >= MI_PAGE_LARGE); bool is_zero = false; From 83aa63548561d3b17f77ed7c94d6945341129597 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 18:44:08 -0700 Subject: [PATCH 4/4] implement arena destroy on program exit --- src/arena.c | 100 +++++++++++++++++++++++++++++++++++++++++----------- src/heap.c | 2 +- src/init.c | 2 +- 3 files changed, 82 insertions(+), 22 deletions(-) diff --git a/src/arena.c b/src/arena.c index 9eedcfdb..e8c7418e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -42,13 +42,16 @@ typedef uintptr_t mi_block_info_t; typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific bool exclusive; // only allow allocations if specifically for this arena + bool owned; // if true, the arena will be released when the process exits if `mi_option_destroy_on_exit` is set. _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) + size_t meta_size; // size of the arena structure itself including the bitmaps int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL bool is_large; // large- or huge OS pages (always committed) + bool is_huge_alloc; // huge OS pages allocated by `_mi_os_alloc_huge_pages` _Atomic(size_t) search_idx; // optimization to start the search for free blocks _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? @@ -63,6 +66,8 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 +static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept; + /* ----------------------------------------------------------- Arena id's 0 is used for non-arena's (like OS memory) @@ -130,6 +135,12 @@ static size_t mi_arena_block_size(size_t bcount) { return (bcount * MI_ARENA_BLOCK_SIZE); } +static size_t mi_arena_size(mi_arena_t* arena) { + return mi_arena_block_size(arena->block_count); +} + + + /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ @@ -251,8 +262,8 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t // try numa affine allocation for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena == NULL) break; // end reached - if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? + if (arena != NULL && + (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { void* p = mi_arena_alloc_at(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); @@ -264,8 +275,8 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t // try from another numa node instead.. for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena == NULL) break; // end reached - if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! + if (arena != NULL && + (arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { void* p = mi_arena_alloc_at(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); @@ -294,7 +305,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re } arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); if (arena_count >= 8 && arena_count <= 128) { - arena_reserve = (1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially + arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially } if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size @@ -499,9 +510,11 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) size_t max_purge_count = (visit_all ? max_arena : 1); for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (mi_arena_try_purge(arena, now, force, stats)) { - if (max_purge_count <= 1) break; - max_purge_count--; + if (arena != NULL) { + if (mi_arena_try_purge(arena, now, force, stats)) { + if (max_purge_count <= 1) break; + max_purge_count--; + } } } } @@ -574,10 +587,41 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, mi_arenas_try_purge(false, false, stats); } +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +static void mi_arenas_destroy(void) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + size_t new_max_arena = 0; + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena != NULL) { + if (arena->owned && arena->start != NULL) { + mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); + if (arena->is_huge_alloc) { + _mi_os_free_huge_pages(arena->start, mi_arena_size(arena), &_mi_stats_main); + } + else { + _mi_os_free(arena->start, mi_arena_size(arena), &_mi_stats_main); + } + _mi_os_free(arena, arena->meta_size, &_mi_stats_main); + } + else { + new_max_arena = i; + } + } + } + + // try to lower the max arena. + size_t expected = max_arena; + mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); +} + void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats) { - MI_UNUSED(free_arenas); // todo - mi_arenas_try_purge(force_decommit, true, stats); + if (free_arenas) { + mi_arenas_destroy(); + } + mi_arenas_try_purge(force_decommit, true, stats); } @@ -585,7 +629,7 @@ bool _mi_arena_contains(const void* p) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { + if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { return true; } } @@ -601,20 +645,20 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - if (arena_id != NULL) *arena_id = -1; + if (arena_id != NULL) { *arena_id = -1; } size_t i = mi_atomic_increment_acq_rel(&mi_arena_count); if (i >= MI_MAX_ARENAS) { mi_atomic_decrement_acq_rel(&mi_arena_count); return false; } - mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); arena->id = mi_arena_id_create(i); - if (arena_id != NULL) *arena_id = arena->id; + mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); + if (arena_id != NULL) { *arena_id = arena->id; } return true; } -bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); if (size < MI_ARENA_BLOCK_SIZE) return false; @@ -638,11 +682,14 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); arena->exclusive = exclusive; + arena->owned = owned; + arena->meta_size = asize; arena->block_count = bcount; arena->field_count = fields; arena->start = (uint8_t*)start; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; + arena->is_huge_alloc= is_huge_alloc; arena->is_zero_init = is_zero; arena->allow_decommit = allow_decommit; arena->purge_expire = 0; @@ -667,8 +714,13 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is } +bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + return mi_manage_os_memory_ex2(start,size,is_committed,is_large,false,is_zero,numa_node,exclusive,false /* not owned */, arena_id); +} + + // Reserve a range of regular OS memory -int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept +static int mi_reserve_os_memory_ex2(size_t size, bool commit, bool allow_large, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block @@ -676,7 +728,7 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc bool is_zero; void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &is_zero, &_mi_stats_main); if (start==NULL) return ENOMEM; - if (!mi_manage_os_memory_ex(start, size, (large || commit), large, is_zero, -1, exclusive, arena_id)) { + if (!mi_manage_os_memory_ex2(start, size, (large || commit), large, false, is_zero, -1, exclusive, owned, arena_id)) { _mi_os_free_ex(start, size, commit, &_mi_stats_main); _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); return ENOMEM; @@ -685,10 +737,18 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc return 0; } -bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { - return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false, NULL); +// Reserve a range of regular OS memory +int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + return mi_reserve_os_memory_ex2(size,commit,allow_large,exclusive,true /*owned*/, arena_id); } + +// Manage a range of regular OS memory +bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { + return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); +} + +// Reserve a range of regular OS memory int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept { return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL); } @@ -746,7 +806,7 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex(p, hsize, true, true, is_zero, numa_node, exclusive, arena_id)) { + if (!mi_manage_os_memory_ex2(p, hsize, true, true, true, is_zero, numa_node, exclusive, true /* owned */, arena_id)) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } diff --git a/src/heap.c b/src/heap.c index 08b27f3d..53923cf6 100644 --- a/src/heap.c +++ b/src/heap.c @@ -158,7 +158,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect regions on program-exit (or shared library unload) if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { - _mi_arena_collect(false,true,&heap->tld->stats); + _mi_arena_collect(false /* destroy arenas */, true /* force purge */, &heap->tld->stats); } } diff --git a/src/init.c b/src/init.c index 177d3034..f878789c 100644 --- a/src/init.c +++ b/src/init.c @@ -592,7 +592,7 @@ static void mi_cdecl mi_process_done(void) { // or C-runtime termination code. if (mi_option_is_enabled(mi_option_destroy_on_exit)) { _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) - _mi_arena_collect(true,true,&_mi_heap_main_get()->tld->stats); + _mi_arena_collect(true /* destroy (owned) arenas */, true /* purge the rest */, &_mi_heap_main_get()->tld->stats); } if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {