diff --git a/CMakeLists.txt b/CMakeLists.txt index bcfe91d8..fca04263 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ set(mi_sources src/alloc-aligned.c src/alloc-posix.c src/arena.c + src/arena-abandoned.c src/bitmap.c src/heap.c src/init.c @@ -61,7 +62,7 @@ set(mi_sources set(mi_cflags "") set(mi_cflags_static "") # extra flags for a static library build set(mi_cflags_dynamic "") # extra flags for a shared-object library build -set(mi_defines "") +set(mi_defines "") set(mi_libraries "") # ----------------------------------------------------------------------------- @@ -327,7 +328,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM list(APPEND mi_cflags_dynamic -ftls-model=initial-exec) message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)") else() - list(APPEND mi_cflags -ftls-model=initial-exec) + list(APPEND mi_cflags -ftls-model=initial-exec) endif() endif() if(MI_OVERRIDE) @@ -345,18 +346,18 @@ endif() # extra needed libraries -# we prefer -l test over `find_library` as sometimes core libraries +# we prefer -l test over `find_library` as sometimes core libraries # like `libatomic` are not on the system path (see issue #898) -function(find_link_library libname outlibname) - check_linker_flag(C "-l${libname}" mi_has_lib${libname}) +function(find_link_library libname outlibname) + check_linker_flag(C "-l${libname}" mi_has_lib${libname}) if (mi_has_lib${libname}) message(VERBOSE "link library: -l${libname}") - set(${outlibname} ${libname} PARENT_SCOPE) + set(${outlibname} ${libname} PARENT_SCOPE) else() find_library(MI_LIBPATH libname) if (MI_LIBPATH) message(VERBOSE "link library ${libname} at ${MI_LIBPATH}") - set(${outlibname} ${MI_LIBPATH} PARENT_SCOPE) + set(${outlibname} ${MI_LIBPATH} PARENT_SCOPE) else() message(VERBOSE "link library not found: ${libname}") set(${outlibname} "" PARENT_SCOPE) @@ -365,19 +366,19 @@ function(find_link_library libname outlibname) endfunction() if(WIN32) - list(APPEND mi_libraries psapi shell32 user32 advapi32 bcrypt) + list(APPEND mi_libraries psapi shell32 user32 advapi32 bcrypt) else() find_link_library("pthread" MI_LIB_PTHREAD) - if(MI_LIB_PTHREAD) + if(MI_LIB_PTHREAD) list(APPEND mi_libraries "${MI_LIB_PTHREAD}") endif() find_link_library("rt" MI_LIB_RT) - if(MI_LIB_RT) + if(MI_LIB_RT) list(APPEND mi_libraries "${MI_LIB_RT}") endif() find_link_library("atomic" MI_LIB_ATOMIC) - if(MI_LIB_ATOMIC) - list(APPEND mi_libraries "${MI_LIB_ATOMIC}") + if(MI_LIB_ATOMIC) + list(APPEND mi_libraries "${MI_LIB_ATOMIC}") endif() endif() diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index df2a0816..f83b0dee 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -236,6 +236,16 @@ + + + + + + + + + + diff --git a/ide/vs2022/mimalloc-override.vcxproj.filters b/ide/vs2022/mimalloc-override.vcxproj.filters index dd69c827..a9f66c35 100644 --- a/ide/vs2022/mimalloc-override.vcxproj.filters +++ b/ide/vs2022/mimalloc-override.vcxproj.filters @@ -58,6 +58,9 @@ Sources + + Sources + diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 3e11d0fe..dfbdec64 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -213,6 +213,16 @@ + + + + + + + + + + false @@ -256,6 +266,7 @@ + diff --git a/ide/vs2022/mimalloc.vcxproj.filters b/ide/vs2022/mimalloc.vcxproj.filters index a387f5a5..cb0303e0 100644 --- a/ide/vs2022/mimalloc.vcxproj.filters +++ b/ide/vs2022/mimalloc.vcxproj.filters @@ -61,6 +61,9 @@ Sources + + Sources + @@ -90,6 +93,9 @@ Headers + + Headers + diff --git a/src/arena-abandoned.c b/src/arena-abandoned.c new file mode 100644 index 00000000..91327d5f --- /dev/null +++ b/src/arena-abandoned.c @@ -0,0 +1,342 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "arena.h" + +/* ----------------------------------------------------------- + Abandoned blocks/segments: + + _mi_arena_segment_clear_abandoned + _mi_arena_segment_mark_abandoned + + This is used to atomically abandon/reclaim segments + (and crosses the arena API but it is convenient to have here). + + Abandoned segments still have live blocks; they get reclaimed + when a thread frees a block in it, or when a thread needs a fresh + segment. + + Abandoned segments are atomically marked in the `block_abandoned` + bitmap of arenas. Any segments allocated outside arenas are put + in the sub-process `abandoned_os_list`. This list is accessed + using locks but this should be uncommon and generally uncontended. + Reclaim and visiting either scan through the `block_abandoned` + bitmaps of the arena's, or visit the `abandoned_os_list` + + A potentially nicer design is to use arena's for everything + and perhaps have virtual arena's to map OS allocated memory + but this would lack the "density" of our current arena's. TBC. +----------------------------------------------------------- */ + + +// reclaim a specific OS abandoned segment; `true` on success. +// sets the thread_id. +static bool mi_arena_segment_os_clear_abandoned(mi_segment_t* segment, bool take_lock) { + mi_assert(segment->memid.memkind != MI_MEM_ARENA); + // not in an arena, remove from list of abandoned os segments + mi_subproc_t* const subproc = segment->subproc; + if (take_lock && !mi_lock_try_acquire(&subproc->abandoned_os_lock)) { + return false; // failed to acquire the lock, we just give up + } + // remove atomically from the abandoned os list (if possible!) + bool reclaimed = false; + mi_segment_t* const next = segment->abandoned_os_next; + mi_segment_t* const prev = segment->abandoned_os_prev; + if (next != NULL || prev != NULL || subproc->abandoned_os_list == segment) { + #if MI_DEBUG>3 + // find ourselves in the abandoned list (and check the count) + bool found = false; + size_t count = 0; + for (mi_segment_t* current = subproc->abandoned_os_list; current != NULL; current = current->abandoned_os_next) { + if (current == segment) { found = true; } + count++; + } + mi_assert_internal(found); + mi_assert_internal(count == mi_atomic_load_relaxed(&subproc->abandoned_os_list_count)); + #endif + // remove (atomically) from the list and reclaim + if (prev != NULL) { prev->abandoned_os_next = next; } + else { subproc->abandoned_os_list = next; } + if (next != NULL) { next->abandoned_os_prev = prev; } + else { subproc->abandoned_os_list_tail = prev; } + segment->abandoned_os_next = NULL; + segment->abandoned_os_prev = NULL; + mi_atomic_decrement_relaxed(&subproc->abandoned_count); + mi_atomic_decrement_relaxed(&subproc->abandoned_os_list_count); + mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); + reclaimed = true; + } + if (take_lock) { mi_lock_release(&segment->subproc->abandoned_os_lock); } + return reclaimed; +} + +// reclaim a specific abandoned segment; `true` on success. +// sets the thread_id. +bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment) { + if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) { + return mi_arena_segment_os_clear_abandoned(segment, true /* take lock */); + } + // arena segment: use the blocks_abandoned bitmap. + size_t arena_idx; + size_t bitmap_idx; + mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); + mi_arena_t* arena = mi_arena_from_index(arena_idx); + mi_assert_internal(arena != NULL); + // reclaim atomically + bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx); + if (was_marked) { + mi_assert_internal(mi_atomic_load_acquire(&segment->thread_id) == 0); + mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count); + mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); + } + // mi_assert_internal(was_marked); + mi_assert_internal(!was_marked || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); + //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); + return was_marked; +} + + +// mark a specific OS segment as abandoned +static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) { + mi_assert(segment->memid.memkind != MI_MEM_ARENA); + // not in an arena; we use a list of abandoned segments + mi_subproc_t* const subproc = segment->subproc; + if (!mi_lock_acquire(&subproc->abandoned_os_lock)) { + _mi_error_message(EFAULT, "internal error: failed to acquire the abandoned (os) segment lock to mark abandonment"); + // we can continue but cannot visit/reclaim such blocks.. + } + else { + // push on the tail of the list (important for the visitor) + mi_segment_t* prev = subproc->abandoned_os_list_tail; + mi_assert_internal(prev == NULL || prev->abandoned_os_next == NULL); + mi_assert_internal(segment->abandoned_os_prev == NULL); + mi_assert_internal(segment->abandoned_os_next == NULL); + if (prev != NULL) { prev->abandoned_os_next = segment; } + else { subproc->abandoned_os_list = segment; } + subproc->abandoned_os_list_tail = segment; + segment->abandoned_os_prev = prev; + segment->abandoned_os_next = NULL; + mi_atomic_increment_relaxed(&subproc->abandoned_os_list_count); + mi_atomic_increment_relaxed(&subproc->abandoned_count); + // and release the lock + mi_lock_release(&subproc->abandoned_os_lock); + } + return; +} + +// mark a specific segment as abandoned +// clears the thread_id. +void _mi_arena_segment_mark_abandoned(mi_segment_t* segment) +{ + mi_assert_internal(segment->used == segment->abandoned); + mi_atomic_store_release(&segment->thread_id, 0); // mark as abandoned for multi-thread free's + if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) { + mi_arena_segment_os_mark_abandoned(segment); + return; + } + // segment is in an arena, mark it in the arena `blocks_abandoned` bitmap + size_t arena_idx; + size_t bitmap_idx; + mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); + mi_arena_t* arena = mi_arena_from_index(arena_idx); + mi_assert_internal(arena != NULL); + // set abandonment atomically + const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); + if (was_unmarked) { mi_atomic_increment_relaxed(&segment->subproc->abandoned_count); } + mi_assert_internal(was_unmarked); + mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); +} + + +/* ----------------------------------------------------------- + Iterate through the abandoned blocks/segments using a cursor. + This is used for reclaiming and abandoned block visiting. +----------------------------------------------------------- */ + +// start a cursor at a randomized arena +void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current) { + mi_assert_internal(heap == NULL || heap->tld->segments.subproc == subproc); + current->bitmap_idx = 0; + current->subproc = subproc; + current->visit_all = visit_all; + current->hold_visit_lock = false; + const size_t abandoned_count = mi_atomic_load_relaxed(&subproc->abandoned_count); + const size_t abandoned_list_count = mi_atomic_load_relaxed(&subproc->abandoned_os_list_count); + const size_t max_arena = mi_arena_get_count(); + if (heap != NULL && heap->arena_id != _mi_arena_id_none()) { + // for a heap that is bound to one arena, only visit that arena + current->start = mi_arena_id_index(heap->arena_id); + current->end = current->start + 1; + current->os_list_count = 0; + } + else { + // otherwise visit all starting at a random location + if (abandoned_count > abandoned_list_count && max_arena > 0) { + current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); + current->end = current->start + max_arena; + } + else { + current->start = 0; + current->end = 0; + } + current->os_list_count = abandoned_list_count; // max entries to visit in the os abandoned list + } + mi_assert_internal(current->start <= max_arena); +} + +void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current) { + if (current->hold_visit_lock) { + mi_lock_release(¤t->subproc->abandoned_os_visit_lock); + current->hold_visit_lock = false; + } +} + +static mi_segment_t* mi_arena_segment_clear_abandoned_at(mi_arena_t* arena, mi_subproc_t* subproc, mi_bitmap_index_t bitmap_idx) { + // try to reclaim an abandoned segment in the arena atomically + if (!_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) return NULL; + mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); + mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx); + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); + // check that the segment belongs to our sub-process + // note: this is the reason we need the `abandoned_visit` lock in the case abandoned visiting is enabled. + // without the lock an abandoned visit may otherwise fail to visit all abandoned segments in the sub-process. + // for regular reclaim it is fine to miss one sometimes so without abandoned visiting we don't need the `abandoned_visit` lock. + if (segment->subproc != subproc) { + // it is from another sub-process, re-mark it and continue searching + const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); + mi_assert_internal(was_zero); MI_UNUSED(was_zero); + return NULL; + } + else { + // success, we unabandoned a segment in our sub-process + mi_atomic_decrement_relaxed(&subproc->abandoned_count); + return segment; + } +} + +static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_cursor_t* previous) { + const size_t max_arena = mi_arena_get_count(); + size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx); + size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1; + // visit arena's (from the previous cursor) + for (; previous->start < previous->end; previous->start++, field_idx = 0, bit_idx = 0) { + // index wraps around + size_t arena_idx = (previous->start >= max_arena ? previous->start % max_arena : previous->start); + mi_arena_t* arena = mi_arena_from_index(arena_idx); + if (arena != NULL) { + bool has_lock = false; + // visit the abandoned fields (starting at previous_idx) + for (; field_idx < arena->field_count; field_idx++, bit_idx = 0) { + size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]); + if mi_unlikely(field != 0) { // skip zero fields quickly + // we only take the arena lock if there are actually abandoned segments present + if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) { + has_lock = (previous->visit_all ? mi_lock_acquire(&arena->abandoned_visit_lock) : mi_lock_try_acquire(&arena->abandoned_visit_lock)); + if (!has_lock) { + if (previous->visit_all) { + _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock"); + } + // skip to next arena + break; + } + } + mi_assert_internal(has_lock || !mi_option_is_enabled(mi_option_visit_abandoned)); + // visit each set bit in the field (todo: maybe use `ctz` here?) + for (; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) { + // pre-check if the bit is set + size_t mask = ((size_t)1 << bit_idx); + if mi_unlikely((field & mask) == mask) { + previous->bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx); + mi_segment_t* const segment = mi_arena_segment_clear_abandoned_at(arena, previous->subproc, previous->bitmap_idx); + if (segment != NULL) { + //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); + if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); } + return segment; + } + } + } + } + } + if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); } + } + } + return NULL; +} + +static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_cursor_t* previous) { + // go through the abandoned_os_list + // we only allow one thread per sub-process to do to visit guarded by the `abandoned_os_visit_lock`. + // The lock is released when the cursor is released. + if (!previous->hold_visit_lock) { + previous->hold_visit_lock = (previous->visit_all ? mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock) + : mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock)); + if (!previous->hold_visit_lock) { + if (previous->visit_all) { + _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the OS visitor lock"); + } + return NULL; // we cannot get the lock, give up + } + } + // One list entry at a time + while (previous->os_list_count > 0) { + previous->os_list_count--; + const bool has_lock = mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free` + if (has_lock) { + mi_segment_t* segment = previous->subproc->abandoned_os_list; + // pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries) + if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) { + mi_lock_release(&previous->subproc->abandoned_os_lock); + return segment; + } + // already abandoned, try again + mi_lock_release(&previous->subproc->abandoned_os_lock); + } + else { + _mi_error_message(EFAULT, "failed to acquire abandoned OS list lock during abandoned block visit\n"); + return NULL; + } + } + // done + mi_assert_internal(previous->os_list_count == 0); + return NULL; +} + + +// reclaim abandoned segments +// this does not set the thread id (so it appears as still abandoned) +mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous) { + if (previous->start < previous->end) { + // walk the arena + mi_segment_t* segment = mi_arena_segment_clear_abandoned_next_field(previous); + if (segment != NULL) { return segment; } + } + // no entries in the arena's anymore, walk the abandoned OS list + mi_assert_internal(previous->start == previous->end); + return mi_arena_segment_clear_abandoned_next_list(previous); +} + + +bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { + // (unfortunately) the visit_abandoned option must be enabled from the start. + // This is to avoid taking locks if abandoned list visiting is not required (as for most programs) + if (!mi_option_is_enabled(mi_option_visit_abandoned)) { + _mi_error_message(EFAULT, "internal error: can only visit abandoned blocks when MIMALLOC_VISIT_ABANDONED=ON"); + return false; + } + mi_arena_field_cursor_t current; + _mi_arena_field_cursor_init(NULL, _mi_subproc_from_id(subproc_id), true /* visit all (blocking) */, ¤t); + mi_segment_t* segment; + bool ok = true; + while (ok && (segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL) { + ok = _mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg); + _mi_arena_segment_mark_abandoned(segment); + } + _mi_arena_field_cursor_done(¤t); + return ok; +} diff --git a/src/arena.c b/src/arena.c index 77f42825..8fa018ab 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2023, Microsoft Research, Daan Leijen +Copyright (c) 2019-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -10,70 +10,27 @@ terms of the MIT license. A copy of the license can be found in the file large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB). In contrast to the rest of mimalloc, the arenas are shared between threads and need to be accessed using atomic operations. - -Arenas are used to for huge OS page (1GiB) reservations or for reserving -OS memory upfront which can be improve performance or is sometimes needed -on embedded devices. We can also employ this with WASI or `sbrk` systems -to reserve large arenas upfront and be able to reuse the memory more effectively. - -The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" +#include "arena.h" -#include // memset -#include // ENOMEM - -#include "bitmap.h" // atomic bitmap - -/* ----------------------------------------------------------- - Arena allocation ------------------------------------------------------------ */ - -// Block info: bit 0 contains the `in_use` bit, the upper bits the -// size in count of arena blocks. -typedef uintptr_t mi_block_info_t; #define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB #define MI_MAX_ARENAS (132) // Limited as the reservation exponentially increases (and takes up .bss) -// A memory arena descriptor -typedef struct mi_arena_s { - mi_arena_id_t id; // arena id; 0 for non-specific - mi_memid_t memid; // memid of the memory area - _Atomic(uint8_t*) start; // the start of the memory area - size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) - size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) - size_t meta_size; // size of the arena structure itself (including its bitmaps) - mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) - int numa_node; // associated NUMA node - bool exclusive; // only allow allocations if specifically for this arena - bool is_large; // memory area consists of large- or huge OS pages (always committed) - mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited - _Atomic(size_t) search_idx; // optimization to start the search for free blocks - _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. - mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? - mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) - mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) - mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) - mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) - // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields. -} mi_arena_t; - - // The available arenas static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 -//static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept; /* ----------------------------------------------------------- Arena id's id = arena_index + 1 ----------------------------------------------------------- */ -static size_t mi_arena_id_index(mi_arena_id_t id) { +size_t mi_arena_id_index(mi_arena_id_t id) { return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1); } @@ -100,6 +57,15 @@ bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_i } } +size_t mi_arena_get_count(void) { + return mi_atomic_load_relaxed(&mi_arena_count); +} + +mi_arena_t* mi_arena_from_index(size_t idx) { + mi_assert_internal(idx < mi_arena_get_count()); + return mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[idx]); +} + /* ----------------------------------------------------------- Arena allocations get a (currently) 16-bit memory id where the @@ -126,7 +92,7 @@ static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_ return memid; } -static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { +bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { mi_assert_internal(memid.memkind == MI_MEM_ARENA); *arena_index = mi_arena_id_index(memid.mem.arena.id); *bitmap_index = memid.mem.arena.block_index; @@ -200,7 +166,7 @@ void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size) { } } -static void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { +void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex))); } @@ -290,7 +256,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no mi_assert_internal(size <= mi_arena_block_size(bcount)); // Check arena suitability - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); + mi_arena_t* arena = mi_arena_from_index(arena_index); if (arena == NULL) return NULL; if (!allow_large && arena->is_large) return NULL; if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; @@ -900,7 +866,7 @@ void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_debug_show_bitmap(" ", "committed blocks", arena->block_count, arena->blocks_committed, arena->field_count); } if (show_abandoned) { - abandoned_total += mi_debug_show_bitmap(" ", "abandoned blocks", arena->block_count, arena->blocks_abandoned, arena->field_count); + abandoned_total += mi_debug_show_bitmap(" ", "abandoned blocks", arena->block_count, arena->blocks_abandoned, arena->field_count); } if (show_purge && arena->blocks_purge != NULL) { purge_total += mi_debug_show_bitmap(" ", "purgeable blocks", arena->block_count, arena->blocks_purge, arena->field_count); @@ -980,335 +946,3 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv } -/* ----------------------------------------------------------- - Abandoned blocks/segments: - - _mi_arena_segment_clear_abandoned - _mi_arena_segment_mark_abandoned - - This is used to atomically abandon/reclaim segments - (and crosses the arena API but it is convenient to have here). - - Abandoned segments still have live blocks; they get reclaimed - when a thread frees a block in it, or when a thread needs a fresh - segment. - - Abandoned segments are atomically marked in the `block_abandoned` - bitmap of arenas. Any segments allocated outside arenas are put - in the sub-process `abandoned_os_list`. This list is accessed - using locks but this should be uncommon and generally uncontended. - Reclaim and visiting either scan through the `block_abandoned` - bitmaps of the arena's, or visit the `abandoned_os_list` - - A potentially nicer design is to use arena's for everything - and perhaps have virtual arena's to map OS allocated memory - but this would lack the "density" of our current arena's. TBC. ------------------------------------------------------------ */ - -// reclaim a specific OS abandoned segment; `true` on success. -// sets the thread_id. -static bool mi_arena_segment_os_clear_abandoned(mi_segment_t* segment, bool take_lock) { - mi_assert(segment->memid.memkind != MI_MEM_ARENA); - // not in an arena, remove from list of abandoned os segments - mi_subproc_t* const subproc = segment->subproc; - if (take_lock && !mi_lock_try_acquire(&subproc->abandoned_os_lock)) { - return false; // failed to acquire the lock, we just give up - } - // remove atomically from the abandoned os list (if possible!) - bool reclaimed = false; - mi_segment_t* const next = segment->abandoned_os_next; - mi_segment_t* const prev = segment->abandoned_os_prev; - if (next != NULL || prev != NULL || subproc->abandoned_os_list == segment) { - #if MI_DEBUG>3 - // find ourselves in the abandoned list (and check the count) - bool found = false; - size_t count = 0; - for (mi_segment_t* current = subproc->abandoned_os_list; current != NULL; current = current->abandoned_os_next) { - if (current == segment) { found = true; } - count++; - } - mi_assert_internal(found); - mi_assert_internal(count == mi_atomic_load_relaxed(&subproc->abandoned_os_list_count)); - #endif - // remove (atomically) from the list and reclaim - if (prev != NULL) { prev->abandoned_os_next = next; } - else { subproc->abandoned_os_list = next; } - if (next != NULL) { next->abandoned_os_prev = prev; } - else { subproc->abandoned_os_list_tail = prev; } - segment->abandoned_os_next = NULL; - segment->abandoned_os_prev = NULL; - mi_atomic_decrement_relaxed(&subproc->abandoned_count); - mi_atomic_decrement_relaxed(&subproc->abandoned_os_list_count); - mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); - reclaimed = true; - } - if (take_lock) { mi_lock_release(&segment->subproc->abandoned_os_lock); } - return reclaimed; -} - -// reclaim a specific abandoned segment; `true` on success. -// sets the thread_id. -bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment) { - if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) { - return mi_arena_segment_os_clear_abandoned(segment, true /* take lock */); - } - // arena segment: use the blocks_abandoned bitmap. - size_t arena_idx; - size_t bitmap_idx; - mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); - mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); - mi_assert_internal(arena != NULL); - // reclaim atomically - bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx); - if (was_marked) { - mi_assert_internal(mi_atomic_load_acquire(&segment->thread_id) == 0); - mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count); - mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); - } - // mi_assert_internal(was_marked); - mi_assert_internal(!was_marked || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); - //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); - return was_marked; -} - - -// mark a specific OS segment as abandoned -static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) { - mi_assert(segment->memid.memkind != MI_MEM_ARENA); - // not in an arena; we use a list of abandoned segments - mi_subproc_t* const subproc = segment->subproc; - if (!mi_lock_acquire(&subproc->abandoned_os_lock)) { - _mi_error_message(EFAULT, "internal error: failed to acquire the abandoned (os) segment lock to mark abandonment"); - // we can continue but cannot visit/reclaim such blocks.. - } - else { - // push on the tail of the list (important for the visitor) - mi_segment_t* prev = subproc->abandoned_os_list_tail; - mi_assert_internal(prev == NULL || prev->abandoned_os_next == NULL); - mi_assert_internal(segment->abandoned_os_prev == NULL); - mi_assert_internal(segment->abandoned_os_next == NULL); - if (prev != NULL) { prev->abandoned_os_next = segment; } - else { subproc->abandoned_os_list = segment; } - subproc->abandoned_os_list_tail = segment; - segment->abandoned_os_prev = prev; - segment->abandoned_os_next = NULL; - mi_atomic_increment_relaxed(&subproc->abandoned_os_list_count); - mi_atomic_increment_relaxed(&subproc->abandoned_count); - // and release the lock - mi_lock_release(&subproc->abandoned_os_lock); - } - return; -} - -// mark a specific segment as abandoned -// clears the thread_id. -void _mi_arena_segment_mark_abandoned(mi_segment_t* segment) -{ - mi_assert_internal(segment->used == segment->abandoned); - mi_atomic_store_release(&segment->thread_id, 0); // mark as abandoned for multi-thread free's - if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) { - mi_arena_segment_os_mark_abandoned(segment); - return; - } - // segment is in an arena, mark it in the arena `blocks_abandoned` bitmap - size_t arena_idx; - size_t bitmap_idx; - mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); - mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); - mi_assert_internal(arena != NULL); - // set abandonment atomically - const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); - if (was_unmarked) { mi_atomic_increment_relaxed(&segment->subproc->abandoned_count); } - mi_assert_internal(was_unmarked); - mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); -} - - -/* ----------------------------------------------------------- - Iterate through the abandoned blocks/segments using a cursor. - This is used for reclaiming and abandoned block visiting. ------------------------------------------------------------ */ - -// start a cursor at a randomized arena -void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current) { - mi_assert_internal(heap == NULL || heap->tld->segments.subproc == subproc); - current->bitmap_idx = 0; - current->subproc = subproc; - current->visit_all = visit_all; - current->hold_visit_lock = false; - const size_t abandoned_count = mi_atomic_load_relaxed(&subproc->abandoned_count); - const size_t abandoned_list_count = mi_atomic_load_relaxed(&subproc->abandoned_os_list_count); - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - if (heap != NULL && heap->arena_id != _mi_arena_id_none()) { - // for a heap that is bound to one arena, only visit that arena - current->start = mi_arena_id_index(heap->arena_id); - current->end = current->start + 1; - current->os_list_count = 0; - } - else { - // otherwise visit all starting at a random location - if (abandoned_count > abandoned_list_count && max_arena > 0) { - current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); - current->end = current->start + max_arena; - } - else { - current->start = 0; - current->end = 0; - } - current->os_list_count = abandoned_list_count; // max entries to visit in the os abandoned list - } - mi_assert_internal(current->start <= max_arena); -} - -void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current) { - if (current->hold_visit_lock) { - mi_lock_release(¤t->subproc->abandoned_os_visit_lock); - current->hold_visit_lock = false; - } -} - -static mi_segment_t* mi_arena_segment_clear_abandoned_at(mi_arena_t* arena, mi_subproc_t* subproc, mi_bitmap_index_t bitmap_idx) { - // try to reclaim an abandoned segment in the arena atomically - if (!_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) return NULL; - mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); - mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx); - mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); - // check that the segment belongs to our sub-process - // note: this is the reason we need the `abandoned_visit` lock in the case abandoned visiting is enabled. - // without the lock an abandoned visit may otherwise fail to visit all abandoned segments in the sub-process. - // for regular reclaim it is fine to miss one sometimes so without abandoned visiting we don't need the `abandoned_visit` lock. - if (segment->subproc != subproc) { - // it is from another sub-process, re-mark it and continue searching - const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); - mi_assert_internal(was_zero); MI_UNUSED(was_zero); - return NULL; - } - else { - // success, we unabandoned a segment in our sub-process - mi_atomic_decrement_relaxed(&subproc->abandoned_count); - return segment; - } -} - -static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_cursor_t* previous) { - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx); - size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1; - // visit arena's (from the previous cursor) - for (; previous->start < previous->end; previous->start++, field_idx = 0, bit_idx = 0) { - // index wraps around - size_t arena_idx = (previous->start >= max_arena ? previous->start % max_arena : previous->start); - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); - if (arena != NULL) { - bool has_lock = false; - // visit the abandoned fields (starting at previous_idx) - for (; field_idx < arena->field_count; field_idx++, bit_idx = 0) { - size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]); - if mi_unlikely(field != 0) { // skip zero fields quickly - // we only take the arena lock if there are actually abandoned segments present - if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) { - has_lock = (previous->visit_all ? mi_lock_acquire(&arena->abandoned_visit_lock) : mi_lock_try_acquire(&arena->abandoned_visit_lock)); - if (!has_lock) { - if (previous->visit_all) { - _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock"); - } - // skip to next arena - break; - } - } - mi_assert_internal(has_lock || !mi_option_is_enabled(mi_option_visit_abandoned)); - // visit each set bit in the field (todo: maybe use `ctz` here?) - for (; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) { - // pre-check if the bit is set - size_t mask = ((size_t)1 << bit_idx); - if mi_unlikely((field & mask) == mask) { - previous->bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx); - mi_segment_t* const segment = mi_arena_segment_clear_abandoned_at(arena, previous->subproc, previous->bitmap_idx); - if (segment != NULL) { - //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); - if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); } - return segment; - } - } - } - } - } - if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); } - } - } - return NULL; -} - -static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_cursor_t* previous) { - // go through the abandoned_os_list - // we only allow one thread per sub-process to do to visit guarded by the `abandoned_os_visit_lock`. - // The lock is released when the cursor is released. - if (!previous->hold_visit_lock) { - previous->hold_visit_lock = (previous->visit_all ? mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock) - : mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock)); - if (!previous->hold_visit_lock) { - if (previous->visit_all) { - _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the OS visitor lock"); - } - return NULL; // we cannot get the lock, give up - } - } - // One list entry at a time - while (previous->os_list_count > 0) { - previous->os_list_count--; - const bool has_lock = mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free` - if (has_lock) { - mi_segment_t* segment = previous->subproc->abandoned_os_list; - // pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries) - if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) { - mi_lock_release(&previous->subproc->abandoned_os_lock); - return segment; - } - // already abandoned, try again - mi_lock_release(&previous->subproc->abandoned_os_lock); - } - else { - _mi_error_message(EFAULT, "failed to acquire abandoned OS list lock during abandoned block visit\n"); - return NULL; - } - } - // done - mi_assert_internal(previous->os_list_count == 0); - return NULL; -} - - -// reclaim abandoned segments -// this does not set the thread id (so it appears as still abandoned) -mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous) { - if (previous->start < previous->end) { - // walk the arena - mi_segment_t* segment = mi_arena_segment_clear_abandoned_next_field(previous); - if (segment != NULL) { return segment; } - } - // no entries in the arena's anymore, walk the abandoned OS list - mi_assert_internal(previous->start == previous->end); - return mi_arena_segment_clear_abandoned_next_list(previous); -} - - -bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { - // (unfortunately) the visit_abandoned option must be enabled from the start. - // This is to avoid taking locks if abandoned list visiting is not required (as for most programs) - if (!mi_option_is_enabled(mi_option_visit_abandoned)) { - _mi_error_message(EFAULT, "internal error: can only visit abandoned blocks when MIMALLOC_VISIT_ABANDONED=ON"); - return false; - } - mi_arena_field_cursor_t current; - _mi_arena_field_cursor_init(NULL, _mi_subproc_from_id(subproc_id), true /* visit all (blocking) */, ¤t); - mi_segment_t* segment; - bool ok = true; - while (ok && (segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL) { - ok = _mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg); - _mi_arena_segment_mark_abandoned(segment); - } - _mi_arena_field_cursor_done(¤t); - return ok; -} diff --git a/src/arena.h b/src/arena.h new file mode 100644 index 00000000..0b923d43 --- /dev/null +++ b/src/arena.h @@ -0,0 +1,63 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MI_ARENA_H +#define MI_ARENA_H + +/* ---------------------------------------------------------------------------- +"Arenas" are fixed area's of OS memory from which we can allocate +large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB). +In contrast to the rest of mimalloc, the arenas are shared between +threads and need to be accessed using atomic operations. + +Arenas are also used to for huge OS page (1GiB) reservations or for reserving +OS memory upfront which can be improve performance or is sometimes needed +on embedded devices. We can also employ this with WASI or `sbrk` systems +to reserve large arenas upfront and be able to reuse the memory more effectively. + +The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "bitmap.h" + +/* ----------------------------------------------------------- + Arena allocation +----------------------------------------------------------- */ + +// A memory arena descriptor +typedef struct mi_arena_s { + mi_arena_id_t id; // arena id; 0 for non-specific + mi_memid_t memid; // memid of the memory area + _Atomic(uint8_t*) start; // the start of the memory area + size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) + size_t meta_size; // size of the arena structure itself (including its bitmaps) + mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) + int numa_node; // associated NUMA node + bool exclusive; // only allow allocations if specifically for this arena + bool is_large; // memory area consists of large- or huge OS pages (always committed) + mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited + _Atomic(size_t) search_idx; // optimization to start the search for free blocks + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) + mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) + mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) + mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) + // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields. +} mi_arena_t; + + +// Minimal exports for arena-abandoned. +size_t mi_arena_id_index(mi_arena_id_t id); +mi_arena_t* mi_arena_from_index(size_t idx); +size_t mi_arena_get_count(void); +void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex); +bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index); + +#endif \ No newline at end of file diff --git a/src/static.c b/src/static.c index bf025eb7..5e1b826c 100644 --- a/src/static.c +++ b/src/static.c @@ -24,6 +24,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "alloc-aligned.c" #include "alloc-posix.c" #include "arena.c" +#include "arena-abandoned.c" #include "bitmap.c" #include "heap.c" #include "init.c" @@ -31,7 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "options.c" #include "os.c" #include "page.c" // includes page-queue.c -#include "random.c" +#include "random.c" #include "segment.c" #include "segment-map.c" #include "stats.c"