From db52999d8567c628154fcb002cf636bc1c1697d9 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 8 Mar 2024 09:01:26 -0800 Subject: [PATCH 01/12] update used block comment (issue #861) --- include/mimalloc/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 771059bf..049e68e7 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -295,7 +295,7 @@ typedef struct mi_page_s { uint8_t retire_expire:7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) - uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t used; // number of blocks in use (including blocks in `thread_free`) uint32_t xblock_size; // size available in each block (always `>0`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) From a42707908fdaa4ff5fd09788d74ad70311e9bf85 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 17 Mar 2024 06:33:40 -0700 Subject: [PATCH 02/12] fix typo, issue #866 --- doc/mimalloc-doc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 4c23a5fa..01b13904 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -168,7 +168,7 @@ void* mi_expand(void* p, size_t newsize); /// @returns A pointer to a block of \a count * \a size bytes, or \a NULL /// if out of memory or if \a count * \a size overflows. /// -/// If there is no overflow, it behaves exactly like `mi_malloc(p,count*size)`. +/// If there is no overflow, it behaves exactly like `mi_malloc(count*size)`. /// @see mi_calloc() /// @see mi_zallocn() void* mi_mallocn(size_t count, size_t size); From 18ebeb8a83386ade978b2d867fafbd3b39641826 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 17 Mar 2024 08:33:09 -0700 Subject: [PATCH 03/12] fix (benign) race condition on the page flags has_aligned flag and refactor free-ing code (issue 865) --- src/alloc.c | 323 ++++++++++++++++++++++++++++------------------------ 1 file changed, 175 insertions(+), 148 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 8a76d3d3..2576206f 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -26,7 +26,9 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { +// Note: in release mode the (inlined) routine is about 7 instructions with a single test. +extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept +{ mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* const block = page->free; if mi_unlikely(block == NULL) { @@ -61,43 +63,43 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } } -#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN if (!zero && !mi_page_is_huge(page)) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); } -#elif (MI_SECURE!=0) + #elif (MI_SECURE!=0) if (!zero) { block->next = 0; } // don't leak internal data -#endif + #endif -#if (MI_STAT>0) + #if (MI_STAT>0) const size_t bsize = mi_page_usable_block_size(page); if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_increase(heap, normal, bsize); mi_heap_stat_counter_increase(heap, normal_count, 1); -#if (MI_STAT>1) + #if (MI_STAT>1) const size_t bin = _mi_bin(bsize); mi_heap_stat_increase(heap, normal_bins[bin], 1); -#endif + #endif } -#endif + #endif -#if MI_PADDING // && !MI_TRACK_ENABLED + #if MI_PADDING // && !MI_TRACK_ENABLED mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); - #if (MI_DEBUG>=2) - mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); - #endif + #if (MI_DEBUG>=2) + mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); + #endif mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); padding->delta = (uint32_t)(delta); - #if MI_PADDING_CHECK - if (!mi_page_is_huge(page)) { - uint8_t* fill = (uint8_t*)padding - delta; - const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes - for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } - } + #if MI_PADDING_CHECK + if (!mi_page_is_huge(page)) { + uint8_t* fill = (uint8_t*)padding - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes + for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } + } + #endif #endif -#endif return block; } @@ -112,9 +114,11 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, #if (MI_PADDING) if (size == 0) { size = sizeof(void*); } #endif + mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); mi_track_malloc(p,size,zero); + #if MI_STAT>1 if (p != NULL) { if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } @@ -403,115 +407,31 @@ static void mi_stat_huge_free(const mi_page_t* page) { // Free // ------------------------------------------------------ -// multi-threaded free (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) -static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) +// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) +static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); + +// regular free of a (thread local) block pointer +// fast path written carefully to prevent spilling on the stack +static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool check_full) { - // first see if the segment was abandoned and we can reclaim it - mi_segment_t* const segment = _mi_page_segment(page); - if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && - #if MI_HUGE_PAGE_ABANDON - segment->page_kind != MI_PAGE_HUGE && - #endif - mi_atomic_load_relaxed(&segment->thread_id) == 0) - { - // the segment is abandoned, try to reclaim it into our heap - if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) { - mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); - mi_free(block); // recursively free as now it will be a local free in our heap - return; - } - } - - // The padding check may access the non-thread-owned page for the key values. - // that is safe as these are constant and the page won't be freed (as the block is not freed yet). + // owning thread can free a block directly + if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); - _mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection - - if (segment->page_kind == MI_PAGE_HUGE) { - #if MI_HUGE_PAGE_ABANDON - // huge page segments are always abandoned and can be freed immediately - mi_stat_huge_free(page); - _mi_segment_huge_page_free(segment, page, block); - return; - #else - // huge pages are special as they occupy the entire segment - // as these are large we reset the memory occupied by the page so it is available to other threads - // (as the owning thread needs to actually free the memory later). - _mi_segment_huge_page_reset(segment, page, block); - #endif - } - - #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading - memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + mi_stat_free(page, block); + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif - - // Try to put the block on either the page-local thread free list, or the heap delayed free list. - mi_thread_free_t tfreex; - bool use_delayed; - mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); - do { - use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); - if mi_unlikely(use_delayed) { - // unlikely: this only happens on the first concurrent free in a page that is in the full list - tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); - } - else { - // usual: directly add to page thread_free list - mi_block_set_next(page, block, mi_tf_block(tfree)); - tfreex = mi_tf_set_block(tfree,block); - } - } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); - - if mi_unlikely(use_delayed) { - // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); - mi_assert_internal(heap != NULL); - if (heap != NULL) { - // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) - mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); - do { - mi_block_set_nextx(heap,block,dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); - } - - // and reset the MI_DELAYED_FREEING flag - tfree = mi_atomic_load_relaxed(&page->xthread_free); - do { - tfreex = tfree; - mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); - tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned + mi_block_set_next(page, block, page->local_free); + page->local_free = block; + if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) + _mi_page_retire(page); } + else if mi_unlikely(check_full && mi_page_is_in_full(page)) { + _mi_page_unfull(page); + } } -// regular free -static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) -{ - // and push it on the free list - //const size_t bsize = mi_page_block_size(page); - if mi_likely(local) { - // owning thread can free a block directly - if mi_unlikely(mi_check_is_double_free(page, block)) return; - mi_check_padding(page, block); - #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); - #endif - mi_block_set_next(page, block, page->local_free); - page->local_free = block; - page->used--; - if mi_unlikely(mi_page_all_free(page)) { - _mi_page_retire(page); - } - else if mi_unlikely(mi_page_is_in_full(page)) { - _mi_page_unfull(page); - } - } - else { - _mi_free_block_mt(page,block); - } -} - - // Adjust a block that was allocated aligned, to the actual start of the block in the page. mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); @@ -520,17 +440,27 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p return (mi_block_t*)((uintptr_t)p - adjust); } - -void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { +// free a local pointer +static void mi_decl_noinline mi_free_generic_local(const mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); - mi_stat_free(page, block); // stat_free may access the padding - mi_track_free_size(block, mi_page_usable_size_of(page,block)); - _mi_free_block(page, is_local, block); + mi_free_block_local(page, block, true); +} + +// free a pointer owned by another thread +static void mi_decl_noinline mi_free_generic_mt(const mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { + mi_block_t* const block = _mi_page_ptr_unalign(segment, page, p); // don't check `has_aligned` flag to avoid a race (issue #865) + mi_free_block_mt(segment, page, block); +} + +// generic free (for runtime integration) +void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { + if (is_local) mi_free_generic_local(segment,page,p); + else mi_free_generic_mt(segment,page,p); } // Get the segment data belonging to a pointer -// This is just a single `and` in assembly but does further checks in debug mode -// (and secure mode) if this was a valid pointer. +// This is just a single `and` in release mode but does further checks in debug mode +// (and secure mode) to see if this was a valid pointer. static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) { MI_UNUSED(msg); @@ -566,7 +496,7 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms } // Free a block -// fast path written carefully to prevent spilling on the stack +// Fast path written carefully to prevent register spilling on the stack void mi_free(void* p) mi_attr_noexcept { if mi_unlikely(p == NULL) return; @@ -574,31 +504,20 @@ void mi_free(void* p) mi_attr_noexcept const bool is_local= (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); mi_page_t* const page = _mi_segment_page_of(segment, p); - if mi_likely(is_local) { // thread-local free? - if mi_likely(page->flags.full_aligned == 0) // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) - { + if mi_likely(is_local) { // thread-local free? + if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) + // thread-local, aligned, and not a full page mi_block_t* const block = (mi_block_t*)p; - if mi_unlikely(mi_check_is_double_free(page, block)) return; - mi_check_padding(page, block); - mi_stat_free(page, block); - #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); - #endif - mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned - mi_block_set_next(page, block, page->local_free); - page->local_free = block; - if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) - _mi_page_retire(page); - } + mi_free_block_local(page,block,false /* no need to check if the page is full */); } else { // page is full or contains (inner) aligned blocks; use generic path - _mi_free_generic(segment, page, true, p); + mi_free_generic_local(segment, page, p); } } else { // not thread-local; use generic path - _mi_free_generic(segment, page, false, p); + mi_free_generic_mt(segment, page, p); } } @@ -623,10 +542,118 @@ bool _mi_free_delayed_block(mi_block_t* block) { _mi_page_free_collect(page, false); // and free the block (possibly freeing the page as well since used is updated) - _mi_free_block(page, true, block); + mi_free_block_local(page, block, true); return true; } +// ------------------------------------------------------ +// Multi-threaded Free (`_mt`) +// ------------------------------------------------------ + +// Push a block that is owned by another thread on its page-local thread free +// list or it's heap delayed free list. Such blocks are later collected by +// the owning thread in `_mi_free_delayed_block`. +static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block_t* block ) +{ + // Try to put the block on either the page-local thread free list, + // or the heap delayed free list (if this is the first non-local free in that page) + mi_thread_free_t tfreex; + bool use_delayed; + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); + do { + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); + if mi_unlikely(use_delayed) { + // unlikely: this only happens on the first concurrent free in a page that is in the full list + tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); + } + else { + // usual: directly add to page thread_free list + mi_block_set_next(page, block, mi_tf_block(tfree)); + tfreex = mi_tf_set_block(tfree,block); + } + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + + // If this was the first non-local free, we need to push it on the heap delayed free list instead + if mi_unlikely(use_delayed) { + // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) + mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); + mi_assert_internal(heap != NULL); + if (heap != NULL) { + // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + do { + mi_block_set_nextx(heap,block,dfree, heap->keys); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); + } + + // and reset the MI_DELAYED_FREEING flag + tfree = mi_atomic_load_relaxed(&page->xthread_free); + do { + tfreex = tfree; + mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + } +} + +// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) +static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) +{ + // first see if the segment was abandoned and if we can reclaim it into our thread + if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && + #if MI_HUGE_PAGE_ABANDON + segment->page_kind != MI_PAGE_HUGE && + #endif + mi_atomic_load_relaxed(&segment->thread_id) == 0) + { + // the segment is abandoned, try to reclaim it into our heap + if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) { + mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); + mi_free(block); // recursively free as now it will be a local free in our heap + return; + } + } + + // The padding check may access the non-thread-owned page for the key values. + // that is safe as these are constant and the page won't be freed (as the block is not freed yet). + mi_check_padding(page, block); + + // adjust stats (after padding check and potential recursive `mi_free` above) + mi_stat_free(page, block); // stat_free may access the padding + mi_track_free_size(block, mi_page_usable_size_of(page,block)); + + // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection + _mi_padding_shrink(page, block, sizeof(mi_block_t)); + + if (segment->page_kind == MI_PAGE_HUGE) { + #if MI_HUGE_PAGE_ABANDON + // huge page segments are always abandoned and can be freed immediately + mi_stat_huge_free(page); + _mi_segment_huge_page_free(segment, page, block); + return; + #else + // huge pages are special as they occupy the entire segment + // as these are large we reset the memory occupied by the page so it is available to other threads + // (as the owning thread needs to actually free the memory later). + _mi_segment_huge_page_reset(segment, page, block); + #endif + } + else { + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading + memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + #endif + } + + // and finally free the actual block by pushing it on the owning heap + // thread_delayed free list (or heap delayed free list) + mi_free_block_delayed_mt(page,block); +} + + +// ------------------------------------------------------ +// Usable size +// ------------------------------------------------------ + // Bytes available in a block mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); From 355f44f373e765f4eb2bbfc91ade0735525e3a31 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 17 Mar 2024 08:44:11 -0700 Subject: [PATCH 04/12] split free routines in a separate file --- src/alloc.c | 511 +-------------------------------------------------- src/free.c | 519 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 520 insertions(+), 510 deletions(-) create mode 100644 src/free.c diff --git a/src/alloc.c b/src/alloc.c index 2576206f..76d68d13 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -18,6 +18,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_IN_ALLOC_C #include "alloc-override.c" +#include "free.c" #undef MI_IN_ALLOC_C // ------------------------------------------------------ @@ -194,516 +195,6 @@ mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept } -// ------------------------------------------------------ -// Check for double free in secure and debug mode -// This is somewhat expensive so only enabled for secure mode 4 -// ------------------------------------------------------ - -#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0)) -// linear check if the free list contains a specific element -static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { - while (list != NULL) { - if (elem==list) return true; - list = mi_block_next(page, list); - } - return false; -} - -static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { - // The decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed - if (mi_list_contains(page, page->free, block) || - mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, mi_page_thread_free(page), block)) - { - _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); - return true; - } - return false; -} - -#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); } - -static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - bool is_double_free = false; - mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field - if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? - (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? - { - // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? - // (continue in separate function to improve code generation) - is_double_free = mi_check_is_double_freex(page, block); - } - return is_double_free; -} -#else -static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - MI_UNUSED(page); - MI_UNUSED(block); - return false; -} -#endif - -// --------------------------------------------------------------------------- -// Check for heap block overflow by setting up padding at the end of the block -// --------------------------------------------------------------------------- - -#if MI_PADDING // && !MI_TRACK_ENABLED -static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { - *bsize = mi_page_usable_block_size(page); - const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); - mi_track_mem_defined(padding,sizeof(mi_padding_t)); - *delta = padding->delta; - uint32_t canary = padding->canary; - uintptr_t keys[2]; - keys[0] = page->keys[0]; - keys[1] = page->keys[1]; - bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize); - mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); - return ok; -} - -// Return the exact usable size of a block. -static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - mi_assert_internal(ok); mi_assert_internal(delta <= bsize); - return (ok ? bsize - delta : 0); -} - -// When a non-thread-local block is freed, it becomes part of the thread delayed free -// list that is freed later by the owning heap. If the exact usable size is too small to -// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) -// so it will later not trigger an overflow error in `mi_free_block`. -void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - mi_assert_internal(ok); - if (!ok || (bsize - delta) >= min_size) return; // usually already enough space - mi_assert_internal(bsize >= min_size); - if (bsize < min_size) return; // should never happen - size_t new_delta = (bsize - min_size); - mi_assert_internal(new_delta < bsize); - mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); - mi_track_mem_defined(padding,sizeof(mi_padding_t)); - padding->delta = (uint32_t)new_delta; - mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); -} -#else -static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { - MI_UNUSED(block); - return mi_page_usable_block_size(page); -} - -void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - MI_UNUSED(page); - MI_UNUSED(block); - MI_UNUSED(min_size); -} -#endif - -#if MI_PADDING && MI_PADDING_CHECK - -static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - *size = *wrong = bsize; - if (!ok) return false; - mi_assert_internal(bsize >= delta); - *size = bsize - delta; - if (!mi_page_is_huge(page)) { - uint8_t* fill = (uint8_t*)block + bsize - delta; - const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes - mi_track_mem_defined(fill, maxpad); - for (size_t i = 0; i < maxpad; i++) { - if (fill[i] != MI_DEBUG_PADDING) { - *wrong = bsize - delta + i; - ok = false; - break; - } - } - mi_track_mem_noaccess(fill, maxpad); - } - return ok; -} - -static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - size_t size; - size_t wrong; - if (!mi_verify_padding(page,block,&size,&wrong)) { - _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); - } -} - -#else - -static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - MI_UNUSED(page); - MI_UNUSED(block); -} - -#endif - -// only maintain stats for smaller objects if requested -#if (MI_STAT>0) -static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { -#if (MI_STAT < 2) - MI_UNUSED(block); -#endif - mi_heap_t* const heap = mi_heap_get_default(); - const size_t bsize = mi_page_usable_block_size(page); -#if (MI_STAT>1) - const size_t usize = mi_page_usable_size_of(page, block); - mi_heap_stat_decrease(heap, malloc, usize); -#endif - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal, bsize); -#if (MI_STAT > 1) - mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); -#endif - } -#if !MI_HUGE_PAGE_ABANDON - else { - const size_t bpsize = mi_page_block_size(page); - if (bpsize <= MI_HUGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, huge, bpsize); - } - else { - mi_heap_stat_decrease(heap, giant, bpsize); - } - } -#endif -} -#else -static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { - MI_UNUSED(page); MI_UNUSED(block); -} -#endif - -#if MI_HUGE_PAGE_ABANDON -#if (MI_STAT>0) -// maintain stats for huge objects -static void mi_stat_huge_free(const mi_page_t* page) { - mi_heap_t* const heap = mi_heap_get_default(); - const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc` - if (bsize <= MI_HUGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, huge, bsize); - } - else { - mi_heap_stat_decrease(heap, giant, bsize); - } -} -#else -static void mi_stat_huge_free(const mi_page_t* page) { - MI_UNUSED(page); -} -#endif -#endif - -// ------------------------------------------------------ -// Free -// ------------------------------------------------------ - -// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) -static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); - -// regular free of a (thread local) block pointer -// fast path written carefully to prevent spilling on the stack -static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool check_full) -{ - // owning thread can free a block directly - if mi_unlikely(mi_check_is_double_free(page, block)) return; - mi_check_padding(page, block); - mi_stat_free(page, block); - #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); - #endif - mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned - mi_block_set_next(page, block, page->local_free); - page->local_free = block; - if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) - _mi_page_retire(page); - } - else if mi_unlikely(check_full && mi_page_is_in_full(page)) { - _mi_page_unfull(page); - } -} - -// Adjust a block that was allocated aligned, to the actual start of the block in the page. -mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { - mi_assert_internal(page!=NULL && p!=NULL); - const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - const size_t adjust = (diff % mi_page_block_size(page)); - return (mi_block_t*)((uintptr_t)p - adjust); -} - -// free a local pointer -static void mi_decl_noinline mi_free_generic_local(const mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { - mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); - mi_free_block_local(page, block, true); -} - -// free a pointer owned by another thread -static void mi_decl_noinline mi_free_generic_mt(const mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { - mi_block_t* const block = _mi_page_ptr_unalign(segment, page, p); // don't check `has_aligned` flag to avoid a race (issue #865) - mi_free_block_mt(segment, page, block); -} - -// generic free (for runtime integration) -void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { - if (is_local) mi_free_generic_local(segment,page,p); - else mi_free_generic_mt(segment,page,p); -} - -// Get the segment data belonging to a pointer -// This is just a single `and` in release mode but does further checks in debug mode -// (and secure mode) to see if this was a valid pointer. -static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) -{ - MI_UNUSED(msg); - mi_assert(p != NULL); - -#if (MI_DEBUG>0) - if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) { - _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); - return NULL; - } -#endif - - mi_segment_t* const segment = _mi_ptr_segment(p); - mi_assert_internal(segment != NULL); - -#if (MI_DEBUG>0) - if mi_unlikely(!mi_is_in_heap_region(p)) { - _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" - "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); - if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) { - _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); - } - } -#endif -#if (MI_DEBUG>0 || MI_SECURE>=4) - if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) { - _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p); - return NULL; - } -#endif - - return segment; -} - -// Free a block -// Fast path written carefully to prevent register spilling on the stack -void mi_free(void* p) mi_attr_noexcept -{ - if mi_unlikely(p == NULL) return; - mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); - const bool is_local= (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); - mi_page_t* const page = _mi_segment_page_of(segment, p); - - if mi_likely(is_local) { // thread-local free? - if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) - // thread-local, aligned, and not a full page - mi_block_t* const block = (mi_block_t*)p; - mi_free_block_local(page,block,false /* no need to check if the page is full */); - } - else { - // page is full or contains (inner) aligned blocks; use generic path - mi_free_generic_local(segment, page, p); - } - } - else { - // not thread-local; use generic path - mi_free_generic_mt(segment, page, p); - } -} - -// return true if successful -bool _mi_free_delayed_block(mi_block_t* block) { - // get segment and page - const mi_segment_t* const segment = _mi_ptr_segment(block); - mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(_mi_thread_id() == segment->thread_id); - mi_page_t* const page = _mi_segment_page_of(segment, block); - - // Clear the no-delayed flag so delayed freeing is used again for this page. - // This must be done before collecting the free lists on this page -- otherwise - // some blocks may end up in the page `thread_free` list with no blocks in the - // heap `thread_delayed_free` list which may cause the page to be never freed! - // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) - if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) { - return false; - } - - // collect all other non-local frees to ensure up-to-date `used` count - _mi_page_free_collect(page, false); - - // and free the block (possibly freeing the page as well since used is updated) - mi_free_block_local(page, block, true); - return true; -} - -// ------------------------------------------------------ -// Multi-threaded Free (`_mt`) -// ------------------------------------------------------ - -// Push a block that is owned by another thread on its page-local thread free -// list or it's heap delayed free list. Such blocks are later collected by -// the owning thread in `_mi_free_delayed_block`. -static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block_t* block ) -{ - // Try to put the block on either the page-local thread free list, - // or the heap delayed free list (if this is the first non-local free in that page) - mi_thread_free_t tfreex; - bool use_delayed; - mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); - do { - use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); - if mi_unlikely(use_delayed) { - // unlikely: this only happens on the first concurrent free in a page that is in the full list - tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); - } - else { - // usual: directly add to page thread_free list - mi_block_set_next(page, block, mi_tf_block(tfree)); - tfreex = mi_tf_set_block(tfree,block); - } - } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); - - // If this was the first non-local free, we need to push it on the heap delayed free list instead - if mi_unlikely(use_delayed) { - // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); - mi_assert_internal(heap != NULL); - if (heap != NULL) { - // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) - mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); - do { - mi_block_set_nextx(heap,block,dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); - } - - // and reset the MI_DELAYED_FREEING flag - tfree = mi_atomic_load_relaxed(&page->xthread_free); - do { - tfreex = tfree; - mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); - tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); - } -} - -// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) -static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) -{ - // first see if the segment was abandoned and if we can reclaim it into our thread - if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && - #if MI_HUGE_PAGE_ABANDON - segment->page_kind != MI_PAGE_HUGE && - #endif - mi_atomic_load_relaxed(&segment->thread_id) == 0) - { - // the segment is abandoned, try to reclaim it into our heap - if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) { - mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); - mi_free(block); // recursively free as now it will be a local free in our heap - return; - } - } - - // The padding check may access the non-thread-owned page for the key values. - // that is safe as these are constant and the page won't be freed (as the block is not freed yet). - mi_check_padding(page, block); - - // adjust stats (after padding check and potential recursive `mi_free` above) - mi_stat_free(page, block); // stat_free may access the padding - mi_track_free_size(block, mi_page_usable_size_of(page,block)); - - // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection - _mi_padding_shrink(page, block, sizeof(mi_block_t)); - - if (segment->page_kind == MI_PAGE_HUGE) { - #if MI_HUGE_PAGE_ABANDON - // huge page segments are always abandoned and can be freed immediately - mi_stat_huge_free(page); - _mi_segment_huge_page_free(segment, page, block); - return; - #else - // huge pages are special as they occupy the entire segment - // as these are large we reset the memory occupied by the page so it is available to other threads - // (as the owning thread needs to actually free the memory later). - _mi_segment_huge_page_reset(segment, page, block); - #endif - } - else { - #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading - memset(block, MI_DEBUG_FREED, mi_usable_size(block)); - #endif - } - - // and finally free the actual block by pushing it on the owning heap - // thread_delayed free list (or heap delayed free list) - mi_free_block_delayed_mt(page,block); -} - - -// ------------------------------------------------------ -// Usable size -// ------------------------------------------------------ - -// Bytes available in a block -mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { - const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); - const size_t size = mi_page_usable_size_of(page, block); - const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; - mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); - return (size - adjust); -} - -static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { - if (p == NULL) return 0; - const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg); - const mi_page_t* const page = _mi_segment_page_of(segment, p); - if mi_likely(!mi_page_has_aligned(page)) { - const mi_block_t* block = (const mi_block_t*)p; - return mi_page_usable_size_of(page, block); - } - else { - // split out to separate routine for improved code generation - return mi_page_usable_aligned_size_of(segment, page, p); - } -} - -mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { - return _mi_usable_size(p, "mi_usable_size"); -} - - -// ------------------------------------------------------ -// Allocation extensions -// ------------------------------------------------------ - -void mi_free_size(void* p, size_t size) mi_attr_noexcept { - MI_UNUSED_RELEASE(size); - mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); - mi_free(p); -} - -void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept { - MI_UNUSED_RELEASE(alignment); - mi_assert(((uintptr_t)p % alignment) == 0); - mi_free_size(p,size); -} - -void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { - MI_UNUSED_RELEASE(alignment); - mi_assert(((uintptr_t)p % alignment) == 0); - mi_free(p); -} - mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count,size,&total)) return NULL; diff --git a/src/free.c b/src/free.c new file mode 100644 index 00000000..7761cb6a --- /dev/null +++ b/src/free.c @@ -0,0 +1,519 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#if !defined(MI_IN_ALLOC_C) +#error "this file should be included from 'alloc.c' (so aliases can work from alloc-override)" +#endif + +// ------------------------------------------------------ +// Check for double free in secure and debug mode +// This is somewhat expensive so only enabled for secure mode 4 +// ------------------------------------------------------ + +#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0)) +// linear check if the free list contains a specific element +static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { + while (list != NULL) { + if (elem==list) return true; + list = mi_block_next(page, list); + } + return false; +} + +static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { + // The decoded value is in the same page (or NULL). + // Walk the free lists to verify positively if it is already freed + if (mi_list_contains(page, page->free, block) || + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, mi_page_thread_free(page), block)) + { + _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); + return true; + } + return false; +} + +#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); } + +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + bool is_double_free = false; + mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? + (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? + { + // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? + // (continue in separate function to improve code generation) + is_double_free = mi_check_is_double_freex(page, block); + } + return is_double_free; +} +#else +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); + MI_UNUSED(block); + return false; +} +#endif + +// --------------------------------------------------------------------------- +// Check for heap block overflow by setting up padding at the end of the block +// --------------------------------------------------------------------------- + +#if MI_PADDING // && !MI_TRACK_ENABLED +static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { + *bsize = mi_page_usable_block_size(page); + const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); + *delta = padding->delta; + uint32_t canary = padding->canary; + uintptr_t keys[2]; + keys[0] = page->keys[0]; + keys[1] = page->keys[1]; + bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize); + mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); + return ok; +} + +// Return the exact usable size of a block. +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); mi_assert_internal(delta <= bsize); + return (ok ? bsize - delta : 0); +} + +// When a non-thread-local block is freed, it becomes part of the thread delayed free +// list that is freed later by the owning heap. If the exact usable size is too small to +// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) +// so it will later not trigger an overflow error in `mi_free_block`. +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); + if (!ok || (bsize - delta) >= min_size) return; // usually already enough space + mi_assert_internal(bsize >= min_size); + if (bsize < min_size) return; // should never happen + size_t new_delta = (bsize - min_size); + mi_assert_internal(new_delta < bsize); + mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); + padding->delta = (uint32_t)new_delta; + mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); +} +#else +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(block); + return mi_page_usable_block_size(page); +} + +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + MI_UNUSED(page); + MI_UNUSED(block); + MI_UNUSED(min_size); +} +#endif + +#if MI_PADDING && MI_PADDING_CHECK + +static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + *size = *wrong = bsize; + if (!ok) return false; + mi_assert_internal(bsize >= delta); + *size = bsize - delta; + if (!mi_page_is_huge(page)) { + uint8_t* fill = (uint8_t*)block + bsize - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes + mi_track_mem_defined(fill, maxpad); + for (size_t i = 0; i < maxpad; i++) { + if (fill[i] != MI_DEBUG_PADDING) { + *wrong = bsize - delta + i; + ok = false; + break; + } + } + mi_track_mem_noaccess(fill, maxpad); + } + return ok; +} + +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + size_t size; + size_t wrong; + if (!mi_verify_padding(page,block,&size,&wrong)) { + _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); + } +} + +#else + +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); + MI_UNUSED(block); +} + +#endif + +// only maintain stats for smaller objects if requested +#if (MI_STAT>0) +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { +#if (MI_STAT < 2) + MI_UNUSED(block); +#endif + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_usable_block_size(page); +#if (MI_STAT>1) + const size_t usize = mi_page_usable_size_of(page, block); + mi_heap_stat_decrease(heap, malloc, usize); +#endif + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal, bsize); +#if (MI_STAT > 1) + mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); +#endif + } +#if !MI_HUGE_PAGE_ABANDON + else { + const size_t bpsize = mi_page_block_size(page); + if (bpsize <= MI_HUGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, huge, bpsize); + } + else { + mi_heap_stat_decrease(heap, giant, bpsize); + } + } +#endif +} +#else +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); MI_UNUSED(block); +} +#endif + +#if MI_HUGE_PAGE_ABANDON +#if (MI_STAT>0) +// maintain stats for huge objects +static void mi_stat_huge_free(const mi_page_t* page) { + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc` + if (bsize <= MI_HUGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, huge, bsize); + } + else { + mi_heap_stat_decrease(heap, giant, bsize); + } +} +#else +static void mi_stat_huge_free(const mi_page_t* page) { + MI_UNUSED(page); +} +#endif +#endif + +// ------------------------------------------------------ +// Free +// ------------------------------------------------------ + +// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) +static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); + +// regular free of a (thread local) block pointer +// fast path written carefully to prevent spilling on the stack +static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool check_full) +{ + // owning thread can free a block directly + if mi_unlikely(mi_check_is_double_free(page, block)) return; + mi_check_padding(page, block); + mi_stat_free(page, block); + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif + mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned + mi_block_set_next(page, block, page->local_free); + page->local_free = block; + if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) + _mi_page_retire(page); + } + else if mi_unlikely(check_full && mi_page_is_in_full(page)) { + _mi_page_unfull(page); + } +} + +// Adjust a block that was allocated aligned, to the actual start of the block in the page. +mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { + mi_assert_internal(page!=NULL && p!=NULL); + const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); + const size_t adjust = (diff % mi_page_block_size(page)); + return (mi_block_t*)((uintptr_t)p - adjust); +} + +// free a local pointer +static void mi_decl_noinline mi_free_generic_local(const mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { + mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); + mi_free_block_local(page, block, true); +} + +// free a pointer owned by another thread +static void mi_decl_noinline mi_free_generic_mt(const mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { + mi_block_t* const block = _mi_page_ptr_unalign(segment, page, p); // don't check `has_aligned` flag to avoid a race (issue #865) + mi_free_block_mt(segment, page, block); +} + +// generic free (for runtime integration) +void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { + if (is_local) mi_free_generic_local(segment,page,p); + else mi_free_generic_mt(segment,page,p); +} + +// Get the segment data belonging to a pointer +// This is just a single `and` in release mode but does further checks in debug mode +// (and secure mode) to see if this was a valid pointer. +static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) +{ + MI_UNUSED(msg); + mi_assert(p != NULL); + +#if (MI_DEBUG>0) + if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) { + _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); + return NULL; + } +#endif + + mi_segment_t* const segment = _mi_ptr_segment(p); + mi_assert_internal(segment != NULL); + +#if (MI_DEBUG>0) + if mi_unlikely(!mi_is_in_heap_region(p)) { + _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" + "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); + if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) { + _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); + } + } +#endif +#if (MI_DEBUG>0 || MI_SECURE>=4) + if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) { + _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p); + return NULL; + } +#endif + + return segment; +} + +// Free a block +// Fast path written carefully to prevent register spilling on the stack +void mi_free(void* p) mi_attr_noexcept +{ + if mi_unlikely(p == NULL) return; + mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); + const bool is_local= (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); + mi_page_t* const page = _mi_segment_page_of(segment, p); + + if mi_likely(is_local) { // thread-local free? + if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) + // thread-local, aligned, and not a full page + mi_block_t* const block = (mi_block_t*)p; + mi_free_block_local(page,block,false /* no need to check if the page is full */); + } + else { + // page is full or contains (inner) aligned blocks; use generic path + mi_free_generic_local(segment, page, p); + } + } + else { + // not thread-local; use generic path + mi_free_generic_mt(segment, page, p); + } +} + +// return true if successful +bool _mi_free_delayed_block(mi_block_t* block) { + // get segment and page + const mi_segment_t* const segment = _mi_ptr_segment(block); + mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(_mi_thread_id() == segment->thread_id); + mi_page_t* const page = _mi_segment_page_of(segment, block); + + // Clear the no-delayed flag so delayed freeing is used again for this page. + // This must be done before collecting the free lists on this page -- otherwise + // some blocks may end up in the page `thread_free` list with no blocks in the + // heap `thread_delayed_free` list which may cause the page to be never freed! + // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) + if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) { + return false; + } + + // collect all other non-local frees to ensure up-to-date `used` count + _mi_page_free_collect(page, false); + + // and free the block (possibly freeing the page as well since used is updated) + mi_free_block_local(page, block, true); + return true; +} + +// ------------------------------------------------------ +// Multi-threaded Free (`_mt`) +// ------------------------------------------------------ + +// Push a block that is owned by another thread on its page-local thread free +// list or it's heap delayed free list. Such blocks are later collected by +// the owning thread in `_mi_free_delayed_block`. +static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block_t* block ) +{ + // Try to put the block on either the page-local thread free list, + // or the heap delayed free list (if this is the first non-local free in that page) + mi_thread_free_t tfreex; + bool use_delayed; + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); + do { + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); + if mi_unlikely(use_delayed) { + // unlikely: this only happens on the first concurrent free in a page that is in the full list + tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); + } + else { + // usual: directly add to page thread_free list + mi_block_set_next(page, block, mi_tf_block(tfree)); + tfreex = mi_tf_set_block(tfree,block); + } + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + + // If this was the first non-local free, we need to push it on the heap delayed free list instead + if mi_unlikely(use_delayed) { + // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) + mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); + mi_assert_internal(heap != NULL); + if (heap != NULL) { + // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + do { + mi_block_set_nextx(heap,block,dfree, heap->keys); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); + } + + // and reset the MI_DELAYED_FREEING flag + tfree = mi_atomic_load_relaxed(&page->xthread_free); + do { + tfreex = tfree; + mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + } +} + +// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) +static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) +{ + // first see if the segment was abandoned and if we can reclaim it into our thread + if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && + #if MI_HUGE_PAGE_ABANDON + segment->page_kind != MI_PAGE_HUGE && + #endif + mi_atomic_load_relaxed(&segment->thread_id) == 0) + { + // the segment is abandoned, try to reclaim it into our heap + if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) { + mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); + mi_free(block); // recursively free as now it will be a local free in our heap + return; + } + } + + // The padding check may access the non-thread-owned page for the key values. + // that is safe as these are constant and the page won't be freed (as the block is not freed yet). + mi_check_padding(page, block); + + // adjust stats (after padding check and potential recursive `mi_free` above) + mi_stat_free(page, block); // stat_free may access the padding + mi_track_free_size(block, mi_page_usable_size_of(page,block)); + + // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection + _mi_padding_shrink(page, block, sizeof(mi_block_t)); + + if (segment->page_kind == MI_PAGE_HUGE) { + #if MI_HUGE_PAGE_ABANDON + // huge page segments are always abandoned and can be freed immediately + mi_stat_huge_free(page); + _mi_segment_huge_page_free(segment, page, block); + return; + #else + // huge pages are special as they occupy the entire segment + // as these are large we reset the memory occupied by the page so it is available to other threads + // (as the owning thread needs to actually free the memory later). + _mi_segment_huge_page_reset(segment, page, block); + #endif + } + else { + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading + memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + #endif + } + + // and finally free the actual block by pushing it on the owning heap + // thread_delayed free list (or heap delayed free list) + mi_free_block_delayed_mt(page,block); +} + + +// ------------------------------------------------------ +// Usable size +// ------------------------------------------------------ + +// Bytes available in a block +mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { + const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); + const size_t size = mi_page_usable_size_of(page, block); + const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; + mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); + return (size - adjust); +} + +static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { + if (p == NULL) return 0; + const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg); + const mi_page_t* const page = _mi_segment_page_of(segment, p); + if mi_likely(!mi_page_has_aligned(page)) { + const mi_block_t* block = (const mi_block_t*)p; + return mi_page_usable_size_of(page, block); + } + else { + // split out to separate routine for improved code generation + return mi_page_usable_aligned_size_of(segment, page, p); + } +} + +mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { + return _mi_usable_size(p, "mi_usable_size"); +} + + +// ------------------------------------------------------ +// Allocation extensions +// ------------------------------------------------------ + +void mi_free_size(void* p, size_t size) mi_attr_noexcept { + MI_UNUSED_RELEASE(size); + mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); + mi_free(p); +} + +void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept { + MI_UNUSED_RELEASE(alignment); + mi_assert(((uintptr_t)p % alignment) == 0); + mi_free_size(p,size); +} + +void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { + MI_UNUSED_RELEASE(alignment); + mi_assert(((uintptr_t)p % alignment) == 0); + mi_free(p); +} From cc809b0cd4b99a564b00224cb2e66e4d881f62cd Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 18 Mar 2024 01:40:03 -0700 Subject: [PATCH 05/12] take 16 bits from used field to create a fast unalign path --- include/mimalloc/internal.h | 2 +- include/mimalloc/types.h | 24 +++++++++++++----------- src/alloc.c | 2 +- src/free.c | 17 +++++++++++------ src/init.c | 18 ++++++++++-------- src/page.c | 12 +++++++++++- 6 files changed, 47 insertions(+), 28 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 96f3922e..72544c3d 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -202,7 +202,7 @@ void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, siz void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); -void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration +void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); // "libc.c" diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 049e68e7..c624e5b4 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -273,7 +273,7 @@ typedef uintptr_t mi_thread_free_t; // and 12 are still good for address calculation) // - To limit the structure size, the `xblock_size` is 32-bits only; for // blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size -// - `thread_free` uses the bottom bits as a delayed-free flags to optimize +// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize // concurrent frees where only the first concurrent free adds to the owning // heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). // The invariant is that no-delayed-free is only set if there is @@ -295,19 +295,21 @@ typedef struct mi_page_s { uint8_t retire_expire:7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) - uint32_t used; // number of blocks in use (including blocks in `thread_free`) - uint32_t xblock_size; // size available in each block (always `>0`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - + uint16_t used; // number of blocks in use (including blocks in `thread_free`) + uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift == block_size)` (used for quick block start finding for aligned pointers) + uint8_t block_offset_adj; // if not zero, then `(page_start - (uint8_t*)page - 8*(block_offset_adj-1)) % block_size == 0)` (used for quick block start finding for aligned pointers) + uint32_t xblock_size; // size available in each block (always `>0`) + #if (MI_ENCODE_FREELIST || MI_PADDING) uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary - #endif + #endif _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads _Atomic(uintptr_t) xheap; - - struct mi_page_s* next; // next page owned by this thread with the same `block_size` - struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + + struct mi_page_s* next; // next page owned by the heap with the same `block_size` + struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` } mi_page_t; @@ -386,8 +388,8 @@ typedef struct mi_segment_s { uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` // layout like this to optimize access in `mi_free` - size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment + size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages } mi_segment_t; @@ -446,8 +448,6 @@ typedef struct mi_padding_s { // A heap owns a set of pages. struct mi_heap_s { mi_tld_t* tld; - mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. - mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") _Atomic(mi_block_t*) thread_delayed_free; mi_threadid_t thread_id; // thread this heap belongs too mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) @@ -459,6 +459,8 @@ struct mi_heap_s { size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread bool no_reclaim; // `true` if this heap should not reclaim abandoned pages + mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. + mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") }; diff --git a/src/alloc.c b/src/alloc.c index 76d68d13..3a38a226 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -37,8 +37,8 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); // pop from the free list - page->used++; page->free = mi_block_next(page, block); + page->used++; mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); #if MI_DEBUG>3 if (page->free_is_zero) { diff --git a/src/free.c b/src/free.c index 7761cb6a..d0fcf133 100644 --- a/src/free.c +++ b/src/free.c @@ -249,25 +249,30 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool // Adjust a block that was allocated aligned, to the actual start of the block in the page. mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); - const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - const size_t adjust = (diff % mi_page_block_size(page)); + const size_t diff = (mi_likely(page->block_offset_adj != 0) + ? (uint8_t*)p - (uint8_t*)page - 8*(page->block_offset_adj-1) + : (uint8_t*)p - _mi_page_start(segment, page, NULL)); + + const size_t adjust = (mi_likely(page->block_size_shift != 0) + ? diff & (((size_t)1 << page->block_size_shift) - 1) + : diff % mi_page_block_size(page)); return (mi_block_t*)((uintptr_t)p - adjust); } // free a local pointer -static void mi_decl_noinline mi_free_generic_local(const mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { +static void mi_decl_noinline mi_free_generic_local(mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); mi_free_block_local(page, block, true); } // free a pointer owned by another thread -static void mi_decl_noinline mi_free_generic_mt(const mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { +static void mi_decl_noinline mi_free_generic_mt(mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { mi_block_t* const block = _mi_page_ptr_unalign(segment, page, p); // don't check `has_aligned` flag to avoid a race (issue #865) mi_free_block_mt(segment, page, block); } // generic free (for runtime integration) -void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { +void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { if (is_local) mi_free_generic_local(segment,page,p); else mi_free_generic_mt(segment,page,p); } @@ -469,7 +474,7 @@ static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* // ------------------------------------------------------ // Bytes available in a block -mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { +static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); const size_t size = mi_page_usable_size_of(page, block); const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; diff --git a/src/init.c b/src/init.c index 7ec6e01e..11471760 100644 --- a/src/init.c +++ b/src/init.c @@ -21,9 +21,11 @@ const mi_page_t _mi_page_empty = { false, // is_zero 0, // retire_expire NULL, // free - 0, // used - 0, // xblock_size NULL, // local_free + 0, // used + 0, // block size shift + 0, // block offset adj + 0, // xblock_size #if (MI_PADDING || MI_ENCODE_FREELIST) { 0, 0 }, #endif @@ -93,8 +95,6 @@ const mi_page_t _mi_page_empty = { mi_decl_cache_align const mi_heap_t _mi_heap_empty = { NULL, - MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY, MI_ATOMIC_VAR_INIT(NULL), 0, // tid 0, // cookie @@ -104,7 +104,9 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next - false + false, + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY }; @@ -130,8 +132,6 @@ static mi_tld_t tld_main = { mi_heap_t _mi_heap_main = { &tld_main, - MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY, MI_ATOMIC_VAR_INIT(NULL), 0, // thread id 0, // initial cookie @@ -141,7 +141,9 @@ mi_heap_t _mi_heap_main = { 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next heap - false // can reclaim + false, // can reclaim + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY }; bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. diff --git a/src/page.c b/src/page.c index 5fefc3b5..5930a430 100644 --- a/src/page.c +++ b/src/page.c @@ -660,7 +660,6 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_page_set_heap(page, heap); size_t page_size; const void* page_start = _mi_segment_page_start(segment, page, block_size, &page_size, NULL); - MI_UNUSED(page_start); mi_track_mem_noaccess(page_start,page_size); page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); @@ -677,6 +676,15 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(!page->is_zero_init || mi_mem_is_zero(page_start, page_size)); } #endif + if (_mi_is_power_of_two(block_size) && block_size > 0) { + page->block_size_shift = (uint32_t)(mi_ctz((uintptr_t)block_size)); + } + const ptrdiff_t start_offset = (uint8_t*)page_start - (uint8_t*)page; + const ptrdiff_t start_adjust = start_offset % block_size; + if (start_offset >= 0 && (start_adjust % 8) == 0 && (start_adjust/8) < 255) { + page->block_offset_adj = (uint8_t)((start_adjust/8) + 1); + } + mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); @@ -690,6 +698,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->keys[0] != 0); mi_assert_internal(page->keys[1] != 0); #endif + mi_assert_internal(page->block_size_shift == 0 || (block_size == (1UL << page->block_size_shift))); + mi_assert_internal(page->block_offset_adj == 0 || (((uint8_t*)page_start - (uint8_t*)page - 8*(page->block_offset_adj-1))) % block_size == 0); mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list From d08b4219e9bdbc83c92cd50e1d54b24a939a2271 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 18 Mar 2024 03:32:06 -0700 Subject: [PATCH 06/12] improve used decrement code gen --- src/free.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/free.c b/src/free.c index d0fcf133..6cdac123 100644 --- a/src/free.c +++ b/src/free.c @@ -238,7 +238,9 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned mi_block_set_next(page, block, page->local_free); page->local_free = block; - if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) + const uint32_t used = page->used - 1; + page->used = used; + if mi_unlikely(used == 0) { // generates better code than: --page->used == 0 _mi_page_retire(page); } else if mi_unlikely(check_full && mi_page_is_in_full(page)) { From 34d37fa2048f3bd65d017bf4f295b3b5b97ae98e Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 18 Mar 2024 03:47:54 -0700 Subject: [PATCH 07/12] nicer organisation of free.c --- src/free.c | 431 +++++++++++++++++++++++++++-------------------------- 1 file changed, 222 insertions(+), 209 deletions(-) diff --git a/src/free.c b/src/free.c index 6cdac123..e7547aa3 100644 --- a/src/free.c +++ b/src/free.c @@ -8,214 +8,12 @@ terms of the MIT license. A copy of the license can be found in the file #error "this file should be included from 'alloc.c' (so aliases can work from alloc-override)" #endif -// ------------------------------------------------------ -// Check for double free in secure and debug mode -// This is somewhat expensive so only enabled for secure mode 4 -// ------------------------------------------------------ +// forward declarations +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block); +static bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block); +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block); +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block); -#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0)) -// linear check if the free list contains a specific element -static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { - while (list != NULL) { - if (elem==list) return true; - list = mi_block_next(page, list); - } - return false; -} - -static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { - // The decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed - if (mi_list_contains(page, page->free, block) || - mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, mi_page_thread_free(page), block)) - { - _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); - return true; - } - return false; -} - -#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); } - -static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - bool is_double_free = false; - mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field - if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? - (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? - { - // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? - // (continue in separate function to improve code generation) - is_double_free = mi_check_is_double_freex(page, block); - } - return is_double_free; -} -#else -static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - MI_UNUSED(page); - MI_UNUSED(block); - return false; -} -#endif - -// --------------------------------------------------------------------------- -// Check for heap block overflow by setting up padding at the end of the block -// --------------------------------------------------------------------------- - -#if MI_PADDING // && !MI_TRACK_ENABLED -static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { - *bsize = mi_page_usable_block_size(page); - const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); - mi_track_mem_defined(padding,sizeof(mi_padding_t)); - *delta = padding->delta; - uint32_t canary = padding->canary; - uintptr_t keys[2]; - keys[0] = page->keys[0]; - keys[1] = page->keys[1]; - bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize); - mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); - return ok; -} - -// Return the exact usable size of a block. -static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - mi_assert_internal(ok); mi_assert_internal(delta <= bsize); - return (ok ? bsize - delta : 0); -} - -// When a non-thread-local block is freed, it becomes part of the thread delayed free -// list that is freed later by the owning heap. If the exact usable size is too small to -// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) -// so it will later not trigger an overflow error in `mi_free_block`. -void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - mi_assert_internal(ok); - if (!ok || (bsize - delta) >= min_size) return; // usually already enough space - mi_assert_internal(bsize >= min_size); - if (bsize < min_size) return; // should never happen - size_t new_delta = (bsize - min_size); - mi_assert_internal(new_delta < bsize); - mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); - mi_track_mem_defined(padding,sizeof(mi_padding_t)); - padding->delta = (uint32_t)new_delta; - mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); -} -#else -static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { - MI_UNUSED(block); - return mi_page_usable_block_size(page); -} - -void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - MI_UNUSED(page); - MI_UNUSED(block); - MI_UNUSED(min_size); -} -#endif - -#if MI_PADDING && MI_PADDING_CHECK - -static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - *size = *wrong = bsize; - if (!ok) return false; - mi_assert_internal(bsize >= delta); - *size = bsize - delta; - if (!mi_page_is_huge(page)) { - uint8_t* fill = (uint8_t*)block + bsize - delta; - const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes - mi_track_mem_defined(fill, maxpad); - for (size_t i = 0; i < maxpad; i++) { - if (fill[i] != MI_DEBUG_PADDING) { - *wrong = bsize - delta + i; - ok = false; - break; - } - } - mi_track_mem_noaccess(fill, maxpad); - } - return ok; -} - -static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - size_t size; - size_t wrong; - if (!mi_verify_padding(page,block,&size,&wrong)) { - _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); - } -} - -#else - -static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - MI_UNUSED(page); - MI_UNUSED(block); -} - -#endif - -// only maintain stats for smaller objects if requested -#if (MI_STAT>0) -static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { -#if (MI_STAT < 2) - MI_UNUSED(block); -#endif - mi_heap_t* const heap = mi_heap_get_default(); - const size_t bsize = mi_page_usable_block_size(page); -#if (MI_STAT>1) - const size_t usize = mi_page_usable_size_of(page, block); - mi_heap_stat_decrease(heap, malloc, usize); -#endif - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal, bsize); -#if (MI_STAT > 1) - mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); -#endif - } -#if !MI_HUGE_PAGE_ABANDON - else { - const size_t bpsize = mi_page_block_size(page); - if (bpsize <= MI_HUGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, huge, bpsize); - } - else { - mi_heap_stat_decrease(heap, giant, bpsize); - } - } -#endif -} -#else -static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { - MI_UNUSED(page); MI_UNUSED(block); -} -#endif - -#if MI_HUGE_PAGE_ABANDON -#if (MI_STAT>0) -// maintain stats for huge objects -static void mi_stat_huge_free(const mi_page_t* page) { - mi_heap_t* const heap = mi_heap_get_default(); - const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc` - if (bsize <= MI_HUGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, huge, bsize); - } - else { - mi_heap_stat_decrease(heap, giant, bsize); - } -} -#else -static void mi_stat_huge_free(const mi_page_t* page) { - MI_UNUSED(page); -} -#endif -#endif // ------------------------------------------------------ // Free @@ -417,8 +215,12 @@ static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block } } +#if MI_HUGE_PAGE_ABANDON +static void mi_stat_huge_free(const mi_page_t* page); +#endif + // Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) -static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) +static void mi_decl_noinline mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { // first see if the segment was abandoned and if we can reclaim it into our thread if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && @@ -504,7 +306,7 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { // ------------------------------------------------------ -// Allocation extensions +// Free variants // ------------------------------------------------------ void mi_free_size(void* p, size_t size) mi_attr_noexcept { @@ -524,3 +326,214 @@ void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { mi_assert(((uintptr_t)p % alignment) == 0); mi_free(p); } + + +// ------------------------------------------------------ +// Check for double free in secure and debug mode +// This is somewhat expensive so only enabled for secure mode 4 +// ------------------------------------------------------ + +#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0)) +// linear check if the free list contains a specific element +static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { + while (list != NULL) { + if (elem==list) return true; + list = mi_block_next(page, list); + } + return false; +} + +static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { + // The decoded value is in the same page (or NULL). + // Walk the free lists to verify positively if it is already freed + if (mi_list_contains(page, page->free, block) || + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, mi_page_thread_free(page), block)) + { + _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); + return true; + } + return false; +} + +#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); } + +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + bool is_double_free = false; + mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? + (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? + { + // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? + // (continue in separate function to improve code generation) + is_double_free = mi_check_is_double_freex(page, block); + } + return is_double_free; +} +#else +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); + MI_UNUSED(block); + return false; +} +#endif + + +// --------------------------------------------------------------------------- +// Check for heap block overflow by setting up padding at the end of the block +// --------------------------------------------------------------------------- + +#if MI_PADDING // && !MI_TRACK_ENABLED +static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { + *bsize = mi_page_usable_block_size(page); + const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); + *delta = padding->delta; + uint32_t canary = padding->canary; + uintptr_t keys[2]; + keys[0] = page->keys[0]; + keys[1] = page->keys[1]; + bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize); + mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); + return ok; +} + +// Return the exact usable size of a block. +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); mi_assert_internal(delta <= bsize); + return (ok ? bsize - delta : 0); +} + +// When a non-thread-local block is freed, it becomes part of the thread delayed free +// list that is freed later by the owning heap. If the exact usable size is too small to +// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) +// so it will later not trigger an overflow error in `mi_free_block`. +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); + if (!ok || (bsize - delta) >= min_size) return; // usually already enough space + mi_assert_internal(bsize >= min_size); + if (bsize < min_size) return; // should never happen + size_t new_delta = (bsize - min_size); + mi_assert_internal(new_delta < bsize); + mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); + padding->delta = (uint32_t)new_delta; + mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); +} +#else +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(block); + return mi_page_usable_block_size(page); +} + +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + MI_UNUSED(page); + MI_UNUSED(block); + MI_UNUSED(min_size); +} +#endif + +#if MI_PADDING && MI_PADDING_CHECK + +static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + *size = *wrong = bsize; + if (!ok) return false; + mi_assert_internal(bsize >= delta); + *size = bsize - delta; + if (!mi_page_is_huge(page)) { + uint8_t* fill = (uint8_t*)block + bsize - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes + mi_track_mem_defined(fill, maxpad); + for (size_t i = 0; i < maxpad; i++) { + if (fill[i] != MI_DEBUG_PADDING) { + *wrong = bsize - delta + i; + ok = false; + break; + } + } + mi_track_mem_noaccess(fill, maxpad); + } + return ok; +} + +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + size_t size; + size_t wrong; + if (!mi_verify_padding(page,block,&size,&wrong)) { + _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); + } +} + +#else + +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); + MI_UNUSED(block); +} + +#endif + +// only maintain stats for smaller objects if requested +#if (MI_STAT>0) +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { +#if (MI_STAT < 2) + MI_UNUSED(block); +#endif + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_usable_block_size(page); +#if (MI_STAT>1) + const size_t usize = mi_page_usable_size_of(page, block); + mi_heap_stat_decrease(heap, malloc, usize); +#endif + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal, bsize); +#if (MI_STAT > 1) + mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); +#endif + } +#if !MI_HUGE_PAGE_ABANDON + else { + const size_t bpsize = mi_page_block_size(page); + if (bpsize <= MI_HUGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, huge, bpsize); + } + else { + mi_heap_stat_decrease(heap, giant, bpsize); + } + } +#endif +} +#else +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); MI_UNUSED(block); +} +#endif + +#if MI_HUGE_PAGE_ABANDON +#if (MI_STAT>0) +// maintain stats for huge objects +static void mi_stat_huge_free(const mi_page_t* page) { + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc` + if (bsize <= MI_HUGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, huge, bsize); + } + else { + mi_heap_stat_decrease(heap, giant, bsize); + } +} +#else +static void mi_stat_huge_free(const mi_page_t* page) { + MI_UNUSED(page); +} +#endif +#endif From 9085596eab602d868129ec44b62a8f6ea7c40d16 Mon Sep 17 00:00:00 2001 From: Daan Date: Sat, 23 Mar 2024 08:57:29 -0700 Subject: [PATCH 08/12] update comment --- include/mimalloc/types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index c624e5b4..7ab0a325 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -297,8 +297,8 @@ typedef struct mi_page_s { mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) uint16_t used; // number of blocks in use (including blocks in `thread_free`) - uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift == block_size)` (used for quick block start finding for aligned pointers) - uint8_t block_offset_adj; // if not zero, then `(page_start - (uint8_t*)page - 8*(block_offset_adj-1)) % block_size == 0)` (used for quick block start finding for aligned pointers) + uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift == block_size)` (used for fast path in `free.c:_mi_page_ptr_unalign`) + uint8_t block_offset_adj; // if not zero, then `(page_start - (uint8_t*)page - 8*(block_offset_adj-1)) % block_size == 0)` (used for fast path in `free.c:_mi_page_ptr_unalign`) uint32_t xblock_size; // size available in each block (always `>0`) #if (MI_ENCODE_FREELIST || MI_PADDING) From 60c4a0fe5608603c2a7c0d45eaa1c73ca830c275 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 08:10:35 -0700 Subject: [PATCH 09/12] fix compilation warnings for new uint16_t size for used field --- include/mimalloc/internal.h | 10 ++--- include/mimalloc/types.h | 12 +++--- src/free.c | 75 ++++++++++++++++++++++--------------- src/init.c | 16 ++++---- src/page.c | 8 ++-- 5 files changed, 68 insertions(+), 53 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 72544c3d..3aad1ba4 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) -#define mi_decl_weak +#define mi_decl_weak #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread @@ -40,7 +40,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) #define mi_decl_cache_align -#define mi_decl_weak +#define mi_decl_weak #endif #if defined(__EMSCRIPTEN__) && !defined(__wasi__) @@ -91,7 +91,7 @@ void _mi_thread_data_collect(void); // os.c void _mi_os_init(void); // called from process init -void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); @@ -132,8 +132,8 @@ void _mi_arena_segment_mark_abandoned(mi_segment_t* segment); size_t _mi_arena_segment_abandoned_count(void); typedef struct mi_arena_field_cursor_s { // abstract - mi_arena_id_t start; - int count; + mi_arena_id_t start; + int count; size_t bitmap_idx; } mi_arena_field_cursor_t; void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 7ab0a325..ad0aabe9 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -300,14 +300,14 @@ typedef struct mi_page_s { uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift == block_size)` (used for fast path in `free.c:_mi_page_ptr_unalign`) uint8_t block_offset_adj; // if not zero, then `(page_start - (uint8_t*)page - 8*(block_offset_adj-1)) % block_size == 0)` (used for fast path in `free.c:_mi_page_ptr_unalign`) uint32_t xblock_size; // size available in each block (always `>0`) - + #if (MI_ENCODE_FREELIST || MI_PADDING) uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary - #endif + #endif _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads _Atomic(uintptr_t) xheap; - + struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` } mi_page_t; @@ -373,7 +373,7 @@ typedef struct mi_segment_s { bool allow_decommit; bool allow_purge; size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` - + // segment fields struct mi_segment_s* next; // must be the first segment field after abandoned_next -- see `segment.c:segment_init` struct mi_segment_s* prev; @@ -450,7 +450,7 @@ struct mi_heap_s { mi_tld_t* tld; _Atomic(mi_block_t*) thread_delayed_free; mi_threadid_t thread_id; // thread this heap belongs too - mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) + mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation @@ -460,7 +460,7 @@ struct mi_heap_s { mi_heap_t* next; // list of heaps per thread bool no_reclaim; // `true` if this heap should not reclaim abandoned pages mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. - mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") + mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") }; diff --git a/src/free.c b/src/free.c index e7547aa3..4e031028 100644 --- a/src/free.c +++ b/src/free.c @@ -6,6 +6,11 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #if !defined(MI_IN_ALLOC_C) #error "this file should be included from 'alloc.c' (so aliases can work from alloc-override)" +// add includes help an IDE +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // _mi_prim_thread_id() #endif // forward declarations @@ -26,7 +31,7 @@ static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* // fast path written carefully to prevent spilling on the stack static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool check_full) { - // owning thread can free a block directly + // checks if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); mi_stat_free(page, block); @@ -34,47 +39,57 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned + + // actual free: push on the local free list mi_block_set_next(page, block, page->local_free); page->local_free = block; - const uint32_t used = page->used - 1; - page->used = used; - if mi_unlikely(used == 0) { // generates better code than: --page->used == 0 + if mi_unlikely(--page->used == 0) { _mi_page_retire(page); } else if mi_unlikely(check_full && mi_page_is_in_full(page)) { _mi_page_unfull(page); - } + } } // Adjust a block that was allocated aligned, to the actual start of the block in the page. mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); - const size_t diff = (mi_likely(page->block_offset_adj != 0) - ? (uint8_t*)p - (uint8_t*)page - 8*(page->block_offset_adj-1) - : (uint8_t*)p - _mi_page_start(segment, page, NULL)); - - const size_t adjust = (mi_likely(page->block_size_shift != 0) - ? diff & (((size_t)1 << page->block_size_shift) - 1) - : diff % mi_page_block_size(page)); + + size_t diff; + if mi_likely(page->block_offset_adj != 0) { + diff = (uint8_t*)p - (uint8_t*)page - 8 * (page->block_offset_adj - 1); + } + else { + diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); + } + + size_t adjust; + if mi_likely(page->block_size_shift != 0) { + adjust = diff & (((size_t)1 << page->block_size_shift) - 1); + } + else { + adjust = diff % mi_page_block_size(page); + } + return (mi_block_t*)((uintptr_t)p - adjust); } -// free a local pointer -static void mi_decl_noinline mi_free_generic_local(mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { +// free a local pointer (page parameter comes first for better codegen) +static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); mi_free_block_local(page, block, true); } -// free a pointer owned by another thread -static void mi_decl_noinline mi_free_generic_mt(mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { +// free a pointer owned by another thread (page parameter comes first for better codegen) +static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { mi_block_t* const block = _mi_page_ptr_unalign(segment, page, p); // don't check `has_aligned` flag to avoid a race (issue #865) mi_free_block_mt(segment, page, block); } // generic free (for runtime integration) void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { - if (is_local) mi_free_generic_local(segment,page,p); - else mi_free_generic_mt(segment,page,p); + if (is_local) mi_free_generic_local(page,segment,p); + else mi_free_generic_mt(page,segment,p); } // Get the segment data belonging to a pointer @@ -127,16 +142,16 @@ void mi_free(void* p) mi_attr_noexcept if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) // thread-local, aligned, and not a full page mi_block_t* const block = (mi_block_t*)p; - mi_free_block_local(page,block,false /* no need to check if the page is full */); + mi_free_block_local(page, block, false /* no need to check if the page is full */); } else { // page is full or contains (inner) aligned blocks; use generic path - mi_free_generic_local(segment, page, p); + mi_free_generic_local(page, segment, p); } } else { // not thread-local; use generic path - mi_free_generic_mt(segment, page, p); + mi_free_generic_mt(page, segment, p); } } @@ -174,7 +189,7 @@ bool _mi_free_delayed_block(mi_block_t* block) { // the owning thread in `_mi_free_delayed_block`. static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block_t* block ) { - // Try to put the block on either the page-local thread free list, + // Try to put the block on either the page-local thread free list, // or the heap delayed free list (if this is the first non-local free in that page) mi_thread_free_t tfreex; bool use_delayed; @@ -217,17 +232,17 @@ static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block #if MI_HUGE_PAGE_ABANDON static void mi_stat_huge_free(const mi_page_t* page); -#endif +#endif // Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) static void mi_decl_noinline mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { // first see if the segment was abandoned and if we can reclaim it into our thread - if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && + if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && #if MI_HUGE_PAGE_ABANDON - segment->page_kind != MI_PAGE_HUGE && + segment->page_kind != MI_PAGE_HUGE && #endif - mi_atomic_load_relaxed(&segment->thread_id) == 0) + mi_atomic_load_relaxed(&segment->thread_id) == 0) { // the segment is abandoned, try to reclaim it into our heap if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) { @@ -240,13 +255,13 @@ static void mi_decl_noinline mi_free_block_mt(mi_segment_t* segment, mi_page_t* // The padding check may access the non-thread-owned page for the key values. // that is safe as these are constant and the page won't be freed (as the block is not freed yet). mi_check_padding(page, block); - + // adjust stats (after padding check and potential recursive `mi_free` above) mi_stat_free(page, block); // stat_free may access the padding mi_track_free_size(block, mi_page_usable_size_of(page,block)); - + // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection - _mi_padding_shrink(page, block, sizeof(mi_block_t)); + _mi_padding_shrink(page, block, sizeof(mi_block_t)); if (segment->page_kind == MI_PAGE_HUGE) { #if MI_HUGE_PAGE_ABANDON @@ -266,7 +281,7 @@ static void mi_decl_noinline mi_free_block_mt(mi_segment_t* segment, mi_page_t* memset(block, MI_DEBUG_FREED, mi_usable_size(block)); #endif } - + // and finally free the actual block by pushing it on the owning heap // thread_delayed free list (or heap delayed free list) mi_free_block_delayed_mt(page,block); diff --git a/src/init.c b/src/init.c index 11471760..8a20daca 100644 --- a/src/init.c +++ b/src/init.c @@ -224,7 +224,7 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { is_zero = memid.initially_zero; } } - + if (td != NULL && !is_zero) { _mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid)); } @@ -399,23 +399,23 @@ void mi_thread_done(void) mi_attr_noexcept { _mi_thread_done(NULL); } -void _mi_thread_done(mi_heap_t* heap) +void _mi_thread_done(mi_heap_t* heap) { // calling with NULL implies using the default heap - if (heap == NULL) { - heap = mi_prim_get_default_heap(); + if (heap == NULL) { + heap = mi_prim_get_default_heap(); if (heap == NULL) return; } // prevent re-entrancy through heap_done/heap_set_default_direct (issue #699) if (!mi_heap_is_initialized(heap)) { - return; + return; } // adjust stats mi_atomic_decrement_relaxed(&thread_count); _mi_stat_decrease(&_mi_stats_main.threads, 1); - + // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... if (heap->thread_id != _mi_thread_id()) return; @@ -437,7 +437,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. - _mi_prim_thread_associate_default_heap(heap); + _mi_prim_thread_associate_default_heap(heap); } @@ -597,7 +597,7 @@ static void mi_cdecl mi_process_done(void) { // release any thread specific resources and ensure _mi_thread_done is called on all but the main thread _mi_prim_thread_done_auto_done(); - + #ifndef MI_SKIP_COLLECT_ON_EXIT #if (MI_DEBUG || !defined(MI_SHARED_LIB)) // free all memory if possible on process exit. This is not needed for a stand-alone process diff --git a/src/page.c b/src/page.c index 5930a430..8721a063 100644 --- a/src/page.c +++ b/src/page.c @@ -192,8 +192,8 @@ static void _mi_page_thread_free_collect(mi_page_t* page) if (head == NULL) return; // find the tail -- also to get a proper count (without data races) - uint32_t max_count = page->capacity; // cannot collect more than capacity - uint32_t count = 1; + size_t max_count = page->capacity; // cannot collect more than capacity + size_t count = 1; mi_block_t* tail = head; mi_block_t* next; while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { @@ -211,7 +211,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page) page->local_free = head; // update counts now - page->used -= count; + page->used -= (uint16_t)count; } void _mi_page_free_collect(mi_page_t* page, bool force) { @@ -677,7 +677,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi } #endif if (_mi_is_power_of_two(block_size) && block_size > 0) { - page->block_size_shift = (uint32_t)(mi_ctz((uintptr_t)block_size)); + page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size)); } const ptrdiff_t start_offset = (uint8_t*)page_start - (uint8_t*)page; const ptrdiff_t start_adjust = start_offset % block_size; From 4f809aadb7663d67758db84c12d2fcb8b877b46b Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 08:29:56 -0700 Subject: [PATCH 10/12] use free field for expiration instead of used --- src/free.c | 2 +- src/page.c | 23 ++++++++++---------- src/segment.c | 59 +++++++++++++++++++++++++++++++-------------------- 3 files changed, 49 insertions(+), 35 deletions(-) diff --git a/src/free.c b/src/free.c index 4e031028..9579eecb 100644 --- a/src/free.c +++ b/src/free.c @@ -57,7 +57,7 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p size_t diff; if mi_likely(page->block_offset_adj != 0) { - diff = (uint8_t*)p - (uint8_t*)page - 8 * (page->block_offset_adj - 1); + diff = (uint8_t*)p - (uint8_t*)page - (8*(page->block_offset_adj - 1)); } else { diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); diff --git a/src/page.c b/src/page.c index 8721a063..d9e416b2 100644 --- a/src/page.c +++ b/src/page.c @@ -261,7 +261,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); #endif - + // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); @@ -676,16 +676,17 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(!page->is_zero_init || mi_mem_is_zero(page_start, page_size)); } #endif - if (_mi_is_power_of_two(block_size) && block_size > 0) { + if (block_size > 0 && _mi_is_power_of_two(block_size)) { page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size)); } - const ptrdiff_t start_offset = (uint8_t*)page_start - (uint8_t*)page; - const ptrdiff_t start_adjust = start_offset % block_size; - if (start_offset >= 0 && (start_adjust % 8) == 0 && (start_adjust/8) < 255) { - page->block_offset_adj = (uint8_t)((start_adjust/8) + 1); + if (block_size > 0) { + const ptrdiff_t start_offset = (uint8_t*)page_start - (uint8_t*)page; + const ptrdiff_t start_adjust = start_offset % block_size; + if (start_offset >= 0 && (start_adjust % 8) == 0 && (start_adjust/8) < 255) { + page->block_offset_adj = (uint8_t)((start_adjust/8) + 1); + } } - - + mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); @@ -723,7 +724,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p while (page != NULL) { mi_page_t* next = page->next; // remember next - #if MI_STAT + #if MI_STAT count++; #endif @@ -880,7 +881,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme else { // otherwise find a page with free blocks in our size segregated queues #if MI_PADDING - mi_assert_internal(size >= MI_PADDING_SIZE); + mi_assert_internal(size >= MI_PADDING_SIZE); #endif return mi_find_free_page(heap, size); } @@ -896,7 +897,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // initialize if necessary if mi_unlikely(!mi_heap_is_initialized(heap)) { - heap = mi_heap_get_default(); // calls mi_thread_init + heap = mi_heap_get_default(); // calls mi_thread_init if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; } } mi_assert_internal(mi_heap_is_initialized(heap)); diff --git a/src/segment.c b/src/segment.c index a6522028..7d406a96 100644 --- a/src/segment.c +++ b/src/segment.c @@ -237,12 +237,12 @@ static void mi_page_purge(mi_segment_t* segment, mi_page_t* page, mi_segments_tl mi_assert_internal(!page->segment_in_use); if (!segment->allow_purge) return; mi_assert_internal(page->used == 0); + mi_assert_internal(page->free == NULL); mi_assert_expensive(!mi_pages_purge_contains(page, tld)); size_t psize; void* start = mi_segment_raw_page_start(segment, page, &psize); const bool needs_recommit = _mi_os_purge(start, psize, tld->stats); if (needs_recommit) { page->is_committed = false; } - page->used = 0; } static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { @@ -258,6 +258,7 @@ static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_ if (!ok) return false; // failed to commit! page->is_committed = true; page->used = 0; + page->free = NULL; page->is_zero_init = is_zero; if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); @@ -270,18 +271,30 @@ static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_ The free page queue ----------------------------------------------------------- */ -// we re-use the `used` field for the expiration counter. Since this is a -// a 32-bit field while the clock is always 64-bit we need to guard -// against overflow, we use substraction to check for expiry which work +// we re-use the `free` field for the expiration counter. Since this is a +// a pointer size field while the clock is always 64-bit we need to guard +// against overflow, we use substraction to check for expiry which works // as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) -static void mi_page_purge_set_expire(mi_page_t* page) { - mi_assert_internal(page->used == 0); - uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_purge_delay); - page->used = expire; +static uint32_t mi_page_get_expire( mi_page_t* page ) { + return (uint32_t)((uintptr_t)page->free); } +static void mi_page_set_expire( mi_page_t* page, uint32_t expire ) { + page->free = (mi_block_t*)((uintptr_t)expire); +} + +static void mi_page_purge_set_expire(mi_page_t* page) { + mi_assert_internal(mi_page_get_expire(page)==0); + uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_purge_delay); + mi_page_set_expire(page, expire); +} + +// we re-use the `free` field for the expiration counter. Since this is a +// a pointer size field while the clock is always 64-bit we need to guard +// against overflow, we use substraction to check for expiry which work +// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) static bool mi_page_purge_is_expired(mi_page_t* page, mi_msecs_t now) { - int32_t expire = (int32_t)(page->used); + int32_t expire = (int32_t)mi_page_get_expire(page); return (((int32_t)now - expire) >= 0); } @@ -320,14 +333,14 @@ static void mi_page_purge_remove(mi_page_t* page, mi_segments_tld_t* tld) { mi_page_queue_t* pq = &tld->pages_purge; mi_assert_internal(pq!=NULL); mi_assert_internal(!page->segment_in_use); - mi_assert_internal(page->used != 0); + mi_assert_internal(mi_page_get_expire(page) != 0); mi_assert_internal(mi_pages_purge_contains(page, tld)); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == pq->last) pq->last = page->prev; if (page == pq->first) pq->first = page->next; page->next = page->prev = NULL; - page->used = 0; + mi_page_set_expire(page,0); } static void mi_segment_remove_all_purges(mi_segment_t* segment, bool force_purge, mi_segments_tld_t* tld) { @@ -493,7 +506,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se } MI_UNUSED(fully_committed); mi_assert_internal((fully_committed && committed_size == segment_size) || (!fully_committed && committed_size < segment_size)); - + _mi_abandoned_await_readers(); // prevent ABA issue if concurrent readers try to access our memory (that might be purged) _mi_arena_free(segment, segment_size, committed_size, segment->memid, tld->stats); } @@ -592,7 +605,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, if (segment == NULL) return NULL; mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); mi_assert_internal(segment->memid.is_pinned ? segment->memid.initially_committed : true); - + // zero the segment info (but not the `mem` fields) ptrdiff_t ofs = offsetof(mi_segment_t, next); _mi_memzero((uint8_t*)segment + ofs, info_size - ofs); @@ -746,21 +759,21 @@ Abandonment When threads terminate, they can leave segments with live blocks (reached through other threads). Such segments are "abandoned" and will be reclaimed by other threads to -reuse their pages and/or free them eventually. The +reuse their pages and/or free them eventually. The `thread_id` of such segments is 0. When a block is freed in an abandoned segment, the segment -is reclaimed into that thread. +is reclaimed into that thread. Moreover, if threads are looking for a fresh segment, they will first consider abondoned segments -- these can be found -by scanning the arena memory -(segments outside arena memoryare only reclaimed by a free). +by scanning the arena memory +(segments outside arena memoryare only reclaimed by a free). ----------------------------------------------------------- */ // legacy: Wait until there are no more pending reads on segments that used to be in the abandoned list void _mi_abandoned_await_readers(void) { - // nothing needed + // nothing needed } /* ----------------------------------------------------------- @@ -914,12 +927,12 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, // attempt to reclaim a particular segment (called from multi threaded free `alloc.c:mi_free_block_mt`) bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) { - if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned + if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned // don't reclaim more from a free than half the current segments // this is to prevent a pure free-ing thread to start owning too many segments - if (heap->tld->segments.reclaim_count * 2 > heap->tld->segments.count) return false; + if (heap->tld->segments.reclaim_count * 2 > heap->tld->segments.count) return false; if (_mi_arena_segment_clear_abandoned(segment)) { // atomically unabandon - mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, NULL, &heap->tld->segments); + mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, NULL, &heap->tld->segments); mi_assert_internal(res == segment); return (res != NULL); } @@ -946,11 +959,11 @@ static long mi_segment_get_reclaim_tries(void) { static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, bool* reclaimed, mi_segments_tld_t* tld) { - *reclaimed = false; + *reclaimed = false; mi_segment_t* segment; mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap,¤t); long max_tries = mi_segment_get_reclaim_tries(); - while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) + while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) { segment->abandoned_visits++; // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments From ea6137a5017a407ffedafd2757ee6d4a840668fc Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 09:01:58 -0700 Subject: [PATCH 11/12] use MI_MAX_ALIGN_SIZE to adjust block_offset_adj --- include/mimalloc/types.h | 4 ++-- src/page.c | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index ad0aabe9..5bc49aa0 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -297,8 +297,8 @@ typedef struct mi_page_s { mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) uint16_t used; // number of blocks in use (including blocks in `thread_free`) - uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift == block_size)` (used for fast path in `free.c:_mi_page_ptr_unalign`) - uint8_t block_offset_adj; // if not zero, then `(page_start - (uint8_t*)page - 8*(block_offset_adj-1)) % block_size == 0)` (used for fast path in `free.c:_mi_page_ptr_unalign`) + uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) + uint8_t block_offset_adj; // if not zero, then `(mi_page_start(_,page,_) - (uint8_t*)page - MI_MAX_ALIGN_SIZE*(block_offset_adj-1)) % block_size == 0)` (only used for fast path in `free.c:_mi_page_ptr_unalign`) uint32_t xblock_size; // size available in each block (always `>0`) #if (MI_ENCODE_FREELIST || MI_PADDING) diff --git a/src/page.c b/src/page.c index d9e416b2..912f969a 100644 --- a/src/page.c +++ b/src/page.c @@ -682,8 +682,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi if (block_size > 0) { const ptrdiff_t start_offset = (uint8_t*)page_start - (uint8_t*)page; const ptrdiff_t start_adjust = start_offset % block_size; - if (start_offset >= 0 && (start_adjust % 8) == 0 && (start_adjust/8) < 255) { - page->block_offset_adj = (uint8_t)((start_adjust/8) + 1); + if (start_offset >= 0 && (start_adjust % MI_MAX_ALIGN_SIZE) == 0 && (start_adjust / MI_MAX_ALIGN_SIZE) < 255) { + const ptrdiff_t adjust = (start_adjust / MI_MAX_ALIGN_SIZE); + mi_assert_internal(adjust + 1 == (uint8_t)(adjust + 1)); + page->block_offset_adj = (uint8_t)(adjust + 1); } } @@ -700,7 +702,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->keys[1] != 0); #endif mi_assert_internal(page->block_size_shift == 0 || (block_size == (1UL << page->block_size_shift))); - mi_assert_internal(page->block_offset_adj == 0 || (((uint8_t*)page_start - (uint8_t*)page - 8*(page->block_offset_adj-1))) % block_size == 0); + mi_assert_internal(page->block_offset_adj == 0 || (((uint8_t*)page_start - (uint8_t*)page - MI_MAX_ALIGN_SIZE*(page->block_offset_adj-1))) % block_size == 0); mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list From 86475a7b9bbd5fdd756cd89a579c3d8368708e2f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 09:03:01 -0700 Subject: [PATCH 12/12] use MI_MAX_ALIGN_SIZE to adjust block_offset_adj --- src/free.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/free.c b/src/free.c index 9579eecb..7a5a7806 100644 --- a/src/free.c +++ b/src/free.c @@ -57,7 +57,7 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p size_t diff; if mi_likely(page->block_offset_adj != 0) { - diff = (uint8_t*)p - (uint8_t*)page - (8*(page->block_offset_adj - 1)); + diff = (uint8_t*)p - (uint8_t*)page - (MI_MAX_ALIGN_SIZE*(page->block_offset_adj - 1)); } else { diff = (uint8_t*)p - _mi_page_start(segment, page, NULL);