From f141ca12a49da87b7740a9cbd67ebf14d31d145c Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 10:53:09 -0700 Subject: [PATCH 1/6] add extra runtime check to ensure we never insert large or huge pages in the segment free queue (issue #870) --- src/segment.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/segment.c b/src/segment.c index 7d406a96..e7e7d2cc 100644 --- a/src/segment.c +++ b/src/segment.c @@ -746,8 +746,10 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) } else if (segment->used + 1 == segment->capacity) { mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // for now we only support small and medium pages - // move back to segments free list - mi_segment_insert_in_free_queue(segment,tld); + if (segment->page_kind <= MI_PAGE_MEDIUM) { + // move back to segments free list + mi_segment_insert_in_free_queue(segment,tld); + } } } } From 6688b45fbdd2b3091a852e6db37454a3a7897061 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 10:57:02 -0700 Subject: [PATCH 2/6] rename MI_ALIGNMENT_MAX to MI_BLOCK_ALIGNMENT_MAX for clarity --- doc/mimalloc-doc.h | 4 ++-- include/mimalloc/types.h | 4 ++-- src/alloc-aligned.c | 10 ++++----- src/os.c | 48 ++++++++++++++++++++-------------------- src/page.c | 2 +- src/segment.c | 2 +- test/test-api.c | 16 +++++++------- 7 files changed, 43 insertions(+), 43 deletions(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 01b13904..47a8a6b9 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -499,11 +499,11 @@ void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_m /// \{ /// The maximum supported alignment size (currently 1MiB). -#define MI_ALIGNMENT_MAX (1024*1024UL) +#define MI_BLOCK_ALIGNMENT_MAX (1024*1024UL) /// Allocate \a size bytes aligned by \a alignment. /// @param size number of bytes to allocate. -/// @param alignment the minimal alignment of the allocated memory. Must be less than #MI_ALIGNMENT_MAX. +/// @param alignment the minimal alignment of the allocated memory. Must be less than #MI_BLOCK_ALIGNMENT_MAX. /// @returns pointer to the allocated memory or \a NULL if out of memory. /// The returned pointer is aligned by \a alignment, i.e. /// `(uintptr_t)p % alignment == 0`. diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 5bc49aa0..d088b305 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -193,8 +193,8 @@ typedef int32_t mi_ssize_t; // Used as a special value to encode block sizes in 32 bits. #define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) -// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments -#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) +// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments +#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) // ------------------------------------------------------ diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 0907811e..5f60b2fc 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -33,7 +33,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* void* p; size_t oversize; - if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { + if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) { // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down) @@ -47,7 +47,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block // zero afterwards as only the area from the aligned_p may be committed! - if (p == NULL) return NULL; + if (p == NULL) return NULL; } else { // otherwise over-allocate @@ -73,9 +73,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); mi_assert_internal(mi_usable_size(aligned_p)>=size); mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust); - + // now zero the block if needed - if (alignment > MI_ALIGNMENT_MAX) { + if (alignment > MI_BLOCK_ALIGNMENT_MAX) { // for the tracker, on huge aligned allocations only from the start of the large block is defined mi_track_mem_undefined(aligned_p, size); if (zero) { @@ -85,7 +85,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* if (p != aligned_p) { mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p)); - } + } return aligned_p; } diff --git a/src/os.c b/src/os.c index 21ab9243..09ae367d 100644 --- a/src/os.c +++ b/src/os.c @@ -29,7 +29,7 @@ bool _mi_os_has_overcommit(void) { return mi_os_mem_config.has_overcommit; } -bool _mi_os_has_virtual_reserve(void) { +bool _mi_os_has_virtual_reserve(void) { return mi_os_mem_config.has_virtual_reserve; } @@ -180,7 +180,7 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me } } else { - // nothing to do + // nothing to do mi_assert(memid.memkind < MI_MEM_OS); } } @@ -203,25 +203,25 @@ static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bo if (!commit) { allow_large = false; } if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning *is_zero = false; - void* p = NULL; + void* p = NULL; int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p); if (err != 0) { _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large); } - + MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); - if (commit) { - _mi_stat_increase(&stats->committed, size); + if (commit) { + _mi_stat_increase(&stats->committed, size); // seems needed for asan (or `mimalloc-test-api` fails) #ifdef MI_TRACK_ASAN if (*is_zero) { mi_track_mem_defined(p,size); } else { mi_track_mem_undefined(p,size); } #endif - } + } } return p; } @@ -258,7 +258,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // over-allocate uncommitted (virtual) memory p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); if (p == NULL) return NULL; - + // set p to the aligned part in the full region // note: this is dangerous on Windows as VirtualFree needs the actual base pointer // this is handled though by having the `base` field in the memid's @@ -274,7 +274,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // overallocate... p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); if (p == NULL) return NULL; - + // and selectively unmap parts around the over-allocated area. (noop on sbrk) void* aligned_p = mi_align_up_ptr(p, alignment); size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; @@ -285,7 +285,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); } // we can return the aligned pointer on `mmap` (and sbrk) systems p = aligned_p; - *base = aligned_p; // since we freed the pre part, `*base == p`. + *base = aligned_p; // since we freed the pre part, `*base == p`. } } @@ -307,7 +307,7 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats); if (p != NULL) { *memid = _mi_memid_create_os(true, os_is_zero, os_is_large); - } + } return p; } @@ -318,7 +318,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); - + bool os_is_large = false; bool os_is_zero = false; void* os_base = NULL; @@ -333,7 +333,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo /* ----------------------------------------------------------- OS aligned allocation with an offset. This is used - for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc + for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc page where the object can be aligned at an offset from the start of the segment. As we may need to overallocate, we need to free such pointers using `mi_free_aligned` to use the actual start of the memory region. @@ -396,7 +396,7 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; + mi_stats_t* stats = &_mi_stats_main; if (is_zero != NULL) { *is_zero = false; } _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit _mi_stat_counter_increase(&stats->commit_calls, 1); @@ -406,21 +406,21 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize); if (csize == 0) return true; - // commit + // commit bool os_is_zero = false; - int err = _mi_prim_commit(start, csize, &os_is_zero); + int err = _mi_prim_commit(start, csize, &os_is_zero); if (err != 0) { _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); return false; } - if (os_is_zero && is_zero != NULL) { + if (os_is_zero && is_zero != NULL) { *is_zero = true; mi_assert_expensive(mi_mem_is_zero(start, csize)); } // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails) #ifdef MI_TRACK_ASAN if (os_is_zero) { mi_track_mem_defined(start,csize); } - else { mi_track_mem_undefined(start,csize); } + else { mi_track_mem_undefined(start,csize); } #endif return true; } @@ -434,11 +434,11 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_ // page align size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); - if (csize == 0) return true; + if (csize == 0) return true; // decommit *needs_recommit = true; - int err = _mi_prim_decommit(start,csize,needs_recommit); + int err = _mi_prim_decommit(start,csize,needs_recommit); if (err != 0) { _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } @@ -456,7 +456,7 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { // but may be used later again. This will release physical memory // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. -bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); @@ -476,7 +476,7 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { } -// either resets or decommits memory, returns true if the memory needs +// either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) { @@ -489,7 +489,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) { bool needs_recommit = true; mi_os_decommit_ex(p, size, &needs_recommit, stats); - return needs_recommit; + return needs_recommit; } else { if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed @@ -499,7 +499,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) } } -// either resets or decommits memory, returns true if the memory needs +// either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) { return _mi_os_purge_ex(p, size, true, stats); diff --git a/src/page.c b/src/page.c index 912f969a..63780d63 100644 --- a/src/page.c +++ b/src/page.c @@ -831,7 +831,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex // Because huge pages contain just one block, and the segment contains // just that page, we always treat them as abandoned and any thread // that frees the block can free the whole page and segment directly. -// Huge pages are also use if the requested alignment is very large (> MI_ALIGNMENT_MAX). +// Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX). static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0); diff --git a/src/segment.c b/src/segment.c index e7e7d2cc..359815ce 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1189,7 +1189,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if mi_unlikely(page_alignment > MI_ALIGNMENT_MAX) { + if mi_unlikely(page_alignment > MI_BLOCK_ALIGNMENT_MAX) { mi_assert_internal(_mi_is_power_of_two(page_alignment)); mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE); //mi_assert_internal((MI_SEGMENT_SIZE % page_alignment) == 0); diff --git a/test/test-api.c b/test/test-api.c index 8dd24e1b..6dd2bc7f 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -34,7 +34,7 @@ we therefore test the API over various inputs. Please add more tests :-) #include "mimalloc.h" // #include "mimalloc/internal.h" -#include "mimalloc/types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX +#include "mimalloc/types.h" // for MI_DEBUG and MI_BLOCK_ALIGNMENT_MAX #include "testhelper.h" @@ -59,7 +59,7 @@ bool mem_is_zero(uint8_t* p, size_t size) { // --------------------------------------------------------------------------- int main(void) { mi_option_disable(mi_option_verbose); - + // --------------------------------------------------- // Malloc // --------------------------------------------------- @@ -154,7 +154,7 @@ int main(void) { }; CHECK_BODY("malloc-aligned6") { bool ok = true; - for (size_t align = 1; align <= MI_ALIGNMENT_MAX && ok; align *= 2) { + for (size_t align = 1; align <= MI_BLOCK_ALIGNMENT_MAX && ok; align *= 2) { void* ps[8]; for (int i = 0; i < 8 && ok; i++) { ps[i] = mi_malloc_aligned(align*13 // size @@ -170,16 +170,16 @@ int main(void) { result = ok; }; CHECK_BODY("malloc-aligned7") { - void* p = mi_malloc_aligned(1024,MI_ALIGNMENT_MAX); + void* p = mi_malloc_aligned(1024,MI_BLOCK_ALIGNMENT_MAX); mi_free(p); - result = ((uintptr_t)p % MI_ALIGNMENT_MAX) == 0; + result = ((uintptr_t)p % MI_BLOCK_ALIGNMENT_MAX) == 0; }; CHECK_BODY("malloc-aligned8") { bool ok = true; for (int i = 0; i < 5 && ok; i++) { int n = (1 << i); - void* p = mi_malloc_aligned(1024, n * MI_ALIGNMENT_MAX); - ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0; + void* p = mi_malloc_aligned(1024, n * MI_BLOCK_ALIGNMENT_MAX); + ok = ((uintptr_t)p % (n*MI_BLOCK_ALIGNMENT_MAX)) == 0; mi_free(p); } result = ok; @@ -187,7 +187,7 @@ int main(void) { CHECK_BODY("malloc-aligned9") { bool ok = true; void* p[8]; - size_t sizes[8] = { 8, 512, 1024 * 1024, MI_ALIGNMENT_MAX, MI_ALIGNMENT_MAX + 1, 2 * MI_ALIGNMENT_MAX, 8 * MI_ALIGNMENT_MAX, 0 }; + size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 }; for (int i = 0; i < 28 && ok; i++) { int align = (1 << i); for (int j = 0; j < 8 && ok; j++) { From b5665f0eec417a73a8abcdb00f3a95165b165527 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 14:17:17 -0700 Subject: [PATCH 3/6] add full block_size and page_start to page info --- ide/vs2022/mimalloc.vcxproj | 6 +++ ide/vs2022/mimalloc.vcxproj.filters | 3 ++ include/mimalloc/internal.h | 21 +++----- include/mimalloc/types.h | 31 +++++------ src/alloc.c | 10 ++-- src/free.c | 16 +----- src/heap.c | 14 ++--- src/init.c | 9 ++-- src/page-queue.c | 30 ++++++----- src/page.c | 83 ++++++++++++----------------- src/segment.c | 37 +++++++------ src/stats.c | 7 +-- 12 files changed, 119 insertions(+), 148 deletions(-) diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 11da11c3..3e11d0fe 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -217,6 +217,12 @@ false + + true + true + true + true + diff --git a/ide/vs2022/mimalloc.vcxproj.filters b/ide/vs2022/mimalloc.vcxproj.filters index bb5c8ce9..a387f5a5 100644 --- a/ide/vs2022/mimalloc.vcxproj.filters +++ b/ide/vs2022/mimalloc.vcxproj.filters @@ -58,6 +58,9 @@ Sources + + Sources + diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 02200594..21dc9d62 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -147,7 +147,7 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment); mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size, size_t* pre_size); // page start for any page #if MI_HUGE_PAGE_ABANDON void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); @@ -452,10 +452,9 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const } // Quick page start for initialized pages -static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - const size_t bsize = page->xblock_size; - mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0); - return _mi_segment_page_start(segment, page, bsize, page_size, NULL); +static inline uint8_t* mi_page_start(const mi_page_t* page) { + mi_assert_internal(page->page_start != NULL); + return page->page_start; } // Get the page containing the pointer @@ -466,16 +465,8 @@ static inline mi_page_t* _mi_ptr_page(void* p) { // Get the block size of a page (special case for huge objects) static inline size_t mi_page_block_size(const mi_page_t* page) { - const size_t bsize = page->xblock_size; - mi_assert_internal(bsize > 0); - if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) { - return bsize; - } - else { - size_t psize; - _mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL); - return psize; - } + mi_assert_internal(page->block_size > 0); + return page->block_size; } static inline bool mi_page_is_huge(const mi_page_t* page) { diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index d088b305..69d59527 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -181,7 +181,6 @@ typedef int32_t mi_ssize_t; #define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB #define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2MiB #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) -#define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) @@ -190,9 +189,6 @@ typedef int32_t mi_ssize_t; #error "mimalloc internal: define more bins" #endif -// Used as a special value to encode block sizes in 32 bits. -#define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) - // Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments #define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) @@ -258,7 +254,6 @@ typedef uintptr_t mi_thread_free_t; // implement a monotonic heartbeat. The `thread_free` list is needed for // avoiding atomic operations in the common case. // -// // `used - |thread_free|` == actual blocks that are in use (alive) // `used - |thread_free| + |free| + |local_free| == capacity` // @@ -266,16 +261,13 @@ typedef uintptr_t mi_thread_free_t; // the number of memory accesses in the `mi_page_all_free` function(s). // // Notes: -// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`) +// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc` // - Using `uint16_t` does not seem to slow things down -// - The size is 8 words on 64-bit which helps the page index calculations -// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 -// and 12 are still good for address calculation) -// - To limit the structure size, the `xblock_size` is 32-bits only; for -// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size +// - The size is 10 words on 64-bit which helps the page index calculations +// (and 14 words on 32-bit, and encoded free lists add 2 words) // - `xthread_free` uses the bottom bits as a delayed-free flags to optimize // concurrent frees where only the first concurrent free adds to the owning -// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). +// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`). // The invariant is that no-delayed-free is only set if there is // at least one block that will be added, or as already been added, to // the owning heap `thread_delayed_free` list. This guarantees that pages @@ -290,16 +282,16 @@ typedef struct mi_page_s { // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory + uint16_t used; // number of blocks in use (including blocks in `thread_free`) mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) + uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized uint8_t retire_expire:7; // expiration count for retired blocks - + // padding mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - uint16_t used; // number of blocks in use (including blocks in `thread_free`) - uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) - uint8_t block_offset_adj; // if not zero, then `(mi_page_start(_,page,_) - (uint8_t*)page - MI_MAX_ALIGN_SIZE*(block_offset_adj-1)) % block_size == 0)` (only used for fast path in `free.c:_mi_page_ptr_unalign`) - uint32_t xblock_size; // size available in each block (always `>0`) + size_t block_size; // size available in each block (always `>0`) + uint8_t* page_start; // start of the page area containing the blocks #if (MI_ENCODE_FREELIST || MI_PADDING) uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary @@ -310,6 +302,10 @@ typedef struct mi_page_s { struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` + + #if MI_INTPTR_SIZE==4 // pad to 14 words on 32-bit + void* padding[1]; + #endif } mi_page_t; @@ -548,7 +544,6 @@ typedef struct mi_stats_s { mi_stat_counter_t searches; mi_stat_counter_t normal_count; mi_stat_counter_t huge_count; - mi_stat_counter_t giant_count; mi_stat_counter_t arena_count; mi_stat_counter_t arena_crossover_count; mi_stat_counter_t arena_rollback_count; diff --git a/src/alloc.c b/src/alloc.c index 3a38a226..8b6c4de0 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file // Note: in release mode the (inlined) routine is about 7 instructions with a single test. extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { - mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); + mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); mi_block_t* const block = page->free; if mi_unlikely(block == NULL) { return _mi_malloc_generic(heap, size, zero, 0); @@ -53,14 +53,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // zero the block? note: we need to zero the full block size (issue #63) if mi_unlikely(zero) { - mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) - mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE); + mi_assert_internal(page->block_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) + mi_assert_internal(page->block_size >= MI_PADDING_SIZE); if (page->free_is_zero) { block->next = 0; - mi_track_mem_defined(block, page->xblock_size - MI_PADDING_SIZE); + mi_track_mem_defined(block, page->block_size - MI_PADDING_SIZE); } else { - _mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE); + _mi_memzero_aligned(block, page->block_size - MI_PADDING_SIZE); } } diff --git a/src/free.c b/src/free.c index 0e560e53..c66de6f6 100644 --- a/src/free.c +++ b/src/free.c @@ -55,14 +55,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); - size_t diff; - if mi_likely(page->block_offset_adj != 0) { - diff = (uint8_t*)p - (uint8_t*)page - (MI_MAX_ALIGN_SIZE*(page->block_offset_adj - 1)); - } - else { - diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - } - + size_t diff = (uint8_t*)p - page->page_start; size_t adjust; if mi_likely(page->block_size_shift != 0) { adjust = diff & (((size_t)1 << page->block_size_shift) - 1); @@ -519,12 +512,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { #if !MI_HUGE_PAGE_ABANDON else { const size_t bpsize = mi_page_block_size(page); - if (bpsize <= MI_HUGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, huge, bpsize); - } - else { - mi_heap_stat_decrease(heap, giant, bpsize); - } + mi_heap_stat_decrease(heap, huge, bpsize); } #endif } diff --git a/src/heap.c b/src/heap.c index 18cfc706..21cdfa46 100644 --- a/src/heap.c +++ b/src/heap.c @@ -289,12 +289,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ // stats const size_t bsize = mi_page_block_size(page); if (bsize > MI_LARGE_OBJ_SIZE_MAX) { - if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, giant, bsize); - } - else { - mi_heap_stat_decrease(heap, huge, bsize); - } + mi_heap_stat_decrease(heap, huge, bsize); } #if (MI_STAT) _mi_page_free_collect(page, false); // update used count @@ -467,8 +462,7 @@ static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa MI_UNUSED(heap); MI_UNUSED(pq); bool* found = (bool*)vfound; - mi_segment_t* segment = _mi_page_segment(page); - void* start = _mi_page_start(segment, page, NULL); + void* start = mi_page_start(page); void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page)); *found = (p >= start && p < end); return (!*found); // continue if not found @@ -514,7 +508,7 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v const size_t bsize = mi_page_block_size(page); const size_t ubsize = mi_page_usable_block_size(page); // without padding size_t psize; - uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); + uint8_t* pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize, NULL); if (page->capacity == 1) { // optimize page with one block @@ -581,7 +575,7 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa xarea.page = page; xarea.area.reserved = page->reserved * bsize; xarea.area.committed = page->capacity * bsize; - xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); + xarea.area.blocks = mi_page_start(page); xarea.area.used = page->used; // number of blocks in use (#553) xarea.area.block_size = ubsize; xarea.area.full_block_size = bsize; diff --git a/src/init.c b/src/init.c index 8a20daca..79175f81 100644 --- a/src/init.c +++ b/src/init.c @@ -17,15 +17,15 @@ const mi_page_t _mi_page_empty = { 0, false, false, false, 0, // capacity 0, // reserved capacity + 0, // used { 0 }, // flags + 0, // block size shift false, // is_zero 0, // retire_expire NULL, // free NULL, // local_free - 0, // used - 0, // block size shift - 0, // block offset adj - 0, // xblock_size + 0, // block_size + NULL, // page_start #if (MI_PADDING || MI_ENCODE_FREELIST) { 0, 0 }, #endif @@ -78,7 +78,6 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - MI_STAT_COUNT_NULL(), \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 } \ diff --git a/src/page-queue.c b/src/page-queue.c index fbfe2811..095f9b81 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -11,6 +11,10 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MI_IN_PAGE_C #error "this file should be included from 'page.c'" +// include to help an IDE +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #endif /* ----------------------------------------------------------- @@ -138,20 +142,20 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* #endif static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size)); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(mi_page_block_size(page))); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size); + mi_assert_internal(bin >= MI_BIN_HUGE || mi_page_block_size(page) == pq->block_size); mi_assert_expensive(mi_page_queue_contains(pq, page)); return pq; } static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size)); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(mi_page_block_size(page))); mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size); + mi_assert_internal(mi_page_is_in_full(page) || mi_page_block_size(page) == pq->block_size); return pq; } @@ -206,7 +210,7 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_assert_internal(mi_page_block_size(page) == queue->block_size || (mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; @@ -231,8 +235,8 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); #endif - mi_assert_internal(page->xblock_size == queue->block_size || - (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || + mi_assert_internal(mi_page_block_size(page) == queue->block_size || + (mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); @@ -258,11 +262,13 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); - mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) || - (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) || - (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) || - (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || - (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + const size_t bsize = mi_page_block_size(page); + MI_UNUSED(bsize); + mi_assert_internal((bsize == to->block_size && bsize == from->block_size) || + (bsize == to->block_size && mi_page_queue_is_full(from)) || + (bsize == from->block_size && mi_page_queue_is_full(to)) || + (bsize > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || + (bsize > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; diff --git a/src/page.c b/src/page.c index 63780d63..ef58d883 100644 --- a/src/page.c +++ b/src/page.c @@ -59,7 +59,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) { static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { size_t psize; - uint8_t* page_area = _mi_page_start(_mi_page_segment(page), page, &psize); + uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize, NULL); mi_block_t* start = (mi_block_t*)page_area; mi_block_t* end = (mi_block_t*)(page_area + psize); while(p != NULL) { @@ -78,14 +78,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { } static bool mi_page_is_valid_init(mi_page_t* page) { - mi_assert_internal(page->xblock_size > 0); + mi_assert_internal(mi_page_block_size(page) > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); const size_t bsize = mi_page_block_size(page); mi_segment_t* segment = _mi_page_segment(page); - uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,bsize,NULL,NULL)); + uint8_t* start = mi_page_start(page); + mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL,NULL)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -283,10 +283,9 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size #if MI_HUGE_PAGE_ABANDON mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); #endif - mi_assert_internal(pq!=NULL || page->xblock_size != 0); mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size); // a fresh page was found, initialize it - const size_t full_block_size = ((pq == NULL || mi_page_queue_is_huge(pq)) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc + const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc mi_assert_internal(full_block_size >= block_size); mi_page_init(heap, page, full_block_size, heap->tld); mi_heap_stat_increase(heap, pages, 1); @@ -425,7 +424,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_segment_page_free(page, force, segments_tld); } -#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX +#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE #define MI_RETIRE_CYCLES (16) // Retire a page with no more used blocks @@ -448,10 +447,12 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_queue_is_special(pq)) { // not too large && not full or huge queue? + const size_t bsize = mi_page_block_size(page); + if mi_likely(bsize < MI_MAX_RETIRE_SIZE) { // not too large && not full or huge queue? + mi_assert_internal(!mi_page_queue_is_special(pq)); if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); + page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; @@ -514,7 +515,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co #endif mi_assert_internal(page->capacity + extend <= page->reserved); mi_assert_internal(bsize == mi_page_block_size(page)); - void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); + void* const page_area = mi_page_start(page); // initialize a randomized free list // set up `slice_count` slices to alternate between @@ -572,7 +573,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co #endif mi_assert_internal(page->capacity + extend <= page->reserved); mi_assert_internal(bsize == mi_page_block_size(page)); - void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); + void* const page_area = mi_page_start(page); mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity); @@ -616,15 +617,15 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) size_t page_size; //uint8_t* page_start = - _mi_page_start(_mi_page_segment(page), page, &page_size); + _mi_segment_page_start(_mi_page_segment(page), page, &page_size, NULL); mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count - const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size); + const size_t bsize = mi_page_block_size(page); size_t extend = page->reserved - page->capacity; mi_assert_internal(extend > 0); - size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize); + size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/bsize); if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; } mi_assert_internal(max_extend > 0); @@ -658,10 +659,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(block_size > 0); // set fields mi_page_set_heap(page, heap); + page->block_size = block_size; size_t page_size; - const void* page_start = _mi_segment_page_start(segment, page, block_size, &page_size, NULL); - mi_track_mem_noaccess(page_start,page_size); - page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); + page->page_start = _mi_segment_page_start(segment, page, &page_size, NULL); + mi_track_mem_noaccess(page->page_start,page_size); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); mi_assert_internal(page->reserved > 0); @@ -673,20 +674,14 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi #if MI_DEBUG>2 if (page->is_zero_init) { mi_track_mem_defined(page_start, page_size); - mi_assert_expensive(!page->is_zero_init || mi_mem_is_zero(page_start, page_size)); + mi_assert_expensive(!page->is_zero_init || mi_mem_is_zero(page->page_start, page_size)); } #endif if (block_size > 0 && _mi_is_power_of_two(block_size)) { page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size)); } - if (block_size > 0) { - const ptrdiff_t start_offset = (uint8_t*)page_start - (uint8_t*)page; - const ptrdiff_t start_adjust = start_offset % block_size; - if (start_offset >= 0 && (start_adjust % MI_MAX_ALIGN_SIZE) == 0 && (start_adjust / MI_MAX_ALIGN_SIZE) < 255) { - const ptrdiff_t adjust = (start_adjust / MI_MAX_ALIGN_SIZE); - mi_assert_internal(adjust + 1 == (uint8_t)(adjust + 1)); - page->block_offset_adj = (uint8_t)(adjust + 1); - } + else { + page->block_size_shift = 0; } mi_assert_internal(page->capacity == 0); @@ -701,8 +696,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->keys[0] != 0); mi_assert_internal(page->keys[1] != 0); #endif - mi_assert_internal(page->block_size_shift == 0 || (block_size == (1UL << page->block_size_shift))); - mi_assert_internal(page->block_offset_adj == 0 || (((uint8_t*)page_start - (uint8_t*)page - MI_MAX_ALIGN_SIZE*(page->block_offset_adj-1))) % block_size == 0); + mi_assert_internal(page->block_size_shift == 0 || (block_size == ((size_t)1 << page->block_size_shift))); mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list @@ -827,40 +821,31 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex General allocation ----------------------------------------------------------- */ -// A huge page is allocated directly without being in a queue. -// Because huge pages contain just one block, and the segment contains -// just that page, we always treat them as abandoned and any thread -// that frees the block can free the whole page and segment directly. -// Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX). +// Huge pages contain just one block, and the segment contains just that page. +// Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX) +// so their size is not always `> MI_LARGE_OBJ_SIZE_MAX`. static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0); #if MI_HUGE_PAGE_ABANDON mi_page_queue_t* pq = NULL; #else - mi_page_queue_t* pq = mi_page_queue(heap, MI_HUGE_OBJ_SIZE_MAX); // not block_size as that can be low if the page_alignment > 0 - mi_assert_internal(mi_page_queue_is_huge(pq)); + mi_page_queue_t* pq = mi_page_queue(heap, block_size); + // mi_assert_internal(mi_page_queue_is_huge(pq)); #endif - mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size,page_alignment); - if (page != NULL) { - const size_t bsize = mi_page_block_size(page); // note: not `mi_page_usable_block_size` as `size` includes padding already - mi_assert_internal(bsize >= size); + mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment); + if (page != NULL) { + mi_assert_internal(mi_page_block_size(page) >= size); mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); + mi_assert_internal(mi_page_is_huge(page)); mi_assert_internal(_mi_page_segment(page)->used==1); #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue mi_page_set_heap(page, NULL); #endif - - if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - mi_heap_stat_increase(heap, giant, bsize); - mi_heap_stat_counter_increase(heap, giant_count, 1); - } - else { - mi_heap_stat_increase(heap, huge, bsize); - mi_heap_stat_counter_increase(heap, huge_count, 1); - } + mi_heap_stat_increase(heap, huge, mi_page_block_size(page)); + mi_heap_stat_counter_increase(heap, huge_count, 1); } return page; } @@ -927,7 +912,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al mi_assert_internal(mi_page_block_size(page) >= size); // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc) - if mi_unlikely(zero && page->xblock_size == 0) { + if mi_unlikely(zero && page->block_size == 0) { // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case. void* p = _mi_page_malloc(heap, page, size, false); mi_assert_internal(p != NULL); diff --git a/src/segment.c b/src/segment.c index 359815ce..a4fd26e9 100644 --- a/src/segment.c +++ b/src/segment.c @@ -412,13 +412,13 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_ #endif if (page_size != NULL) *page_size = psize; - mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page); + mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } // Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size) +static uint8_t* mi_segment_page_start_ex(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size) { size_t psize; uint8_t* p = mi_segment_raw_page_start(segment, page, &psize); @@ -437,11 +437,15 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } if (page_size != NULL) *page_size = psize; - mi_assert_internal(page->xblock_size==0 || _mi_ptr_page(p) == page); + mi_assert_internal(_mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size, size_t* pre_size) { + return mi_segment_page_start_ex(segment, page, mi_page_block_size(page), page_size, pre_size); +} + static size_t mi_segment_calculate_sizes(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; @@ -707,15 +711,19 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg page->is_zero_init = false; page->segment_in_use = false; - // zero the page data, but not the segment fields and capacity, and block_size (for page size calculations) - uint32_t block_size = page->xblock_size; + // zero the page data, but not the segment fields and capacity, page start, and block_size (for page size calculations) + size_t block_size = page->block_size; + uint8_t block_size_shift = page->block_size_shift; + uint8_t* page_start = page->page_start; uint16_t capacity = page->capacity; uint16_t reserved = page->reserved; ptrdiff_t ofs = offsetof(mi_page_t,capacity); _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs); page->capacity = capacity; page->reserved = reserved; - page->xblock_size = block_size; + page->block_size = block_size; + page->block_size_shift = block_size_shift; + page->page_start = page_start; segment->used--; // schedule purge @@ -831,7 +839,6 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { // Possibly clear pages and check if free space is available static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free) { - mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); bool has_page = false; size_t pages_used = 0; size_t pages_used_empty = 0; @@ -847,7 +854,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool pages_used_empty++; has_page = true; } - else if (page->xblock_size == block_size && mi_page_has_any_available(page)) { + else if (mi_page_block_size(page) == block_size && mi_page_has_any_available(page)) { // a page has available free blocks of the right size has_page = true; } @@ -901,7 +908,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, else { // otherwise reclaim it into the heap _mi_page_reclaim(heap, page); - if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) { + if (requested_block_size == mi_page_block_size(page) && mi_page_has_any_available(page)) { if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } } } @@ -1008,7 +1015,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_assert_internal(page_kind <= MI_PAGE_LARGE); - mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX); // 1. try to reclaim an abandoned segment bool reclaimed; @@ -1077,7 +1084,7 @@ static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_p mi_assert_internal(page != NULL); #if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN // verify it is committed - _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; + mi_segment_page_start_ex(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; #endif return page; } @@ -1100,7 +1107,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); #if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN - _mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0; + mi_segment_page_start_ex(segment, page, sizeof(void*), NULL, NULL)[0] = 0; #endif return page; } @@ -1117,11 +1124,11 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); - // for huge pages we initialize the xblock_size as we may + // for huge pages we initialize the block_size as we may // overallocate to accommodate large alignments. size_t psize; - uint8_t* start = _mi_segment_page_start(segment, page, 0, &psize, NULL); - page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize); + uint8_t* start = mi_segment_page_start_ex(segment, page, 0, &psize, NULL); + page->block_size = psize; // reset the part of the page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE) if (page_alignment > 0 && segment->allow_decommit && page->is_committed) { diff --git a/src/stats.c b/src/stats.c index 8fbdfc45..5dfd713b 100644 --- a/src/stats.c +++ b/src/stats.c @@ -117,8 +117,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1); - mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); - mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1); + mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) { @@ -316,12 +315,10 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) #endif #if MI_STAT mi_stat_print(&stats->normal, "normal", (stats->normal_count.count == 0 ? 1 : -(stats->normal.allocated / stats->normal_count.count)), out, arg); - mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg); - mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out, arg); + mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg); mi_stat_count_t total = { 0,0,0,0 }; mi_stat_add(&total, &stats->normal, 1); mi_stat_add(&total, &stats->huge, 1); - mi_stat_add(&total, &stats->giant, 1); mi_stat_print(&total, "total", 1, out, arg); #endif #if MI_STAT>1 From a8a53e3e85fbe8c8f997078399ee089880614ebf Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 14:50:15 -0700 Subject: [PATCH 4/6] fix double counting of free-ing for non-thread-local free calls --- include/mimalloc/internal.h | 2 +- src/alloc-aligned.c | 2 +- src/free.c | 27 ++++++++++++++------------- src/options.c | 2 +- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 21dc9d62..29943357 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -200,7 +200,7 @@ void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; -mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); +mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 5f60b2fc..b63c5e43 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -69,7 +69,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* // todo: expand padding if overallocated ? mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); - mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); + mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_page(aligned_p), aligned_p)); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); mi_assert_internal(mi_usable_size(aligned_p)>=size); mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust); diff --git a/src/free.c b/src/free.c index c66de6f6..39443ccf 100644 --- a/src/free.c +++ b/src/free.c @@ -29,17 +29,17 @@ static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* // regular free of a (thread local) block pointer // fast path written carefully to prevent spilling on the stack -static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool check_full) +static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool track_stats, bool check_full) { // checks if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); - mi_stat_free(page, block); + if (track_stats) { mi_stat_free(page, block); } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif - mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned - + if (track_stats) { mi_track_free_size(p, mi_page_usable_size_of(page, block)); } // faster then mi_usable_size as we already know the page and that p is unaligned + // actual free: push on the local free list mi_block_set_next(page, block, page->local_free); page->local_free = block; @@ -52,7 +52,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool } // Adjust a block that was allocated aligned, to the actual start of the block in the page. -mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { +mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); size_t diff = (uint8_t*)p - page->page_start; @@ -69,13 +69,14 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p // free a local pointer (page parameter comes first for better codegen) static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { - mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); - mi_free_block_local(page, block, true); + MI_UNUSED(segment); + mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p); + mi_free_block_local(page, block, true, true); } // free a pointer owned by another thread (page parameter comes first for better codegen) static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { - mi_block_t* const block = _mi_page_ptr_unalign(segment, page, p); // don't check `has_aligned` flag to avoid a race (issue #865) + mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865) mi_free_block_mt(segment, page, block); } @@ -135,7 +136,7 @@ void mi_free(void* p) mi_attr_noexcept if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) // thread-local, aligned, and not a full page mi_block_t* const block = (mi_block_t*)p; - mi_free_block_local(page, block, false /* no need to check if the page is full */); + mi_free_block_local(page, block, true, false /* no need to check if the page is full */); } else { // page is full or contains (inner) aligned blocks; use generic path @@ -170,7 +171,7 @@ bool _mi_free_delayed_block(mi_block_t* block) { _mi_page_free_collect(page, false); // and free the block (possibly freeing the page as well since used is updated) - mi_free_block_local(page, block, true); + mi_free_block_local(page, block, false /* stats have already been adjusted */, true); return true; } @@ -287,8 +288,8 @@ static void mi_decl_noinline mi_free_block_mt(mi_segment_t* segment, mi_page_t* // ------------------------------------------------------ // Bytes available in a block -static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { - const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); +static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* page, const void* p) mi_attr_noexcept { + const mi_block_t* block = _mi_page_ptr_unalign(page, p); const size_t size = mi_page_usable_size_of(page, block); const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); @@ -305,7 +306,7 @@ static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noe } else { // split out to separate routine for improved code generation - return mi_page_usable_aligned_size_of(segment, page, p); + return mi_page_usable_aligned_size_of(page, p); } } diff --git a/src/options.c b/src/options.c index f8e928d0..8a84d344 100644 --- a/src/options.c +++ b/src/options.c @@ -91,7 +91,7 @@ static mi_option_desc_t options[_mi_option_last] = { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, - { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) }, // reclaim an abandoned segment on a free + { 0, UNINIT, MI_OPTION(abandoned_reclaim_on_free) }, // reclaim an abandoned segment on a free }; static void mi_option_init(mi_option_desc_t* desc); From 9c96d05ee435a8931c685f33cc115f58765e530b Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 14:52:50 -0700 Subject: [PATCH 5/6] abandoned reclaim on free is on by default --- src/free.c | 6 +++--- src/options.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/free.c b/src/free.c index 39443ccf..87847f21 100644 --- a/src/free.c +++ b/src/free.c @@ -25,7 +25,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block); // ------------------------------------------------------ // forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) -static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block); // regular free of a (thread local) block pointer // fast path written carefully to prevent spilling on the stack @@ -77,7 +77,7 @@ static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t // free a pointer owned by another thread (page parameter comes first for better codegen) static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865) - mi_free_block_mt(segment, page, block); + mi_free_block_mt(page, segment, block); } // generic free (for runtime integration) @@ -230,7 +230,7 @@ static void mi_stat_huge_free(const mi_page_t* page); #endif // Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) -static void mi_decl_noinline mi_free_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) +static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block) { // first see if the segment was abandoned and if we can reclaim it into our thread if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && diff --git a/src/options.c b/src/options.c index 8a84d344..f8e928d0 100644 --- a/src/options.c +++ b/src/options.c @@ -91,7 +91,7 @@ static mi_option_desc_t options[_mi_option_last] = { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, - { 0, UNINIT, MI_OPTION(abandoned_reclaim_on_free) }, // reclaim an abandoned segment on a free + { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) }, // reclaim an abandoned segment on a free }; static void mi_option_init(mi_option_desc_t* desc); From 006ae2d055ea6a7d847621963dc85a8c39423fa7 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 24 Mar 2024 17:07:28 -0700 Subject: [PATCH 6/6] add is_huge page flag to ensure the right page queue is returned (see #868) --- include/mimalloc/internal.h | 4 +++- include/mimalloc/types.h | 5 +++-- src/alloc.c | 2 +- src/init.c | 8 +++++--- src/page-queue.c | 34 ++++++++++++++++++---------------- src/page.c | 19 +++++++++---------- src/segment.c | 9 +++++++-- 7 files changed, 46 insertions(+), 35 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 29943357..4df8ca68 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -470,7 +470,9 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } static inline bool mi_page_is_huge(const mi_page_t* page) { - return (_mi_page_segment(page)->page_kind == MI_PAGE_HUGE); + mi_assert_internal((page->is_huge && _mi_page_segment(page)->page_kind == MI_PAGE_HUGE) || + (!page->is_huge && _mi_page_segment(page)->page_kind != MI_PAGE_HUGE)); + return page->is_huge; } // Get the usable block size of a page without fixed padding. diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 69d59527..6b22c83e 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -278,6 +278,7 @@ typedef struct mi_page_s { uint8_t segment_in_use:1; // `true` if the segment allocated this page uint8_t is_committed:1; // `true` if the page virtual memory is committed uint8_t is_zero_init:1; // `true` if the page was initially zero initialized + uint8_t is_huge:1; // `true` if the page is in a huge segment // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` @@ -285,7 +286,7 @@ typedef struct mi_page_s { uint16_t used; // number of blocks in use (including blocks in `thread_free`) mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) - uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized + uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized uint8_t retire_expire:7; // expiration count for retired blocks // padding mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) diff --git a/src/alloc.c b/src/alloc.c index 8b6c4de0..2e03eca0 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. diff --git a/src/init.c b/src/init.c index 79175f81..604809ad 100644 --- a/src/init.c +++ b/src/init.c @@ -14,7 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, + 0, + false, false, false, false, 0, // capacity 0, // reserved capacity 0, // used @@ -78,9 +79,10 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ - { 0, 0 }, { 0, 0 }, { 0, 0 } \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() // -------------------------------------------------------- diff --git a/src/page-queue.c b/src/page-queue.c index 095f9b81..e4bfde14 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -1,5 +1,5 @@ /*---------------------------------------------------------------------------- -Copyright (c) 2018-2020, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -141,21 +141,21 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* } #endif -static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(mi_page_block_size(page))); - mi_heap_t* heap = mi_page_heap(page); - mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); +static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { + mi_assert_internal(heap!=NULL); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); + mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(bin >= MI_BIN_HUGE || mi_page_block_size(page) == pq->block_size); - mi_assert_expensive(mi_page_queue_contains(pq, page)); + mi_assert_internal((mi_page_block_size(page) == pq->block_size) || + (mi_page_is_huge(page) && mi_page_queue_is_huge(pq)) || + (mi_page_is_in_full(page) && mi_page_queue_is_full(pq))); return pq; } -static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(mi_page_block_size(page))); - mi_assert_internal(bin <= MI_BIN_FULL); - mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(mi_page_is_in_full(page) || mi_page_block_size(page) == pq->block_size); +static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { + mi_heap_t* heap = mi_page_heap(page); + mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); + mi_assert_expensive(mi_page_queue_contains(pq, page)); return pq; } @@ -210,7 +210,9 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(mi_page_block_size(page) == queue->block_size || (mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_assert_internal(mi_page_block_size(page) == queue->block_size || + (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || + (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; @@ -236,7 +238,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); #endif mi_assert_internal(mi_page_block_size(page) == queue->block_size || - (mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || + (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); @@ -267,8 +269,8 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal((bsize == to->block_size && bsize == from->block_size) || (bsize == to->block_size && mi_page_queue_is_full(from)) || (bsize == from->block_size && mi_page_queue_is_full(to)) || - (bsize > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || - (bsize > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + (mi_page_is_huge(page) && mi_page_queue_is_huge(to)) || + (mi_page_is_huge(page) && mi_page_queue_is_full(to))); mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; diff --git a/src/page.c b/src/page.c index ef58d883..d36421f4 100644 --- a/src/page.c +++ b/src/page.c @@ -1,5 +1,5 @@ /*---------------------------------------------------------------------------- -Copyright (c) 2018-2020, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -82,7 +82,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); - const size_t bsize = mi_page_block_size(page); + // const size_t bsize = mi_page_block_size(page); mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = mi_page_start(page); mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL,NULL)); @@ -448,8 +448,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); const size_t bsize = mi_page_block_size(page); - if mi_likely(bsize < MI_MAX_RETIRE_SIZE) { // not too large && not full or huge queue? - mi_assert_internal(!mi_page_queue_is_special(pq)); + if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); @@ -662,7 +661,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->block_size = block_size; size_t page_size; page->page_start = _mi_segment_page_start(segment, page, &page_size, NULL); - mi_track_mem_noaccess(page->page_start,page_size); + mi_track_mem_noaccess(page->page_start,page_size); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); mi_assert_internal(page->reserved > 0); @@ -821,7 +820,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex General allocation ----------------------------------------------------------- */ -// Huge pages contain just one block, and the segment contains just that page. +// Huge pages contain just one block, and the segment contains just that page. // Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX) // so their size is not always `> MI_LARGE_OBJ_SIZE_MAX`. static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { @@ -830,15 +829,15 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a #if MI_HUGE_PAGE_ABANDON mi_page_queue_t* pq = NULL; #else - mi_page_queue_t* pq = mi_page_queue(heap, block_size); - // mi_assert_internal(mi_page_queue_is_huge(pq)); + mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size + mi_assert_internal(mi_page_queue_is_huge(pq)); #endif mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment); - if (page != NULL) { + if (page != NULL) { mi_assert_internal(mi_page_block_size(page) >= size); mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); mi_assert_internal(mi_page_is_huge(page)); + mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue diff --git a/src/segment.c b/src/segment.c index a4fd26e9..cec3079e 100644 --- a/src/segment.c +++ b/src/segment.c @@ -142,6 +142,7 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->used <= segment->capacity); mi_assert_internal(segment->abandoned <= segment->used); + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || segment->capacity == 1); size_t nfree = 0; for (size_t i = 0; i < segment->capacity; i++) { const mi_page_t* const page = &segment->pages[i]; @@ -151,6 +152,7 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* if (page->segment_in_use) { mi_assert_expensive(!mi_pages_purge_contains(page, tld)); } + if (segment->page_kind == MI_PAGE_HUGE) mi_assert_internal(page->is_huge); } mi_assert_internal(nfree + segment->used == segment->capacity); // mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 @@ -615,11 +617,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, _mi_memzero((uint8_t*)segment + ofs, info_size - ofs); // initialize pages info + const bool is_huge = (page_kind == MI_PAGE_HUGE); for (size_t i = 0; i < capacity; i++) { mi_assert_internal(i <= 255); segment->pages[i].segment_idx = (uint8_t)i; segment->pages[i].is_committed = segment->memid.initially_committed; segment->pages[i].is_zero_init = segment->memid.initially_zero; + segment->pages[i].is_huge = is_huge; } // initialize @@ -753,7 +757,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_segment_abandon(segment,tld); } else if (segment->used + 1 == segment->capacity) { - mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // for now we only support small and medium pages + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // large and huge pages are always the single page in a segment if (segment->page_kind <= MI_PAGE_MEDIUM) { // move back to segments free list mi_segment_insert_in_free_queue(segment,tld); @@ -1123,13 +1127,14 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, #endif mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); + mi_assert_internal(page->is_huge); // for huge pages we initialize the block_size as we may // overallocate to accommodate large alignments. size_t psize; uint8_t* start = mi_segment_page_start_ex(segment, page, 0, &psize, NULL); page->block_size = psize; - + // reset the part of the page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE) if (page_alignment > 0 && segment->allow_decommit && page->is_committed) { uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment);