From 2479d168adb829f2f7edfa9562f53dddd7d0c7a7 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Nov 2022 16:41:40 -0800 Subject: [PATCH] decommit unused prefix of large aligned blocks --- include/mimalloc-internal.h | 5 +++++ src/alloc-aligned.c | 15 ++++++++++++-- src/alloc.c | 41 ++++++++++++++++++++++--------------- src/segment.c | 14 +++++++++---- test/test-api.c | 16 ++++++++++----- 5 files changed, 63 insertions(+), 28 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index ea104f3d..01df65dd 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -94,6 +94,7 @@ void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_off bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); +bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld); bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size); @@ -485,6 +486,10 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } } +static inline bool mi_page_is_huge(const mi_page_t* page) { + return (_mi_page_segment(page)->page_kind == MI_PAGE_HUGE); +} + // Get the usable block size of a page without fixed padding. // This may still include internal padding due to alignment and rounding up size classes. static inline size_t mi_page_usable_block_size(const mi_page_t* page) { diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index db80baee..ffc51edc 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -44,8 +44,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* return NULL; } oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); - p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); // the page block size should be large enough to align in the single huge page block - if (p == NULL) return NULL; + p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block + // zero afterwards as only the area from the aligned_p may be committed! + if (p == NULL) return NULL; } else { // otherwise over-allocate @@ -63,6 +64,16 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); + + // now zero the block if needed + if (zero && alignment > MI_ALIGNMENT_MAX) { + const ptrdiff_t diff = (uint8_t*)aligned_p - (uint8_t*)p; + ptrdiff_t zsize = mi_page_usable_block_size(_mi_ptr_page(p)) - diff - MI_PADDING_SIZE; + #if MI_PADDING + zsize -= MI_MAX_ALIGN_SIZE; + #endif + if (zsize > 0) { _mi_memzero(aligned_p, zsize); } + } #if MI_TRACK_ENABLED if (p != aligned_p) { diff --git a/src/alloc.c b/src/alloc.c index a38da2e5..cfc623c1 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -51,7 +51,9 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED - if (!page->is_zero && !zero) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); } + if (!page->is_zero && !zero && !mi_page_is_huge(page)) { + memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); + } #elif (MI_SECURE!=0) if (!zero) { block->next = 0; } // don't leak internal data #endif @@ -77,9 +79,11 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz #endif padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); padding->delta = (uint32_t)(delta); - uint8_t* fill = (uint8_t*)padding - delta; - const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes - for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } + if (!mi_page_is_huge(page)) { + uint8_t* fill = (uint8_t*)padding - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes + for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } + } #endif return block; @@ -250,17 +254,19 @@ static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, si if (!ok) return false; mi_assert_internal(bsize >= delta); *size = bsize - delta; - uint8_t* fill = (uint8_t*)block + bsize - delta; - const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes - mi_track_mem_defined(fill,maxpad); - for (size_t i = 0; i < maxpad; i++) { - if (fill[i] != MI_DEBUG_PADDING) { - *wrong = bsize - delta + i; - ok = false; - break; + if (!mi_page_is_huge(page)) { + uint8_t* fill = (uint8_t*)block + bsize - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes + mi_track_mem_defined(fill, maxpad); + for (size_t i = 0; i < maxpad; i++) { + if (fill[i] != MI_DEBUG_PADDING) { + *wrong = bsize - delta + i; + ok = false; + break; + } } + mi_track_mem_noaccess(fill, maxpad); } - mi_track_mem_noaccess(fill,maxpad); return ok; } @@ -361,10 +367,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // that is safe as these are constant and the page won't be freed (as the block is not freed yet). mi_check_padding(page, block); mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection - #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED // note: when tracking, cannot use mi_usable_size with multi-threading - memset(block, MI_DEBUG_FREED, mi_usable_size(block)); - #endif - + // huge page segments are always abandoned and can be freed immediately mi_segment_t* const segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { @@ -373,6 +376,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc return; } + #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED // note: when tracking, cannot use mi_usable_size with multi-threading + memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + #endif + // Try to put the block on either the page-local thread free list, or the heap delayed free list. mi_thread_free_t tfreex; bool use_delayed; diff --git a/src/segment.c b/src/segment.c index 798aa756..b922a50c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1265,10 +1265,16 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, if (page_alignment > 0) { size_t psize; size_t pre_size; - void* p = _mi_segment_page_start(segment, page, 0, &psize, &pre_size); - void* aligned_p = (void*)_mi_align_up((uintptr_t)p, page_alignment); - mi_assert_internal(page_alignment == 0 || _mi_is_aligned(aligned_p, page_alignment)); - mi_assert_internal(page_alignment == 0 || psize - ((uint8_t*)aligned_p - (uint8_t*)p) >= size); + uint8_t* p = (uint8_t*)_mi_segment_page_start(segment, page, 0, &psize, &pre_size); + uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)p, page_alignment); + mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment)); + mi_assert_internal(psize - (aligned_p - p) >= size); + if (!segment->mem_is_pinned && page->is_committed) { + // decommit the part of the page that is unused; this can be quite large (close to MI_SEGMENT_SIZE) + uint8_t* decommit_start = p + sizeof(mi_block_t); // for the free list + ptrdiff_t decommit_size = aligned_p - decommit_start; + _mi_mem_decommit(decommit_start, decommit_size, os_tld); + } } #endif // for huge pages we initialize the xblock_size as we may diff --git a/test/test-api.c b/test/test-api.c index a16ef381..65578287 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -177,11 +177,17 @@ int main(void) { }; CHECK_BODY("malloc-aligned9") { bool ok = true; - for (int i = 0; i < 5 && ok; i++) { - int n = (1 << i); - void* p = mi_malloc_aligned( 2*n*MI_ALIGNMENT_MAX, n*MI_ALIGNMENT_MAX); - ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0; - mi_free(p); + void* p[8]; + size_t sizes[8] = { 8, 512, 1024 * 1024, MI_ALIGNMENT_MAX, MI_ALIGNMENT_MAX + 1, 2 * MI_ALIGNMENT_MAX, 8 * MI_ALIGNMENT_MAX, 0 }; + for (int i = 0; i < 28 && ok; i++) { + int align = (1 << i); + for (int j = 0; j < 8 && ok; j++) { + p[j] = mi_zalloc_aligned(sizes[j], align); + ok = ((uintptr_t)p[j] % align) == 0; + } + for (int j = 0; j < 8; j++) { + mi_free(p[j]); + } } result = ok; };