diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d3e689ae..6b4b86a2 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -147,6 +147,7 @@ mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); @@ -426,7 +427,8 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) { // Segment that contains the pointer static inline mi_segment_t* _mi_ptr_segment(const void* p) { // mi_assert_internal(p != NULL); - return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); + if (p == NULL) return NULL; + return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); } // Segment belonging to a page @@ -440,7 +442,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { static inline size_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE); + mi_assert_internal(diff >= 0 && (size_t)diff <= MI_SEGMENT_SIZE /* for huge alignment it can be equal */); size_t idx = (size_t)diff >> segment->page_shift; mi_assert_internal(idx < segment->capacity); mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); diff --git a/include/mimalloc.h b/include/mimalloc.h index dec0fc0c..17fd1c60 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -166,7 +166,7 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s // Note that `alignment` always follows `size` for consistency with unaligned // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -#define MI_ALIGNMENT_MAX (1024*1024UL) // maximum supported alignment is 1MiB +#define MI_ALIGNMENT_MAX (2*1024*1024UL) // maximum supported alignment is 1MiB mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 5672078e..f4ddbf99 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -18,41 +18,59 @@ terms of the MIT license. A copy of the license can be found in the file static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { mi_assert_internal(size <= PTRDIFF_MAX); - mi_assert_internal(alignment!=0 && _mi_is_power_of_two(alignment) && alignment <= MI_ALIGNMENT_MAX); + mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); - const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` + const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)` const size_t padsize = size + MI_PADDING_SIZE; // use regular allocation if it is guaranteed to fit the alignment constraints - if (offset==0 && alignment<=padsize && padsize<=MI_MEDIUM_OBJ_SIZE_MAX && (padsize&align_mask)==0) { + if (offset == 0 && alignment <= padsize && padsize <= MI_MEDIUM_OBJ_SIZE_MAX && (padsize & align_mask) == 0) { void* p = _mi_heap_malloc_zero(heap, size, zero); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); return p; } - - // otherwise over-allocate - const size_t oversize = size + alignment - 1; - void* p = _mi_heap_malloc_zero(heap, oversize, zero); - if (p == NULL) return NULL; + + void* p; + size_t oversize; + if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { + // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) + if mi_unlikely(offset != 0) { + // todo: cannot support offset alignment for very large alignments yet + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset); + #endif + return NULL; + } + oversize = size + MI_SEGMENT_SIZE - 1; + p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); + if (p == NULL) return NULL; + //mi_assert_internal(_mi_is_aligned(p, alignment)); + } + else { + // otherwise over-allocate + oversize = size + alignment - 1; + p = _mi_heap_malloc_zero(heap, oversize, zero); + if (p == NULL) return NULL; + } // .. and align within the allocation uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask); mi_assert_internal(adjust <= alignment); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); - mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); - + mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); + #if MI_TRACK_ENABLED if (p != aligned_p) { mi_track_free(p); - mi_track_malloc(aligned_p,size,zero); + mi_track_malloc(aligned_p, size, zero); } else { - mi_track_resize(aligned_p,oversize,size); + mi_track_resize(aligned_p, oversize, size); } #endif - return aligned_p; + return aligned_p; } // Primitive aligned allocation @@ -60,18 +78,20 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t { // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size. mi_assert(alignment > 0); - if mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see ) + if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } + /* if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment); #endif return NULL; } + */ if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); @@ -82,7 +102,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check // try first if there happens to be a small block available with just the right alignment - if mi_likely(padsize <= MI_SMALL_SIZE_MAX) { + if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) { mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; if mi_likely(page->free != NULL && is_aligned) diff --git a/src/alloc.c b/src/alloc.c index 284db29c..9ecb9f30 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -117,8 +117,9 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t si } // The main allocation function -static inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { +inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { if mi_likely(size <= MI_SMALL_SIZE_MAX) { + mi_assert_internal(huge_alignment == 0); return mi_heap_malloc_small_zero(heap, size, zero); } else { diff --git a/src/page.c b/src/page.c index 49662dba..5e2ec826 100644 --- a/src/page.c +++ b/src/page.c @@ -792,7 +792,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex // that frees the block can free the whole page and segment directly. static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); - mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE); + mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size,page_alignment); if (page != NULL) { const size_t bsize = mi_page_block_size(page); // note: not `mi_page_usable_block_size` as `size` includes padding already diff --git a/src/region.c b/src/region.c index c3f2c8a2..ea376aa4 100644 --- a/src/region.c +++ b/src/region.c @@ -376,7 +376,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, } if (p != NULL) { - mi_assert_internal((uintptr_t)p % alignment == 0); + mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0); #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed #endif @@ -470,7 +470,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { mi_atomic_store_release(®ion->info, (size_t)0); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { _mi_abandoned_await_readers(); // ensure no pending reads - _mi_arena_free(start, MI_REGION_SIZE, 0, 0, arena_memid, (~commit == 0), tld->stats); + _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats); } } } diff --git a/src/segment.c b/src/segment.c index 225ecd2a..577fa4fd 100644 --- a/src/segment.c +++ b/src/segment.c @@ -503,7 +503,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // calculate needed sizes first size_t capacity; if (page_kind == MI_PAGE_HUGE) { - mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0); + mi_assert_internal(page_shift == MI_SEGMENT_SHIFT + 1 && required > 0); capacity = 1; } else { @@ -574,8 +574,9 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ size_t align_offset = 0; size_t alignment = MI_SEGMENT_SIZE; if (page_alignment > 0) { - align_offset = pre_size; alignment = page_alignment; + align_offset = _mi_align_up( pre_size, MI_SEGMENT_SIZE ); + segment_size += (align_offset - pre_size); } segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, alignment, align_offset, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate @@ -1251,14 +1252,23 @@ static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT, page_alignment, tld, os_tld); + mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT + 1, page_alignment, tld, os_tld); if (segment == NULL) return NULL; mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); segment->thread_id = 0; // huge pages are immediately abandoned mi_segments_track_size(-(long)segment->segment_size, tld); mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); - mi_assert_internal(page_alignment == 0 || _mi_is_aligned(_mi_page_start(segment, page, NULL),page_alignment)); +#if MI_DEBUG > 3 + if (page_alignment > 0) { + size_t psize; + size_t pre_size; + void* p = _mi_segment_page_start(segment, page, 0, &psize, &pre_size); + void* aligned_p = (void*)_mi_align_up((uintptr_t)p, page_alignment); + mi_assert_internal(page_alignment == 0 || _mi_is_aligned(aligned_p, page_alignment)); + mi_assert_internal(page_alignment == 0 || psize - ((uint8_t*)aligned_p - (uint8_t*)p) >= size); + } +#endif return page; } @@ -1296,10 +1306,16 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (page_alignment <= MI_ALIGNMENT_MAX) { + if mi_unlikely(page_alignment > MI_ALIGNMENT_MAX) { + mi_assert_internal(_mi_is_power_of_two(page_alignment)); + mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE); + //mi_assert_internal((MI_SEGMENT_SIZE % page_alignment) == 0); + if (page_alignment < MI_SEGMENT_SIZE) { + page_alignment = MI_SEGMENT_SIZE; + } page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld); } - if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { + else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld); } else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {