From 6a80fa3ac1a3ec10b79a651ced93f286abe096c6 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Nov 2022 14:46:52 -0700 Subject: [PATCH 1/7] initial progress on removing alignment limit --- ide/vs2022/mimalloc-override.vcxproj | 2 +- ide/vs2022/mimalloc.vcxproj | 2 +- include/mimalloc-internal.h | 12 ++++++--- include/mimalloc-types.h | 2 ++ src/alloc.c | 10 +++++--- src/arena.c | 13 +++++----- src/os.c | 37 ++++++++++++++++++++++++++++ src/page.c | 22 ++++++++--------- src/region.c | 21 ++++++++-------- src/segment.c | 36 ++++++++++++++++++--------- test/test-api.c | 8 +++--- 11 files changed, 114 insertions(+), 51 deletions(-) diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index d674108b..87b0a1e4 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -123,7 +123,7 @@ true true ../../include - MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + MI_DEBUG=4;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 29f732d1..9081881c 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -116,7 +116,7 @@ true true ../../include - MI_DEBUG=3;%(PreprocessorDefinitions); + MI_DEBUG=4;%(PreprocessorDefinitions); CompileAsCpp false stdcpp20 diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 23981240..d3e689ae 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -81,9 +81,13 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free th size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); +void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats); +void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); + + // memory.c -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld); +void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); @@ -94,7 +98,7 @@ bool _mi_mem_unprotect(void* addr, size_t size); void _mi_mem_collect(mi_os_tld_t* tld); // "segment.c" -mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page @@ -107,7 +111,7 @@ void _mi_abandoned_await_readers(void); // "page.c" -void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept mi_attr_malloc; +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks void _mi_page_unfull(mi_page_t* page); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index bca0ad61..79d04d2d 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -301,6 +301,8 @@ typedef struct mi_segment_s { size_t memid; // id for the os-level memory manager bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_committed; // `true` if the whole segment is eagerly committed + size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX) + size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX) // segment fields _Atomic(struct mi_segment_s*) abandoned_next; diff --git a/src/alloc.c b/src/alloc.c index af255f67..284db29c 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -30,7 +30,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* const block = page->free; if mi_unlikely(block == NULL) { - return _mi_malloc_generic(heap, size, zero); + return _mi_malloc_generic(heap, size, zero, 0); } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); // pop from the free list @@ -117,14 +117,14 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t si } // The main allocation function -extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { +static inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { if mi_likely(size <= MI_SMALL_SIZE_MAX) { return mi_heap_malloc_small_zero(heap, size, zero); } else { mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local - void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero); // note: size can overflow but it is detected in malloc_generic + void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic mi_assert_internal(p == NULL || mi_usable_size(p) >= size); #if MI_STAT>1 if (p != NULL) { @@ -137,6 +137,10 @@ extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero } } +extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { + return _mi_heap_malloc_zero_ex(heap, size, zero, 0); +} + mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { return _mi_heap_malloc_zero(heap, size, false); } diff --git a/src/arena.c b/src/arena.c index ce716089..5aef95f7 100644 --- a/src/arena.c +++ b/src/arena.c @@ -190,7 +190,7 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n return p; } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t arena_id, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL); @@ -201,7 +201,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* // try to allocate in an arena if the alignment is small enough // and the object is not too large or too small. - if (alignment <= MI_SEGMENT_ALIGN && + if (alignment <= MI_SEGMENT_ALIGN && align_offset == 0 && size >= MI_ARENA_MIN_OBJ_SIZE && mi_atomic_load_relaxed(&mi_arena_count) > 0) { @@ -256,14 +256,14 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* } *is_zero = true; *memid = MI_MEMID_OS; - void* p = _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats); + void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats); if (p != NULL) *is_pinned = *large; return p; } void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t arena_id, size_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, arena_id, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, arena_id, memid, tld); } @@ -281,16 +281,17 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats) { +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); if (p==NULL) return; if (size==0) return; if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through - _mi_os_free_ex(p, size, all_committed, stats); + _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats); } else { // allocated in an arena + mi_assert_internal(align_offset == 0); size_t arena_idx; size_t bitmap_idx; mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx); diff --git a/src/os.c b/src/os.c index fe9c2959..57b34a2c 100644 --- a/src/os.c +++ b/src/os.c @@ -840,8 +840,45 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ ); } +/* ----------------------------------------------------------- + OS aligned allocation with an offset. This is used + for large alignments > MI_SEGMENT_SIZE so we can align + the first page at an offset from the start of the segment. + As we may need to overallocate, we need to free such pointers + using `mi_free_aligned` to use the actual start of the + memory region. +----------------------------------------------------------- */ +void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) { + mi_assert(offset <= MI_SEGMENT_SIZE); + mi_assert(offset <= size); + mi_assert((alignment % _mi_os_page_size()) == 0); + if (offset > MI_SEGMENT_SIZE) return NULL; + if (offset == 0) { + return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats); + } + else { + const size_t extra = _mi_align_up(offset, alignment) - offset; + const size_t oversize = size + extra; + void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats); + if (start == NULL) return NULL; + void* p = (uint8_t*)start + extra; + mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); + if (commit && extra > _mi_os_page_size()) { + _mi_os_decommit(start, extra, tld_stats); + } + return p; + } +} + +void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) { + mi_assert(align_offset <= MI_SEGMENT_SIZE); + const size_t extra = _mi_align_up(align_offset, alignment) - align_offset; + void* start = (uint8_t*)p - extra; + _mi_os_free_ex(start, size + extra, was_committed, tld_stats); +} + /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. ----------------------------------------------------------- */ diff --git a/src/page.c b/src/page.c index 26b9c9f1..49662dba 100644 --- a/src/page.c +++ b/src/page.c @@ -252,10 +252,10 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { } // allocate a fresh page from a segment -static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) { +static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) { mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq)); mi_assert_internal(pq==NULL||block_size == pq->block_size); - mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os); + mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os); if (page == NULL) { // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) return NULL; @@ -272,7 +272,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size // Get a fresh page to use static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { mi_assert_internal(mi_heap_contains_queue(heap, pq)); - mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size); + mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0); if (page==NULL) return NULL; mi_assert_internal(pq->block_size==mi_page_block_size(page)); mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); @@ -790,10 +790,10 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex // Because huge pages contain just one block, and the segment contains // just that page, we always treat them as abandoned and any thread // that frees the block can free the whole page and segment directly. -static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { +static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE); - mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); + mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size,page_alignment); if (page != NULL) { const size_t bsize = mi_page_block_size(page); // note: not `mi_page_usable_block_size` as `size` includes padding already mi_assert_internal(bsize >= size); @@ -818,16 +818,16 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { // Allocate a page // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. -static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept { +static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept { // huge allocation? const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` - if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) ) { + if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) { if mi_unlikely(req_size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size); return NULL; } else { - return mi_huge_page_alloc(heap,size); + return mi_huge_page_alloc(heap,size,huge_alignment); } } else { @@ -839,7 +839,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept { // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed. // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. -void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { mi_assert_internal(heap != NULL); @@ -858,10 +858,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexce _mi_heap_delayed_free_partial(heap); // find (or allocate) a page of the right size - mi_page_t* page = mi_find_page(heap, size); + mi_page_t* page = mi_find_page(heap, size, huge_alignment); if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more mi_heap_collect(heap, true /* force */); - page = mi_find_page(heap, size); + page = mi_find_page(heap, size, huge_alignment); } if mi_unlikely(page == NULL) { // out of memory diff --git a/src/region.c b/src/region.c index 8b04387d..c3f2c8a2 100644 --- a/src/region.c +++ b/src/region.c @@ -50,9 +50,9 @@ bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); // arena.c mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats); +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); @@ -181,7 +181,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, bool is_zero = false; bool is_pinned = false; size_t arena_memid = 0; - void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_pinned, &is_zero, _mi_arena_id_none(), & arena_memid, tld); + void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, ®ion_commit, ®ion_large, &is_pinned, &is_zero, _mi_arena_id_none(), & arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); mi_assert_internal(!region_large || region_commit); @@ -190,7 +190,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, const size_t idx = mi_atomic_increment_acq_rel(®ions_count); if (idx >= MI_REGION_MAX) { mi_atomic_decrement_acq_rel(®ions_count); - _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats); + _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats); _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB)); return false; } @@ -347,7 +347,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); @@ -363,7 +363,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l void* p = NULL; size_t arena_memid; const size_t blocks = mi_region_block_count(size); - if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { + if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld); if (p == NULL) { _mi_warning_message("unable to allocate from region: size %zu\n", size); @@ -371,7 +371,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l } if (p == NULL) { // and otherwise fall back to the OS - p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, _mi_arena_id_none(), & arena_memid, tld); + p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(), & arena_memid, tld); *memid = mi_memid_create_from_arena(arena_memid); } @@ -391,7 +391,7 @@ Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { +void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; @@ -402,10 +402,11 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re mem_region_t* region; if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { // was a direct arena allocation, pass through - _mi_arena_free(p, size, arena_memid, full_commit, tld->stats); + _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats); } else { // allocated in a region + mi_assert_internal(align_offset == 0); mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; const size_t blocks = mi_region_block_count(size); mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); @@ -469,7 +470,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { mi_atomic_store_release(®ion->info, (size_t)0); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { _mi_abandoned_await_readers(); // ensure no pending reads - _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats); + _mi_arena_free(start, MI_REGION_SIZE, 0, 0, arena_memid, (~commit == 0), tld->stats); } } } diff --git a/src/segment.c b/src/segment.c index 68174bb2..225ecd2a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -475,7 +475,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false; } - _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); + _mi_mem_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, fully_committed, any_reset, tld->os); } // called by threads that are terminating to free cached segments @@ -495,7 +495,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { ----------------------------------------------------------- */ // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { // the segment parameter is non-null if it came from our cache mi_assert_internal(segment==NULL || (required==0 && page_kind <= MI_PAGE_LARGE)); @@ -507,7 +507,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ capacity = 1; } else { - mi_assert_internal(required == 0); + mi_assert_internal(required == 0 && page_alignment == 0); size_t page_size = (size_t)1 << page_shift; capacity = MI_SEGMENT_SIZE / page_size; mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0); @@ -571,7 +571,13 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ size_t memid; bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy bool is_pinned = false; - segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); + size_t align_offset = 0; + size_t alignment = MI_SEGMENT_SIZE; + if (page_alignment > 0) { + align_offset = pre_size; + alignment = page_alignment; + } + segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, alignment, align_offset, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { // ensure the initial info is committed @@ -581,7 +587,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ if (commit_zero) is_zero = true; if (!ok) { // commit failed; we cannot touch the memory: free the segment directly and return `NULL` - _mi_mem_free(segment, MI_SEGMENT_SIZE, memid, false, false, os_tld); + _mi_mem_free(segment, segment_size, alignment, align_offset, memid, false, false, os_tld); return NULL; } } @@ -589,6 +595,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ segment->memid = memid; segment->mem_is_pinned = (mem_large || is_pinned); segment->mem_is_committed = commit; + segment->mem_alignment = alignment; + segment->mem_align_offset = align_offset; mi_segments_track_size((long)segment_size, tld); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); @@ -637,8 +645,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ return segment; } -static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_init(NULL, required, page_kind, page_shift, tld, os_tld); +static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_init(NULL, required, page_kind, page_shift, page_alignment, tld, os_tld); } static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { @@ -1169,7 +1177,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s return segment; } // 2. otherwise allocate a fresh segment - return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); + return mi_segment_alloc(0, page_kind, page_shift, 0, tld, os_tld); } @@ -1241,15 +1249,16 @@ static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size return page; } -static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); + mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT, page_alignment, tld, os_tld); if (segment == NULL) return NULL; mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); segment->thread_id = 0; // huge pages are immediately abandoned mi_segments_track_size(-(long)segment->segment_size, tld); mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); + mi_assert_internal(page_alignment == 0 || _mi_is_aligned(_mi_page_start(segment, page, NULL),page_alignment)); return page; } @@ -1285,8 +1294,11 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block Page allocation ----------------------------------------------------------- */ -mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; + if (page_alignment <= MI_ALIGNMENT_MAX) { + page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld); + } if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld); } @@ -1297,7 +1309,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment page = mi_segment_large_page_alloc(heap, block_size, tld, os_tld); } else { - page = mi_segment_huge_page_alloc(block_size,tld,os_tld); + page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld); } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); diff --git a/test/test-api.c b/test/test-api.c index 3c2ef7e4..650056e2 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -161,10 +161,12 @@ int main(void) { result = ok; }; CHECK_BODY("malloc-aligned7") { - void* p = mi_malloc_aligned(1024,MI_ALIGNMENT_MAX); mi_free(p); - }; + void* p = mi_malloc_aligned(1024,MI_ALIGNMENT_MAX); + mi_free(p); + }; CHECK_BODY("malloc-aligned8") { - void* p = mi_malloc_aligned(1024,2*MI_ALIGNMENT_MAX); mi_free(p); + void* p = mi_malloc_aligned(1024,2*MI_ALIGNMENT_MAX); + mi_free(p); }; CHECK_BODY("malloc-aligned-at1") { void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); From 4b91ff760ddd9c7ed7d4e2ddf33d987f81779f58 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Nov 2022 16:40:42 -0700 Subject: [PATCH 2/7] initial working large alignment --- include/mimalloc-internal.h | 6 +++-- include/mimalloc.h | 2 +- src/alloc-aligned.c | 50 ++++++++++++++++++++++++++----------- src/alloc.c | 3 ++- src/page.c | 2 +- src/region.c | 4 +-- src/segment.c | 28 ++++++++++++++++----- 7 files changed, 67 insertions(+), 28 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d3e689ae..6b4b86a2 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -147,6 +147,7 @@ mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); @@ -426,7 +427,8 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) { // Segment that contains the pointer static inline mi_segment_t* _mi_ptr_segment(const void* p) { // mi_assert_internal(p != NULL); - return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); + if (p == NULL) return NULL; + return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); } // Segment belonging to a page @@ -440,7 +442,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { static inline size_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE); + mi_assert_internal(diff >= 0 && (size_t)diff <= MI_SEGMENT_SIZE /* for huge alignment it can be equal */); size_t idx = (size_t)diff >> segment->page_shift; mi_assert_internal(idx < segment->capacity); mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); diff --git a/include/mimalloc.h b/include/mimalloc.h index dec0fc0c..17fd1c60 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -166,7 +166,7 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s // Note that `alignment` always follows `size` for consistency with unaligned // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -#define MI_ALIGNMENT_MAX (1024*1024UL) // maximum supported alignment is 1MiB +#define MI_ALIGNMENT_MAX (2*1024*1024UL) // maximum supported alignment is 1MiB mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 5672078e..f4ddbf99 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -18,41 +18,59 @@ terms of the MIT license. A copy of the license can be found in the file static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { mi_assert_internal(size <= PTRDIFF_MAX); - mi_assert_internal(alignment!=0 && _mi_is_power_of_two(alignment) && alignment <= MI_ALIGNMENT_MAX); + mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); - const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` + const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)` const size_t padsize = size + MI_PADDING_SIZE; // use regular allocation if it is guaranteed to fit the alignment constraints - if (offset==0 && alignment<=padsize && padsize<=MI_MEDIUM_OBJ_SIZE_MAX && (padsize&align_mask)==0) { + if (offset == 0 && alignment <= padsize && padsize <= MI_MEDIUM_OBJ_SIZE_MAX && (padsize & align_mask) == 0) { void* p = _mi_heap_malloc_zero(heap, size, zero); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); return p; } - - // otherwise over-allocate - const size_t oversize = size + alignment - 1; - void* p = _mi_heap_malloc_zero(heap, oversize, zero); - if (p == NULL) return NULL; + + void* p; + size_t oversize; + if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { + // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) + if mi_unlikely(offset != 0) { + // todo: cannot support offset alignment for very large alignments yet + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset); + #endif + return NULL; + } + oversize = size + MI_SEGMENT_SIZE - 1; + p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); + if (p == NULL) return NULL; + //mi_assert_internal(_mi_is_aligned(p, alignment)); + } + else { + // otherwise over-allocate + oversize = size + alignment - 1; + p = _mi_heap_malloc_zero(heap, oversize, zero); + if (p == NULL) return NULL; + } // .. and align within the allocation uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask); mi_assert_internal(adjust <= alignment); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); - mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); - + mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); + #if MI_TRACK_ENABLED if (p != aligned_p) { mi_track_free(p); - mi_track_malloc(aligned_p,size,zero); + mi_track_malloc(aligned_p, size, zero); } else { - mi_track_resize(aligned_p,oversize,size); + mi_track_resize(aligned_p, oversize, size); } #endif - return aligned_p; + return aligned_p; } // Primitive aligned allocation @@ -60,18 +78,20 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t { // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size. mi_assert(alignment > 0); - if mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see ) + if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } + /* if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment); #endif return NULL; } + */ if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); @@ -82,7 +102,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check // try first if there happens to be a small block available with just the right alignment - if mi_likely(padsize <= MI_SMALL_SIZE_MAX) { + if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) { mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; if mi_likely(page->free != NULL && is_aligned) diff --git a/src/alloc.c b/src/alloc.c index 284db29c..9ecb9f30 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -117,8 +117,9 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t si } // The main allocation function -static inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { +inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { if mi_likely(size <= MI_SMALL_SIZE_MAX) { + mi_assert_internal(huge_alignment == 0); return mi_heap_malloc_small_zero(heap, size, zero); } else { diff --git a/src/page.c b/src/page.c index 49662dba..5e2ec826 100644 --- a/src/page.c +++ b/src/page.c @@ -792,7 +792,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex // that frees the block can free the whole page and segment directly. static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); - mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE); + mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size,page_alignment); if (page != NULL) { const size_t bsize = mi_page_block_size(page); // note: not `mi_page_usable_block_size` as `size` includes padding already diff --git a/src/region.c b/src/region.c index c3f2c8a2..ea376aa4 100644 --- a/src/region.c +++ b/src/region.c @@ -376,7 +376,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, } if (p != NULL) { - mi_assert_internal((uintptr_t)p % alignment == 0); + mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0); #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed #endif @@ -470,7 +470,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { mi_atomic_store_release(®ion->info, (size_t)0); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { _mi_abandoned_await_readers(); // ensure no pending reads - _mi_arena_free(start, MI_REGION_SIZE, 0, 0, arena_memid, (~commit == 0), tld->stats); + _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats); } } } diff --git a/src/segment.c b/src/segment.c index 225ecd2a..577fa4fd 100644 --- a/src/segment.c +++ b/src/segment.c @@ -503,7 +503,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // calculate needed sizes first size_t capacity; if (page_kind == MI_PAGE_HUGE) { - mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0); + mi_assert_internal(page_shift == MI_SEGMENT_SHIFT + 1 && required > 0); capacity = 1; } else { @@ -574,8 +574,9 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ size_t align_offset = 0; size_t alignment = MI_SEGMENT_SIZE; if (page_alignment > 0) { - align_offset = pre_size; alignment = page_alignment; + align_offset = _mi_align_up( pre_size, MI_SEGMENT_SIZE ); + segment_size += (align_offset - pre_size); } segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, alignment, align_offset, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate @@ -1251,14 +1252,23 @@ static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT, page_alignment, tld, os_tld); + mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT + 1, page_alignment, tld, os_tld); if (segment == NULL) return NULL; mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); segment->thread_id = 0; // huge pages are immediately abandoned mi_segments_track_size(-(long)segment->segment_size, tld); mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); - mi_assert_internal(page_alignment == 0 || _mi_is_aligned(_mi_page_start(segment, page, NULL),page_alignment)); +#if MI_DEBUG > 3 + if (page_alignment > 0) { + size_t psize; + size_t pre_size; + void* p = _mi_segment_page_start(segment, page, 0, &psize, &pre_size); + void* aligned_p = (void*)_mi_align_up((uintptr_t)p, page_alignment); + mi_assert_internal(page_alignment == 0 || _mi_is_aligned(aligned_p, page_alignment)); + mi_assert_internal(page_alignment == 0 || psize - ((uint8_t*)aligned_p - (uint8_t*)p) >= size); + } +#endif return page; } @@ -1296,10 +1306,16 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (page_alignment <= MI_ALIGNMENT_MAX) { + if mi_unlikely(page_alignment > MI_ALIGNMENT_MAX) { + mi_assert_internal(_mi_is_power_of_two(page_alignment)); + mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE); + //mi_assert_internal((MI_SEGMENT_SIZE % page_alignment) == 0); + if (page_alignment < MI_SEGMENT_SIZE) { + page_alignment = MI_SEGMENT_SIZE; + } page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld); } - if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { + else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld); } else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { From a200291ae576e1e766be8526c0d058bd9766d480 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Nov 2022 08:26:17 -0800 Subject: [PATCH 3/7] further progress on removing aligned limit --- src/alloc-aligned.c | 14 +++++++------- src/page.c | 4 ++-- test/test-api.c | 21 +++++++++++++++++++-- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index f4ddbf99..63acd58c 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -29,22 +29,21 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); return p; } - + void* p; size_t oversize; if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) if mi_unlikely(offset != 0) { // todo: cannot support offset alignment for very large alignments yet - #if MI_DEBUG > 0 +#if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset); - #endif +#endif return NULL; } - oversize = size + MI_SEGMENT_SIZE - 1; - p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); + oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); + p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); // the page block size should be large enough to align in the single huge page block if (p == NULL) return NULL; - //mi_assert_internal(_mi_is_aligned(p, alignment)); } else { // otherwise over-allocate @@ -57,7 +56,8 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask); mi_assert_internal(adjust <= alignment); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); - if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); + if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true); } + mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); diff --git a/src/page.c b/src/page.c index 5e2ec826..6b54eb2c 100644 --- a/src/page.c +++ b/src/page.c @@ -262,7 +262,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size } // a fresh page was found, initialize it mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_page_init(heap, page, block_size, heap->tld); + mi_page_init(heap, page, (pq == NULL ? MI_HUGE_BLOCK_SIZE : block_size), heap->tld); mi_heap_stat_increase(heap, pages, 1); if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL mi_assert_expensive(_mi_page_is_valid(page)); @@ -643,7 +643,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_track_mem_noaccess(page_start,page_size); page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); - page->reserved = (uint16_t)(page_size / block_size); + page->reserved = (block_size < MI_HUGE_BLOCK_SIZE ? (uint16_t)(page_size / block_size) : 1); #ifdef MI_ENCODE_FREELIST page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); diff --git a/test/test-api.c b/test/test-api.c index 650056e2..312b3f1b 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -163,10 +163,27 @@ int main(void) { CHECK_BODY("malloc-aligned7") { void* p = mi_malloc_aligned(1024,MI_ALIGNMENT_MAX); mi_free(p); + result = ((uintptr_t)p % MI_ALIGNMENT_MAX) == 0; }; CHECK_BODY("malloc-aligned8") { - void* p = mi_malloc_aligned(1024,2*MI_ALIGNMENT_MAX); - mi_free(p); + bool ok = true; + for (int i = 0; i < 5 && ok; i++) { + int n = (1 << i); + void* p = mi_malloc_aligned(1024, n * MI_ALIGNMENT_MAX); + ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0; + mi_free(p); + } + result = ok; + }; + CHECK_BODY("malloc-aligned9") { + bool ok = true; + for (int i = 0; i < 5 && ok; i++) { + int n = (1 << i); + void* p = mi_malloc_aligned( 2*n*MI_ALIGNMENT_MAX, n*MI_ALIGNMENT_MAX); + ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0; + mi_free(p); + } + result = ok; }; CHECK_BODY("malloc-aligned-at1") { void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); From f54e64365f7fe8d933944fa0ad633f14fc1539c2 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 6 Nov 2022 09:44:12 -0800 Subject: [PATCH 4/7] fix assertions with incorrect block size for large alignments --- src/alloc-aligned.c | 1 + src/page.c | 6 ++++-- src/segment.c | 5 +++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 63acd58c..3ce01f5c 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -57,6 +57,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* mi_assert_internal(adjust <= alignment); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true); } + mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); diff --git a/src/page.c b/src/page.c index 6b54eb2c..48ee1f56 100644 --- a/src/page.c +++ b/src/page.c @@ -262,7 +262,9 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size } // a fresh page was found, initialize it mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_page_init(heap, page, (pq == NULL ? MI_HUGE_BLOCK_SIZE : block_size), heap->tld); + mi_assert_internal(pq!=NULL || page->xblock_size != 0); + mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size); + mi_page_init(heap, page, (pq==NULL ? mi_page_block_size(page) : block_size), heap->tld); mi_heap_stat_increase(heap, pages, 1); if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL mi_assert_expensive(_mi_page_is_valid(page)); @@ -643,7 +645,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_track_mem_noaccess(page_start,page_size); page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); - page->reserved = (block_size < MI_HUGE_BLOCK_SIZE ? (uint16_t)(page_size / block_size) : 1); + page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); diff --git a/src/segment.c b/src/segment.c index 577fa4fd..12c9e108 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1269,6 +1269,11 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_assert_internal(page_alignment == 0 || psize - ((uint8_t*)aligned_p - (uint8_t*)p) >= size); } #endif + // for huge pages we initialize the xblock_size as we may + // overallocate to accommodate large alignments. + size_t psize; + _mi_segment_page_start(segment, page, 0, &psize, NULL); + page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : psize); return page; } From d3715132d165d8d1fefc66f3414acb897502c4fd Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 6 Nov 2022 09:52:54 -0800 Subject: [PATCH 5/7] move null ptr check --- include/mimalloc-internal.h | 3 +-- src/alloc.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6b4b86a2..4620fb72 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -426,8 +426,7 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) { // Segment that contains the pointer static inline mi_segment_t* _mi_ptr_segment(const void* p) { - // mi_assert_internal(p != NULL); - if (p == NULL) return NULL; + mi_assert_internal(p != NULL); return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); } diff --git a/src/alloc.c b/src/alloc.c index 9ecb9f30..d10bd586 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -470,8 +470,8 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms } #endif + if mi_unlikely(p == NULL) return NULL; mi_segment_t* const segment = _mi_ptr_segment(p); - if mi_unlikely(segment == NULL) return NULL; // checks also for (p==NULL) #if (MI_DEBUG>0) if mi_unlikely(!mi_is_in_heap_region(p)) { From f788e3c9a3481d425cea81b037d20107541a087f Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Nov 2022 14:18:52 -0800 Subject: [PATCH 6/7] add comment --- src/page.c | 6 ++++-- src/segment.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/page.c b/src/page.c index 48ee1f56..414438b4 100644 --- a/src/page.c +++ b/src/page.c @@ -260,11 +260,13 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) return NULL; } - // a fresh page was found, initialize it mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(pq!=NULL || page->xblock_size != 0); mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size); - mi_page_init(heap, page, (pq==NULL ? mi_page_block_size(page) : block_size), heap->tld); + // a fresh page was found, initialize it + const size_t full_block_size = (pq == NULL ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc + mi_assert_internal(full_block_size >= block_size); + mi_page_init(heap, page, full_block_size, heap->tld); mi_heap_stat_increase(heap, pages, 1); if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL mi_assert_expensive(_mi_page_is_valid(page)); diff --git a/src/segment.c b/src/segment.c index 12c9e108..7f80bf8f 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1273,7 +1273,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, // overallocate to accommodate large alignments. size_t psize; _mi_segment_page_start(segment, page, 0, &psize, NULL); - page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : psize); + page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize); return page; } From 3eb616f2bf05f61d857ac3c6464f85a14e48a710 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 6 Nov 2022 14:51:06 -0800 Subject: [PATCH 7/7] fix bug in secure mode where adjustment would make the page size less than the blocksize on the first page of a segment --- src/page.c | 1 + src/segment.c | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/page.c b/src/page.c index 48ee1f56..f587fc9a 100644 --- a/src/page.c +++ b/src/page.c @@ -646,6 +646,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); + mi_assert_internal(page->reserved > 0); #ifdef MI_ENCODE_FREELIST page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); diff --git a/src/segment.c b/src/segment.c index 12c9e108..3c53db6f 100644 --- a/src/segment.c +++ b/src/segment.c @@ -403,12 +403,14 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) size_t adjust = block_size - ((uintptr_t)p % block_size); - if (adjust < block_size) { - p += adjust; - psize -= adjust; - if (pre_size != NULL) *pre_size = adjust; + if (psize - adjust >= block_size) { + if (adjust < block_size) { + p += adjust; + psize -= adjust; + if (pre_size != NULL) *pre_size = adjust; + } + mi_assert_internal((uintptr_t)p % block_size == 0); } - mi_assert_internal((uintptr_t)p % block_size == 0); } if (page_size != NULL) *page_size = psize;