From 037285ac0980b648d5c4ad6b359ac57d5f21e543 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 8 Sep 2020 13:27:34 -0700 Subject: [PATCH] refactor segment cache and map in a separate source file --- CMakeLists.txt | 1 + ide/vs2017/mimalloc-override.vcxproj | 1 + ide/vs2017/mimalloc-override.vcxproj.filters | 3 + ide/vs2017/mimalloc.vcxproj | 1 + ide/vs2017/mimalloc.vcxproj.filters | 3 + ide/vs2019/mimalloc-override.vcxproj | 1 + ide/vs2019/mimalloc-override.vcxproj.filters | 5 +- ide/vs2019/mimalloc.vcxproj | 1 + ide/vs2019/mimalloc.vcxproj.filters | 3 + include/mimalloc-internal.h | 29 +- include/mimalloc-types.h | 1 + src/arena.c | 229 +------------- src/bitmap.c | 2 +- src/segment-cache.c | 310 +++++++++++++++++++ src/segment.c | 152 +-------- src/static.c | 1 + 16 files changed, 378 insertions(+), 365 deletions(-) create mode 100644 src/segment-cache.c diff --git a/CMakeLists.txt b/CMakeLists.txt index acd16a98..a9fdb259 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ set(mi_sources src/os.c src/bitmap.c src/arena.c + src/segment-cache.c src/segment.c src/page.c src/alloc.c diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index ab02a658..a87b69ac 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -246,6 +246,7 @@ + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 2544c06b..d01f9311 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -79,5 +79,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index f212b619..41fb77c1 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -242,6 +242,7 @@ + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 128126c1..05417645 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -62,6 +62,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 000958ee..4136e574 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -246,6 +246,7 @@ + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index 38e83578..d6b7b5a9 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -46,6 +46,9 @@ Source Files + + Source Files + @@ -67,7 +70,7 @@ Header Files - Source Files + Header Files diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index b1184cb5..98dee520 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -234,6 +234,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 8d071d29..92be7cb4 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -49,6 +49,9 @@ Source Files + + Source Files + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 7ddfa38f..11733c66 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -62,19 +62,24 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -size_t _mi_os_good_alloc_size(size_t size); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +size_t _mi_os_good_alloc_size(size_t size); // arena.c -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc(size_t size, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void _mi_arena_free(void* p, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld); +// "segment-cache.c" +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld); +void _mi_segment_map_allocated_at(const mi_segment_t* segment); +void _mi_segment_map_freed_at(const mi_segment_t* segment); // "segment.c" mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); @@ -463,6 +468,10 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) { return mi_page_block_size(page) - MI_PADDING_SIZE; } +// size of a segment +static inline size_t mi_segment_size(mi_segment_t* segment) { + return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; +} // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 06ab1ebe..8524de8a 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -114,6 +114,7 @@ terms of the MIT license. A copy of the license can be found in the file // Derived constants #define MI_SEGMENT_SIZE (1ULL< MI_SEGMENT_ALIGN) return NULL; - - // numa node determines start field - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot - mi_bitmap_index_t bitidx = 0; - bool claimed = false; - if (*large) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (claimed) *large = true; - } - if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (claimed) *large = false; - } - - if (!claimed) return NULL; - - // found a slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - void* p = slot->p; - *memid = slot->memid; - *is_zero = false; - mi_commit_mask_t cmask = slot->commit_mask; // copy - slot->p = NULL; - mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - // ignore commit request - /* - if (commit && !mi_commit_mask_is_full(cmask)) { - bool commit_zero; - bool ok = _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); // todo: only commit needed parts? - if (!ok) { - *commit_mask = cmask; - } - else { - *commit_mask = mi_commit_mask_full(); - } - } - else { - */ - *commit_mask = cmask; - - // mark the slot as free again - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); - return p; -} - -static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) { - if (mi_commit_mask_is_empty(*cmask)) { - // nothing - } - else if (mi_commit_mask_is_full(*cmask)) { - _mi_os_decommit(p, total, stats); - } - else { - // todo: one call to decommit the whole at once? - mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); - size_t part = total/MI_COMMIT_MASK_BITS; - uintptr_t idx; - uintptr_t count; - mi_commit_mask_t mask = *cmask; - mi_commit_mask_foreach(mask, idx, count) { - void* start = (uint8_t*)p + (idx*part); - size_t size = count*part; - _mi_os_decommit(start, size, stats); - } - mi_commit_mask_foreach_end() - } - *cmask = mi_commit_mask_empty(); -} - -static mi_decl_noinline void mi_cache_purge(mi_os_tld_t* tld) { - UNUSED(tld); - mi_msecs_t now = _mi_clock_now(); - size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start - size_t purged = 0; - for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots - if (idx >= MI_CACHE_MAX) idx = 0; // wrap - mi_cache_slot_t* slot = &cache[idx]; - mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); - if (expire != 0 && now >= expire) { // racy read - // seems expired, first claim it from available - purged++; - mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); - if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { - // was available, we claimed it - expire = mi_atomic_loadi64_acquire(&slot->expire); - if (expire != 0 && now >= expire) { // safe read - // still expired, decommit it - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); - _mi_abandoned_await_readers(); // wait until safe to decommit - // decommit committed parts - mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); - //_mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); - } - _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop - } - if (purged > 4) break; // bound to no more than 4 purge tries per push - } - } -} - -static mi_decl_noinline bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) -{ - // only for segment blocks - if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - - // numa node determines start field - int numa_node = _mi_os_numa_node(NULL); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // purge expired entries - mi_cache_purge(tld); - - // find an available slot - mi_bitmap_index_t bitidx; - bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (!claimed) return false; - - mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); - - // set the slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - slot->p = start; - slot->memid = memid; - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - slot->commit_mask = commit_mask; - if (!mi_commit_mask_is_empty(commit_mask) && !is_large) { - long delay = mi_option_get(mi_option_arena_reset_delay); - if (delay == 0) { - _mi_abandoned_await_readers(); // wait until safe to decommit - mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); - } - else { - mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); - } - } - - // make it available - _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); - return true; -} - - /* ----------------------------------------------------------- Arena Allocation ----------------------------------------------------------- */ @@ -333,7 +147,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren return p; } -static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); @@ -349,11 +163,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - bool acommit = commit; - void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) { - *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); return p; } } @@ -366,11 +178,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - bool acommit = commit; - void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) { - *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); return p; } } @@ -379,11 +189,10 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, - bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(commit_mask != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); + mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; *is_zero = false; @@ -392,49 +201,35 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if (large==NULL) large = &default_large; // ensure `large != NULL` const int numa_node = _mi_os_numa_node(tld); // current numa node - // try to get from the cache - if (size == MI_SEGMENT_SIZE && alignment <= MI_SEGMENT_ALIGN) { - void* p = mi_cache_pop(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld); - if (p != NULL) return p; - } - // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN) { - void* p = mi_arena_allocate(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld); + void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_zero, memid, tld); if (p != NULL) return p; } // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; - void* p = _mi_os_alloc_aligned(size, alignment, commit, large, tld->stats); - *commit_mask = ((p!=NULL && commit) ? mi_commit_mask_full() : mi_commit_mask_empty()); - return p; + return _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats); } -void* _mi_arena_alloc(size_t size, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, commit_mask, large, is_zero, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld); } /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) { +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld->stats != NULL); if (p==NULL) return; if (size==0) return; if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through - if (!mi_cache_push(p, size, memid, commit_mask, is_large, tld)) { - _mi_abandoned_await_readers(); // wait until safe to free - // TODO: is it safe on all platforms to free even it contains decommitted parts? (eg. macOS) - const size_t csize = mi_commit_mask_committed_size(commit_mask, size); - _mi_stat_decrease(&_mi_stats_main.committed, csize); - _mi_os_free_ex(p, size, false /*pretend decommitted to not double count stats*/, tld->stats); - } + _mi_os_free_ex(p, size, is_committed, tld->stats); } else { // allocated in an arena diff --git a/src/bitmap.c b/src/bitmap.c index 93487a32..ad5a9552 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -40,7 +40,7 @@ static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { // Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. -bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); mi_assert_internal(count <= MI_BITMAP_FIELD_BITS); diff --git a/src/segment-cache.c b/src/segment-cache.c new file mode 100644 index 00000000..569e878a --- /dev/null +++ b/src/segment-cache.c @@ -0,0 +1,310 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2020, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- + Implements a cache of segments to avoid expensive OS calls + and also the full memory map of all segments. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include "bitmap.h" // atomic bitmap + +#define MI_CACHE_FIELDS (16) +#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit + +#define BITS_SET() ATOMIC_VAR_INIT(UINTPTR_MAX) +#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) + +typedef struct mi_cache_slot_s { + void* p; + size_t memid; + mi_commit_mask_t commit_mask; + _Atomic(mi_msecs_t) expire; +} mi_cache_slot_t; + +static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 + +static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! +static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; +static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free + + +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + // only segment blocks + if (size != MI_SEGMENT_SIZE) return NULL; + + // numa node determines start field + const int numa_node = _mi_os_numa_node(tld); + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; + } + + // find an available slot + mi_bitmap_index_t bitidx = 0; + bool claimed = false; + if (*large) { // large allowed? + claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = true; + } + if (!claimed) { + claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = false; + } + + if (!claimed) return NULL; + + // found a slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + void* p = slot->p; + *memid = slot->memid; + *is_zero = false; + mi_commit_mask_t cmask = slot->commit_mask; // copy + slot->p = NULL; + mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); + *commit_mask = cmask; + + // mark the slot as free again + mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); + _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); + return p; +} + +static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) +{ + if (mi_commit_mask_is_empty(*cmask)) { + // nothing + } + else if (mi_commit_mask_is_full(*cmask)) { + _mi_os_decommit(p, total, stats); + } + else { + // todo: one call to decommit the whole at once? + mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); + size_t part = total/MI_COMMIT_MASK_BITS; + uintptr_t idx; + uintptr_t count; + mi_commit_mask_t mask = *cmask; + mi_commit_mask_foreach(mask, idx, count) { + void* start = (uint8_t*)p + (idx*part); + size_t size = count*part; + _mi_os_decommit(start, size, stats); + } + mi_commit_mask_foreach_end() + } + *cmask = mi_commit_mask_empty(); +} + +#define MI_MAX_PURGE_PER_PUSH (4) + +static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) +{ + UNUSED(tld); + mi_msecs_t now = _mi_clock_now(); + size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start + size_t purged = 0; + for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots + if (idx >= MI_CACHE_MAX) idx = 0; // wrap + mi_cache_slot_t* slot = &cache[idx]; + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); + if (expire != 0 && now >= expire) { // racy read + // seems expired, first claim it from available + purged++; + mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); + if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { + // was available, we claimed it + expire = mi_atomic_loadi64_acquire(&slot->expire); + if (expire != 0 && now >= expire) { // safe read + // still expired, decommit it + mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); + mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + _mi_abandoned_await_readers(); // wait until safe to decommit + // decommit committed parts + // TODO: instead of decommit, we could also free to the OS? + mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); + } + _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop + } + if (purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push + } + } +} + +mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) +{ + // only for normal segment blocks + if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; + + // numa node determines start field + int numa_node = _mi_os_numa_node(NULL); + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; + } + + // purge expired entries + mi_segment_cache_purge(tld); + + // find an available slot + mi_bitmap_index_t bitidx; + bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (!claimed) return false; + + mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + + // set the slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + slot->p = start; + slot->memid = memid; + mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); + slot->commit_mask = commit_mask; + if (!mi_commit_mask_is_empty(commit_mask) && !is_large) { + long delay = mi_option_get(mi_option_arena_reset_delay); + if (delay == 0) { + _mi_abandoned_await_readers(); // wait until safe to decommit + mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); + } + else { + mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); + } + } + + // make it available + _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); + return true; +} + + +/* ----------------------------------------------------------- + The following functions are to reliably find the segment or + block that encompasses any pointer p (or NULL if it is not + in any of our segments). + We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) + set to 1 if it contains the segment meta data. +----------------------------------------------------------- */ + + +#if (MI_INTPTR_SIZE==8) +#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB +#else +#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb +#endif + +#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) +#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) +#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) + +static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 2KiB per TB with 64MiB segments + +static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { + mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? + uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE; + *bitidx = segindex % (8*MI_INTPTR_SIZE); + return (segindex / (8*MI_INTPTR_SIZE)); +} + +void _mi_segment_map_allocated_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); + if (index==0) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask | ((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +void _mi_segment_map_freed_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); + if (index == 0) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask & ~((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. +static mi_segment_t* _mi_segment_of(const void* p) { + mi_segment_t* segment = _mi_ptr_segment(p); + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge + const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) { + return segment; // yes, allocated by us + } + if (index==0) return NULL; + // search downwards for the first segment in case it is an interior pointer + // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough + // valid huge objects + // note: we could maintain a lowest index to speed up the path for invalid pointers? + size_t lobitidx; + size_t loindex; + uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); + if (lobits != 0) { + loindex = index; + lobitidx = mi_bsr(lobits); // lobits != 0 + } + else { + uintptr_t lomask = mask; + loindex = index - 1; + while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--; + if (loindex==0) return NULL; + lobitidx = mi_bsr(lomask); // lomask != 0 + } + // take difference as the addresses could be larger than the MAX_ADDRESS space. + size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; + segment = (mi_segment_t*)((uint8_t*)segment - diff); + + if (segment == NULL) return NULL; + mi_assert_internal((void*)segment < p); + bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(cookie_ok); + if (mi_unlikely(!cookie_ok)) return NULL; + if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range + mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); + return segment; +} + +// Is this a valid pointer in our heap? +static bool mi_is_valid_pointer(const void* p) { + return (_mi_segment_of(p) != NULL); +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return mi_is_valid_pointer(p); +} + +/* +// Return the full segment range belonging to a pointer +static void* mi_segment_range_of(const void* p, size_t* size) { + mi_segment_t* segment = _mi_segment_of(p); + if (segment == NULL) { + if (size != NULL) *size = 0; + return NULL; + } + else { + if (size != NULL) *size = segment->segment_size; + return segment; + } + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + mi_reset_delayed(tld); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); + return page; +} +*/ diff --git a/src/segment.c b/src/segment.c index b010fcbb..a1a38a64 100644 --- a/src/segment.c +++ b/src/segment.c @@ -13,8 +13,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_PAGE_HUGE_ALIGN (256*1024) -static void mi_segment_map_allocated_at(const mi_segment_t* segment); -static void mi_segment_map_freed_at(const mi_segment_t* segment); static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); /* -------------------------------------------------------------------------------- @@ -183,11 +181,6 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { Segment size calculations ----------------------------------------------------------- */ - -static size_t mi_segment_size(mi_segment_t* segment) { - return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; -} - static size_t mi_segment_info_size(mi_segment_t* segment) { return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; } @@ -249,7 +242,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { segment->thread_id = 0; - mi_segment_map_freed_at(segment); + _mi_segment_map_freed_at(segment); mi_segments_track_size(-((long)mi_segment_size(segment)),tld); if (MI_SECURE>0) { // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set @@ -264,7 +257,13 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // mi_segment_delayed_decommit(segment,true,tld->stats); // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); - _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->commit_mask, segment->mem_is_fixed, tld->os); + const size_t size = mi_segment_size(segment); + if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->mem_is_fixed, tld->os)) { + const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size); + if (csize > 0 && !segment->mem_is_fixed) _mi_stat_decrease(&_mi_stats_main.committed, csize); + _mi_abandoned_await_readers(); // wait until safe to free + _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_fixed /* pretend not committed to not double count decommits */, tld->os); + } } @@ -647,7 +646,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) const bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); - const bool commit = eager || (required > 0); + bool commit = eager || (required > 0); // Try to get from our cache first bool is_zero = false; @@ -657,8 +656,12 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // Allocate the segment from the OS bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy size_t memid = 0; - segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &commit_mask, &mem_large, &is_zero, &memid, os_tld); - if (segment == NULL) return NULL; // failed to allocate + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &mem_large, &is_zero, &memid, os_tld); + if (segment==NULL) { + segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); + if (segment == NULL) return NULL; // failed to allocate + commit_mask = (commit ? mi_commit_mask_full() : mi_commit_mask_empty()); + } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); @@ -674,7 +677,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ segment->mem_is_fixed = mem_large; segment->mem_is_committed = mi_commit_mask_is_full(commit_mask); mi_segments_track_size((long)(segment_size), tld); - mi_segment_map_allocated_at(segment); + _mi_segment_map_allocated_at(segment); } // zero the segment info? -- not always needed as it is zero initialized from the OS @@ -1368,126 +1371,3 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment } -/* ----------------------------------------------------------- - The following functions are to reliably find the segment or - block that encompasses any pointer p (or NULL if it is not - in any of our segments). - We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) - set to 1 if it contains the segment meta data. ------------------------------------------------------------ */ - - -#if (MI_INTPTR_SIZE==8) -#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB -#else -#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb -#endif - -#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) -#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) -#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) - -static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 2KiB per TB with 64MiB segments - -static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { - mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? - uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE; - *bitidx = segindex % (8*MI_INTPTR_SIZE); - return (segindex / (8*MI_INTPTR_SIZE)); -} - -static void mi_segment_map_allocated_at(const mi_segment_t* segment) { - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); - if (index==0) return; - uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - uintptr_t newmask; - do { - newmask = (mask | ((uintptr_t)1 << bitidx)); - } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); -} - -static void mi_segment_map_freed_at(const mi_segment_t* segment) { - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); - if (index == 0) return; - uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - uintptr_t newmask; - do { - newmask = (mask & ~((uintptr_t)1 << bitidx)); - } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); -} - -// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. -static mi_segment_t* _mi_segment_of(const void* p) { - mi_segment_t* segment = _mi_ptr_segment(p); - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge - const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) { - return segment; // yes, allocated by us - } - if (index==0) return NULL; - // search downwards for the first segment in case it is an interior pointer - // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough - // valid huge objects - // note: we could maintain a lowest index to speed up the path for invalid pointers? - size_t lobitidx; - size_t loindex; - uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); - if (lobits != 0) { - loindex = index; - lobitidx = mi_bsr(lobits); // lobits != 0 - } - else { - uintptr_t lomask = mask; - loindex = index - 1; - while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--; - if (loindex==0) return NULL; - lobitidx = mi_bsr(lomask); // lomask != 0 - } - // take difference as the addresses could be larger than the MAX_ADDRESS space. - size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; - segment = (mi_segment_t*)((uint8_t*)segment - diff); - - if (segment == NULL) return NULL; - mi_assert_internal((void*)segment < p); - bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(cookie_ok); - if (mi_unlikely(!cookie_ok)) return NULL; - if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range - mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); - return segment; -} - -// Is this a valid pointer in our heap? -static bool mi_is_valid_pointer(const void* p) { - return (_mi_segment_of(p) != NULL); -} - -bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - return mi_is_valid_pointer(p); -} - -/* -// Return the full segment range belonging to a pointer -static void* mi_segment_range_of(const void* p, size_t* size) { - mi_segment_t* segment = _mi_segment_of(p); - if (segment == NULL) { - if (size != NULL) *size = 0; - return NULL; - } - else { - if (size != NULL) *size = segment->segment_size; - return segment; - } - mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); - mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); - mi_reset_delayed(tld); - mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); - return page; -} -*/ diff --git a/src/static.c b/src/static.c index e53aff1d..79c0a033 100644 --- a/src/static.c +++ b/src/static.c @@ -25,6 +25,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "os.c" #include "bitmap.c" #include "arena.c" +#include "segment-cache.c" #include "segment.c" #include "page.c" #include "heap.c"