From 829fd872f407c5e201cd844b8f26f2c87915e89b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 4 Nov 2019 11:48:41 -0800 Subject: [PATCH 01/41] initial delay slots --- include/mimalloc-internal.h | 11 ++- include/mimalloc-types.h | 26 +++++-- include/mimalloc.h | 1 + src/heap.c | 2 +- src/init.c | 4 +- src/memory.c | 143 +++++++++++++++++++++++++++++++----- src/options.c | 1 + src/segment.c | 31 ++++---- src/stats.c | 2 +- 9 files changed, 171 insertions(+), 50 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 413f76e6..25a3d93d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -61,15 +61,15 @@ int _mi_os_numa_node_count(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); +void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld); -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size); -void _mi_mem_collect(mi_stats_t* stats); +void _mi_mem_collect(mi_os_tld_t* tld); // "segment.c" mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); @@ -107,7 +107,6 @@ uintptr_t _mi_heap_random(mi_heap_t* heap); // "stats.c" void _mi_stats_done(mi_stats_t* stats); -typedef int64_t mi_msecs_t; mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 99b6b22b..8a3ffff4 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -385,6 +385,19 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) +// ------------------------------------------------------ +// Delay slots (to avoid expensive OS calls) +// ------------------------------------------------------ +typedef int64_t mi_msecs_t; + +typedef struct mi_delay_slot_s { + mi_msecs_t expire; + uint8_t* addr; + size_t size; +} mi_delay_slot_t; + +#define MI_RESET_DELAY_SLOTS (128) + // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ @@ -395,6 +408,12 @@ typedef struct mi_segment_queue_s { mi_segment_t* last; } mi_segment_queue_t; +// OS thread local data +typedef struct mi_os_tld_s { + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats + mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS]; +} mi_os_tld_t; // Segments thread local data typedef struct mi_segments_tld_s { @@ -408,14 +427,9 @@ typedef struct mi_segments_tld_s { size_t cache_size; // total size of all segments in the cache mi_segment_t* cache; // (small) cache of segments mi_stats_t* stats; // points to tld stats + mi_os_tld_t* os; // points to os stats } mi_segments_tld_t; -// OS thread local data -typedef struct mi_os_tld_s { - size_t region_idx; // start point for next allocation - mi_stats_t* stats; // points to tld stats -} mi_os_tld_t; - // Thread local data struct mi_tld_s { unsigned long long heartbeat; // monotonic heartbeat count diff --git a/include/mimalloc.h b/include/mimalloc.h index c03ddc1e..e6fa9c2b 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -275,6 +275,7 @@ typedef enum mi_option_e { mi_option_reset_decommits, mi_option_eager_commit_delay, mi_option_segment_reset, + mi_option_reset_delay, mi_option_os_tag, mi_option_max_numa_node, mi_option_max_errors, diff --git a/src/heap.c b/src/heap.c index 162cf406..d03925d5 100644 --- a/src/heap.c +++ b/src/heap.c @@ -149,7 +149,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect regions if (collect >= FORCE && _mi_is_main_thread()) { - _mi_mem_collect(&heap->tld->stats); + _mi_mem_collect(&heap->tld->os); } } diff --git a/src/init.c b/src/init.c index ef848de4..971a93c0 100644 --- a/src/init.c +++ b/src/init.c @@ -94,11 +94,12 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) +#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) static mi_tld_t tld_main = { 0, false, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments + { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments { 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -218,6 +219,7 @@ static bool _mi_heap_init(void) { memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; tld->segments.stats = &tld->stats; + tld->segments.os = &tld->os; tld->os.stats = &tld->stats; _mi_heap_default = heap; } diff --git a/src/memory.c b/src/memory.c index 75a1df92..e12405c1 100644 --- a/src/memory.c +++ b/src/memory.c @@ -53,6 +53,9 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +// local +static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size); + // Constants #if (MI_INTPTR_SIZE==8) @@ -470,16 +473,19 @@ Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { - mi_assert_internal(size > 0 && stats != NULL); +void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { + mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; + + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + size_t arena_memid = 0; size_t idx = 0; size_t bitidx = 0; if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) { // was a direct arena allocation, pass through - _mi_arena_free(p, size, arena_memid, stats); + _mi_arena_free(p, size, arena_memid, tld->stats); } else { // allocated in a region @@ -512,14 +518,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments { - _mi_os_reset(p, size, stats); + _mi_os_reset(p, size, tld->stats); // cannot use delay reset! (due to concurrent allocation in the same region) //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? } } } if (!is_eager_committed) { // adjust commit statistics as we commit again when re-using the same slot - _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); + _mi_stat_decrease(&tld->stats->committed, mi_good_commit_size(size)); } // TODO: should we free empty regions? currently only done _mi_mem_collect. @@ -539,7 +545,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- collection -----------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_stats_t* stats) { +void _mi_mem_collect(mi_os_tld_t* tld) { // free every region that has no segments in use. for (size_t i = 0; i < regions_count; i++) { mem_region_t* region = ®ions[i]; @@ -554,7 +560,8 @@ void _mi_mem_collect(mi_stats_t* stats) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats); + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE); + _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release mi_atomic_write(®ion->info,0); @@ -564,25 +571,123 @@ void _mi_mem_collect(mi_stats_t* stats) { } } +/* ---------------------------------------------------------------------------- + Delay slots +-----------------------------------------------------------------------------*/ + +typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); + +static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, + mi_msecs_t delay, uint8_t* addr, size_t size, + mi_delay_resolve_fun* resolve, void* arg) +{ + if (delay==0) { + resolve(addr, size, arg); + return; + } + + mi_msecs_t now = _mi_clock_now(); + mi_delay_slot_t* oldest = slots; + // walk through all slots, resolving expired ones. + // remember the oldest slot to insert the new entry in. + for (size_t i = 0; i < count; i++) { + mi_delay_slot_t* slot = &slots[i]; + + if (slot->expire == 0) { + // empty slot + oldest = slot; + } + // TODO: should we handle overlapping areas too? + else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { + // earlier slot encompasses new area, increase expiration + slot->expire = now + delay; + delay = 0; + } + else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { + // new one encompasses old slot, overwrite + slot->expire = now + delay; + slot->addr = addr; + slot->size = size; + delay = 0; + } + else if (slot->expire < now) { + // expired slot, resolve now + slot->expire = 0; + resolve(slot->addr, slot->size, arg); + } + else if (oldest->expire > slot->expire) { + oldest = slot; + } + } + if (delay>0) { + // not yet registered, use the oldest slot + if (oldest->expire > 0) { + resolve(oldest->addr, oldest->size, arg); // evict if not empty + } + oldest->expire = now + delay; + oldest->addr = addr; + oldest->size = size; + } +} + +static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size) +{ + uint8_t* addr = (uint8_t*)p; + bool done = false; + // walk through all slots + for (size_t i = 0; i < count; i++) { + mi_delay_slot_t* slot = &slots[i]; + if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { + // earlier slot encompasses the area; remove it + slot->expire = 0; + done = true; + } + else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { + // new one encompasses old slot, remove it + slot->expire = 0; + } + else if ((addr <= slot->addr && addr + size > slot->addr) || + (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { + // partial overlap, remove slot + mi_assert_internal(false); + slot->expire = 0; + } + } + return done; +} + +static void mi_resolve_reset(void* p, size_t size, void* vtld) { + mi_os_tld_t* tld = (mi_os_tld_t*)vtld; + _mi_os_reset(p, size, tld->stats); +} + +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { + mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay), + (uint8_t*)p, size, &mi_resolve_reset, tld); + return true; +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) { + return _mi_os_unreset(p, size, is_zero, tld->stats); + } + return true; +} + + /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_commit(p, size, is_zero, stats); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + return _mi_os_commit(p, size, is_zero, tld->stats); } -bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_decommit(p, size, stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_reset(p, size, stats); -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_unreset(p, size, is_zero, stats); +bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + return _mi_os_decommit(p, size, tld->stats); } bool _mi_mem_protect(void* p, size_t size) { diff --git a/src/options.c b/src/options.c index 63b1612a..e098af0b 100644 --- a/src/options.c +++ b/src/options.c @@ -70,6 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index 178e0eda..b9abe2b3 100644 --- a/src/segment.c +++ b/src/segment.c @@ -234,7 +234,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_assert_internal(!segment->mem_is_fixed); _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } - _mi_mem_free(segment, segment_size, segment->memid, tld->stats); + _mi_mem_free(segment, segment_size, segment->memid, tld->os); } @@ -281,7 +281,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) { - _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); + _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->os); } segment->next = tld->cache; tld->cache = segment; @@ -346,13 +346,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) { mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats); + _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->os); segment->mem_is_committed = true; } if (!segment->mem_is_fixed && (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { bool reset_zero = false; - _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats); + _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->os); if (reset_zero) is_zero = true; } } @@ -365,7 +365,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, if (!commit) { // ensure the initial info is committed bool commit_zero = false; - _mi_mem_commit(segment, info_size, &commit_zero, tld->stats); + _mi_mem_commit(segment, info_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; } segment->memid = memid; @@ -459,7 +459,7 @@ static bool mi_segment_has_free(const mi_segment_t* segment) { return (segment->used < segment->capacity); } -static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) { +static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_assert_expensive(mi_segment_is_valid(segment)); for (size_t i = 0; i < segment->capacity; i++) { @@ -472,14 +472,14 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) mi_assert_internal(!segment->mem_is_fixed); page->is_committed = true; bool is_zero = false; - _mi_mem_commit(start,psize,&is_zero,stats); + _mi_mem_commit(start,psize,&is_zero,tld->os); if (is_zero) page->is_zero_init = true; } if (page->is_reset) { mi_assert_internal(!segment->mem_is_fixed); page->is_reset = false; bool is_zero = false; - _mi_mem_unreset(start, psize, &is_zero, stats); + _mi_mem_unreset(start, psize, &is_zero, tld->os); if (is_zero) page->is_zero_init = true; } } @@ -497,21 +497,20 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) { - UNUSED(stats); +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); size_t inuse = page->capacity * page->block_size; - _mi_stat_decrease(&stats->page_committed, inuse); - _mi_stat_decrease(&stats->pages, 1); + _mi_stat_decrease(&tld->stats->page_committed, inuse); + _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; - _mi_mem_reset(start, psize, stats); + _mi_mem_reset(start, psize, tld->os); } // zero the page data, but not the segment fields @@ -529,7 +528,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_assert_expensive(mi_segment_is_valid(segment)); // mark it as free now - mi_segment_page_clear(segment, page, tld->stats); + mi_segment_page_clear(segment, page, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment @@ -634,7 +633,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen _mi_stat_decrease(&tld->stats->pages_abandoned, 1); if (mi_page_all_free(page)) { // if everything free by now, free the page - mi_segment_page_clear(segment,page,tld->stats); + mi_segment_page_clear(segment,page,tld); } else { // otherwise reclaim it @@ -666,7 +665,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen // Requires that the page has free pages static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); - mi_page_t* page = mi_segment_find_free(segment, tld->stats); + mi_page_t* page = mi_segment_find_free(segment, tld); page->segment_in_use = true; segment->used++; mi_assert_internal(segment->used <= segment->capacity); diff --git a/src/stats.c b/src/stats.c index 011fab64..cb6d8866 100644 --- a/src/stats.c +++ b/src/stats.c @@ -206,7 +206,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); const long avg_frac1 = (long)(avg_tens%10); - _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1); + _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); } From f0e02bab0344e099fe491eb24690a0b9a08cf6e0 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 8 Nov 2019 12:22:03 -0800 Subject: [PATCH 02/41] pr #168 by @zerodefect to update the install location --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 59d889b8..7b455881 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ option(MI_SECURE_FULL "Use full security mitigations, may be more expensiv option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) -set(mi_install_dir "lib/mimalloc-${mi_version}") +set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}") set(mi_sources src/stats.c From d2279b2a3faf7c2e084644449326306ef8d4f619 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 10 Nov 2019 08:13:40 -0800 Subject: [PATCH 03/41] update test-stress with better object distribution --- test/test-stress.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index be2a9c67..37572d42 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -17,8 +17,8 @@ terms of the MIT license. #include // argument defaults -static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 40; // scaling factor +static int THREADS = 8; // more repeatable if THREADS <= #processors +static int N = 200; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor @@ -63,7 +63,11 @@ static bool chance(size_t perc, random_t r) { } static void* alloc_items(size_t items, random_t r) { - if (chance(1, r)) items *= 100; // 1% huge objects; + if (chance(1, r)) { + if (chance(1, r)) items *= 1000; // 0.01% giant + else if (chance(10, r)) items *= 100; // 0.1% huge + else items *= 10; // 1% large objects; + } if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); if (p != NULL) { From 21bbb1be870c8b9bd6ca057257a4cbb0ec57e6e5 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 10 Nov 2019 12:36:55 -0800 Subject: [PATCH 04/41] fix warnings --- src/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/init.c b/src/init.c index 7e704e7a..d5ec03c2 100644 --- a/src/init.c +++ b/src/init.c @@ -100,8 +100,8 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { 0, tld_main_stats, {{0,NULL,0}} }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { From 83a066fd2d0d7484abf6372e41ac777c721c761a Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 09:46:02 -0800 Subject: [PATCH 05/41] remove reset_decommits option --- include/mimalloc.h | 3 +-- src/memory.c | 28 ++++++++++++---------------- src/options.c | 7 +++---- src/os.c | 20 +++----------------- 4 files changed, 19 insertions(+), 39 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 70b6e412..4c542ee0 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -272,9 +272,8 @@ typedef enum mi_option_e { mi_option_segment_cache, mi_option_page_reset, mi_option_cache_reset, - mi_option_reset_decommits, - mi_option_eager_commit_delay, mi_option_segment_reset, + mi_option_eager_commit_delay, mi_option_os_tag, mi_option_max_numa_node, mi_option_max_errors, diff --git a/src/memory.c b/src/memory.c index a1f94e18..ceb9a702 100644 --- a/src/memory.c +++ b/src/memory.c @@ -350,12 +350,12 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; // we can align the size up to page size (as we allocate that way too) // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); - const size_t blocks = mi_region_block_count(size); + size = _mi_align_up(size, _mi_os_page_size()); + const size_t blocks = mi_region_block_count(size); mi_region_info_t info = mi_atomic_read(®ion->info); bool is_large; - bool is_eager_committed; - void* start = mi_region_info_read(info,&is_large,&is_eager_committed); + bool is_committed; + void* start = mi_region_info_read(info, &is_large, &is_committed); mi_assert_internal(start != NULL); void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? @@ -366,18 +366,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // TODO: implement delayed decommit/reset as these calls are too expensive // if the memory is reused soon. // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (!is_large) { - if (mi_option_is_enabled(mi_option_segment_reset)) { - if (!is_eager_committed && // cannot reset large pages - (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead - mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments - { - _mi_os_reset(p, size, stats); - //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? - } - } - } - if (!is_eager_committed) { + if (!is_large && + mi_option_is_enabled(mi_option_segment_reset) && + mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use `option_page_reset` instead + { + _mi_os_reset(p, size, stats); + //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? + } + if (!is_committed) { // adjust commit statistics as we commit again when re-using the same slot _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); } diff --git a/src/options.c b/src/options.c index 63b1612a..75a2736a 100644 --- a/src/options.c +++ b/src/options.c @@ -65,11 +65,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, + { 1, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(cache_reset) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output @@ -88,7 +87,7 @@ void _mi_options_init(void) { mi_option_desc_t* desc = &options[option]; _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } - } + } mi_max_error_count = mi_option_get(mi_option_max_errors); } diff --git a/src/os.c b/src/os.c index 027df6ab..5229381b 100644 --- a/src/os.c +++ b/src/os.c @@ -646,10 +646,6 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats); } -bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, true /* conservative? */, is_zero, stats); -} - // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory @@ -708,22 +704,12 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_decommit(addr,size,stats); - } - else { - return mi_os_resetx(addr, size, true, stats); - } + return mi_os_resetx(addr, size, true, stats); } bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) - } - else { - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); - } + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); } From 93a646338343984b86b00b1c7852322eafa7190e Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 14:16:45 -0800 Subject: [PATCH 06/41] only allow commit delay for small and medium objects --- src/options.c | 2 +- src/segment.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/options.c b/src/options.c index 75a2736a..dbb7df79 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, + { 0, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(cache_reset) }, { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed diff --git a/src/segment.c b/src/segment.c index b2b37fac..d089078c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -328,9 +328,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); - bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager || (page_kind > MI_PAGE_MEDIUM); + bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); + bool commit = eager || (page_kind >= MI_PAGE_LARGE); bool protection_still_good = false; bool is_zero = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); @@ -359,7 +359,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, else { // Allocate the segment from the OS size_t memid; - bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy + bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { From 534e1e39ef29946e502fd0f668d2dc80ffd141da Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 14:42:29 -0800 Subject: [PATCH 07/41] allow allocation in committed regions even if not requested --- src/memory.c | 6 ++---- src/options.c | 4 ++-- src/segment.c | 4 +++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/memory.c b/src/memory.c index ceb9a702..24239e05 100644 --- a/src/memory.c +++ b/src/memory.c @@ -210,14 +210,12 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo if (rnode >= 0 && rnode != numa_node) return false; } - // note: we also skip if commit is false and the region is committed, - // that is a bit strong but prevents allocation of eager-delayed segments in an eagerly committed region + // check allow-large bool is_large; bool is_committed; mi_region_info_read(info, &is_large, &is_committed); - - if (!commit && is_committed) return false; if (!allow_large && is_large) return false; + return true; } diff --git a/src/options.c b/src/options.c index dbb7df79..694b916b 100644 --- a/src/options.c +++ b/src/options.c @@ -65,8 +65,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, - { 0, UNINIT, MI_OPTION(cache_reset) }, + { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 0, UNINIT, MI_OPTION(cache_reset) }, // reset segment cache on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment.c b/src/segment.c index d089078c..eb5a0390 100644 --- a/src/segment.c +++ b/src/segment.c @@ -327,12 +327,14 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, mi_assert_internal(segment_size >= required); size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); - // Try to get it from our thread local cache first + // Initialize parameters bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (page_kind >= MI_PAGE_LARGE); bool protection_still_good = false; bool is_zero = false; + + // Try to get it from our thread local cache first mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { if (MI_SECURE!=0) { From 2bb058bd25258c2e7a9fb2c1a64400ec780c2912 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 14:44:32 -0800 Subject: [PATCH 08/41] remove cache_reset parameter --- include/mimalloc.h | 1 - src/options.c | 1 - src/segment.c | 6 +----- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 4c542ee0..6df889a4 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -271,7 +271,6 @@ typedef enum mi_option_e { mi_option_reserve_huge_os_pages, mi_option_segment_cache, mi_option_page_reset, - mi_option_cache_reset, mi_option_segment_reset, mi_option_eager_commit_delay, mi_option_os_tag, diff --git a/src/options.c b/src/options.c index 694b916b..1231e1c9 100644 --- a/src/options.c +++ b/src/options.c @@ -66,7 +66,6 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free - { 0, UNINIT, MI_OPTION(cache_reset) }, // reset segment cache on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment.c b/src/segment.c index eb5a0390..ef24c660 100644 --- a/src/segment.c +++ b/src/segment.c @@ -280,9 +280,6 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) return false; } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); - if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) { - _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); - } segment->next = tld->cache; tld->cache = segment; tld->cache_count++; @@ -351,8 +348,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats); segment->mem_is_committed = true; } - if (!segment->mem_is_fixed && - (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { + if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_page_reset)) { bool reset_zero = false; _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats); if (reset_zero) is_zero = true; From db3f1c4bfadcb7007357fd61d7dc24369ae8fe31 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 15:26:11 -0800 Subject: [PATCH 09/41] add commit info to arenas --- src/arena.c | 66 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/src/arena.c b/src/arena.c index 1b6cf4a4..02890bd6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -33,6 +33,7 @@ of 256MiB in practice. #include "bitmap.inc.c" // atomic bitmap + // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); void _mi_os_free(void* p, size_t size, mi_stats_t* stats); @@ -40,6 +41,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- @@ -56,13 +58,15 @@ int _mi_os_numa_node_count(void); typedef struct mi_arena_s { uint8_t* start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) - size_t field_count; // number of bitmap fields + size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? + bool is_committed; // is the memory committed bool is_large; // large OS page allocated volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? - mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks + mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed? + mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; @@ -104,7 +108,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* size_t idx = mi_atomic_read(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around - if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) { + if (mi_bitmap_try_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { mi_atomic_write(&arena->search_idx, idx); // start search from here next time return true; } @@ -118,31 +122,46 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* ----------------------------------------------------------- */ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_zero, size_t* memid) + bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_bitmap_index_t bitmap_index; - if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) { - // claimed it! set the dirty bits (todo: no need for an atomic op here?) - *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); - *memid = mi_memid_create(arena_index, bitmap_index); - *commit = true; // TODO: support commit on demand? - *large = arena->is_large; - return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE)); + if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; + + // claimed it! set the dirty bits (todo: no need for an atomic op here?) + void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); + *memid = mi_memid_create(arena_index, bitmap_index); + *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + *large = arena->is_large; + if (arena->is_committed) { + // always committed + *commit = true; } - return NULL; + else if (commit) { + // ensure commit now + bool any_zero; + mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_zero); + if (any_zero) { + bool commit_zero; + _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); + if (commit_zero) *is_zero = true; + } + } + else { + // no need to commit, but check if already fully committed + *commit = mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + } + return p; } void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; *is_zero = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - + // try to allocate in an arena if the alignment is small enough // and the object is not too large or too small. if (alignment <= MI_SEGMENT_ALIGN && @@ -160,7 +179,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -172,7 +191,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -182,9 +201,6 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; - if (*large) { - *large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed - } return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); } @@ -223,7 +239,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { return; } const size_t blocks = mi_block_count_of_size(size); - bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx); + bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!ones) { _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); return; @@ -283,15 +299,17 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; + arena->is_committed = true; arena->search_idx = 0; - arena->blocks_dirty = &arena->blocks_map[bcount]; + arena->blocks_dirty = &arena->blocks_inuse[bcount]; + arena->blocks_committed = NULL; // the bitmaps are already zero initialized due to os_alloc // just claim leftover blocks if needed size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; if (post > 0) { // don't use leftover bits at the end mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL); + mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } mi_arena_add(arena); From 5e6754f3f7905485ca74546ab082f4c3bc5404fd Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 15:45:31 -0800 Subject: [PATCH 10/41] track commit status per block in a region --- src/memory.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/src/memory.c b/src/memory.c index 208b9b7e..8299bbc2 100644 --- a/src/memory.c +++ b/src/memory.c @@ -59,7 +59,7 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_ // Constants #if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 40KiB for the region map +#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map #elif (MI_INTPTR_SIZE==4) #define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map #else @@ -94,8 +94,9 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b typedef struct mem_region_s { volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) - mi_bitmap_field_t in_use; - mi_bitmap_field_t dirty; + mi_bitmap_field_t in_use; // bit per in-use block + mi_bitmap_field_t dirty; // track if non-zero per block + mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed)) size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; @@ -165,20 +166,20 @@ static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ -static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // not out of regions yet? if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; // try to allocate a fresh region from the OS bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); - bool region_large = (commit && allow_large); - bool is_zero = false; + bool region_large = (commit && allow_large); + bool is_zero = false; size_t arena_memid = 0; void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); - + // claim a fresh slot const uintptr_t idx = mi_atomic_increment(®ions_count); if (idx >= MI_REGION_MAX) { @@ -191,8 +192,13 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t* r = ®ions[idx]; r->numa_node = _mi_os_numa_node(tld) + 1; r->arena_memid = arena_memid; + mi_atomic_write(&r->in_use, 0); + mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL)); + mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0)); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + + // and share it mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others *region = r; return true; @@ -269,20 +275,28 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(!(region_is_large && !*is_large)); mi_assert_internal(start != NULL); - bool any_zero = false; - *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, &any_zero); - if (!mi_option_is_enabled(mi_option_eager_commit)) any_zero = true; // if no eager commit, even dirty segments may be partially committed + *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); *is_large = region_is_large; *memid = mi_memid_create(region, bit_idx); void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - if (*commit && !region_is_committed && any_zero) { // want to commit, but not yet fully committed? - // ensure commit - _mi_os_commit(p, blocks * MI_SEGMENT_SIZE, is_zero, tld->stats); + if (region_is_committed) { + // always committed + *commit = true; + } + else if (*commit) { + // ensure commit + bool any_zero; + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_zero); + if (any_zero) { + bool commit_zero; + _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); + if (commit_zero) *is_zero = true; + } } else { - *commit = region_is_committed || !any_zero; - } - + // no need to commit, but check if already fully committed + *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); + } // and return the allocation mi_assert_internal(p != NULL); @@ -374,7 +388,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { mi_option_is_enabled(mi_option_segment_reset) && mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use `option_page_reset` instead { - _mi_os_reset(p, size, tld->stats); + // note: don't use `_mi_mem_reset` as it is shared with other threads! + _mi_os_reset(p, size, tld->stats); // TODO: maintain reset bits to unreset } if (!is_committed) { // adjust commit statistics as we commit again when re-using the same slot From a0958b2da696a308f8c200f45f08bf1ab3e5f14b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 17:06:16 -0800 Subject: [PATCH 11/41] enable more reset delay slots --- include/mimalloc-types.h | 13 ++++++-- src/init.c | 9 ++++-- src/memory.c | 70 ++++++++++++++++++++++++++-------------- src/options.c | 2 +- src/segment.c | 4 ++- 5 files changed, 66 insertions(+), 32 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 2651fc85..0ce91339 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -390,13 +390,20 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); // ------------------------------------------------------ typedef int64_t mi_msecs_t; +#define MI_RESET_DELAY_SLOTS (256) + typedef struct mi_delay_slot_s { mi_msecs_t expire; uint8_t* addr; size_t size; } mi_delay_slot_t; -#define MI_RESET_DELAY_SLOTS (128) +typedef struct mi_delay_slots_s { + size_t capacity; // always `MI_RESET_DELAY_SLOTS` + size_t count; // current slots used (`<= capacity`) + mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS]; +} mi_delay_slots_t; + // ------------------------------------------------------ // Thread Local data @@ -411,8 +418,8 @@ typedef struct mi_segment_queue_s { // OS thread local data typedef struct mi_os_tld_s { size_t region_idx; // start point for next allocation - mi_stats_t* stats; // points to tld stats - mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS]; + mi_delay_slots_t* reset_delay; // delay slots for OS reset operations + mi_stats_t* stats; // points to tld stats } mi_os_tld_t; // Segments thread local data diff --git a/src/init.c b/src/init.c index d5ec03c2..c9700cd5 100644 --- a/src/init.c +++ b/src/init.c @@ -100,8 +100,8 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, tld_main_stats, {{0,NULL,0}} }, // os - { MI_STATS_NULL } // stats + { 0, NULL, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { @@ -192,6 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; + mi_delay_slots_t reset_delay; } mi_thread_data_t; // Initialize the thread local default heap, called from `mi_thread_init` @@ -211,6 +212,7 @@ static bool _mi_heap_init(void) { } mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; + mi_delay_slots_t* reset_delay = &td->reset_delay; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); heap->random = _mi_random_init(heap->thread_id); @@ -221,6 +223,9 @@ static bool _mi_heap_init(void) { tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; tld->os.stats = &tld->stats; + tld->os.reset_delay = reset_delay; + memset(reset_delay, 0, sizeof(*reset_delay)); + reset_delay->capacity = MI_RESET_DELAY_SLOTS; _mi_heap_default = heap; } return false; diff --git a/src/memory.c b/src/memory.c index 8299bbc2..f3052d6b 100644 --- a/src/memory.c +++ b/src/memory.c @@ -54,7 +54,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); // local -static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size); +static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size); // Constants @@ -208,7 +208,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, Try to claim blocks in suitable regions -----------------------------------------------------------------------------*/ -static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) { +static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); if (info==0) return false; @@ -229,7 +229,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo } -static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // try all regions for a free slot const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); @@ -238,7 +238,7 @@ static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, me for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; - if (mi_region_is_suitable(r, numa_node, commit, allow_large)) { + if (mi_region_is_suitable(r, numa_node, allow_large)) { if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; @@ -256,7 +256,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mem_region_t* region; mi_bitmap_index_t bit_idx; // first try to claim in existing regions - if (!mi_region_try_claim(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { + if (!mi_region_try_claim(blocks, *is_large, ®ion, &bit_idx, tld)) { // otherwise try to allocate a fresh region if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { // out of regions or memory @@ -354,7 +354,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { if (p==NULL) return; if (size==0) return; - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + mi_delay_remove(tld->reset_delay, p, size); size_t arena_memid = 0; mi_bitmap_index_t bit_idx; @@ -424,7 +424,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE); + mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE); _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release @@ -440,21 +440,22 @@ void _mi_mem_collect(mi_os_tld_t* tld) { typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); -static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, +static void mi_delay_insert(mi_delay_slots_t* ds, mi_msecs_t delay, uint8_t* addr, size_t size, mi_delay_resolve_fun* resolve, void* arg) { - if (delay==0) { + if (ds == NULL || delay==0 || addr==NULL || size==0) { resolve(addr, size, arg); return; } mi_msecs_t now = _mi_clock_now(); - mi_delay_slot_t* oldest = slots; + mi_delay_slot_t* oldest = &ds->slots[0]; // walk through all slots, resolving expired ones. // remember the oldest slot to insert the new entry in. - for (size_t i = 0; i < count; i++) { - mi_delay_slot_t* slot = &slots[i]; + size_t newcount = 0; + for (size_t i = 0; i < ds->count; i++) { + mi_delay_slot_t* slot = &ds->slots[i]; if (slot->expire == 0) { // empty slot @@ -480,26 +481,40 @@ static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, } else if (oldest->expire > slot->expire) { oldest = slot; + newcount = i+1; + } + else { + newcount = i+1; } } + ds->count = newcount; if (delay>0) { - // not yet registered, use the oldest slot - if (oldest->expire > 0) { + // not yet registered, use the oldest slot (or a new one if there is space) + if (ds->count < ds->capacity) { + oldest = &ds->slots[ds->count]; + ds->count++; + } + else if (oldest->expire > 0) { resolve(oldest->addr, oldest->size, arg); // evict if not empty } + mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count); oldest->expire = now + delay; oldest->addr = addr; oldest->size = size; } } -static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size) +static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size) { + if (ds == NULL || p==NULL || size==0) return false; + uint8_t* addr = (uint8_t*)p; bool done = false; - // walk through all slots - for (size_t i = 0; i < count; i++) { - mi_delay_slot_t* slot = &slots[i]; + size_t newcount = 0; + + // walk through all valid slots + for (size_t i = 0; i < ds->count; i++) { + mi_delay_slot_t* slot = &ds->slots[i]; if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { // earlier slot encompasses the area; remove it slot->expire = 0; @@ -510,12 +525,17 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_ slot->expire = 0; } else if ((addr <= slot->addr && addr + size > slot->addr) || - (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { - // partial overlap, remove slot - mi_assert_internal(false); + (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { + // partial overlap + // can happen with a large object spanning onto some partial end block + // mi_assert_internal(false); slot->expire = 0; } + else { + newcount = i + 1; + } } + ds->count = newcount; return done; } @@ -525,13 +545,13 @@ static void mi_resolve_reset(void* p, size_t size, void* vtld) { } bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay), + mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay), (uint8_t*)p, size, &mi_resolve_reset, tld); return true; } bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) { + if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) { return _mi_os_unreset(p, size, is_zero, tld->stats); } return true; @@ -544,12 +564,12 @@ bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { -----------------------------------------------------------------------------*/ bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + mi_delay_remove(tld->reset_delay,p, size); return _mi_os_commit(p, size, is_zero, tld->stats); } bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + mi_delay_remove(tld->reset_delay, p, size); return _mi_os_decommit(p, size, tld->stats); } diff --git a/src/options.c b/src/options.c index 81ffe88b..ff96c95b 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds diff --git a/src/segment.c b/src/segment.c index 49dab6ba..549dd339 100644 --- a/src/segment.c +++ b/src/segment.c @@ -504,7 +504,9 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? - if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { + if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) + // && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets + { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; From 165ee4584597aebdb1a45fcd4e8b3904b6f7d396 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 17:31:48 -0800 Subject: [PATCH 12/41] initialize delay slots for the main thread --- src/init.c | 4 +++- src/options.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/init.c b/src/init.c index c9700cd5..5967b4b9 100644 --- a/src/init.c +++ b/src/init.c @@ -96,11 +96,13 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) #define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) +static mi_delay_slots_t tld_reset_delay_main = { MI_RESET_DELAY_SLOTS, 0, { {0,NULL,0} } }; + static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, NULL, tld_main_stats }, // os + { 0, &tld_reset_delay_main, tld_main_stats }, // os { MI_STATS_NULL } // stats }; diff --git a/src/options.c b/src/options.c index ff96c95b..81ffe88b 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds From ef179a63770d8e17f105303a08ddfdd57085b936 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:16:59 -0800 Subject: [PATCH 13/41] avoid allocation at numa node detection on linux --- include/mimalloc-internal.h | 37 +++++++++++++++------ src/os.c | 65 +++++++++++++++++-------------------- 2 files changed, 56 insertions(+), 46 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6bfabe27..668a7bd3 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -17,18 +17,18 @@ terms of the MIT license. A copy of the license can be found in the file #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else -#define mi_trace_message(...) +#define mi_trace_message(...) #endif #if defined(_MSC_VER) #define mi_decl_noinline __declspec(noinline) -#define mi_attr_noreturn +#define mi_attr_noreturn #elif defined(__GNUC__) || defined(__clang__) #define mi_decl_noinline __attribute__((noinline)) #define mi_attr_noreturn __attribute__((noreturn)) #else #define mi_decl_noinline -#define mi_attr_noreturn +#define mi_attr_noreturn #endif @@ -56,8 +56,6 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); -int _mi_os_numa_node(mi_os_tld_t* tld); -int _mi_os_numa_node_count(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); @@ -146,8 +144,8 @@ bool _mi_page_is_valid(mi_page_t* page); Inlined definitions ----------------------------------------------------------- */ #define UNUSED(x) (void)(x) -#if (MI_DEBUG>0) -#define UNUSED_RELEASE(x) +#if (MI_DEBUG>0) +#define UNUSED_RELEASE(x) #else #define UNUSED_RELEASE(x) UNUSED(x) #endif @@ -398,7 +396,7 @@ static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* bl #endif } -static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { +static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST block->next = (mi_encoded_t)next ^ cookie; #else @@ -411,12 +409,12 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* #ifdef MI_ENCODE_FREELIST mi_block_t* next = mi_block_nextx(page->cookie,block); // check for free list corruption: is `next` at least in our segment range? - // TODO: it is better to check if it is actually inside our page but that is more expensive + // TODO: it is better to check if it is actually inside our page but that is more expensive // to calculate. Perhaps with a relative free list this becomes feasible? if (next!=NULL && !mi_is_in_same_segment(block, next)) { _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); next = NULL; - } + } return next; #else UNUSED(page); @@ -433,6 +431,25 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #endif } + +// ------------------------------------------------------------------- +// Optimize numa node access for the common case (= one node) +// ------------------------------------------------------------------- + +int _mi_os_numa_node_get(mi_os_tld_t* tld); +int _mi_os_numa_node_count_get(void); + +extern int _mi_numa_node_count; +static inline int _mi_os_numa_node(mi_os_tld_t* tld) { + if (mi_likely(_mi_numa_node_count == 1)) return 0; + else return _mi_os_numa_node_get(tld); +} +static inline int _mi_os_numa_node_count(void) { + if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count; + else return _mi_os_numa_node_count_get(); +} + + // ------------------------------------------------------------------- // Getting the thread id should be performant // as it is called in the fast path of `_mi_free`, diff --git a/src/os.c b/src/os.c index 5229381b..d6878927 100644 --- a/src/os.c +++ b/src/os.c @@ -786,9 +786,9 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); - + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; + MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { @@ -818,7 +818,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation if (pVirtualAlloc2 != NULL && numa_node >= 0) { params[0].Type = MemExtendedParameterNumaNode; - params[0].ULong = (unsigned)numa_node; + params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } #endif @@ -838,7 +838,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); - // TODO: does `mbind` work correctly for huge OS pages? should we + // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); @@ -857,7 +857,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #endif -#if (MI_INTPTR_SIZE >= 8) +#if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages static _Atomic(uintptr_t) mi_huge_start; // = 0 @@ -900,7 +900,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse size_t size = 0; uint8_t* start = mi_os_claim_huge_pages(pages, &size); if (start == NULL) return NULL; // or 32-bit systems - + // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. @@ -920,11 +920,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } break; } - + // success, record it _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - + // check for timeout if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); @@ -958,7 +958,7 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { } /* ---------------------------------------------------------------------------- -Support NUMA aware allocation +Support NUMA aware allocation -----------------------------------------------------------------------------*/ #ifdef WIN32 static int mi_os_numa_nodex() { @@ -975,9 +975,8 @@ static int mi_os_numa_node_countx(void) { return (int)(numa_max + 1); } #elif defined(__linux__) -#include -#include -#include +#include // getcpu +#include // access static int mi_os_numa_nodex(void) { #ifdef SYS_getcpu @@ -990,22 +989,15 @@ static int mi_os_numa_nodex(void) { return 0; #endif } - static int mi_os_numa_node_countx(void) { - DIR* d = opendir("/sys/devices/system/node"); - if (d==NULL) return 1; - - struct dirent* de; - int max_node_num = 0; - while ((de = readdir(d)) != NULL) { - int node_num; - if (strncmp(de->d_name, "node", 4) == 0) { - node_num = (int)strtol(de->d_name+4, NULL, 0); - if (max_node_num < node_num) max_node_num = node_num; - } + char buf[128]; + int max_node = mi_option_get(mi_option_max_numa_node); + int node = 0; + for(node = 0; node < max_node; node++) { + snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); + if (access(buf,R_OK) != 0) break; } - closedir(d); - return (max_node_num + 1); + return (node+1); } #else static int mi_os_numa_nodex(void) { @@ -1016,29 +1008,30 @@ static int mi_os_numa_node_countx(void) { } #endif -int _mi_os_numa_node_count(void) { - static int numa_node_count = 0; // cache the node count - if (mi_unlikely(numa_node_count <= 0)) { - int ncount = mi_os_numa_node_countx(); +int _mi_numa_node_count = 0; // cache the node count + +int _mi_os_numa_node_count_get(void) { + if (mi_unlikely(_mi_numa_node_count <= 0)) { + int ncount = mi_os_numa_node_countx(); int ncount0 = ncount; // never more than max numa node and at least 1 int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); if (ncount > nmax) ncount = nmax; if (ncount <= 0) ncount = 1; - numa_node_count = ncount; - _mi_verbose_message("using %i numa regions (%i nodes detected)\n", numa_node_count, ncount0); + _mi_numa_node_count = ncount; + _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0); } - mi_assert_internal(numa_node_count >= 1); - return numa_node_count; + mi_assert_internal(_mi_numa_node_count >= 1); + return _mi_numa_node_count; } -int _mi_os_numa_node(mi_os_tld_t* tld) { +int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); int numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 int numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - if (numa_node < 0) numa_node = 0; + if (numa_node < 0) numa_node = 0; return numa_node; } From af746ca4c1682e29dd42e8c0e6fa6db6aa04b200 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:17:39 -0800 Subject: [PATCH 14/41] inline bitmap_mask --- src/bitmap.inc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 3847e712..81f87a79 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -8,11 +8,11 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- This file is meant to be included in other files for efficiency. It implements a bitmap that can set/reset sequences of bits atomically -and is used to concurrently claim memory ranges. +and is used to concurrently claim memory ranges. A bitmap is an array of fields where each field is a machine word (`uintptr_t`) -A current limitation is that the bit sequences cannot cross fields +A current limitation is that the bit sequences cannot cross fields and that the sequence must be smaller or equal to the bits in a field. ---------------------------------------------------------------------------- */ #pragma once @@ -59,7 +59,7 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { // The bit mask for a given number of blocks at a specified bit index. -static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { +static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; return ((((uintptr_t)1 << count) - 1) << bitidx); @@ -104,10 +104,10 @@ static inline size_t mi_bsr(uintptr_t x) { Claim a bit sequence atomically ----------------------------------------------------------- */ -// Try to atomically claim a sequence of `count` bits in a single +// Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. -static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) -{ +static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +{ mi_assert_internal(bitmap_idx != NULL); volatile _Atomic(uintptr_t)* field = &bitmap[idx]; uintptr_t map = mi_atomic_read(field); @@ -136,7 +136,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con continue; } else { - // success, we claimed the bits! + // success, we claimed the bits! *bitmap_idx = mi_bitmap_index_create(idx, bitidx); return true; } @@ -205,4 +205,4 @@ static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields return ((mi_atomic_read(&bitmap[idx]) & mask) == mask); } -#endif \ No newline at end of file +#endif From 867d78f877474c7f36fd19bc2ea62918f117f068 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:19:52 -0800 Subject: [PATCH 15/41] reserve huge OS pages earlier on at process_init --- src/init.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/init.c b/src/init.c index 5967b4b9..473e9a32 100644 --- a/src/init.c +++ b/src/init.c @@ -19,7 +19,7 @@ const mi_page_t _mi_page_empty = { 0, #endif 0, // used - NULL, + NULL, ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), 0, NULL, NULL, NULL #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) @@ -246,7 +246,7 @@ static bool _mi_heap_done(void) { // switch to backing heap and free it heap = heap->tld->heap_backing; if (!mi_heap_is_initialized(heap)) return false; - + // collect if not the main thread if (heap != &_mi_heap_main) { _mi_heap_collect_abandon(heap); @@ -394,7 +394,7 @@ bool mi_is_redirected() mi_attr_noexcept { } // Communicate with the redirection module on Windows -#if defined(_WIN32) && defined(MI_SHARED_LIB) +#if defined(_WIN32) && defined(MI_SHARED_LIB) #ifdef __cplusplus extern "C" { #endif @@ -440,11 +440,6 @@ static void mi_process_load(void) { if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { _mi_fputs(NULL,NULL,msg); } - - if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages, pages*500); - } } // Initialize the process; called by thread_init or the process loader @@ -471,6 +466,11 @@ void mi_process_init(void) mi_attr_noexcept { #endif mi_thread_init(); mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + + if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); + mi_reserve_huge_os_pages_interleave(pages, pages*500); + } } // Called when the process is done (through `at_exit`) @@ -497,7 +497,7 @@ static void mi_process_done(void) { #if defined(_WIN32) && defined(MI_SHARED_LIB) - // Windows DLL: easy to hook into process_init and thread_done + // Windows DLL: easy to hook into process_init and thread_done __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { UNUSED(reserved); UNUSED(inst); From d4f54dcf3049bd958ee262cbd9b3b0c7134d59ed Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:37:15 -0800 Subject: [PATCH 16/41] remove numaif dependency on linux --- CMakeLists.txt | 11 ----------- src/os.c | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 18bdea5a..a2258128 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,6 @@ option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanis option(MI_BUILD_TESTS "Build test executables" ON) include("cmake/mimalloc-config-version.cmake") -include("CheckIncludeFile") set(mi_install_dir "lib/mimalloc-${mi_version}") @@ -98,16 +97,6 @@ if(MI_USE_CXX MATCHES "ON") set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) endif() -CHECK_INCLUDE_FILE("numaif.h" MI_HAVE_NUMA_H) -if(MI_HAVE_NUMA_H) - list(APPEND mi_defines MI_HAS_NUMA) - list(APPEND mi_libraries numa) -else() - if (NOT(WIN32)) - message(WARNING "Compiling without using NUMA optimized allocation (on Linux, install libnuma-dev?)") - endif() -endif() - # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) diff --git a/src/os.c b/src/os.c index d6878927..7af7363b 100644 --- a/src/os.c +++ b/src/os.c @@ -827,28 +827,35 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) -#ifdef MI_HAS_NUMA -#include // mbind, and use -lnuma +#include +#ifndef MPOL_PREFERRED +#define MPOL_PREFERRED 1 +#endif +#if defined(SYS_mbind) +static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); +} +#else +static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags); + return 0; +} #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } } - #else - UNUSED(numa_node); - #endif return p; } #else From bdb82748191ac5dbc436f0f62dcbebfd3df95157 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 12 Nov 2019 12:04:43 -0800 Subject: [PATCH 17/41] change max_numa_node to max_numa_nodes option --- include/mimalloc.h | 2 +- src/options.c | 2 +- src/os.c | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 67b17c73..8d029135 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -275,7 +275,7 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_reset_delay, mi_option_os_tag, - mi_option_max_numa_node, + mi_option_max_numa_nodes, mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 81ffe88b..bbea4e67 100644 --- a/src/options.c +++ b/src/options.c @@ -70,7 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node + { 256, UNINIT, MI_OPTION(max_numa_nodes) }, // use at most N numa nodes { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; diff --git a/src/os.c b/src/os.c index 7af7363b..93fb8b31 100644 --- a/src/os.c +++ b/src/os.c @@ -998,9 +998,10 @@ static int mi_os_numa_nodex(void) { } static int mi_os_numa_node_countx(void) { char buf[128]; - int max_node = mi_option_get(mi_option_max_numa_node); + int max_nodes = mi_option_get(mi_option_max_numa_nodes); // set to 0 to disable detection (and NUMA awareness) int node = 0; - for(node = 0; node < max_node; node++) { + for(node = 0; node < max_nodes; node++) { + // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); if (access(buf,R_OK) != 0) break; } @@ -1022,7 +1023,7 @@ int _mi_os_numa_node_count_get(void) { int ncount = mi_os_numa_node_countx(); int ncount0 = ncount; // never more than max numa node and at least 1 - int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); + int nmax = (int)mi_option_get(mi_option_max_numa_nodes); if (ncount > nmax) ncount = nmax; if (ncount <= 0) ncount = 1; _mi_numa_node_count = ncount; From 29919a938dbd6f070ed84b146ad4d712946240ee Mon Sep 17 00:00:00 2001 From: Marco Wang Date: Wed, 13 Nov 2019 13:19:21 +0800 Subject: [PATCH 18/41] Avoid the use of variable argument list function --- include/mimalloc-atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index dff0f011..10368df3 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -220,7 +220,7 @@ static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) #endif #elif defined(__wasi__) #include - static inline void mi_atomic_yield() { + static inline void mi_atomic_yield(void) { sched_yield(); } #else From d01ed42bcb755ed6c1b52bfd8a306821da098dd5 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 13 Nov 2019 13:35:50 -0800 Subject: [PATCH 19/41] replace max_numa_nodes by use_numa_nodes (to help with wrong detection of numa nodes on WSL for example) --- include/mimalloc-internal.h | 8 +++--- include/mimalloc.h | 4 +-- src/arena.c | 15 +++++------ src/init.c | 2 +- src/options.c | 4 +-- src/os.c | 54 +++++++++++++++++-------------------- 6 files changed, 40 insertions(+), 47 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 668a7bd3..77045a99 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -436,15 +436,15 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // Optimize numa node access for the common case (= one node) // ------------------------------------------------------------------- -int _mi_os_numa_node_get(mi_os_tld_t* tld); -int _mi_os_numa_node_count_get(void); +int _mi_os_numa_node_get(mi_os_tld_t* tld); +size_t _mi_os_numa_node_count_get(void); -extern int _mi_numa_node_count; +extern size_t _mi_numa_node_count; static inline int _mi_os_numa_node(mi_os_tld_t* tld) { if (mi_likely(_mi_numa_node_count == 1)) return 0; else return _mi_os_numa_node_get(tld); } -static inline int _mi_os_numa_node_count(void) { +static inline size_t _mi_os_numa_node_count(void) { if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count; else return _mi_os_numa_node_count_get(); } diff --git a/include/mimalloc.h b/include/mimalloc.h index 8d029135..3c942849 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -230,7 +230,7 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; // deprecated @@ -274,8 +274,8 @@ typedef enum mi_option_e { mi_option_segment_reset, mi_option_eager_commit_delay, mi_option_reset_delay, + mi_option_use_numa_nodes, mi_option_os_tag, - mi_option_max_numa_nodes, mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/arena.c b/src/arena.c index 02890bd6..46741208 100644 --- a/src/arena.c +++ b/src/arena.c @@ -42,7 +42,6 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_sec void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- Arena allocation @@ -317,22 +316,22 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } -// reserve huge pages evenly among all numa nodes. -int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept { +// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { if (pages == 0) return 0; // pages per numa node - int numa_count = _mi_os_numa_node_count(); + size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs / numa_count) + 50; // reserve evenly among numa nodes - for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if ((size_t)numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; @@ -349,7 +348,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; - int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0)); + int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } diff --git a/src/init.c b/src/init.c index 473e9a32..72543b95 100644 --- a/src/init.c +++ b/src/init.c @@ -469,7 +469,7 @@ void mi_process_init(void) mi_attr_noexcept { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages, pages*500); + mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); } } diff --git a/src/options.c b/src/options.c index bbea4e67..180f6a75 100644 --- a/src/options.c +++ b/src/options.c @@ -69,9 +69,9 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 256, UNINIT, MI_OPTION(max_numa_nodes) }, // use at most N numa nodes - { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index 93fb8b31..2415a40d 100644 --- a/src/os.c +++ b/src/os.c @@ -968,66 +968,61 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { Support NUMA aware allocation -----------------------------------------------------------------------------*/ #ifdef WIN32 -static int mi_os_numa_nodex() { +static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); GetNumaProcessorNodeEx(&pnum,&numa_node); - return (int)numa_node; + return numa_node; } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { ULONG numa_max = 0; GetNumaHighestNodeNumber(&numa_max); - return (int)(numa_max + 1); + return (numa_max + 1); } #elif defined(__linux__) #include // getcpu #include // access -static int mi_os_numa_nodex(void) { +static size_t mi_os_numa_nodex(void) { #ifdef SYS_getcpu - unsigned node = 0; - unsigned ncpu = 0; - int err = syscall(SYS_getcpu, &ncpu, &node, NULL); + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); if (err != 0) return 0; - return (int)node; + return node; #else return 0; #endif } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { char buf[128]; - int max_nodes = mi_option_get(mi_option_max_numa_nodes); // set to 0 to disable detection (and NUMA awareness) - int node = 0; - for(node = 0; node < max_nodes; node++) { + unsigned node = 0; + for(node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) - snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); + snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); if (access(buf,R_OK) != 0) break; } return (node+1); } #else -static int mi_os_numa_nodex(void) { +static size_t mi_os_numa_nodex(void) { return 0; } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { return 1; } #endif -int _mi_numa_node_count = 0; // cache the node count +size_t _mi_numa_node_count = 0; // cache the node count -int _mi_os_numa_node_count_get(void) { +size_t _mi_os_numa_node_count_get(void) { if (mi_unlikely(_mi_numa_node_count <= 0)) { - int ncount = mi_os_numa_node_countx(); - int ncount0 = ncount; - // never more than max numa node and at least 1 - int nmax = (int)mi_option_get(mi_option_max_numa_nodes); - if (ncount > nmax) ncount = nmax; - if (ncount <= 0) ncount = 1; - _mi_numa_node_count = ncount; - _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0); + long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? + if (ncount <= 0) ncount = (long)mi_os_numa_node_countx(); // or detect dynamically + _mi_numa_node_count = (size_t)(ncount <= 0 ? 1 : ncount); + _mi_verbose_message("using %zd numa regions\n", _mi_numa_node_count); } mi_assert_internal(_mi_numa_node_count >= 1); return _mi_numa_node_count; @@ -1035,11 +1030,10 @@ int _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); - int numa_count = _mi_os_numa_node_count(); + size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - int numa_node = mi_os_numa_nodex(); + size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - if (numa_node < 0) numa_node = 0; - return numa_node; + return (int)numa_node; } From a4ed63d1273befbe2c8835395f3137564d3af7e9 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 13 Nov 2019 17:22:03 -0800 Subject: [PATCH 20/41] Adresses pr #165 and issue #164 by @colesbury: On Mac OS, the thread-local _mi_default_heap may get reset before _mi_thread_done is called, leaking the default heap on non-main threads. Now the current default heap is also stored in mi_pthread_key (or mi_fls_key on Windows). The _mi_thread_done function is called with this value. --- include/mimalloc-internal.h | 1 + src/heap.c | 6 ++-- src/init.c | 59 +++++++++++++++++++++---------------- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index ccf12a06..73849337 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -101,6 +101,7 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); uintptr_t _mi_heap_random(mi_heap_t* heap); +void _mi_heap_set_default_direct(mi_heap_t* heap); // "stats.c" void _mi_stats_done(mi_stats_t* stats); diff --git a/src/heap.c b/src/heap.c index 15c5d02a..daa9b241 100644 --- a/src/heap.c +++ b/src/heap.c @@ -223,7 +223,7 @@ static void mi_heap_free(mi_heap_t* heap) { // reset default if (mi_heap_is_default(heap)) { - _mi_heap_default = heap->tld->heap_backing; + _mi_heap_set_default_direct(heap->tld->heap_backing); } // and free the used memory mi_free(heap); @@ -354,8 +354,8 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { mi_assert(mi_heap_is_initialized(heap)); if (!mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); - mi_heap_t* old = _mi_heap_default; - _mi_heap_default = heap; + mi_heap_t* old = mi_get_default_heap(); + _mi_heap_set_default_direct(heap); return old; } diff --git a/src/init.c b/src/init.c index e15d82eb..081e7ce7 100644 --- a/src/init.c +++ b/src/init.c @@ -90,6 +90,7 @@ const mi_heap_t _mi_heap_empty = { false }; +// the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; @@ -198,8 +199,8 @@ static bool _mi_heap_init(void) { if (mi_heap_is_initialized(_mi_heap_default)) return true; if (_mi_is_main_thread()) { // the main heap is statically allocated - _mi_heap_default = &_mi_heap_main; - mi_assert_internal(_mi_heap_default->tld->heap_backing == _mi_heap_default); + _mi_heap_set_default_direct(&_mi_heap_main); + mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); } else { // use `_mi_os_alloc` to allocate directly from the OS @@ -219,18 +220,17 @@ static bool _mi_heap_init(void) { tld->heap_backing = heap; tld->segments.stats = &tld->stats; tld->os.stats = &tld->stats; - _mi_heap_default = heap; + _mi_heap_set_default_direct(heap); } return false; } // Free the thread local default heap (called from `mi_thread_done`) -static bool _mi_heap_done(void) { - mi_heap_t* heap = _mi_heap_default; +static bool _mi_heap_done(mi_heap_t* heap) { if (!mi_heap_is_initialized(heap)) return true; // reset default heap - _mi_heap_default = (_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); + _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); // todo: delete all non-backing heaps? @@ -277,6 +277,8 @@ static bool _mi_heap_done(void) { // to set up the thread local keys. // -------------------------------------------------------- +static void _mi_thread_done(mi_heap_t* default_heap); + #ifdef __wasi__ // no pthreads in the WebAssembly Standard Interface #elif !defined(_WIN32) @@ -291,14 +293,14 @@ static bool _mi_heap_done(void) { #include static DWORD mi_fls_key; static void NTAPI mi_fls_done(PVOID value) { - if (value!=NULL) mi_thread_done(); + if (value!=NULL) _mi_thread_done((mi_heap_t*)value); } #elif defined(MI_USE_PTHREADS) // use pthread locol storage keys to detect thread ending #include static pthread_key_t mi_pthread_key; static void mi_pthread_done(void* value) { - if (value!=NULL) mi_thread_done(); + if (value!=NULL) _mi_thread_done((mi_heap_t*)value); } #elif defined(__wasi__) // no pthreads in the WebAssembly Standard Interface @@ -332,6 +334,8 @@ void mi_thread_init(void) mi_attr_noexcept mi_process_init(); // initialize the thread local default heap + // (this will call `_mi_heap_set_default_direct` and thus set the + // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) if (_mi_heap_init()) return; // returns true if already initialized // don't further initialize for the main thread @@ -339,33 +343,38 @@ void mi_thread_init(void) mi_attr_noexcept _mi_stat_increase(&mi_get_default_heap()->tld->stats.threads, 1); - // set hooks so our mi_thread_done() will be called - #if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain - #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - FlsSetValue(mi_fls_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_fls_done` is called - #elif defined(MI_USE_PTHREADS) - pthread_setspecific(mi_pthread_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_pthread_done` is called - #endif - //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); } void mi_thread_done(void) mi_attr_noexcept { + _mi_thread_done(mi_get_default_heap()); +} + +static void _mi_thread_done(mi_heap_t* heap) { // stats - mi_heap_t* heap = mi_get_default_heap(); if (!_mi_is_main_thread() && mi_heap_is_initialized(heap)) { _mi_stat_decrease(&heap->tld->stats.threads, 1); } - // abandon the thread local heap - if (_mi_heap_done()) return; // returns true if already ran - - //if (!_mi_is_main_thread()) { - // _mi_verbose_message("thread done: 0x%zx\n", _mi_thread_id()); - //} + if (_mi_heap_done(heap)) return; // returns true if already ran } +void _mi_heap_set_default_direct(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); + _mi_heap_default = heap; + + // ensure the default heap is passed to `_mi_thread_done` + // setting to a non-NULL value also ensures `mi_thread_done` is called. + #if defined(_WIN32) && defined(MI_SHARED_LIB) + // nothing to do as it is done in DllMain + #elif defined(_WIN32) && !defined(MI_SHARED_LIB) + FlsSetValue(mi_fls_key, heap); + #elif defined(MI_USE_PTHREADS) + pthread_setspecific(mi_pthread_key, heap); + #endif +} + + // -------------------------------------------------------- // Run functions on process init/done, and thread init/done @@ -446,7 +455,7 @@ void mi_process_init(void) mi_attr_noexcept { // access _mi_heap_default before setting _mi_process_is_initialized to ensure // that the TLS slot is allocated without getting into recursion on macOS // when using dynamic linking with interpose. - mi_heap_t* h = _mi_heap_default; + mi_heap_t* h = mi_get_default_heap(); _mi_process_is_initialized = true; _mi_heap_main.thread_id = _mi_thread_id(); From dccffea66286dfb16e642aef3fea7babee7038e3 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 14 Nov 2019 11:01:05 -0800 Subject: [PATCH 21/41] fix pr #173 by @zerodefect to use case-insensitive matching of the build type; also use MI_DEBUG_FULL option (instead of MI_CHECK_FULL) --- CMakeLists.txt | 74 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b455881..aa9c126f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,15 +6,14 @@ set(CMAKE_CXX_STANDARD 17) option(MI_OVERRIDE "Override the standard malloc interface" ON) option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON) -option(MI_SEE_ASM "Generate assembly files" OFF) -option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF) -option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) -option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF) +option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF) +option(MI_SECURE "Use security mitigations (like guard pages, allocation randomization, and free-list corruption detection)" OFF) option(MI_SECURE_FULL "Use full security mitigations, may be more expensive (includes double-free mitigation)" OFF) +option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) +option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) - -set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}") +option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) set(mi_sources src/stats.c @@ -29,29 +28,33 @@ set(mi_sources src/options.c src/init.c) -# Set default build type +# ----------------------------------------------------------------------------- +# Converience: set default build type depending on the build directory +# ----------------------------------------------------------------------------- + if (NOT CMAKE_BUILD_TYPE) - if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$") - message(STATUS "No build type selected, default to *** Debug ***") + if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR MI_DEBUG_FULL MATCHES "ON") + message(STATUS "No build type selected, default to: Debug") set(CMAKE_BUILD_TYPE "Debug") else() - message(STATUS "No build type selected, default to *** Release ***") + message(STATUS "No build type selected, default to: Release") set(CMAKE_BUILD_TYPE "Release") endif() -else() - message(STATUS "Build type specified as *** ${CMAKE_BUILD_TYPE} ***") endif() if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") + message(STATUS "Default to secure build") set(MI_SECURE "ON") endif() +# ----------------------------------------------------------------------------- +# Process options +# ----------------------------------------------------------------------------- + if(CMAKE_C_COMPILER_ID MATCHES "MSVC") set(MI_USE_CXX "ON") endif() - -# Options if(MI_OVERRIDE MATCHES "ON") message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") if(APPLE) @@ -84,7 +87,12 @@ if(MI_SEE_ASM MATCHES "ON") endif() if(MI_CHECK_FULL MATCHES "ON") - message(STATUS "Set debug level to full invariant checking (MI_CHECK_FULL=ON)") + message(STATUS "The MI_CHECK_FULL option is deprecated, use MI_DEBUG_FULL instead") + set(MI_DEBUG_FULL "ON") +endif() + +if(MI_DEBUG_FULL MATCHES "ON") + message(STATUS "Set debug level to full invariant checking (MI_DEBUG_FULL=ON)") list(APPEND mi_defines MI_DEBUG=3) # full invariant checking endif() @@ -109,19 +117,6 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") endif() endif() -if(NOT(CMAKE_BUILD_TYPE MATCHES "Release|release|RelWithDebInfo|relwithdebinfo")) - string(TOLOWER "${CMAKE_BUILD_TYPE}" build_type) - set(mi_basename "mimalloc-${build_type}") -else() - if(MI_SECURE MATCHES "ON") - set(mi_basename "mimalloc-secure") - else() - set(mi_basename "mimalloc") - endif() -endif() -message(STATUS "Output library name : ${mi_basename}") -message(STATUS "Installation directory: ${mi_install_dir}") - # extra needed libraries if(WIN32) list(APPEND mi_libraries psapi shell32 user32) @@ -134,9 +129,28 @@ else() endif() # ----------------------------------------------------------------------------- -# Main targets +# Install and output names # ----------------------------------------------------------------------------- +set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}") +if(MI_SECURE MATCHES "ON") + set(mi_basename "mimalloc-secure") +else() + set(mi_basename "mimalloc") +endif() +string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) +if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel)$")) + set(mi_basename "${mi_basename}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version +endif() +message(STATUS "") +message(STATUS "Library base name: ${mi_basename}") +message(STATUS "Build type : ${CMAKE_BUILD_TYPE_LC}") +message(STATUS "Install directory: ${mi_install_dir}") +message(STATUS "") + +# ----------------------------------------------------------------------------- +# Main targets +# ----------------------------------------------------------------------------- # shared library add_library(mimalloc SHARED ${mi_sources}) @@ -238,7 +252,7 @@ endif() if (MI_OVERRIDE MATCHES "ON") target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) if(NOT WIN32) - # It is only possible to override malloc on Windows when building as a DLL. (src/alloc-override.c) + # It is only possible to override malloc on Windows when building as a DLL. target_compile_definitions(mimalloc-static PRIVATE MI_MALLOC_OVERRIDE) target_compile_definitions(mimalloc-obj PRIVATE MI_MALLOC_OVERRIDE) endif() From 8637f113d5ed817fa93e584d716d2b5c91ca723f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 15 Nov 2019 14:09:17 -0800 Subject: [PATCH 22/41] improve test-stress to run multiple iterations --- test/test-stress.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index bb428072..4b6ec22d 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -18,7 +18,8 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor +static int N = 20; // scaling factor +static int ITER = 10; // N full iterations re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor @@ -159,14 +160,17 @@ int main(int argc, char** argv) { //bench_start_program(); mi_stats_reset(); - memset((void*)transfer, 0, TRANSFERS*sizeof(void*)); - run_os_threads(THREADS); - for (int i = 0; i < TRANSFERS; i++) { - free_items((void*)transfer[i]); + for (int i = 0; i < ITER; i++) { + memset((void*)transfer, 0, TRANSFERS * sizeof(void*)); + run_os_threads(THREADS); + for (int i = 0; i < TRANSFERS; i++) { + free_items((void*)transfer[i]); + } } - #ifndef NDEBUG +#ifndef NDEBUG mi_collect(false); - #endif +#endif + mi_stats_print(NULL); //bench_end_program(); return 0; From fd3ce5dc7d22bf4155588ac2755a98e4a405303f Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 15 Nov 2019 16:28:11 -0800 Subject: [PATCH 23/41] improve stress test --- ide/vs2019/mimalloc-test-stress.vcxproj | 4 +- test/test-stress.c | 86 ++++++++++++++++--------- 2 files changed, 59 insertions(+), 31 deletions(-) diff --git a/ide/vs2019/mimalloc-test-stress.vcxproj b/ide/vs2019/mimalloc-test-stress.vcxproj index afbb6666..ef7ab357 100644 --- a/ide/vs2019/mimalloc-test-stress.vcxproj +++ b/ide/vs2019/mimalloc-test-stress.vcxproj @@ -149,8 +149,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/test/test-stress.c b/test/test-stress.c index 4b6ec22d..b6ceaa0a 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -6,7 +6,8 @@ terms of the MIT license. /* This is a stress test for the allocator, using multiple threads and transferring objects between threads. This is not a typical workload - but uses a random linear size distribution. Do not use this test as a benchmark! + but uses a random linear size distribution. Timing can also depend on + (random) thread scheduling. Do not use this test as a benchmark! */ #include @@ -18,16 +19,31 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor -static int ITER = 10; // N full iterations re-creating all threads +static int SCALE = 12; // scaling factor +static int ITER = 50; // N full iterations re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors -// static int N = 100; // scaling factor +// static int SCALE = 100; // scaling factor +static bool allow_large_objects = true; // allow very large objects? +static size_t use_one_size = 0; // use single object size of N uintptr_t? + + +#ifdef USE_STD_MALLOC +#define custom_malloc(s) malloc(s) +#define custom_realloc(p,s) realloc(p,s) +#define custom_free(p) free(p) +#else +#define custom_malloc(s) mi_malloc(s) +#define custom_realloc(p,s) mi_realloc(p,s) +#define custom_free(p) mi_free(p) +#endif + +// transfer pointer between threads #define TRANSFERS (1000) - static volatile void* transfer[TRANSFERS]; + #if (UINTPTR_MAX != UINT32_MAX) const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; #else @@ -64,10 +80,17 @@ static bool chance(size_t perc, random_t r) { } static void* alloc_items(size_t items, random_t r) { - if (chance(1, r)) items *= 100; // 1% huge objects; + if (chance(1, r)) { + if (chance(1, r) && allow_large_objects) items *= 1000; // 0.01% giant + else if (chance(10, r) && allow_large_objects) items *= 100; // 0.1% huge + else items *= 10; // 1% large objects; + } if (items==40) items++; // pthreads uses that size for stack increases - uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); - for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + if (use_one_size>0) items = (use_one_size/sizeof(uintptr_t)); + uintptr_t* p = (uintptr_t*)custom_malloc(items*sizeof(uintptr_t)); + if (p != NULL) { + for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + } return p; } @@ -82,7 +105,7 @@ static void free_items(void* p) { } } } - mi_free(p); + custom_free(p); } @@ -91,12 +114,12 @@ static void stress(intptr_t tid) { uintptr_t r = tid ^ 42; const size_t max_item = 128; // in words const size_t max_item_retained = 10*max_item; - size_t allocs = 25*N*(tid%8 + 1); // some threads do more + size_t allocs = 25*SCALE*(tid%8 + 1); // some threads do more size_t retain = allocs/2; void** data = NULL; size_t data_size = 0; size_t data_top = 0; - void** retained = (void**)mi_malloc(retain*sizeof(void*)); + void** retained = (void**)custom_malloc(retain*sizeof(void*)); size_t retain_top = 0; while (allocs>0 || retain>0) { @@ -105,7 +128,7 @@ static void stress(intptr_t tid) { allocs--; if (data_top >= data_size) { data_size += 100000; - data = (void**)mi_realloc(data, data_size*sizeof(void*)); + data = (void**)custom_realloc(data, data_size*sizeof(void*)); } data[data_top++] = alloc_items((pick(&r) % max_item) + 1, &r); } @@ -121,7 +144,7 @@ static void stress(intptr_t tid) { data[idx] = NULL; } if (chance(25, &r) && data_top > 0) { - // 25% transfer-swap + // 25% exchange a local pointer with the (shared) transfer buffer. size_t data_idx = pick(&r) % data_top; size_t transfer_idx = pick(&r) % TRANSFERS; void* p = data[data_idx]; @@ -136,8 +159,8 @@ static void stress(intptr_t tid) { for (size_t i = 0; i < data_top; i++) { free_items(data[i]); } - mi_free(retained); - mi_free(data); + custom_free(retained); + custom_free(data); //bench_end_thread(); } @@ -152,25 +175,29 @@ int main(int argc, char** argv) { if (argc>=3) { char* end; long n = (strtol(argv[2], &end, 10)); - if (n > 0) N = n; + if (n > 0) SCALE = n; } - printf("start with %i threads with a %i%% load-per-thread\n", THREADS, N); + printf("start with %i threads with a %i%% load-per-thread\n", THREADS, SCALE); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); - //bench_start_program(); + //bench_start_program(); + + // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. mi_stats_reset(); - for (int i = 0; i < ITER; i++) { - memset((void*)transfer, 0, TRANSFERS * sizeof(void*)); + uintptr_t r = 43; + for (int n = 0; n < ITER; n++) { run_os_threads(THREADS); for (int i = 0; i < TRANSFERS; i++) { - free_items((void*)transfer[i]); + if (chance(50, &r) || n+1 == ITER) { // free all on last run, otherwise free half of the transfers + void* p = atomic_exchange_ptr(&transfer[i], NULL); + free_items(p); + } } } -#ifndef NDEBUG - mi_collect(false); -#endif + mi_collect(false); + mi_collect(true); mi_stats_print(NULL); //bench_end_program(); return 0; @@ -187,8 +214,8 @@ static DWORD WINAPI thread_entry(LPVOID param) { } static void run_os_threads(size_t nthreads) { - DWORD* tids = (DWORD*)malloc(nthreads * sizeof(DWORD)); - HANDLE* thandles = (HANDLE*)malloc(nthreads * sizeof(HANDLE)); + DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD)); + HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE)); for (uintptr_t i = 0; i < nthreads; i++) { thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } @@ -198,8 +225,8 @@ static void run_os_threads(size_t nthreads) { for (size_t i = 0; i < nthreads; i++) { CloseHandle(thandles[i]); } - free(tids); - free(thandles); + custom_free(tids); + custom_free(thandles); } static void* atomic_exchange_ptr(volatile void** p, void* newval) { @@ -220,7 +247,7 @@ static void* thread_entry(void* param) { } static void run_os_threads(size_t nthreads) { - pthread_t* threads = (pthread_t*)mi_malloc(nthreads*sizeof(pthread_t)); + pthread_t* threads = (pthread_t*)custom_malloc(nthreads*sizeof(pthread_t)); memset(threads, 0, sizeof(pthread_t)*nthreads); //pthread_setconcurrency(nthreads); for (uintptr_t i = 0; i < nthreads; i++) { @@ -229,6 +256,7 @@ static void run_os_threads(size_t nthreads) { for (size_t i = 0; i < nthreads; i++) { pthread_join(threads[i], NULL); } + custom_free(threads); } static void* atomic_exchange_ptr(volatile void** p, void* newval) { From 94bfb4772575d43bb11247b957ee5c3741a97a1a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 12:59:26 -0800 Subject: [PATCH 24/41] update stress test for more realisitic size distribution --- test/test-stress.c | 87 ++++++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index b6ceaa0a..6b2fb8c4 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -7,7 +7,7 @@ terms of the MIT license. /* This is a stress test for the allocator, using multiple threads and transferring objects between threads. This is not a typical workload but uses a random linear size distribution. Timing can also depend on - (random) thread scheduling. Do not use this test as a benchmark! + (random) thread scheduling. Do not use this test as a benchmark! */ #include @@ -17,10 +17,12 @@ terms of the MIT license. #include #include +// > mimalloc-test-stress [THREADS] [SCALE] [ITER] +// // argument defaults -static int THREADS = 32; // more repeatable if THREADS <= #processors -static int SCALE = 12; // scaling factor -static int ITER = 50; // N full iterations re-creating all threads +static int THREADS = 32; // more repeatable if THREADS <= #processors +static int SCALE = 50; // scaling factor +static int ITER = 10; // N full iterations re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor @@ -56,21 +58,21 @@ typedef uintptr_t* random_t; static uintptr_t pick(random_t r) { uintptr_t x = *r; - #if (UINTPTR_MAX > UINT32_MAX) - // by Sebastiano Vigna, see: +#if (UINTPTR_MAX > UINT32_MAX) + // by Sebastiano Vigna, see: x ^= x >> 30; x *= 0xbf58476d1ce4e5b9UL; x ^= x >> 27; x *= 0x94d049bb133111ebUL; x ^= x >> 31; - #else - // by Chris Wellons, see: +#else + // by Chris Wellons, see: x ^= x >> 16; x *= 0x7feb352dUL; x ^= x >> 15; x *= 0x846ca68bUL; x ^= x >> 16; - #endif +#endif *r = x; return x; } @@ -81,13 +83,13 @@ static bool chance(size_t perc, random_t r) { static void* alloc_items(size_t items, random_t r) { if (chance(1, r)) { - if (chance(1, r) && allow_large_objects) items *= 1000; // 0.01% giant - else if (chance(10, r) && allow_large_objects) items *= 100; // 0.1% huge - else items *= 10; // 1% large objects; + if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant + else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge + else items *= 100; // 1% large objects; } - if (items==40) items++; // pthreads uses that size for stack increases - if (use_one_size>0) items = (use_one_size/sizeof(uintptr_t)); - uintptr_t* p = (uintptr_t*)custom_malloc(items*sizeof(uintptr_t)); + if (items == 40) items++; // pthreads uses that size for stack increases + if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t)); + uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t)); if (p != NULL) { for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; } @@ -99,7 +101,7 @@ static void free_items(void* p) { uintptr_t* q = (uintptr_t*)p; uintptr_t items = (q[0] ^ cookie); for (uintptr_t i = 0; i < items; i++) { - if ((q[i]^cookie) != items - i) { + if ((q[i] ^ cookie) != items - i) { fprintf(stderr, "memory corruption at block %p at %zu\n", p, i); abort(); } @@ -111,30 +113,30 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); - uintptr_t r = tid ^ 42; - const size_t max_item = 128; // in words - const size_t max_item_retained = 10*max_item; - size_t allocs = 25*SCALE*(tid%8 + 1); // some threads do more - size_t retain = allocs/2; + uintptr_t r = tid * 43; + const size_t max_item_shift = 5; // 128 + const size_t max_item_retained_shift = max_item_shift + 2; + size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more + size_t retain = allocs / 2; void** data = NULL; size_t data_size = 0; size_t data_top = 0; - void** retained = (void**)custom_malloc(retain*sizeof(void*)); + void** retained = (void**)custom_malloc(retain * sizeof(void*)); size_t retain_top = 0; - while (allocs>0 || retain>0) { + while (allocs > 0 || retain > 0) { if (retain == 0 || (chance(50, &r) && allocs > 0)) { // 50%+ alloc allocs--; if (data_top >= data_size) { data_size += 100000; - data = (void**)custom_realloc(data, data_size*sizeof(void*)); + data = (void**)custom_realloc(data, data_size * sizeof(void*)); } - data[data_top++] = alloc_items((pick(&r) % max_item) + 1, &r); + data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r); } else { // 25% retain - retained[retain_top++] = alloc_items(10*((pick(&r) % max_item_retained) + 1), &r); + retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r); retain--; } if (chance(66, &r) && data_top > 0) { @@ -167,36 +169,45 @@ static void stress(intptr_t tid) { static void run_os_threads(size_t nthreads); int main(int argc, char** argv) { - if (argc>=2) { + // > mimalloc-test-stress [THREADS] [SCALE] [ITER] + if (argc >= 2) { char* end; long n = strtol(argv[1], &end, 10); if (n > 0) THREADS = n; } - if (argc>=3) { + if (argc >= 3) { char* end; long n = (strtol(argv[2], &end, 10)); if (n > 0) SCALE = n; } - printf("start with %i threads with a %i%% load-per-thread\n", THREADS, SCALE); + if (argc >= 4) { + char* end; + long n = (strtol(argv[3], &end, 10)); + if (n > 0) ITER = n; + } + printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); - //bench_start_program(); + //bench_start_program(); // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. mi_stats_reset(); - uintptr_t r = 43; + uintptr_t r = 43 * 43; for (int n = 0; n < ITER; n++) { run_os_threads(THREADS); for (int i = 0; i < TRANSFERS; i++) { - if (chance(50, &r) || n+1 == ITER) { // free all on last run, otherwise free half of the transfers + if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers void* p = atomic_exchange_ptr(&transfer[i], NULL); free_items(p); } } + mi_collect(false); +#ifndef NDEBUG + if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); } +#endif } - mi_collect(false); mi_collect(true); mi_stats_print(NULL); //bench_end_program(); @@ -230,11 +241,11 @@ static void run_os_threads(size_t nthreads) { } static void* atomic_exchange_ptr(volatile void** p, void* newval) { - #if (INTPTR_MAX == UINT32_MAX) +#if (INTPTR_MAX == UINT32_MAX) return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval); - #else +#else return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval); - #endif +#endif } #else @@ -247,8 +258,8 @@ static void* thread_entry(void* param) { } static void run_os_threads(size_t nthreads) { - pthread_t* threads = (pthread_t*)custom_malloc(nthreads*sizeof(pthread_t)); - memset(threads, 0, sizeof(pthread_t)*nthreads); + pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t)); + memset(threads, 0, sizeof(pthread_t) * nthreads); //pthread_setconcurrency(nthreads); for (uintptr_t i = 0; i < nthreads; i++) { pthread_create(&threads[i], NULL, &thread_entry, (void*)i); From 4d4a2885f5ef5d0b3db8de149b472380f495e729 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 13:19:17 -0800 Subject: [PATCH 25/41] use atomic read/write on the page->heap field where concurrent interaction is possible --- src/alloc.c | 2 +- src/page-queue.c | 6 +++--- src/page.c | 21 ++++++++++++++------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index d2319f82..c4863115 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -235,7 +235,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc } else { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* heap = page->heap; + mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) diff --git a/src/page-queue.c b/src/page-queue.c index 4af70b50..95443a69 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -260,7 +260,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { page->heap->page_count--; page->next = NULL; page->prev = NULL; - page->heap = NULL; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } @@ -274,7 +274,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - page->heap = heap; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue // set append pages to new heap and count size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { - page->heap = heap; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); count++; } diff --git a/src/page.c b/src/page.c index aaf1cb91..a8115d27 100644 --- a/src/page.c +++ b/src/page.c @@ -343,18 +343,24 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(page->heap != NULL); - _mi_page_use_delayed_free(page,MI_NEVER_DELAYED_FREE); +#if MI_DEBUG > 1 + mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); +#endif + + // remove from our page list + mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_page_queue_remove(pq, page); + + // page is no longer associated with our heap + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)page->heap->thread_delayed_free; block != NULL; block = mi_block_nextx(page->heap->cookie,block)) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap->cookie, block)) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif - // and then remove from our page list - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; - mi_page_queue_remove(pq, page); - // and abandon it mi_assert_internal(page->heap == NULL); _mi_segment_page_abandon(page,segments_tld); @@ -755,7 +761,8 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue - page->heap = NULL; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_increase(&heap->tld->stats.giant, block_size); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); From 30e2c54adba9f1d2ef32e35e4e6c4b80e5732c26 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 14:13:02 -0800 Subject: [PATCH 26/41] remove delayed reset option (for now) --- include/mimalloc.h | 2 +- src/memory.c | 139 ++------------------------ src/options.c | 4 +- src/os.c | 237 +++++++++++++++++++++++++-------------------- 4 files changed, 142 insertions(+), 240 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 3c942849..a59b9cf7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -273,7 +273,7 @@ typedef enum mi_option_e { mi_option_page_reset, mi_option_segment_reset, mi_option_eager_commit_delay, - mi_option_reset_delay, + mi_option_reset_decommits, mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, diff --git a/src/memory.c b/src/memory.c index f3052d6b..b0bcf7a0 100644 --- a/src/memory.c +++ b/src/memory.c @@ -53,9 +53,6 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -// local -static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size); - // Constants #if (MI_INTPTR_SIZE==8) @@ -354,8 +351,6 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { if (p==NULL) return; if (size==0) return; - mi_delay_remove(tld->reset_delay, p, size); - size_t arena_memid = 0; mi_bitmap_index_t bit_idx; mem_region_t* region; @@ -424,7 +419,6 @@ void _mi_mem_collect(mi_os_tld_t* tld) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE); _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release @@ -434,142 +428,23 @@ void _mi_mem_collect(mi_os_tld_t* tld) { } } -/* ---------------------------------------------------------------------------- - Delay slots ------------------------------------------------------------------------------*/ - -typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); - -static void mi_delay_insert(mi_delay_slots_t* ds, - mi_msecs_t delay, uint8_t* addr, size_t size, - mi_delay_resolve_fun* resolve, void* arg) -{ - if (ds == NULL || delay==0 || addr==NULL || size==0) { - resolve(addr, size, arg); - return; - } - - mi_msecs_t now = _mi_clock_now(); - mi_delay_slot_t* oldest = &ds->slots[0]; - // walk through all slots, resolving expired ones. - // remember the oldest slot to insert the new entry in. - size_t newcount = 0; - for (size_t i = 0; i < ds->count; i++) { - mi_delay_slot_t* slot = &ds->slots[i]; - - if (slot->expire == 0) { - // empty slot - oldest = slot; - } - // TODO: should we handle overlapping areas too? - else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { - // earlier slot encompasses new area, increase expiration - slot->expire = now + delay; - delay = 0; - } - else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { - // new one encompasses old slot, overwrite - slot->expire = now + delay; - slot->addr = addr; - slot->size = size; - delay = 0; - } - else if (slot->expire < now) { - // expired slot, resolve now - slot->expire = 0; - resolve(slot->addr, slot->size, arg); - } - else if (oldest->expire > slot->expire) { - oldest = slot; - newcount = i+1; - } - else { - newcount = i+1; - } - } - ds->count = newcount; - if (delay>0) { - // not yet registered, use the oldest slot (or a new one if there is space) - if (ds->count < ds->capacity) { - oldest = &ds->slots[ds->count]; - ds->count++; - } - else if (oldest->expire > 0) { - resolve(oldest->addr, oldest->size, arg); // evict if not empty - } - mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count); - oldest->expire = now + delay; - oldest->addr = addr; - oldest->size = size; - } -} - -static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size) -{ - if (ds == NULL || p==NULL || size==0) return false; - - uint8_t* addr = (uint8_t*)p; - bool done = false; - size_t newcount = 0; - - // walk through all valid slots - for (size_t i = 0; i < ds->count; i++) { - mi_delay_slot_t* slot = &ds->slots[i]; - if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { - // earlier slot encompasses the area; remove it - slot->expire = 0; - done = true; - } - else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { - // new one encompasses old slot, remove it - slot->expire = 0; - } - else if ((addr <= slot->addr && addr + size > slot->addr) || - (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { - // partial overlap - // can happen with a large object spanning onto some partial end block - // mi_assert_internal(false); - slot->expire = 0; - } - else { - newcount = i + 1; - } - } - ds->count = newcount; - return done; -} - -static void mi_resolve_reset(void* p, size_t size, void* vtld) { - mi_os_tld_t* tld = (mi_os_tld_t*)vtld; - _mi_os_reset(p, size, tld->stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay), - (uint8_t*)p, size, &mi_resolve_reset, tld); - return true; -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) { - return _mi_os_unreset(p, size, is_zero, tld->stats); - } - return true; -} - - /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { + return _mi_os_reset(p, size, tld->stats); +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + return _mi_os_unreset(p, size, is_zero, tld->stats); +} bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay,p, size); return _mi_os_commit(p, size, is_zero, tld->stats); } bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay, p, size); return _mi_os_decommit(p, size, tld->stats); } diff --git a/src/options.c b/src/options.c index 180f6a75..8c4c1707 100644 --- a/src/options.c +++ b/src/options.c @@ -65,10 +65,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset uses decommit/commit { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/os.c b/src/os.c index 2415a40d..02683a02 100644 --- a/src/os.c +++ b/src/os.c @@ -77,11 +77,11 @@ static bool use_large_os_page(size_t size, size_t alignment) { // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { size_t align_size; - if (size < 512*KiB) align_size = _mi_os_page_size(); - else if (size < 2*MiB) align_size = 64*KiB; - else if (size < 8*MiB) align_size = 256*KiB; - else if (size < 32*MiB) align_size = 1*MiB; - else align_size = 4*MiB; + if (size < 512 * KiB) align_size = _mi_os_page_size(); + else if (size < 2 * MiB) align_size = 64 * KiB; + else if (size < 8 * MiB) align_size = 256 * KiB; + else if (size < 32 * MiB) align_size = 1 * MiB; + else align_size = 4 * MiB; if (size >= (SIZE_MAX - align_size)) return size; // possible overflow? return _mi_align_up(size, align_size); } @@ -92,8 +92,8 @@ size_t _mi_os_good_alloc_size(size_t size) { // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. #include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); +typedef PVOID(__stdcall* PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +typedef NTSTATUS(__stdcall* PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; @@ -129,7 +129,7 @@ static bool mi_win_enable_large_os_pages() if (err == 0) err = GetLastError(); _mi_warning_message("cannot enable large OS page support, error %lu\n", err); } - return (ok!=0); + return (ok != 0); } void _mi_os_init(void) { @@ -144,7 +144,7 @@ void _mi_os_init(void) { if (hDll != NULL) { // use VirtualAlloc2FromApp if possible as it is available to Windows store apps pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + if (pVirtualAlloc2 == NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); @@ -170,7 +170,7 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = 2*MiB; + large_os_page_size = 2 * MiB; } } #endif @@ -210,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { + if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { return VirtualAlloc(hint, size, flags, PAGE_READWRITE); } #endif @@ -233,7 +233,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { + && allow_large && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0) { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. @@ -247,12 +247,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations + mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations } } } if (p == NULL) { - *is_large = ((flags&MEM_LARGE_PAGES) != 0); + *is_large = ((flags & MEM_LARGE_PAGES) != 0); p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { @@ -264,8 +264,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, #elif defined(__wasi__) static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size(); - uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); - size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size()); + uintptr_t aligned_base = _mi_align_up(base, (uintptr_t)try_alignment); + size_t alloc_size = _mi_align_up(aligned_base - base + size, _mi_os_page_size()); mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0); if (alloc_size < size) return NULL; if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) { @@ -278,47 +278,50 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { void* p = NULL; - #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) +#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { - p = mmap(hint,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p == MAP_FAILED) p = NULL; // fall back to regular mmap } - #else +#else UNUSED(try_alignment); - #endif - if (p==NULL) { - p = mmap(addr,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; +#endif + if (p == NULL) { + p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p == MAP_FAILED) p = NULL; } return p; } static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { void* p = NULL; - #if !defined(MAP_ANONYMOUS) - #define MAP_ANONYMOUS MAP_ANON - #endif - int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#if !defined(MAP_ANONYMOUS) +#define MAP_ANONYMOUS MAP_ANON +#endif +#if !defined(MAP_NORESERVE) +#define MAP_NORESERVE 0 +#endif + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; int fd = -1; - #if defined(MAP_ALIGNED) // BSD +#if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { size_t n = _mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); } } - #endif - #if defined(PROT_MAX) +#endif +#if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD - #endif - #if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) +#endif +#if defined(VM_MAKE_TAG) +// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) int os_tag = (int)mi_option_get(mi_option_os_tag); if (os_tag < 100 || os_tag > 255) os_tag = 100; fd = VM_MAKE_TAG(os_tag); - #endif +#endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); @@ -332,39 +335,39 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro else { int lflags = flags; int lfd = fd; - #ifdef MAP_ALIGNED_SUPER +#ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; - #endif - #ifdef MAP_HUGETLB +#endif +#ifdef MAP_HUGETLB lflags |= MAP_HUGETLB; - #endif - #ifdef MAP_HUGE_1GB +#endif +#ifdef MAP_HUGE_1GB static bool mi_huge_pages_available = true; if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else - #endif +#endif { - #ifdef MAP_HUGE_2MB +#ifdef MAP_HUGE_2MB lflags |= MAP_HUGE_2MB; - #endif +#endif } - #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB +#ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; - #endif +#endif if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - #ifdef MAP_HUGE_1GB +#ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); } - #endif +#endif if (large_only) return p; if (p == NULL) { mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations @@ -375,7 +378,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro if (p == NULL) { *is_large = false; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); - #if defined(MADV_HUGEPAGE) +#if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available @@ -387,7 +390,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro *is_large = true; // possibly }; } - #endif +#endif } return p; } @@ -401,18 +404,18 @@ static volatile _Atomic(intptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; - if ((size%MI_SEGMENT_SIZE) != 0) return NULL; + if ((size % MI_SEGMENT_SIZE) != 0) return NULL; intptr_t hint = mi_atomic_add(&aligned_base, size); - if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) + if (hint == 0 || hint > ((intptr_t)30 << 40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode +#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB - #endif + init = init + (MI_SEGMENT_SIZE * ((r >> 17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB +#endif mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } - if (hint%try_alignment != 0) return NULL; + if (hint % try_alignment != 0) return NULL; return (void*)hint; } #else @@ -441,17 +444,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo } */ - #if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); - #elif defined(__wasi__) - *is_large = false; - p = mi_wasm_heap_grow(size, try_alignment); - #else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); - #endif +#if defined(_WIN32) + int flags = MEM_RESERVE; + if (commit) flags |= MEM_COMMIT; + p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); +#elif defined(__wasi__) + *is_large = false; + p = mi_wasm_heap_grow(size, try_alignment); +#else + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); +#endif mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -561,7 +564,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar allow_large = *large; *large = false; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats); + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large != NULL ? large : &allow_large), tld->stats); } @@ -613,7 +616,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ _mi_stat_decrease(&stats->committed, csize); } - #if defined(_WIN32) +#if defined(_WIN32) if (commit) { // if the memory was already committed, the call succeeds but it is not zero'd // *is_zero = true; @@ -624,28 +627,42 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); err = (ok ? 0 : GetLastError()); } - #elif defined(__wasi__) +#elif defined(__wasi__) // WebAssembly guests can't control memory protection - #else +#elif defined(MAP_FIXED) + if (!commit) { + // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) + void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); + if (p != start) { err = errno; } + } + else { + // for commit, just change the protection + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } +#else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); if (err != 0) { err = errno; } - #endif +#endif if (err != 0) { - _mi_warning_message("commit/decommit error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); } mi_assert_internal(err == 0); return (err == 0); } bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, false /* conservative? */, is_zero, stats); + return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); } bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { bool is_zero; - return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats); + return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); } +bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { + return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats); +} // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory @@ -657,24 +674,24 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); + else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! - #if (MI_DEBUG>1) - if (MI_SECURE==0) { +#if (MI_DEBUG>1) + if (MI_SECURE == 0) { memset(start, 0, csize); // pretend it is eagerly reset } - #endif +#endif #if defined(_WIN32) // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); - #if 1 +#if 1 if (p == start && start != NULL) { - VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set + VirtualUnlock(start, csize); // VirtualUnlock after MEM_RESET removes the memory from the working set } - #endif +#endif if (p != start) return false; #else #if defined(MADV_FREE) @@ -704,12 +721,22 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { - return mi_os_resetx(addr, size, true, stats); + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return _mi_os_decommit(addr, size, stats); + } + else { + return mi_os_resetx(addr, size, true, stats); + } } bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return _mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) + } + else { + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); + } } @@ -721,7 +748,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (csize == 0) return false; /* if (_mi_os_is_huge_reserved(addr)) { - _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ int err = 0; @@ -753,7 +780,7 @@ bool _mi_os_unprotect(void* addr, size_t size) { bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { // page align conservatively within the range - mi_assert_internal(oldsize > newsize && p != NULL); + mi_assert_internal(oldsize > newsize&& p != NULL); if (oldsize < newsize || p == NULL) return false; if (oldsize == newsize) return true; @@ -781,20 +808,20 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); + mi_assert_internal(size % GiB == 0); mi_assert_internal(addr != NULL); const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); - #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) +#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { - #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE - #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) - #endif +#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE +#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) +#endif params[0].Type = 5; // == MemExtendedParameterAttributeFlags; params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; @@ -821,7 +848,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } - #endif +#endif // otherwise use regular virtual alloc on older windows return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } @@ -842,16 +869,16 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons } #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); + mi_assert_internal(size % GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + if (numa_node >= 0 && numa_node < 8 * MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8 * MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } @@ -883,7 +910,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r >> 17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB #endif } end = start + size; @@ -936,8 +963,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { - mi_msecs_t estimate = ((elapsed / (page+1)) * pages); - if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + mi_msecs_t estimate = ((elapsed / (page + 1)) * pages); + if (estimate > 2 * max_msecs) { // seems like we are going to timeout, break elapsed = max_msecs + 1; } } @@ -947,7 +974,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } } } - mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); + mi_assert_internal(page * MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) *pages_reserved = page; if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; return (page == 0 ? NULL : start); @@ -956,7 +983,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { - if (p==NULL || size==0) return; + if (p == NULL || size == 0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); @@ -972,7 +999,7 @@ static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); - GetNumaProcessorNodeEx(&pnum,&numa_node); + GetNumaProcessorNodeEx(&pnum, &numa_node); return numa_node; } @@ -999,12 +1026,12 @@ static size_t mi_os_numa_nodex(void) { static size_t mi_os_numa_node_countx(void) { char buf[128]; unsigned node = 0; - for(node = 0; node < 256; node++) { + for (node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf,R_OK) != 0) break; + if (access(buf, R_OK) != 0) break; } - return (node+1); + return (node + 1); } #else static size_t mi_os_numa_nodex(void) { @@ -1031,7 +1058,7 @@ size_t _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); size_t numa_count = _mi_os_numa_node_count(); - if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 + if (numa_count <= 1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } From 211f1aa5190f063ee8eef237473281535c2be79f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 14:55:12 -0800 Subject: [PATCH 27/41] remove reset delay slots; add reset tracking per page and segment --- include/mimalloc-internal.h | 8 +- include/mimalloc-types.h | 28 +--- include/mimalloc.h | 3 +- src/arena.c | 8 +- src/bitmap.inc.c | 54 ++++++-- src/init.c | 11 +- src/memory.c | 199 +++++++++++++++------------ src/options.c | 5 +- src/os.c | 204 ++++++++++++++-------------- src/page.c | 7 +- src/segment.c | 264 ++++++++++++++++++++++-------------- 11 files changed, 443 insertions(+), 348 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d727e563..ab295e65 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -59,7 +59,7 @@ size_t _mi_os_good_alloc_size(size_t size); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld); +void _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); @@ -75,7 +75,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; @@ -297,7 +297,9 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - return _mi_segment_page_start(segment, page, page->block_size, page_size); + const size_t bsize = page->block_size; + mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0); + return _mi_segment_page_start(segment, page, bsize, page_size, NULL); } // Get the page containing the pointer diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0ce91339..e816c3a6 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -384,31 +384,12 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) - -// ------------------------------------------------------ -// Delay slots (to avoid expensive OS calls) -// ------------------------------------------------------ -typedef int64_t mi_msecs_t; - -#define MI_RESET_DELAY_SLOTS (256) - -typedef struct mi_delay_slot_s { - mi_msecs_t expire; - uint8_t* addr; - size_t size; -} mi_delay_slot_t; - -typedef struct mi_delay_slots_s { - size_t capacity; // always `MI_RESET_DELAY_SLOTS` - size_t count; // current slots used (`<= capacity`) - mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS]; -} mi_delay_slots_t; - - // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ +typedef int64_t mi_msecs_t; + // Queue of segments typedef struct mi_segment_queue_s { mi_segment_t* first; @@ -417,9 +398,8 @@ typedef struct mi_segment_queue_s { // OS thread local data typedef struct mi_os_tld_s { - size_t region_idx; // start point for next allocation - mi_delay_slots_t* reset_delay; // delay slots for OS reset operations - mi_stats_t* stats; // points to tld stats + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats } mi_os_tld_t; // Segments thread local data diff --git a/include/mimalloc.h b/include/mimalloc.h index a59b9cf7..197b1734 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -272,8 +272,9 @@ typedef enum mi_option_e { mi_option_segment_cache, mi_option_page_reset, mi_option_segment_reset, - mi_option_eager_commit_delay, mi_option_reset_decommits, + mi_option_eager_commit_delay, + mi_option_reset_delay, mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, diff --git a/src/arena.c b/src/arena.c index 46741208..4a596b2c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -107,7 +107,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* size_t idx = mi_atomic_read(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around - if (mi_bitmap_try_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { + if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { mi_atomic_write(&arena->search_idx, idx); // start search from here next time return true; } @@ -137,9 +137,9 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n } else if (commit) { // ensure commit now - bool any_zero; - mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_zero); - if (any_zero) { + bool any_uncommitted; + mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); + if (any_uncommitted) { bool commit_zero; _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); if (commit_zero) *is_zero = true; diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 81f87a79..11ada472 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -104,9 +104,29 @@ static inline size_t mi_bsr(uintptr_t x) { Claim a bit sequence atomically ----------------------------------------------------------- */ +// Try to atomically claim a sequence of `count` bits at in `idx` +// in the bitmap field. Returns `true` on success. +static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_fields, const size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS); + + mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]); + if ((field & mask) == 0) { // free? + if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) { + // claimed! + return true; + } + } + return false; +} + + // Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. -static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); volatile _Atomic(uintptr_t)* field = &bitmap[idx]; @@ -160,9 +180,9 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { +static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { for (size_t idx = 0; idx < bitmap_fields; idx++) { - if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) { + if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } } @@ -170,39 +190,51 @@ static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, } // Set `count` bits at `bitmap_idx` to 0 atomically -// Returns `true` if all `count` bits were 1 previously +// Returns `true` if all `count` bits were 1 previously. static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - mi_assert_internal((bitmap[idx] & mask) == mask); + // mi_assert_internal((bitmap[idx] & mask) == mask); uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask); return ((prev & mask) == mask); } // Set `count` bits at `bitmap_idx` to 1 atomically -// Returns `true` if all `count` bits were 0 previously +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - // mi_assert_internal((bitmap[idx] & mask) == 0); + //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); if (any_zero != NULL) *any_zero = ((prev & mask) != mask); return ((prev & mask) == 0); } -// Returns `true` if all `count` bits were 1 -static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { +// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one. +static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - // mi_assert_internal((bitmap[idx] & mask) == 0); - return ((mi_atomic_read(&bitmap[idx]) & mask) == mask); + mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]); + if (any_ones != NULL) *any_ones = ((field & mask) != 0); + return ((field & mask) == mask); } +static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +static inline bool mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} + + #endif diff --git a/src/init.c b/src/init.c index f9735462..468fd46f 100644 --- a/src/init.c +++ b/src/init.c @@ -97,13 +97,11 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) #define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) -static mi_delay_slots_t tld_reset_delay_main = { MI_RESET_DELAY_SLOTS, 0, { {0,NULL,0} } }; - static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, &tld_reset_delay_main, tld_main_stats }, // os + { 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -194,8 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` - mi_tld_t tld; - mi_delay_slots_t reset_delay; + mi_tld_t tld; } mi_thread_data_t; // Initialize the thread local default heap, called from `mi_thread_init` @@ -215,7 +212,6 @@ static bool _mi_heap_init(void) { } mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; - mi_delay_slots_t* reset_delay = &td->reset_delay; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); heap->random = _mi_random_init(heap->thread_id); @@ -226,9 +222,6 @@ static bool _mi_heap_init(void) { tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; tld->os.stats = &tld->stats; - tld->os.reset_delay = reset_delay; - memset(reset_delay, 0, sizeof(*reset_delay)); - reset_delay->capacity = MI_RESET_DELAY_SLOTS; _mi_heap_set_default_direct(heap); } return false; diff --git a/src/memory.c b/src/memory.c index b0bcf7a0..94b6348f 100644 --- a/src/memory.c +++ b/src/memory.c @@ -54,6 +54,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); + // Constants #if (MI_INTPTR_SIZE==8) #define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map @@ -73,28 +74,26 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo // Region info is a pointer to the memory region and two bits for // its flags: is_large, and is_committed. -typedef uintptr_t mi_region_info_t; - -static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { - return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0)); -} - -static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { - if (is_large) *is_large = ((info&0x02) != 0); - if (is_committed) *is_committed = ((info&0x01) != 0); - return (void*)(info & ~0x03); -} +typedef union mi_region_info_u { + uintptr_t value; + struct { + bool valid; + bool is_large; + int numa_node; + }; +} mi_region_info_t; // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) - volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) + volatile _Atomic(uintptr_t) info; // is_large, and associated numa node + 1 (so 0 is no association) + volatile _Atomic(void*) start; // start of the memory area (and flags) mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed)) - size_t arena_memid; // if allocated from a (huge page) arena + mi_bitmap_field_t reset; // track reset per block + volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena- } mem_region_t; // The region map @@ -113,24 +112,32 @@ static size_t mi_region_block_count(size_t size) { return _mi_divide_up(size, MI_SEGMENT_SIZE); } +/* // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. static size_t mi_good_commit_size(size_t size) { if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; return _mi_align_up(size, _mi_os_large_page_size()); } +*/ // Return if a pointer points into a region reserved by us. bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; size_t count = mi_atomic_read_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(®ions[i].info), NULL, NULL); + uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; } +static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { + void* start = mi_atomic_read_ptr(®ion->start); + mi_assert_internal(start != NULL); + return ((uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE)); +} + static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); size_t idx = region - regions; @@ -142,13 +149,10 @@ static size_t mi_memid_create_from_arena(size_t arena_memid) { return (arena_memid << 1) | 1; } -static bool mi_memid_is_arena(size_t id) { - return ((id&1)==1); -} -static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { - if (mi_memid_is_arena(id)) { - *arena_memid = (id>>1); +static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { + if ((id&1)==1) { + if (arena_memid != NULL) *arena_memid = (id>>1); return true; } else { @@ -159,6 +163,7 @@ static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t } } + /* ---------------------------------------------------------------------------- Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ @@ -187,16 +192,21 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, // allocated, initialize and claim the initial blocks mem_region_t* r = ®ions[idx]; - r->numa_node = _mi_os_numa_node(tld) + 1; - r->arena_memid = arena_memid; + r->arena_memid = arena_memid; mi_atomic_write(&r->in_use, 0); mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL)); mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0)); + mi_atomic_write(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + mi_atomic_write_ptr(&r->start, start); // and share it - mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others + mi_region_info_t info; + info.valid = true; + info.is_large = region_large; + info.numa_node = _mi_os_numa_node(tld); + mi_atomic_write(&r->info, info.value); // now make it available to others *region = r; return true; } @@ -207,36 +217,33 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? - mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); - if (info==0) return false; + mi_region_info_t info; + info.value = mi_atomic_read_relaxed(®ion->info); + if (info.value==0) return false; // numa correct if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1; + int rnode = info.numa_node; if (rnode >= 0 && rnode != numa_node) return false; } // check allow-large - bool is_large; - bool is_committed; - mi_region_info_read(info, &is_large, &is_committed); - if (!allow_large && is_large) return false; + if (!allow_large && info.is_large) return false; return true; } -static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { - // try all regions for a free slot - const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); + // try all regions for a free slot const size_t count = mi_atomic_read(®ions_count); size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; if (mi_region_is_suitable(r, numa_node, allow_large)) { - if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) { + if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; return true; @@ -252,8 +259,9 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); mem_region_t* region; mi_bitmap_index_t bit_idx; - // first try to claim in existing regions - if (!mi_region_try_claim(blocks, *is_large, ®ion, &bit_idx, tld)) { + const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); + // try to claim in existing regions + if (!mi_region_try_claim(numa_node, blocks, *is_large, ®ion, &bit_idx, tld)) { // otherwise try to allocate a fresh region if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { // out of regions or memory @@ -261,30 +269,28 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo } } + // found a region and claimed `blocks` at `bit_idx` mi_assert_internal(region != NULL); mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); - mi_region_info_t info = mi_atomic_read(®ion->info); - bool region_is_committed = false; - bool region_is_large = false; - void* start = mi_region_info_read(info, ®ion_is_large, ®ion_is_committed); - mi_assert_internal(!(region_is_large && !*is_large)); + mi_region_info_t info; + info.value = mi_atomic_read(®ion->info); + void* start = mi_atomic_read_ptr(®ion->start); + mi_assert_internal(!(info.is_large && !*is_large)); mi_assert_internal(start != NULL); - *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); - *is_large = region_is_large; + *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); + *is_large = info.is_large; *memid = mi_memid_create(region, bit_idx); void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - if (region_is_committed) { - // always committed - *commit = true; - } - else if (*commit) { + + // commit + if (*commit) { // ensure commit - bool any_zero; - mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_zero); - if (any_zero) { + bool any_uncommitted; + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); + if (any_uncommitted) { bool commit_zero; _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); if (commit_zero) *is_zero = true; @@ -294,6 +300,21 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // no need to commit, but check if already fully committed *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); } + mi_assert_internal(mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); + + // unreset reset blocks + if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); + mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); + bool reset_zero; + _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); + if (reset_zero) *is_zero = true; + } + mi_assert_internal(!mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); + + #if (MI_DEBUG>=2) + if (*commit) { ((uint8_t*)p)[0] = 0; } + #endif // and return the allocation mi_assert_internal(p != NULL); @@ -325,7 +346,9 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0); if (p != NULL) { + #if (MI_DEBUG>=2) if (*commit) { ((uint8_t*)p)[0] = 0; } + #endif return p; } _mi_warning_message("unable to allocate from region: size %zu\n", size); @@ -346,56 +369,56 @@ Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { +void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; - + size = _mi_align_up(size, _mi_os_page_size()); + size_t arena_memid = 0; mi_bitmap_index_t bit_idx; mem_region_t* region; - if (mi_memid_indices(id,®ion,&bit_idx,&arena_memid)) { + if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { // was a direct arena allocation, pass through _mi_arena_free(p, size, arena_memid, tld->stats); } else { // allocated in a region mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; - // we can align the size up to page size (as we allocate that way too) - // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); const size_t blocks = mi_region_block_count(size); - mi_region_info_t info = mi_atomic_read(®ion->info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info, &is_large, &is_committed); - mi_assert_internal(start != NULL); - void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE); + mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); + mi_region_info_t info; + info.value = mi_atomic_read(®ion->info); + mi_assert_internal(info.value != 0); + void* blocks_start = mi_region_blocks_start(region, bit_idx); mi_assert_internal(blocks_start == p); // not a pointer in our area? mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? - // decommit (or reset) the blocks to reduce the working set. - // TODO: implement delayed decommit/reset as these calls are too expensive - // if the memory is reused soon. - // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (!is_large && - mi_option_is_enabled(mi_option_segment_reset) && - mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use `option_page_reset` instead - { - // note: don't use `_mi_mem_reset` as it is shared with other threads! - _mi_os_reset(p, size, tld->stats); // TODO: maintain reset bits to unreset - } - if (!is_committed) { - // adjust commit statistics as we commit again when re-using the same slot - _mi_stat_decrease(&tld->stats->committed, mi_good_commit_size(size)); + // committed? + if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); } - // TODO: should we free empty regions? currently only done _mi_mem_collect. - // this frees up virtual address space which might be useful on 32-bit systems? + if (any_reset) { + // set the is_reset bits if any pages were reset + mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); + } + + // reset the blocks to reduce the working set. + if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) && + mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use only `option_page_reset` instead + { + bool any_unreset; + mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); + if (any_unreset) { + _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); + } + } // and unclaim - mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed); } } @@ -416,13 +439,14 @@ void _mi_mem_collect(mi_os_tld_t* tld) { } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); if (m == 0) { // on success, free the whole region - bool is_eager_committed; - void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); - if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); + void* start = mi_atomic_read_ptr(®ions[i].start); + size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid); + memset(®ions[i], 0, sizeof(mem_region_t)); + // and release the whole region + mi_atomic_write(®ion->info, 0); + if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); } - // and release - mi_atomic_write(®ion->info,0); } } } @@ -432,6 +456,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ + bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { return _mi_os_reset(p, size, tld->stats); } diff --git a/src/options.c b/src/options.c index 8c4c1707..9b6e4cd0 100644 --- a/src/options.c +++ b/src/options.c @@ -65,10 +65,11 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset uses decommit/commit + { 500,UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/os.c b/src/os.c index 02683a02..553d72c9 100644 --- a/src/os.c +++ b/src/os.c @@ -77,11 +77,11 @@ static bool use_large_os_page(size_t size, size_t alignment) { // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { size_t align_size; - if (size < 512 * KiB) align_size = _mi_os_page_size(); - else if (size < 2 * MiB) align_size = 64 * KiB; - else if (size < 8 * MiB) align_size = 256 * KiB; - else if (size < 32 * MiB) align_size = 1 * MiB; - else align_size = 4 * MiB; + if (size < 512*KiB) align_size = _mi_os_page_size(); + else if (size < 2*MiB) align_size = 64*KiB; + else if (size < 8*MiB) align_size = 256*KiB; + else if (size < 32*MiB) align_size = 1*MiB; + else align_size = 4*MiB; if (size >= (SIZE_MAX - align_size)) return size; // possible overflow? return _mi_align_up(size, align_size); } @@ -92,8 +92,8 @@ size_t _mi_os_good_alloc_size(size_t size) { // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. #include -typedef PVOID(__stdcall* PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); -typedef NTSTATUS(__stdcall* PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; @@ -129,7 +129,7 @@ static bool mi_win_enable_large_os_pages() if (err == 0) err = GetLastError(); _mi_warning_message("cannot enable large OS page support, error %lu\n", err); } - return (ok != 0); + return (ok!=0); } void _mi_os_init(void) { @@ -144,7 +144,7 @@ void _mi_os_init(void) { if (hDll != NULL) { // use VirtualAlloc2FromApp if possible as it is available to Windows store apps pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2 == NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); @@ -170,7 +170,7 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = 2 * MiB; + large_os_page_size = 2*MiB; } } #endif @@ -210,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { + if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { return VirtualAlloc(hint, size, flags, PAGE_READWRITE); } #endif @@ -233,7 +233,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0) { + && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. @@ -247,12 +247,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations + mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations } } } if (p == NULL) { - *is_large = ((flags & MEM_LARGE_PAGES) != 0); + *is_large = ((flags&MEM_LARGE_PAGES) != 0); p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { @@ -264,8 +264,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, #elif defined(__wasi__) static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size(); - uintptr_t aligned_base = _mi_align_up(base, (uintptr_t)try_alignment); - size_t alloc_size = _mi_align_up(aligned_base - base + size, _mi_os_page_size()); + uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); + size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size()); mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0); if (alloc_size < size) return NULL; if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) { @@ -278,50 +278,50 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { void* p = NULL; -#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { - p = mmap(hint, size, protect_flags, flags, fd, 0); - if (p == MAP_FAILED) p = NULL; // fall back to regular mmap + p = mmap(hint,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; // fall back to regular mmap } -#else + #else UNUSED(try_alignment); -#endif - if (p == NULL) { - p = mmap(addr, size, protect_flags, flags, fd, 0); - if (p == MAP_FAILED) p = NULL; + #endif + if (p==NULL) { + p = mmap(addr,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; } return p; } static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { void* p = NULL; -#if !defined(MAP_ANONYMOUS) -#define MAP_ANONYMOUS MAP_ANON -#endif -#if !defined(MAP_NORESERVE) -#define MAP_NORESERVE 0 -#endif + #if !defined(MAP_ANONYMOUS) + #define MAP_ANONYMOUS MAP_ANON + #endif + #if !defined(MAP_NORESERVE) + #define MAP_NORESERVE 0 + #endif int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; int fd = -1; -#if defined(MAP_ALIGNED) // BSD + #if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { size_t n = _mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); } } -#endif -#if defined(PROT_MAX) + #endif + #if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD -#endif -#if defined(VM_MAKE_TAG) -// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + #endif + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) int os_tag = (int)mi_option_get(mi_option_os_tag); if (os_tag < 100 || os_tag > 255) os_tag = 100; fd = VM_MAKE_TAG(os_tag); -#endif + #endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); @@ -335,39 +335,39 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro else { int lflags = flags; int lfd = fd; -#ifdef MAP_ALIGNED_SUPER + #ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; -#endif -#ifdef MAP_HUGETLB + #endif + #ifdef MAP_HUGETLB lflags |= MAP_HUGETLB; -#endif -#ifdef MAP_HUGE_1GB + #endif + #ifdef MAP_HUGE_1GB static bool mi_huge_pages_available = true; if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else -#endif + #endif { -#ifdef MAP_HUGE_2MB + #ifdef MAP_HUGE_2MB lflags |= MAP_HUGE_2MB; -#endif + #endif } -#ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; -#endif + #endif if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); -#ifdef MAP_HUGE_1GB + #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); } -#endif + #endif if (large_only) return p; if (p == NULL) { mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations @@ -378,7 +378,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro if (p == NULL) { *is_large = false; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); -#if defined(MADV_HUGEPAGE) + #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available @@ -390,7 +390,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro *is_large = true; // possibly }; } -#endif + #endif } return p; } @@ -404,18 +404,18 @@ static volatile _Atomic(intptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; - if ((size % MI_SEGMENT_SIZE) != 0) return NULL; + if ((size%MI_SEGMENT_SIZE) != 0) return NULL; intptr_t hint = mi_atomic_add(&aligned_base, size); - if (hint == 0 || hint > ((intptr_t)30 << 40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) + if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area -#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r >> 17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB -#endif + init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB + #endif mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } - if (hint % try_alignment != 0) return NULL; + if (hint%try_alignment != 0) return NULL; return (void*)hint; } #else @@ -444,17 +444,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo } */ -#if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); -#elif defined(__wasi__) - *is_large = false; - p = mi_wasm_heap_grow(size, try_alignment); -#else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); -#endif + #if defined(_WIN32) + int flags = MEM_RESERVE; + if (commit) flags |= MEM_COMMIT; + p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + #elif defined(__wasi__) + *is_large = false; + p = mi_wasm_heap_grow(size, try_alignment); + #else + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + #endif mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -564,7 +564,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar allow_large = *large; *large = false; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large != NULL ? large : &allow_large), tld->stats); + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats); } @@ -616,7 +616,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ _mi_stat_decrease(&stats->committed, csize); } -#if defined(_WIN32) + #if defined(_WIN32) if (commit) { // if the memory was already committed, the call succeeds but it is not zero'd // *is_zero = true; @@ -627,9 +627,9 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); err = (ok ? 0 : GetLastError()); } -#elif defined(__wasi__) + #elif defined(__wasi__) // WebAssembly guests can't control memory protection -#elif defined(MAP_FIXED) + #elif defined(MAP_FIXED) if (!commit) { // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); @@ -640,10 +640,10 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; } } -#else + #else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); if (err != 0) { err = errno; } -#endif + #endif if (err != 0) { _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); } @@ -674,24 +674,24 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); + else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! -#if (MI_DEBUG>1) - if (MI_SECURE == 0) { + #if (MI_DEBUG>1) + if (MI_SECURE==0) { memset(start, 0, csize); // pretend it is eagerly reset } -#endif + #endif #if defined(_WIN32) // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); -#if 1 + #if 1 if (p == start && start != NULL) { - VirtualUnlock(start, csize); // VirtualUnlock after MEM_RESET removes the memory from the working set + VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set } -#endif + #endif if (p != start) return false; #else #if defined(MADV_FREE) @@ -748,7 +748,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (csize == 0) return false; /* if (_mi_os_is_huge_reserved(addr)) { - _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ int err = 0; @@ -780,7 +780,7 @@ bool _mi_os_unprotect(void* addr, size_t size) { bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { // page align conservatively within the range - mi_assert_internal(oldsize > newsize&& p != NULL); + mi_assert_internal(oldsize > newsize && p != NULL); if (oldsize < newsize || p == NULL) return false; if (oldsize == newsize) return true; @@ -808,20 +808,20 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size % GiB == 0); + mi_assert_internal(size%GiB == 0); mi_assert_internal(addr != NULL); const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { -#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE -#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) -#endif + #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE + #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) + #endif params[0].Type = 5; // == MemExtendedParameterAttributeFlags; params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; @@ -848,7 +848,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } -#endif + #endif // otherwise use regular virtual alloc on older windows return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } @@ -869,16 +869,16 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons } #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size % GiB == 0); + mi_assert_internal(size%GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8 * MI_INTPTR_SIZE) { // at most 64 nodes + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8 * MI_INTPTR_SIZE, 0); + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } @@ -910,7 +910,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r >> 17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB #endif } end = start + size; @@ -963,8 +963,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { - mi_msecs_t estimate = ((elapsed / (page + 1)) * pages); - if (estimate > 2 * max_msecs) { // seems like we are going to timeout, break + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break elapsed = max_msecs + 1; } } @@ -974,7 +974,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } } } - mi_assert_internal(page * MI_HUGE_OS_PAGE_SIZE <= size); + mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) *pages_reserved = page; if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; return (page == 0 ? NULL : start); @@ -983,7 +983,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { - if (p == NULL || size == 0) return; + if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); @@ -999,7 +999,7 @@ static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); - GetNumaProcessorNodeEx(&pnum, &numa_node); + GetNumaProcessorNodeEx(&pnum,&numa_node); return numa_node; } @@ -1026,12 +1026,12 @@ static size_t mi_os_numa_nodex(void) { static size_t mi_os_numa_node_countx(void) { char buf[128]; unsigned node = 0; - for (node = 0; node < 256; node++) { + for(node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf, R_OK) != 0) break; + if (access(buf,R_OK) != 0) break; } - return (node + 1); + return (node+1); } #else static size_t mi_os_numa_nodex(void) { @@ -1058,7 +1058,7 @@ size_t _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); size_t numa_count = _mi_os_numa_node_count(); - if (numa_count <= 1) return 0; // optimize on single numa node systems: always node 0 + if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } diff --git a/src/page.c b/src/page.c index 9085ccb5..df6ecc71 100644 --- a/src/page.c +++ b/src/page.c @@ -75,7 +75,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL,NULL)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -229,6 +229,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_internal(page->heap == NULL); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); + mi_assert_internal(!page->is_reset); _mi_page_free_collect(page,false); mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); mi_page_queue_push(heap, pq, page); @@ -342,7 +343,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(page->heap != NULL); - + #if MI_DEBUG > 1 mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); #endif @@ -597,7 +598,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(block_size > 0); // set fields size_t page_size; - _mi_segment_page_start(segment, page, block_size, &page_size); + _mi_segment_page_start(segment, page, block_size, &page_size, NULL); page->block_size = block_size; mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); diff --git a/src/segment.c b/src/segment.c index 549dd339..ffba8c0d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -13,6 +13,8 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_PAGE_HUGE_ALIGN (256*1024) +static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); + /* ----------------------------------------------------------- Segment allocation We allocate pages inside big OS allocated "segments" @@ -40,7 +42,6 @@ terms of the MIT license. A copy of the license can be found in the file Queue of segments containing free pages ----------------------------------------------------------- */ - #if (MI_DEBUG>=3) static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { mi_assert_internal(segment != NULL); @@ -143,31 +144,50 @@ static bool mi_segment_is_valid(mi_segment_t* segment) { } #endif + +/* ----------------------------------------------------------- + Page reset +----------------------------------------------------------- */ + +static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { + if (!mi_option_is_enabled(mi_option_page_reset)) return; + if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; + size_t psize; + void* start = mi_segment_raw_page_start(segment, page, &psize); + page->is_reset = true; + mi_assert_internal(size <= psize); + _mi_mem_reset(start, ((size == 0 || size > psize) ? psize : size), tld->os); +} + +static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) +{ + mi_assert_internal(page->is_reset); + mi_assert_internal(!segment->mem_is_fixed); + page->is_reset = false; + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + bool is_zero = false; + _mi_mem_unreset(start, ((size == 0 || size > psize) ? psize : size), &is_zero, tld->os); + if (is_zero) page->is_zero_init = true; +} + + /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ -// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size) -{ +// Raw start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +// The raw start is not taking aligned block allocation into consideration. +static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); - uint8_t* p = (uint8_t*)segment + page->segment_idx*psize; + uint8_t* p = (uint8_t*)segment + page->segment_idx * psize; if (page->segment_idx == 0) { // the first page starts after the segment info (and possible guard page) - p += segment->segment_info_size; + p += segment->segment_info_size; psize -= segment->segment_info_size; - // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) - if (block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { - size_t adjust = block_size - ((uintptr_t)p % block_size); - if (adjust < block_size) { - p += adjust; - psize -= adjust; - } - mi_assert_internal((uintptr_t)p % block_size == 0); - } } - + if (MI_SECURE > 1 || (MI_SECURE == 1 && page->segment_idx == segment->capacity - 1)) { // secure == 1: the last page has an os guard page at the end // secure > 1: every page has an os guard page @@ -175,19 +195,36 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } if (page_size != NULL) *page_size = psize; - mi_assert_internal(_mi_ptr_page(p) == page); + mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } -static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { - /* - if (mi_option_is_enabled(mi_option_secure)) { - // always reserve maximally so the protection falls on - // the same address area, as we need to reuse them from the caches interchangably. - capacity = MI_SMALL_PAGES_PER_SEGMENT; +// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size) +{ + size_t psize; + uint8_t* p = mi_segment_raw_page_start(segment, page, &psize); + if (pre_size != NULL) *pre_size = 0; + if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { + // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + size_t adjust = block_size - ((uintptr_t)p % block_size); + if (adjust < block_size) { + p += adjust; + psize -= adjust; + if (pre_size != NULL) *pre_size = adjust; + } + mi_assert_internal((uintptr_t)p % block_size == 0); } - */ + + if (page_size != NULL) *page_size = psize; + mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page); + mi_assert_internal(_mi_ptr_segment(p) == segment); + return p; +} + +static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) +{ const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; size_t guardsize = 0; size_t isize = 0; @@ -234,7 +271,15 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_assert_internal(!segment->mem_is_fixed); _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } - _mi_mem_free(segment, segment_size, segment->memid, tld->os); + + bool fully_committed = true; + bool any_reset = false; + for (size_t i = 0; i < segment->capacity; i++) { + const mi_page_t* page = &segment->pages[i]; + if (!page->is_committed) fully_committed = false; + if (page->is_reset) any_reset = true; + } + _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } @@ -275,7 +320,7 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); - mi_assert_internal(segment->next == NULL); + mi_assert_internal(segment->next == NULL); if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { return false; } @@ -328,31 +373,31 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (page_kind >= MI_PAGE_LARGE); - bool protection_still_good = false; + bool pages_still_good = false; bool is_zero = false; // Try to get it from our thread local cache first - mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); + mi_segment_t* segment = NULL; // mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { - if (MI_SECURE!=0) { - mi_assert_internal(!segment->mem_is_fixed); - if (segment->page_kind != page_kind) { + if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { + pages_still_good = true; + } + else + { + // different page kinds; unreset any reset pages, and unprotect + // TODO: optimize cache pop to return fitting pages if possible? + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->is_reset) { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? (instead of the full page) + } + } + if (MI_SECURE!=0) { + mi_assert_internal(!segment->mem_is_fixed); + // TODO: should we unprotect per page? (with is_protected flag?) _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs } - else { - protection_still_good = true; // otherwise, the guard pages are still in place - } - } - if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) { - mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->os); - segment->mem_is_committed = true; - } - if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_page_reset)) { - bool reset_zero = false; - _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->os); - if (reset_zero) is_zero = true; - } + } } else { // Allocate the segment from the OS @@ -373,27 +418,42 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - // zero the segment info (but not the `mem` fields) - ptrdiff_t ofs = offsetof(mi_segment_t,next); - memset((uint8_t*)segment + ofs, 0, info_size - ofs); - - // guard pages - if ((MI_SECURE != 0) && !protection_still_good) { - // in secure mode, we set up a protected page in between the segment info - // and the page data - mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); - size_t os_page_size = _mi_os_page_size(); - if (MI_SECURE <= 1) { - // and protect the last page too - _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); - } - else { - // protect every page - for (size_t i = 0; i < capacity; i++) { - _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); + if (!pages_still_good) { + // guard pages + if (MI_SECURE != 0) { + // in secure mode, we set up a protected page in between the segment info + // and the page data + mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); + _mi_mem_protect((uint8_t*)segment + info_size, (pre_size - info_size)); + const size_t os_page_size = _mi_os_page_size(); + if (MI_SECURE <= 1) { + // and protect the last page too + _mi_mem_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); + } + else { + // protect every page + for (size_t i = 0; i < capacity; i++) { + _mi_mem_protect((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size); + } } } + + // zero the segment info (but not the `mem` fields) + ptrdiff_t ofs = offsetof(mi_segment_t, next); + memset((uint8_t*)segment + ofs, 0, info_size - ofs); + + // initialize pages info + for (uint8_t i = 0; i < capacity; i++) { + segment->pages[i].segment_idx = i; + segment->pages[i].is_reset = false; + segment->pages[i].is_committed = commit; + segment->pages[i].is_zero_init = is_zero; + } + } + else { + // zero the segment info but not the pages info (and mem fields) + ptrdiff_t ofs = offsetof(mi_segment_t, next); + memset((uint8_t*)segment + ofs, 0, offsetof(mi_segment_t,pages) - ofs); } // initialize @@ -404,13 +464,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); - for (uint8_t i = 0; i < segment->capacity; i++) { - segment->pages[i].segment_idx = i; - segment->pages[i].is_reset = false; - segment->pages[i].is_committed = commit; - segment->pages[i].is_zero_init = is_zero; - } _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } @@ -463,24 +518,22 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use) { - if (page->is_reset || !page->is_committed) { + // set in-use before doing unreset to prevent delayed reset + page->segment_in_use = true; + segment->used++; + if (!page->is_committed) { + mi_assert_internal(!segment->mem_is_fixed); + mi_assert_internal(!page->is_reset); size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - if (!page->is_committed) { - mi_assert_internal(!segment->mem_is_fixed); - page->is_committed = true; - bool is_zero = false; - _mi_mem_commit(start,psize,&is_zero,tld->os); - if (is_zero) page->is_zero_init = true; - } - if (page->is_reset) { - mi_assert_internal(!segment->mem_is_fixed); - page->is_reset = false; - bool is_zero = false; - _mi_mem_unreset(start, psize, &is_zero, tld->os); - if (is_zero) page->is_zero_init = true; - } + uint8_t* start = _mi_page_start(segment, page, &psize); + page->is_committed = true; + bool is_zero = false; + _mi_mem_commit(start,psize,&is_zero,tld->os); + if (is_zero) page->is_zero_init = true; } + if (page->is_reset) { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? + } return page; } } @@ -503,22 +556,21 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); - // reset the page memory to reduce memory pressure? - if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) - // && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets - { - size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - page->is_reset = true; - _mi_mem_reset(start, psize, tld->os); - } + // calculate the used size from the raw (non-aligned) start of the page + size_t pre_size; + _mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); + size_t used_size = pre_size + (page->capacity * page->block_size); - // zero the page data, but not the segment fields + // zero the page data, but not the segment fields page->is_zero_init = false; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); page->segment_in_use = false; segment->used--; + + // reset the page memory to reduce memory pressure? + // note: must come after setting `segment_in_use` to false + mi_page_reset(segment, page, used_size, tld); } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -568,7 +620,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { // remove the segment from the free page queue if needed mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); - + // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); @@ -628,6 +680,8 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { + mi_assert_internal(!page->is_reset); + mi_assert_internal(page->is_committed); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); @@ -636,7 +690,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_page_clear(segment,page,tld); } else { - // otherwise reclaim it + // otherwise reclaim it _mi_page_reclaim(heap,page); } } @@ -666,8 +720,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld); - page->segment_in_use = true; - segment->used++; + mi_assert_internal(page->segment_in_use); mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { // if no more free pages, remove from the queue @@ -685,7 +738,11 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segment_enqueue(free_queue, segment); } mi_assert_internal(free_queue->first != NULL); - return mi_segment_page_alloc_in(free_queue->first,tld); + mi_page_t* page = mi_segment_page_alloc_in(free_queue->first,tld); +#if MI_DEBUG>=2 + _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; +#endif + return page; } static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { @@ -706,6 +763,9 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; +#if MI_DEBUG>=2 + _mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0; +#endif return page; } @@ -717,7 +777,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld segment->used = 1; segment->thread_id = 0; // huge pages are immediately abandoned mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + page->segment_in_use = true; return page; } From 049dbf41bacbf8a839551cd3e7710ffa1925b770 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 15:44:07 -0800 Subject: [PATCH 28/41] fix commit bits for huge page allocations --- src/memory.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/memory.c b/src/memory.c index 94b6348f..214bf0d3 100644 --- a/src/memory.c +++ b/src/memory.c @@ -181,6 +181,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); + mi_assert_internal(!region_large || region_commit); // claim a fresh slot const uintptr_t idx = mi_atomic_increment(®ions_count); @@ -194,8 +195,8 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t* r = ®ions[idx]; r->arena_memid = arena_memid; mi_atomic_write(&r->in_use, 0); - mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL)); - mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0)); + mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); + mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); mi_atomic_write(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); @@ -291,6 +292,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo bool any_uncommitted; mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); if (any_uncommitted) { + mi_assert_internal(!info.is_large); bool commit_zero; _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); if (commit_zero) *is_zero = true; @@ -304,6 +306,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // unreset reset blocks if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + mi_assert_internal(!info.is_large); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); bool reset_zero; From 1674d551ffe5dfffd978737786fe8f94ec7b258c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 20 Nov 2019 20:45:31 -0800 Subject: [PATCH 29/41] add verbose message with secure build level --- src/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/init.c b/src/init.c index 081e7ce7..81413aa9 100644 --- a/src/init.c +++ b/src/init.c @@ -470,6 +470,7 @@ void mi_process_init(void) mi_attr_noexcept { #if (MI_DEBUG) _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif + _mi_verbose_message("secure level: %d\n", MI_SECURE); mi_thread_init(); mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) } From 74dbfc30bebc2e7e48e88edf3cf66b35c057b16f Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 15:21:23 -0800 Subject: [PATCH 30/41] improved security by encoding NULL values; double free mitigation on by default; more precise free list corruption detection --- CMakeLists.txt | 15 ++++-------- include/mimalloc-internal.h | 48 ++++++++++++++++++++++++++----------- include/mimalloc-types.h | 6 ++--- src/alloc.c | 4 ++-- src/page.c | 6 ++--- test/main-override-static.c | 6 ++--- 6 files changed, 49 insertions(+), 36 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa9c126f..467fad95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,8 +7,7 @@ set(CMAKE_CXX_STANDARD 17) option(MI_OVERRIDE "Override the standard malloc interface" ON) option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON) option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF) -option(MI_SECURE "Use security mitigations (like guard pages, allocation randomization, and free-list corruption detection)" OFF) -option(MI_SECURE_FULL "Use full security mitigations, may be more expensive (includes double-free mitigation)" OFF) +option(MI_SECURE "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) @@ -70,15 +69,9 @@ if(MI_OVERRIDE MATCHES "ON") endif() endif() -if(MI_SECURE_FULL MATCHES "ON") - message(STATUS "Set full secure build (may be more expensive) (MI_SECURE_FULL=ON)") +if(MI_SECURE MATCHES "ON") + message(STATUS "Set full secure build (MI_SECURE=ON)") list(APPEND mi_defines MI_SECURE=4) - set(MI_SECURE "ON") -else() - if(MI_SECURE MATCHES "ON") - message(STATUS "Set secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=3) - endif() endif() if(MI_SEE_ASM MATCHES "ON") @@ -92,7 +85,7 @@ if(MI_CHECK_FULL MATCHES "ON") endif() if(MI_DEBUG_FULL MATCHES "ON") - message(STATUS "Set debug level to full invariant checking (MI_DEBUG_FULL=ON)") + message(STATUS "Set debug level to full internal invariant checking (MI_DEBUG_FULL=ON)") list(APPEND mi_defines MI_DEBUG=3) # full invariant checking endif() diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 73849337..452f0b68 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -275,14 +275,20 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { return segment; } -// Get the page containing the pointer -static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { +// used internally +static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE); uintptr_t idx = (uintptr_t)diff >> segment->page_shift; mi_assert_internal(idx < segment->capacity); mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); + return idx; +} + +// Get the page containing the pointer +static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + uintptr_t idx = _mi_segment_page_idx_of(segment, p); return &((mi_segment_t*)segment)->pages[idx]; } @@ -373,53 +379,67 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { // ------------------------------------------------------------------- // Encoding/Decoding the free list next pointers +// Note: we pass a `null` value to be used as the `NULL` value for the +// end of a free list. This is to prevent the cookie itself to ever +// be present among user blocks (as `cookie^0==cookie`). // ------------------------------------------------------------------- static inline bool mi_is_in_same_segment(const void* p, const void* q) { return (_mi_ptr_segment(p) == _mi_ptr_segment(q)); } -static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* block ) { +static inline bool mi_is_in_same_page(const void* p, const void* q) { + mi_segment_t* segmentp = _mi_ptr_segment(p); + mi_segment_t* segmentq = _mi_ptr_segment(q); + if (segmentp != segmentq) return false; + uintptr_t idxp = _mi_segment_page_idx_of(segmentp, p); + uintptr_t idxq = _mi_segment_page_idx_of(segmentq, q); + return (idxp == idxq); +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) { #ifdef MI_ENCODE_FREELIST - return (mi_block_t*)(block->next ^ cookie); + mi_block_t* b = (mi_block_t*)(block->next ^ cookie); + if (mi_unlikely((void*)b==null)) { b = NULL; } + return b; #else - UNUSED(cookie); + UNUSED(cookie); UNUSED(null); return (mi_block_t*)block->next; #endif } -static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) { #ifdef MI_ENCODE_FREELIST + if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } block->next = (mi_encoded_t)next ^ cookie; #else - UNUSED(cookie); + UNUSED(cookie); UNUSED(null); block->next = (mi_encoded_t)next; #endif } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #ifdef MI_ENCODE_FREELIST - mi_block_t* next = mi_block_nextx(page->cookie,block); + mi_block_t* next = mi_block_nextx(page,block,page->cookie); // check for free list corruption: is `next` at least in our segment range? - // TODO: it is better to check if it is actually inside our page but that is more expensive - // to calculate. Perhaps with a relative free list this becomes feasible? - if (next!=NULL && !mi_is_in_same_segment(block, next)) { + // TODO: check if `next` is `page->block_size` aligned? + if (next!=NULL && !mi_is_in_same_page(block, next)) { _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); next = NULL; } return next; #else UNUSED(page); - return mi_block_nextx(0, block); + return mi_block_nextx(page,block,0); #endif } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST - mi_block_set_nextx(page->cookie,block,next); + mi_block_set_nextx(page,block,next, page->cookie); #else UNUSED(page); - mi_block_set_nextx(0, block, next); + mi_block_set_nextx(page,block, next,0); #endif } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 893dcd67..9c5d3c19 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -26,16 +26,16 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 1 // guard page around metadata // #define MI_SECURE 2 // guard page around each mimalloc page // #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free) -// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. (cmake -DMI_SECURE_FULL=ON) +// #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 0 +#define MI_SECURE 4 #endif // Define MI_DEBUG for debug mode // #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free. // #define MI_DEBUG 2 // + internal assertion checks -// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_CHECK_FULL=ON) +// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) #if !defined(MI_DEBUG) #if !defined(NDEBUG) || defined(_DEBUG) #define MI_DEBUG 2 diff --git a/src/alloc.c b/src/alloc.c index c4863115..e68b48d2 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -157,7 +157,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = mi_block_nextx(page->cookie, block); // pretend it is freed, and get the decoded first field + mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? { @@ -242,7 +242,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_t* dfree; do { dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap->cookie,block,dfree); + mi_block_set_nextx(heap,block,dfree, heap->cookie); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } diff --git a/src/page.c b/src/page.c index a8115d27..437cd0a5 100644 --- a/src/page.c +++ b/src/page.c @@ -283,7 +283,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // and free them all while(block != NULL) { - mi_block_t* next = mi_block_nextx(heap->cookie,block); + mi_block_t* next = mi_block_nextx(heap,block, heap->cookie); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet @@ -291,7 +291,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* dfree; do { dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap->cookie, block, dfree); + mi_block_set_nextx(heap, block, dfree, heap->cookie); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } @@ -356,7 +356,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap->cookie, block)) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif diff --git a/test/main-override-static.c b/test/main-override-static.c index 19712411..b04bfeef 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -15,9 +15,9 @@ int main() { mi_version(); // detect double frees and heap corruption - //double_free1(); - //double_free2(); - //corrupt_free(); + // double_free1(); + // double_free2(); + // corrupt_free(); void* p1 = malloc(78); void* p2 = malloc(24); From bc1ff7e7fd5b3822d36b06fcfb532efd422286ef Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 15:28:10 -0800 Subject: [PATCH 31/41] update windows redirect, issue #170 --- bin/mimalloc-redirect.dll | Bin 46592 -> 55808 bytes bin/mimalloc-redirect.lib | Bin 2874 -> 2874 bytes bin/mimalloc-redirect32.dll | Bin 33792 -> 39424 bytes bin/mimalloc-redirect32.lib | Bin 2928 -> 2928 bytes 4 files changed, 0 insertions(+), 0 deletions(-) diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll index a1daf316b56cb3c885dc48f1c5fd1ef0ffa302b9..77e40c1391dc4e3f6fa08c30c1fe461df1b0fa80 100644 GIT binary patch literal 55808 zcmeHw3w)eqneRIxv1w_V01X61_}WhCU}>3Bg!Cqpl9aEQwnG}&h}FzAnKmQ2408zs zr`6Q7Yq}wOBI0kmuCD1ly+`+0iXvyg+6G0eoLx8W>M@7ab0$0GSdSNitLFTl_wvm* z-%Qdp2M;7petEz5_Po#SectE2e3x1GNH@ERF_w!kGQ!vy#PsLl*Rd}bP(ACGug_xt zW%~E#oUv7YZ%%cCKcoeMEn9=$Cauof+}sk@Hv6<-q*?PfYvmg@YE3QmzQqLvGwc%e z65pqP7RkN%tP1b-ym7jf=ihjCJK|q7#ZL!#eDQRc$Jae8;{VO_RsOmLYO6SPS1?xp zm($qug+Kh5D!atyYFFpXDq@eITy@wYiV-`a#DF=T!%!7<* zq@wh^C@G3E_6I=sblaGN8u!@PO2PwbnnJf&~~W?`WHrV-i-Yyvu$Qrd87Ke`Y$n3T5lnwBn8RWL#wve-;CziO0IVZ? z&?(3F!!)?nsU_LmEA%(C<-Lpz{Zhka@C*_rvc+;bByFD<~Oi?Z<>aS6fDhF%~VADNEJ zS#-tFm#@aui(7>qC*FR zPJMT-of4(Qb3Ub9lqwRbk$%d!^!9D`A{Isq0*2s)r>A!S7`c&{!jR9?K$3Wl8apAG zBaMv=l8T~q{z{0d#v)MbHoJpGWVU1_(icrjdHWEQ@d$W8qH(imhb8NX~K zA(yWM=AzVJqMy&j@MA1-wMCImeOazORHBrv=~wmZoi7qIj*k4qYvZGxBqOw#H`GQ( zlbNRIRYm9pwksCCPv5cX_WNOIXcoE~R`tH~WQ{Y?|J5+-tA3z&en;;tE&vD_!%P@V z9z%vJGw-)+Y%K9bwE8)w1laKZL|)kNmFi6yOnCTNd*7g1cp!udQd zYQ3fKg>t(Ga?BvTdEzuhf5jR9Eux{vZ~?bf;7NydmkJcE)k|L^b8zc#hV31SiiB!yq2KK~EnY)K_M8F>ZB9NvaK$Sajz`{l=Crvav3tp=L{e{%E#D$Y+ z2lQn~g>HrJ`%ueueZ!vWm7RHW(XeviOKUqXRCeVR@P@gz zvX1hikqiyrI7^7D?Q=S2+lD@lp$e)?iLz8O(DH*hE*{ zk5b|`zhgGMT3#u#!j2bHsw?jn0j42GR-8!=lU1T4^9%VPT1lEI5^3JSR89^QWz$!@ z7`ch+4Zq2OW%m{(&hR=D{K6MA(N)?p(WhAmH1zoLVdx#Uh2F!3`cbC}S9}*LFkz5K z)u0c?J*tKT6V8Gv?3Cpdb^?Kw*zM@VjFXjO!W5fbT`8Hc9=|b{k<$eam{K5E;R`5* zd%aNcsFlN<{U;z%oLl||@N7Xz{ z1I87;T~GyAUcr^14wm8X;_E~iw5hG=7yal`TnQvJF#>Vo83WIa1|B|%ay)qk9{a+6 zc!KdO*raPpTIJ*Y|6AGroCRV>6RsjXMZI5m`t^+n<*JTu0uo4$VFe%-|)^T@yJV*Ow; zYPo)}54{BIRv>%{G1MnZ+$=I}X>4dOWx37rMMJ6y^3E2k3GP63X$9QM-MUy!xcPiR zW<3Y?eisb~Ej|Oo3dW9J!D<9G@waH0y%0lJI9BwB#f-z-VAY{<3^3PXO3XoGrkJ)0 zKjVcYfZaT}g#v0BG$i?JRPh ziZC~iKKpb3+KO5X9HG_@fWcbo-0y?a$lWs96}M{1i{deKf>S-a+M& zyrB_{e?rfX1eV6yhi!#Be?(AHk>x~yuhp+e0iR9caB>`XN>yEf64xbhEZMeRaU+re-B&E%Z}{Nn8hg^vSYuLSWmk-Ge-dLTa+lt@)836v(%WO$rOLD_>vRnh z*EQtzYvQl{3O32?VN)2!!0M4o3Pk2&0lBLdLn#0X!dpdV zaq}q>=5wc0!bZchRjcp}*q2U+RDA8B6R2K?B4XA}qgpE`TfP%lZdezn!V~{7N$~{$ zOGJJ{tM!p>WAP1TKo{jOv8A;7DQDZSq4XjQxAHd!s48(S;amcrJDt;nK`$ki$}_UG z^y|dD-q{~t3ziam|KW^p{|(r?>G#k>SKbevqxyo^mKl4Z=vXVeQC;+ut{BaKkK(d@ zI0TMIe>XA$mwnBz$KkTMzozLy5ZZBZ03!gV7e>V3pyP;bg}a`}V{A*=nOvLR^^846 z9oto~glMgL!&pL_78TT(uasdU8YMKi;!-PuB0}){iv|L*iOUCdheF^mAqbZm2R?-_ z?93%DV2d}JIQl6LY2#l6LLC(}fNm!|d;r~!1ljfNVgcl&EU$1XQskiL5U*HO6z=+K z)LZ%OCiIm_esLryk=&(_+-2r>o{6Yc-a~@bL6|Ai^u+m;u2AY7dB6W!Ix~GA z1vB;}GnGcmO4YK`%*dLF7BFwIBXoajtW@&MUtY2okMK&n2dqlQfW_v{TDVgo1?D?JcKE(9LC%d_oU5ZHY zRbBvK1v3c#;_Kt!19KGCn%nGAR=AVT(p|+56E@6?(fC{rkypG92N~_F3J|gI^)%Y& z!By~eDM$3n|3v~7AM!Bv90?x33hjV60K5SvCce37aWgfg#7w9nA5`M!(DGc~aag!5 zaT`JBM6O3&CDk289gK|bPq75G_)98%2BlbrKZzzlGz-Q5nTlUSF$da9xfTKRkN~;? zyg!H}meLTNeDg^0YUD9VhxHw22T+imLf_H#z)O}JH)2|)y9}@iC;Bb^k3WZr!6tFb z!G`YtPY;kqj(Up#_fi}%$YR{0T>5f6LTyuk#l4o|-(V^g23rLX>C`l`VD4!&vOF+= z+X&?CGR@RLdzqsnu2)HK7tzC>fW=?`*`Ji2bw9b|BKa&@$Ob5GkMWyKPGj#_*@bgk zIBgJ;ON{nBP3%{+N8b+xh${b5$TM)Uhs=*wF)M{E(VQr07b_Grna88Jw zF`W|*et<}#MP;W88~kc)KvB+jJC2c}!qYJ*d^!g>=PJUU^(^r?v4NO!eubS0PVb2! zzGG{kR4LyCR)SM7SNxMd;?U#e_=8{&bq+}BVB4-E=j4J)LP26y;pHA?oM2-@4$$r#z8v73k&W%3WyloOy)-N zP{%b;XFByC3{s8zj1r<&V>NeEB8!GZOGR2NRr_L$WpbhhIxY1h#H zpWgNH8@$#**?zIXILrwblF)qW$DK0#iTU?)gJOnuksriqBz65P=#F~1O+vZi+awe} zv&4FspyE`kj*4)DOJRo#E!B7P^R|F7M9{5#k4%?37w$VyPm8-aq;={7H=G6x6v{r6 zPpBV_;YbAs0G;KwfCb(`@Q(IU7KY=XRCFc_e}pGHUEm}IxklxhPSCfCB@Hg2ns(pQha@_$y;tGv(A%D zjy5@{ZDRvjF#0k;F&}|np07r%!|<>lFg&bNTDpsX9q}lDh3kf6a|qY%|9D1bO3A;1 z%J}qT@~1m4@+Bvj?B!z1qsTYy#Q9Ep4;50!h|WXGMJboEBY7xKAUaj(%8oFWsVO~& z04=(e9hu;9Tt{9cs$pgNI>A!5(*;U6#z~rIjp4Aw?Fr)^r-U!zj;FBw%Mt}YxN>&p zc3ALuXo|w+DK2Py!%z4Oy;Wktv2+U61}et|#$5Ppu1?;`Awh9pE(J6|jue$}$ep=6 zER|7e6irMUDia6sJI<22$Ja}5pWjN_H@I}gZ}_orJACd}NGioogK+Dlx{=~jA2nNc-q191gxE)5GaLWJzY30ZYQTF~A1VH%r2#$Y zMe62@72aDlWS88|9AqV>g*(5h42}8;h53a$mEoWTH+3iBKn9B%Ztif+Sfb9NQdB{I z`ffd6IBeP@@e#qN&V0&%S`rH~3X8xg;jAbf9Hjz*0|x63_r+eJ z@tfB}SF=2j*|Ff*aQ#_;_+)nVLuJANArijJ1E$?UR&>N&c+P3Ar zu5vUA;ob`V2MXf92O?9mnL@{Cq$i}l+hdx^x1gqhU>c~*LCX|$r9uNjB*h9YkmGLTJ(+hOL{9y`RN{e30!f zh;|p-Mm~lgHHt$=sD?J}^l;eNkAo=xVS3mNq%_?1v#m-1N#stoc;>4ZUeO-Qp@+Tj zIGown4;=vb(3fygEnqGR)exb&T~!EFow>I5SRO>krBnV~4lE|C#HK`zf;Ax-Atd*z z3KKuW^CJ#boWt?7<7_iBTa~>>;0D1&4qpV7ylp+M^v5RvIJXiqF{nLSf~|;3=QG>| zohNe`hER*Mva<}Z7eryFo*ysxAv(N$)Q!5d8Pj@0Zu+qvoGrYEg&?1h1uLyNiH{IW z>8w_*?943aQSLGeZh8PC=hc4#X&6wcsFB{8oTm~eg=uoVoe#pV_Qz|e@R&bqQog2AEa7pTl-v_1>+SmC~h!^}HY)z&1Ff*VC{E~ZjjM=v-^mWpSThe!< z)6h|QL;uRvLyk^PIdCZO7(}AR^dK4aGxsgL!;vZ_GuHU*81~{Reo`{lYw@*UR9PY~ za$x&ulyEbqlKBdTTd?Ci?fjBsiJLHZpgDW@Y+GX4*YgYl@jqlBFa)-=_IMsYwb37a z8Fa7?-<><-vM#~YTbEC!=#$TI*nH8#{$^5|i)I&Y?^^un|3H5sE4A64dBs1<{?Add z^kq7A(Mbvy@9p@K|27WWOW#UHlM@?XbKv2T81d!S{kRNstzxrIF?o@~o&SQ4NUyu9 zUp`St=v;uN^DkHK_PJyQF09ruM(}VDEX?MDg^@D&T;xjpGY2f}+D>@6OzlwzE8O)A zk)(Vj<}d6bI(YsmdmG}oeL5<^rH4$`!@|B)GUv-}r2yl%a81OaEDw6f<|6V+_t2rM zgm;XXO0hR-qo$md<{wpSd3tkNyC+Qh$UB93nn&H=Q9D?);8|tL)5|thyX&yq40) z1U1a`^ss2*&d;f47-AUivn44PS5b}LIh{ssirPpXvj5alY5qxhfPmYCkO!O)QBcN1 z;X-ecNhO>cXPpHTQTBULA>qAC_%g)H{wm{w*$LOUOw zc2wDOw`0A95{Lxl(>CuApu{mOZSblS-%qT^F{x$)1_$nz3Ji{se(Dooz!~Wr!r(|^ zs5da=QA?J1l@3ug98wZSp_}pU`Npz0zLYd8N%)Ix)4-M|ciXf(V?#8sXk2-elY&`b zxCw(IuEj)1dMuDG%0b<|OfTZfB^L>7H7s%qnI?gku*h{hO>n#QA~bysP*9JSiTc-n^op``2v4)Cl zks@mCR$k31HawKqNT%c%kQ*>ML8K#gj;HZWGG-8$oO4#G?=%aiPrWiS^0rv;SiL8o zFU5*KpWg@}`vGHkk{I&Styl<*+2jsoJ2(gg1jd*T^};}}gXW%fL33|?7pnrCW3j|7 z5}Y4I2#cqoV5gw~Nw4H_WAXf}6)tJGf!ICGfd-=iQ7Vu!^PCW||&pm@?~` zlON?`wxx9yO|@YsO|z!LtNJm=LPGRqe>?|nM~Fy9+(j5qa=YLok;pibWE^r*cMwL0 zg-lN3$22M&taXK@I)&h8v0xDd#gOG@BA+yMB)+NoAUS@=-f4~9@1xc7_c1a}3QDb` z3JwWzpin_vd$_Gb$NemfN7jsj&e{F2mLIraEziF!#(@OwoY^0DQ4tNG#351_kkP38 z20?!bRaoP#BlE-jP3!aP=wOy-y9(~X!<_Ml0b_c4Ao4y^ScAM1{dvOjtA31O;}H1r zd)3i&WG+uv6`#6bLG+~dHc%vyG!jT-Vewq;(C=V4m=60^kqES8Cpfs~1t|Q6EY#!o zQsb`TFYp4`BW=(LS7IAV4;XD-$+r8AwpWR^er$?)z!>hV8jHb@$&gYm#%Cc^(t#PIo?ao-6)0=b$g7;QeWf%_Stb z;v`&GK`v<`$RPzl5r6Maq_I}O(KMwy-9-ZFY7)*s%oaF4j zhM1pJSa-82`gDj!9kNkdhDMp-Yb3lo1AO}UkwoUnM(@ke$YdRv=*96qCJ8l&*eaA& zehK|W?vGHF4-VSpFcTd(ytX#;Iqg>jjZ~ep1GxHUrwjr9Eo{T zgUtg8^uquEbVzEpm#N~#})=s5WG zkJ6M2oq^xK9SeTE{N6Q+y^n+68%Jq68ozIiH9j-gpRk>Oz#26}uZ$+2$sbG~OFvw` z3`DK{`RmN&G5(dkTkQesS1XcVjDjR-&ST(9b#gx&1=SVt^V3m~T%I2|X?e(nqdV*e zNm9OQ)v32!@ox*v=_e!^R4w}V-!k`-1wPHcW|G$oH2Lc>-=z(DSNtDwo+2hJK4G}x zpQ8hoTN5}OSoIh5j9<8fTc6?ZM~cf(eAy|5>pPVmCQ95Rcu$`#aeo5aQ|jJ_4eD~a z{c~)t03hi9^p=eDUq&&}&$x~=nOsUTIlj$I@slApPO8t*o&zkoA#z9r zH~ji0zrMw<|AFh35X!=4HIUUnRs&fLWHpf0Kvn};4P-Tt)j(DQSq)@0kkvp|1AmSh zz&Ta+@156x`c^tUe6?+^Ow!kbzm;Z04U z*1AQ}yYJdm=i695)hl{b|7qFob;NirB6qcnH=AODr}E+) zR%~0rY%_D%OxrqUyC#QSW4lh!c?tCj-wiqJ2HWQa9V(~xgzxGccD3z;LXLgNU!Q}Y zI9QadC;16Z;UPI!%k~sg{?jr)SLQd%nCeLnRIl(*OnIe0!b@l;Q+!Jgzq8{ zh8+CDOfE~C`xe(XHlnna&xqb|OR%=SrP-HS;&1kcsYK0}we|jxcXK0$GRl0-qZHMK zeZeN;A^VrrKvn~pG~nc8l%L2$8oBhpO6+|ZL zjICR{uB@_h!+o{oYd4myuB@o7swiJuRdFBxa3Pwc)o*yXqN-|bd4)u`WJPt=LlrAN zQn7Kx27J6A316{36FTNxv8JqYW5tT~8&+&sv&IAuw5vC4RA`fU+*NBgRIROE`$$D? z<%Y8I+6`6Z6;+IlO4nUheV<;tcAa}eRrSU(;MQ!Yy3Zu772o=nR_#tF{+BG{l5URp z8|#;A^FvZ@)gRo6qDYA@~)=xkgqNhRHbRhtv(WjRcHZkxUPW`nD=bmV3t|$9~j-&pjNR#%7NK-%y`JV*p`Ad-ag84@ zG?}O7-GUV*sLjwc2b@=!CdnrVuF>E8sBk9MQUMQok!B_Vvv9+Zos2Nh3?Q)Bs3|Yu4MiWQrZmAo%a%wA)YRPG`vuXBQOcTZBebt$iBf^uJ) zq;f~4yxkL&TdSPd_;`nxYphqw+do0MJ(E=KK_&0ECf08~lT_{!DR1cn@x}`zN8I^IH zMeeD(Zt0eC_Q?2*l3%XV2B+vec0tNHDC1!n_er^nHcU-_S9DIurODVSV~>ePO2PQOpDJ;`lygAFmt^deat}_?e$W4o^ye~m$hc3+>zShc z?UHiV$XMBNY>LXOfKIg7q4`qv@j zslFd`NjYm|9F?(4+PQ0r+W8(S=NTERao;WFR{5zG4@h~3WnA)#u=fj6ZqF3$Z2Zl-p}V*yzA>%ulOpu;*DPu-U$CBN-cj;Sc!mUreb<$*k9Z1 zdz{`yZwR(LuE-D_Q?Y>ahfrPH;PVE+<5s-lYd~|p(yECS!Y}oMx4^eH`|3rbv|N<$3nR7Wh8e46JH=w3aJESvBWp54DmQ2GKC@_)wWB$AV;CYtdxZO!0X>3F% z&CZk|DttuAkWP@Lc1m7eLimjm`8^)<`|GGqvr|zofR!$lY5;F75_Bx3qr%DF7H{8k zyD{`kh}0l9)u-qivWP(}0YFD}fg!H~NzoHmvck&gj-~(P`}`oF^qy=j?9*JXK6RAT zwSZ35e-cA_xxgTGZvb+*ly4A#pJZN(z};Eljj{~z zRlYL%cA^)*C9kLmJpfl)owdDTHL|y%R%r>+a5&e%r|c-DS9yP_ZcS;x^F=t0n~-%MXXm3lFisZm}l0XSY| z5vmcjlc-e@hclH4XRB%@9kLCHSdD1~!Q0Dkd{CJsZ4fyKNDpxmT0eRjA%*> za!f0-0#1x`UdHhmd4p`yzD>F^^rw2pRA=;-MRy#(g16MA;WO1MP7J&zT~a&P2OoKa zpI1raGT;rF2wL`s;^RujiCL#sJeImNI_7%SABu;R@u#fM!e<5;Lq22NWUy}w4U<|$ z|Bqg;)Sm2rgHEc+rS$74FlqY80A~;+;Ot*k16d7ZHIUUnRs&fLWHpf0Kvn};4P-Tt z)j(DQSq)r~2He*$_9X--eOUTh#vVr~Lb?a>#_JfmLwGkuI9Y*y|49Mmq2T#=d|M zB^bo#5j;p=Kz#E;#u|`zA^vNG0Kp)B^mf!E-Gg}EBF6fW_8|TtKD63P@OLn_-^th> zq}dY2Rv-)@?MD1vgbt)HA%1KrW8Fv(B7Wd5{1yVz0mKJC2s%i!yBRxu5A=t$vy`!C z5PFdAL)^Oz`UwLM;`^3ECn)z&jBp8QZ3SaH5lR5lgZS`D;3Ig%i&jAgNV^b+5nM=j zBR+jE`ish4jLj{B-jMbn{?KaZ6X`a@Kf4dO38tK}=MV-#YY_1t@kMke%H8-@y6b-Q zBg)$lpV6UnlozdK>=y`br11?=R)yd}8sF7qpFwCq8sC~^&mshn#`jg(MT98Q_$DD+ zfY63CzRSqm2pve{n|(HkJkN81^3j~iiiZFn58{&NkgGl4OQ1)YlIMR5Q ziOpG$7-@XxfgMH|MjG!akS(dW_eTOvOC1ooSijc;0+^iL$1Zh0u%Z?*x zNaHzYb`imWG@gNFHz7EY#`BTPi%^O*o)KkVL~tRE=Lgv<2s+Yuc9LE5AeteK=SNv3 zLJiV*wvcT@@F0!nk=R!e8j!{_c2j)T8gEZbXVNWCUAl*~V*zXW}k;c1)>`8<^Do6b6!_Ys{*}r$c1{yvq-g@08 z<1fnic^SVdu)kZ-BRVUIrYUcx{ce}Tm5aNSYw?l%vgVL~W0-yd zfxVi8vShxx<)LPOT}!=BRPUwD7m8}##^)d^6`HF(sbf_~LuO-rz>z8)VJ4~_qu{N{ejrr*sPz;C&MJ%qMY zF6lR%FgIl=pPq|333Y+JHnu%ER8yO0bA|?1-WzRd)bJZE^y?9;7A#rp#7_e>W7NYT ztXl9;^_oSc3pDW~B8~VptW^u1@P!uK`=NpvE4`tRuW2)W&;$sYL#q}H1z8rmG}t>{O$@} zS1s^{);71cJn9QB&?5e_x;l84RSULw8$-SY+8qX-m3NFzW#t_yJgvOL5Dv{(-l6n{ zjL{3(L{5w==`q(a{aDR0=ke0xuH)|G zjuXxkr6=?g+DXUBl2h6#5a|IOL`6L%J^6c!_OgBX`-=8y`-b^WxjB%_c%%2f4t`e$$jQT?8KgvJtwq{EPak7XA?oNM>3e(H z_IB*uvp2f0ZD04k!2am|j{OY>0tea-cn*^M(SvOVYYveP0*9i9+=odI4Tl4V^UseMX H*1-P-7U^^R delta 7909 zcmbtZe{@q-p1<$W*t9fl3Qb!ml-H7Alv*<7S1K(8tnO3l7!)<*`XdEeQhtX1km87q z7TtDZHES)!`>3WWAVP?-sE$*qVIOQml{oI%P0>kY7 zwK?Z|Ki~WPeSf@rUlPyjGdveWpQriY%=f?hY+KJ!l}@F1_kOPQyN`YeylY0F_hSWn zdVjCr14rX{RO#PcyV=Ej3IEt{C1lGp$z;Zx!G~1Y6e*Nuis^b%$s~dzHtPX;axhjX z_-UcAVtIHgA(x@^y9HuqCLV#5B>|lRnVP`_{NoddpT%$zVx`}RMM-e*A1uh5RoqbB z)Bwfl^=Mf5O8S+Mzv9gluuf1a$PT*dV2uSG{BRWd>hvqGmKJn5rG!2-NiRV$q#LEaZp-Dz#5z(w-UCcL` zC|HQ*@QrAawP-$((0n};O^}8El*tBB%EX+REXnK4`d?W5xlq#+NiS2oW{K|f!-Pa# z@H&wi73i}Xb7>DI#H%}Q63Ce(Es$Ri0u7Wxvj!zk-Zg0w_FOY zev$lSxl~ABOSwPe{epy1yY6Iw=BDQ7y%=CMY&4oHy_!SNS1x}0cJF9qyJmlYZb;1+ z9}3VtsV`Q%C&lZo6DLbi!%08$()}}}87DUywXIoN6dOc0UhEOZwQHV_oB7bgjM4FV z2I`#oLf+}=Sd5j4oM>2fS@u>MZPA&RhBMOsQxHzmi@GJko79vp37-z%mVR0kPrOWZ znF~bMJ+wIUfS5mb`1Q)RC|ZR4eaV(&^yp#c4Bm_XnNgl6PoLR%!%1 z3lrAS{51t>zvSItFq1vE8I)8zxv-iESt{R}3|%=FE(ey-z`86ZP1A9yMm0rMDPw8z z9HVq#bx~Ru@4W& zoonw(UisvOAQt!C3ZorBTHY|S{VZfWEzTgMX6;ClAh*9@^g)H~{j+^|DDL@VEcSHx z(1JO1cTRRX*I1cm)*mU&Svi-9o(?z6xymt_sb4!!|CCdamx-tK+Q1`C&~G|VZ^^xZ zCeKONvJfrPOB?Caxh3;7|J3@o9=9-$&(6`a@OUF9UHV^=&SEBw=djyQ4V^!yq`AUa zh<*9xUJM2s&=|q+UkV@%*%>EM5NgxO-e-*=h@vITuHDd& zh`(mb5v_m#rB)(nC234KqnX0fBuwTZ?W>3$)kDv8VxyKVA*(Cnq~-${A#M&*MZl5< z9TJQP*eNl?uOYyLXQzXUpTbkJ{m^V4^jbbfwn}85MUm&pUoJT|X$tf3a zCuHe+QQuh_&d*8v483@DJI~ULw1=o|L2_C(6Jlja^zxUAFtorXR+Q6k7u+J&=FpYW ztKt{gbW9o-zcA2K1;t{>K)))uMQk(BC52Cj-(=CD!h_Uqnk!nf=nm83?943OTkVxb zYh^o*iKU+`eJ^Hssgz141AW`H>c${avBmHJ*+=%iU>srC{;aY<(F3!Y@PIKy^A>%Y z%v90ZF?w;)UNK1Mf6S}Ffvnvi%a}Eb;*Z6|!C7gv#|$+q1;P#}Osttau4OA~A&p5T z;mNz%(Q1)f!V=p+R;ICHA(<4NCM}^Cq~)|FwS>CO*(EEKFcOR;vR^ZijFrH;+1ayD z{ds)5$ZQiiSYA11cw)uyh*=h>rD%>g`x0GWv_Q0H&>cmM!dt`Vi}Hn=b9nkDJ!zQ9 zc`IoYCkxfOa+M{l^sdGAF&Yf9+m#CGMl*q_Tyr>5ocl zM{xXM5;tOS?YjMqsQcn4s2g*}&PiyL%LZ#Cx5-EtpPH*nVB5)?$xb2dm}`lJH?vu{$eUT0U2hFLPsQuNvPD1p zODuNfE@mq!dsA@nB!q&pym4W(Ni`K!>ASeq@loa1sd6@chuoH>om_KDqZG=rx&DuA z6DU(^iE5MwMBe@$l5S*`%xX)@#Q5cRK2Sc?n_5n%EOYc(ER8KX@5lIlkmz7pPID!O znJc$#H0tHHN+UMl1qYjS8>?>;rv+H$re#=*Cc?{d%PnQNlMkF>{l8YsP~Q%$>W*i- zI%$6)n@tN|UvLw%g=~3TFgKHiG~}?v!fd)H{}8uyv%J$017ZLf!g5f(TrcY|leNYTj;W3h~rVH|wxS&0&2ZDNK$K+cjN zFMrmg=%veWhL7efN|q^_mT<>|A`Y+AR6m2@^`riMMs7Ya4Y z*n^E}bgI*6m)k1@WwR;+HY?jaIV4P^GE0LZ%Evan1WJ@Ns&ej67mYGpNWn9E)^a}VxD-G`ws2~V^nf~K)?nxpSd(`_qn z(+^!@1LqYWG7YcH)@q~QzaNV!Bdw-3>#f(_zYCEbpyg-FDpuOhxnz{FP19NQ+EHY_vk^S(Pv3&{tPwH~%*(#uoEzXRF4Z`dP5X zKeY_%R@^MHT6270qjQiFne2>I^AK<1^J58T_T6I@n+}OI79)^_yV#-+!X-Z3Z1zZCkZ2*Ihs>6{$@IpZP zq3ho;{Ee6{WZivF#XZYv3|pTZ%e*Ua;iV1d{~YL|jz=6q((s8#Tmmhqs>s}gciAL? zB&oP=_~A|Kh5QtOq^yI=AQFSHoCs+mNfYiQ0^UtC1p|GxD!w+A}?hi&$jPpm3 z9`af}WS}h2K~O!&0{J@7Mvx2ibJy_0HQx${ws~IblQqN9P(-8ixY;iQgmf^(*M5zD zAsGA+@QNdyUTs5YuLJ3urtEN@ectWM!6VH{)kmd3ua!FieQC0SY%Ena= zRTWC8TwLW*ReGaZp^vLVs%kvp72&FJRb@+fO>>pLj#ubTsLV`+iIP=SD4}w4l}A-g zB~%?;HKM94Cbhy*u9{L+?u06>KAtO=A=ctIIwO}@REf#PB{D|`N0g|1Jd5JKU8Wse z)5OtpjioJN>EoIXj=^gzLkY_;*Nk!;zsAy+u>4NdG{ncG=cv{hRxNcB*H{=Tqn5eE zk&w8##FLQtxWunYOd&1_a~$QUX1p>pQLdR*EgIuBs+Lz~M$a`8M=Qrswq`K5TpzEw z%Vg&oC&wm^gpCuH&V;3pYy2ETD)N|m)lwJXnlXlKW=vCDqJ1fH6D%~*I4R{)R-e&6X7_n;tla?!fJNiG}q|&#(jTdGBXZk+BiBms+C(3 zSv*|h=NP`m(ym(0k8;iQH5Pt8gMIV0P4TR`9Mx=kbG)v;`4+CRU1Kqth+*d%=QS2~ zHHOK}H6D&Wj$vkjlx{XZSide)kZbUhg`yed=;4-@gk_X#ra5Z&#XSh0>yTXb^T{FftQyjIA#bb^0`VxD= zByo+Eql2T5XZNYu=QnXp2S+vQcw!_YTrMhbuH%X24wY#=Tb=6xN zcU09$JF9l!cj+ZHb=B3y!zWJW&ZL%IX~Q4vS}PPZxN7U6-zROZYOJr86vHYdSan_9 z?z;M7`croa4Y_N?+Im{B$56_mU!4=~TWag8HX(6wsw9~k>h?(uyQLkqyS7VvYa3kB z&f0i->e?gGXZGAY{9TPxY_{MB>HQ${W1;xB!8?IJfNbC`cqO`M zz|98Uofq8TA$^qUE{2fROn<-%FTtdc}9C&XYX(rR?`11(4 z1R4jg$B*xKfUIzE0#AV?@T0(t0yu(~3JKW>vVg~X2I&O3!Q;h(`~lPi9&ZvPc_H-R z@tQ))K&{~Mu0ZyJeBeC`n~@xy4s`IQKt2Ka!Q=iXq7nZzfY$>{Kq2tB{K;0(2zcD~ zpFvviIO>TRqz8|ygxn3v1&_;uJPVS*<1iyZ(5w}*vB4oVR9< zw&8!Clgm*(cnN<2y#k7W4+HPCVhX{BfVNT;0NxGkxf$yRo~*(OfyTglfK6qXDI^F3 zGs_VVyajj$lnZ_eczHFJ2E46#4c@*%R&+eT=WVDO{5bISwMfJqfFG{I(ty|B0$Bwd z!H)uOzZH7|yc_u3ZI}`8L=uz%+Gr;O>`cw;yGf5Q&Des@mPJSmX<0=``p3d8P? z4<GgY^lSTb`@{W_{?-BCfPK(0 zm^&m5jSoeKriO?Qdzj6QKj;tnNBj}LJJ1yH1X=^WfVI=sDRKI}LVv7M}zuoWfJ27MWKyE<7oQ(y>1JS?~=FQ((80-vnj$ra!UG6SVm!;d< XZR?gXfz#cjN86+C$?d`Y=oJ44aBk?6 diff --git a/bin/mimalloc-redirect.lib b/bin/mimalloc-redirect.lib index 1e22ef12aaec87c9544724a5c766ddbc9bd90ac5..550db8ec56373737c44eaf236727085670796ba1 100644 GIT binary patch delta 104 zcmdlbwo7b6m>A~?1_lu3a)RPLKzh%_oR-N!jH;7gGx1N>VN{#=L3eUF<8=KKoD2+3 pTnr3*fH(lizQMu3z{|kEz{tSFz`?-Iz{0@Fz=mLLwq^Ru0RRTM6hZ(1 delta 112 zcmdlbwo7b6m>Aa{1_m&80@6Utc>;td=Cn+fV`Q5gz^Fd?H5312FJ|G%=8VDh!4D2(PEn_i((&rM#Dquqc+eq(D!H?F;+yZ5m9fPs6$OxqaDoq_PO^$u*_w4_@&p!K{o6yp#ZaJ>r*Ag;&AHEvY485$ttjbSLY-96JUT((ovn}EYKYI?H z2(ah0m!;>YY+hco(MPhJ{Nv#{u2hx8ok;%EIt4etsnaK_ChNHpU_=Q!L64`NTflLZ z457z_35B_Q#hE&2W zNWrj_roWq>iSYn&zd*w)@NY2jb_}r@815$FBTy)qryLPXiH0;krx!Ia6YkN~P7ps% zpf}A~J`OofcrnQ+v?OXwLkm^<-WKs^$Yt0?6D~0Am(ap(evM0h^Tr8dM8qP0{fYJ|6B?wej(YJMM7n8z)67v2kG^$3>HR59pyoMxM*mA9_U>ekLB%4$Kdw+cv{2|A=bcV0i}_d^ zKOAunJKU*_89U*+uvqKLlSm_bT0er$R`*wt6o#o7iSV%m9(YpNx`(KdidaI$$R7Ty zRzxXuXuWn~<=|n1I=YDYv&XG%WfGE_@EKJb)5nN%){v0tf@l2ZnXA7X9YumAugW47 zx^l$n5&!r_SO54&Rq7W;gmn`$=ekCSON(rUo8w$yUW6ae&PE z4oO!>v)kReHg}w8q|2xA3l}DBM;f~Bo*YxD{DM<|aUyxG__Fi%BB6N7<3enT0o25) zg^PC%?Z9fD?O?hYr>+L z))|D^eIbyla33XF)-|DVrek_Abc>>sM7kUk&dt0nHC^HFC*)6KEbQIg0U=>lR;7CY zYmkl6M!PGH+3Hl_biTgbsNaf0n-t=eg)Oyigc>+?QM3qYx**=AIpB2C3(ed}odvy2j0`^x((dIK)g}sR^Rlf@%SF$P}sk&eGi@9}N zaqBqg3^yNVZgnaKI+B5_cIO+Q&_7K-I72Xe>B{DlnZEKF?1bynG2!o5Srx6f^IQ3L=k9u z6Zb-;D!K^F>fm%=E7AHT8n!CA9G&FrC+w>dHu(e-L7^r4VuXK1!h=oUi0P9G+a%Hq z!_&iaWxvsmb>*WeER-Higg+elVBHxV*QGMAQVIUFSqt>h(b0%o+otLu*>%4;&weT* zLsD>@m=+a&kv3~Q3Czn70_?{qxphJUQ?FPcM93O_W6fs9_Q2fhBEJJPlP`G)B z3y!mP4^mY`D)eZ9Lmsh)(a}F+;pUh`-v>HVrH!}^=<01#utHKBZgY!~3NgYDCoFfi z@mC}6tKrkBZSH|rv~-O*eT9qg12{txO7)d(?vY5vNVtFtEJsfcH|e-QeuQwt31#}W zia4Eed0b!?4!Ot_M@1}{2uw7U9e!x~@#vqy9X@SsaU@oHr+5hnk9JVPg%meW$VzKIQSyCd}{)zwSZzuY_Ax= zs)!znOn7Ym8=;*;PAKt-RFci_%N|K9l{h#DBpkyYaiKzCnFuBhCV+w+qeb1q&7on4 zlS4ysyhJL7%6&Srd;AJ1#Ma`e;bO5zu)Mf_VAalGg|N&av5mhNabLuliEt<`Y(Fiw zAGBVjDQUf`$4i92+Q#=IL5tn}s0ZtXO>GtDPtu{dsg3_bdwUOBV4J(I7KJwuK9f=2 z&Y$;)8&auA2X4vfWg}c8wU@I4TrqmfWsf9{6MeYc=Oo9kzekRP7LRDzaXw5piHc!% z13){8JNS#qyURyMM+J6(FUZvfaZT`u`)3HD1vRP@!r+2mTc)C_Sy_%pF@+!0aJV4M z!Bs1=Tu0>Qqjv8zceL%T6K+)Nd~6fn6mrb5SUQ%6_fQjoZ9R&64+ z2_Np(dU3iWewNqax{6dG)w;G4-A!AOG7Viw!rgk0_=|THTbV*fhPG&o6)RcE=Ymsj zT!wO?4@(%GS*ro9Ow2{hm`i?>6)k+8iIPtG&evVO69TZycA{=EAICmo6);SmYseWmb#MTdsiR>Id{T>!G-lmy=`ZmHuV#q``Sc|D+rAfCklth_Wc%5`% zSF-|5-hCIyzD}fu0uf99%q-z%vaT2HqnCegw3ok9(aXP&_p3v_{0De`i1|mlUS7oe zpu3mUDks3N$k)L?@3^bpOD|QDjB2&JTWD>hxy!4J@aziiQ z2bHhEU*&Rdil3Ny9D!ZCdkxROK>z0mZ+KlTHe(!n z1%3K)$p$mwj)zZ=FB3ohyAmB5EDj+dan5Ws$$N$3shSj&nE_=?HH<`jaG6-?V`GNU zovsZ@Jqo2Y%)T^+H@k;-;7ABNC*bH_iPhQ65AR6e(Oj|dmF}i#p1IjQl1jDnO?pwsI!3E+CdQy42!pz5P)`d`p=&GCLf}6@MgidiiEGD)S#2?z3mu}Ga&f{%@{7L9=Cb^0s7%sP zF~0NEGd+x~!?C-S>1Vr5eClsP!{VHbWzt6<$x$cO(%Ft;^xV|G<4xgz7MBUeY}YtV z5bExJQ>f0qcg7(Uii6iO*XgE=Fxw^_S|mr41ll&$i9FfmTOF zI^wt8E|Eue2zM`eD5R4QMtPgi*Sowa!p*~6umC-kZLB?9U@1u0krBhra-V5CoLcb) zr9gWVEk4gM^vxT6naNM_l@RB{5*}BayJ*>Q#VQluL4}5$)a-H2blAc5c0bd{z9Q&@ zD>xgM00Rmt+2gb^u5R0WYl)2RBbvqt`bf}E6zWM+6v3ULo02YH6H|ocAlI|6lD6IR z7lazq0`<=>2#=YZ=*3Cs#Yq!4;n!T)f*x_5Rw$dAGI5nMcS-Zxl=&QKo}|pLk7N8F zvepUtccgi*GXK>$;TrTEQRdG`^hcC=^EkmgEoI^+Wxh_LuTthNX?~kBpCiqal=<}} z$v%D&$NYasn)fR6UrF;L%KVw6t|^9>G$j!p>$m1hN|)T4q$*06P|*d^^swtN_97RU zPP|fU)8ea&kWo!kZSAISD&eP^6kLK5*XX%mQl~0f5?#eqi(iaUjnf_l`hnZMIZQw&i(`Sps27n=By@tcnd)px8_C3c;@;{~-4uPICC?^(9IlwV)Yue`II zU$F+3IFn=5veJ8b$33eZt5>ZOme;Ndx$kr@D=l5UV!dbCniVDM?{s@sm#?{(>JP_y*` zqrWP!(MK;Ef&nAA8okCUFx5P;wZ6eyZOp0;dX0g4VAsj1|A(k)(%f3{-8c zu4Spg+P<-=!Ry5zE}jf+ z4ZtNiqvRMxRkO8*ysvqviULET>#KuxUMzyKzMUZIKTA2rq1(($)99Y?X==zGyJs?%i6op^p5$k@;JSiq^#6;N9lk z8W0vXF1xM4+gM+>4ePeWj|3a%XEvs1HqJNJY$a!_0>(@~NeJI<^p!Vyy;~Y-wgK!) zfLO1pO#yEM*e4i`S*VCWfickVAo0PC&C+TvR4Q*t{Wg?QO|^GvUCoAt!t{Vq$P47H z{g+iHxkusrzsn4ZbWGcNfYToW>`zbCm{O7)fasBtF$H-M zh?tDj7s<|Re#q&K02Zc#v;pCfksbx<17eqq^ef0iK%A11`xN9wAO-+PSwK64mtkoV zoc1KA*8&UxsziS=WAYiYfG`2_WP(W{lmOuYtd$A-6oMa!9e`Gu(4r6y0dW{`TqbnL z1Z@ux{Q%~bh?fy%qC3dx(*TT;g_k^eVpxfnHGoRMEe;2OXaTg!gc}YY1mY;*6rc|Y z;g-oE_5*PhFaqGM$;e76fpz+eK!`H3LqW!Y&>oWXwkXI{ATnfRw~RE{fG7rdhyY(J0c6bJ1ELAA z7cc&;bc0d4_HvE&_%D9CBq;(qi~A>IuNqNJ*?Scz~z_>;-s$G%1FU1JMr{0knWn zED^4D`R_WV36%%x?yY&qTjH(qbL+R%tRFicdzb*f3J2O)z8mwe@s)d*(H){HAaQew z-haSDE=f8WyB4kfR$|w}Rf|+FHLDT`x{8I@uPqwb{e=@3il+|C{{@`-}op z!Se-AI|HtVTpJ6|7OIO%i=HWZy$DK;97mU`EYlOFBc_-s-yAl-YW|~nrsbQK`z^JW z?^&8GAvg*k8M zoXvL>=$#9kPUkGww_O`tn_Y>8lMCk;awh!02pDTHy>G6toVPq~J7UYWziS_|YjcWo z^735yPv#%Xzg&>ySnRml(d&5IG2j?=7@RYmp**MCS?>J4^GD9-of}-}U0enfHQCy2 zAKLzIyJY{|UYb*$b6?JPbE35!Y_Bn@~C9YDJ&(-5P7jj*76&J27 ztSmfL*j;$JFt5m2bYIc&qLY-u764Y*WU`r#nL16!O{Yv*=4`XcoQDNFYCdK@ZZ=vn zELj$l<&dSra@carl4?!E(jBzwZ3f$3+diAtuCwdysrFs=R{LK20XvtY$i z&0-DY53p!s`D_YdiD_`%Ovz3qj0OTmS$7 delta 6377 zcma)Ae_T}8m49y>^nn9BU@!=%gJ9GM$iNIU%r9u_+p>wXeV}7Zqb)>82E>HwU`Zki zWUv87TFmv+X1g{^8X8S&S`w@=E$NPiMRu2LNn;F6G$mbAA40a%E}KrWld#`=XC5*9 zwey*C-#zDh&yRcVz31JTjAk9Kn$zU0hc{KoXEC@L56+$K7rg6ACC<2h+Mndgo`jheBN3z~c_ zM##r=vQi6u^{qYt`7Sincm;Wf&oxqa)W|*QTFB1+3z`~GKIkq|s96^e zqnsApVX_MxoHHAblE2qvCf!&D0Aa8J9mCIQYWVh50>{m~s7I5ZiRSG@G>^mW%wCL} z3CE%F$D=7+hvwdFG3(XH;fxM{w0g>bM*)^P~jKZ#eRM(iG`~rt&WI7{VF)~zkl_>CkqS6Sv07s zfJax2Y0AphPliniPw*P03y`@K2LMJYPdjic>3T(6KckRqMY&g4%;bPSpdg zPjw|V^2(|NwwAJzm_Xh)rmy@68w_#E%7^iczoPQ5NBMIq|3Z}ip33({`NJxIAf9v< zr?1?l^6n^Js`3T#JN#$0r~CA*z5^3%>8@na>s$WN)M$TiriP{v)+e zAC+WuKvFqN7e;q9NZweNMsm|u-$ucszx8FPiY7YjWa<}KxOVUs`uuIa}r`Y5C5POV_ zvPW==Jw_28*C!3h-i}7t>5bayktY3Zlbr7%ZwR9T?#U`8D~ddOdIK-|owVhA`s8z- zPmviN+u%8DgcgSG=S1wc&y?5>pYXR$a4qp;af8s}XEVLpPPlr3!CGLNmK9hj8c6cF zL^|f^zxr0GJ`(8xT1aPZN3{WIJn+HZi#m=RPq`Qew|ieBO2+!!{u>Jm0k_b@4^aKR zuT8KIN)?+{Bd2`-1}Rvb8BZsP(0n(Dy!xIfr&07b$k6ImzKCqjH1nzC8<|JqJ2qn+ zr#zlYu4g{Wr;zTf+tL0!YYW2$^^tnS- zeV%Z2BiG0+r0a@^bFDSaaY>q>vkXW&D2!Gq1{B7%-`2$aCy8J81P*LGncOJsi_kHB zQUv8ds9H$RF5<-=Y0lp^#~oQtYekE;I5_J>G4Nj$dpu!ZDoTgHRm=I}FG=%syUik< z^VEpHZ60~QB!wtLr6CIUN+HzI4Nn?U3O+a87B}-EFz@ISSQ_yUbmdUNls?ZiN{%Aa zFo?_W78zcz7o`8i@lk3M$#2#-@V_Qyx&K_0f+YA`1oTT*&Fk$@EGk-~6- zWGk)(wq<|G4(&yb4oex+iZe9BN*p_d7vATnyVM?S3bL3-LUD2J`E(L3D@uYh4?q)c zWM%NvSPEHrPd<7V0I$$Z50kD+`Scp|gW|q-Ou8NzlRj7&lm6%#lWw4|%#BGic-{m* zs~eN%(ErdqCjFN%CjDt|qYmz?`=sl#k`kTa(e_|VYAnl2!J^-pDY~8_b&32bfkl`1 zk4cwd@-4W$0vB(iKLwY+1OHCTnDnkVCS8T>8nE}^{`crl!~Oe0V-lTOpZju^@_l9v zMG~Gp?SlVeQnfN1QKvjx>&>QqvCkF#qTlUfQV6#G3_pK??f*u99)AA^&yQgLub5tV zZ(}@Nh(1rKS~1@+{n_|{G>=;Z-opFc z^K}7_=yxP=$z9(;Qua&pb;~3is&MRHgHxq#TfN^M(FZ(X|B(be=UWK}Og%FDK7)4z z++nu6bnILze}{_|Hgb;?^0$RJpJ~4b+ETbU;12a{7h#CLtHXgxy7NPHQ$mdz@H%?1 zk^kXcnyZYN-PmyCn`_DtdYY#0UGj^~cO2-V14K4J@b-g2D6*0f6(pYG|Y4r14b0Ny5ujpctC)6+)v(wW{6)r*hXBQeU!edu)$~& zp8drgeEY{NUzm?u=-bKbqX&i_)fGeh%vFE^+)Jvoc8`up{tXfwTdrj_%^wOv3pPA1 zF1ns7EfjtV_>vT+H|0LucN#0`xr-nx%G?wUnO=9+(4`e(^fQln zJqcP>O8&>hlV6&9`QumyBCHx{Y28KY49Z@)3N469` z+Ad&vFoapMGETQ)Y#-6VEw*Sj{ElAES4b!7)&=yP)n02ag37&e%S5PHX<-r+;_&Rt zOh3gd(~&!(a@?=C#&%Yf@+vBff88i8hNzY%jQZ7E%wF=jAks=NT^SVIQ3 zCboA*arFv{`WTt{0I_?Iv7#YXD|OKkP|x$8gwT7MBJ^0Jd|>>3)N$Rj`<0G{?d0AeJfIG1%C`Q0P^5E;sCXR+CUAUi;!Oi{SovDD6x9@Y4seEFqL<>~id-}ox2`|2KEJDk&5u6eS$dAR%Vh<145v7K=l%{83Z1%j`| z&%j#z(7T}7p3x6rU$-rzz!9h10dj*yqG(522?N`fOQEN zR6!X)(-Lr61-k$YE&(}1be=*Gz&Hp?v`4s%&ea&BSI`N8A|M*kbW}ywi~Be+17rYE zU_b?mAh3eA#Q-jc*_HLXA*cg2$AFNk(FMUV(8(Ae#x#U85R8CWSd-{XtIRw)+PWAb zqGO0L`h0X8pi0me0^S3G7t|aB76a~ppdT~{sz6N5YQQ59On|0A%>V{eAPj-Fo)h&T z0{~eKI3EHBhz6{e(WwKqf@mZad9g&CfM5`GCI&15T+91!dTF@H_ybb(HSsJ|N3Ul4+;pl}Rmjsar!gPgbtL`P3X=de?>LQokKitCx} zo#Gw{yfI-&9oY&&S4>zG8>v45!2oCo)C}Qd6p;1f5KMvQKr)0wDxmdZRv-hYAHq-! z5KRzNf@pX}m^Nm)Q(pr?E9eABhOk4OEL6ab=v6$6q2e<&DB-p!)iO}-yxAd@30gd ztgmZ0SijHL@bE*12WlId>gx;z1%{SG4G%qNI9UH+OH=JZ!-3kSrib@!ec)hyeIaUV z|8k;ubi=K?@_BOlo;686Z^I#sdBm`mvQS}QR1a}sjA@{Z@E|9@16$9p@M#eIqA z+W6rMn)Uo}^LI~Z+hrL0va!q9Ydmgz);MHLELm5wu_V95RN^SPv*c$b>88yluj!kn zM@>DZ@0y-A#hFvhS>_z`9poO6j%I>!tIh36?BNj>T!&W%;`0QA?NQc)R6GOPsOWBzHLf2XPd3&X0yk#*ZQ(`!Wt>N-FCNa%=WHr+O}ZR z+gIDIcDKFS{+RtA?e{w-9KUr;Io@+zcieP@93MI8pT+oc9kwpp3EOp>Vhh_U?d{v` zd+cZIL-wFubm$#<4xgjd(GA6`jyZ?JS>cqO1I|I`S*O+|xH4Q`SCi|AYr-|@nsybH zo65J9pD2H-{B(J^JW|dz)5YL8UD2MRnxbIQXi=VVlQG|DGIkq}8T*Y-8Rv{4K3jagy?CT}v^b+AyW~hoN6B=_^^$F-O4Di68PkyIyh&#k&3eqb+3Yj7 znva+}%$Lnm=Bwsu^L4Ye)KOYdx~;Ub^knHkDQD4I1dC{?vv@5{7N2F@GGUptOj+`+ zMOKs5YIRuqttYJW*042V)t2op+f!Ck<}Dj3YY&!B051_lu3DuUv7K>FRpoX*M7jOvq^`6nAQYD|95#5S3qnQQVfE`A`* zK3STPbMjP1fyt8?yY#b%7 delta 121 zcmew$_Cahym>AbP1_m%L0@6Ut`2~b0=5$Ve&%`#FpP6g&F)n@}%|2O}(P6R&v*=_l zMxn`583iU!V(ij?$Hl-<1jJuB85k;n_zwpI11|#u10w?y0|x^;0~-S?0}Fz+*_%m~ F698;87}@{; From 9a5189aa834a62a8ee96867693d622ab0d6d158e Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 15:42:22 -0800 Subject: [PATCH 32/41] add vs2019 project filters --- ide/vs2019/mimalloc-override.vcxproj.filters | 72 +++++++++++++++++++ ide/vs2019/mimalloc.vcxproj.filters | 75 ++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 ide/vs2019/mimalloc-override.vcxproj.filters create mode 100644 ide/vs2019/mimalloc.vcxproj.filters diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters new file mode 100644 index 00000000..bc1e4c60 --- /dev/null +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -0,0 +1,72 @@ + + + + + Header Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + {f1fccf27-17b9-42dd-ba51-6070baff85c6} + + + {39cb7e38-69d0-43fb-8406-6a0f7cefc3b4} + + + \ No newline at end of file diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters new file mode 100644 index 00000000..b2282df3 --- /dev/null +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -0,0 +1,75 @@ + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + {2b556b10-f559-4b2d-896e-142652adbf0c} + + + {852a14ae-6dde-4e95-8077-ca705e97e5af} + + + \ No newline at end of file From 5c8721f0b80f6f5218f37b102f50c31ddbdda7a7 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 15:59:54 -0800 Subject: [PATCH 33/41] update documentation --- readme.md | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/readme.md b/readme.md index 0d11db16..44f62230 100644 --- a/readme.md +++ b/readme.md @@ -56,6 +56,7 @@ Enjoy! ### Releases +* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode. * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. * 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements. @@ -138,6 +139,10 @@ target_link_libraries(myapp PUBLIC mimalloc-static) ``` to link with the static library. See `test\CMakeLists.txt` for an example. +For best performance in C++ programs, it is also recommended to override the +global `new` and `delete` operators. For convience, mimalloc provides +[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in +a single(!) source file in your project. You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): @@ -188,18 +193,18 @@ or via environment variables. - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. -- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly +- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory. - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions - show in the working set even though usually just a small part is committed to physical memory. This is why it - turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better + show in the working set even though usually just a small part is committed to physical memory. This is why it + turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better to turn it on as it improves performance and has no other drawbacks. - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at - startup and can give quite a performance improvement on long running workloads. Usually it is better to not use - `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving + startup and can give quite a performance improvement on long running workloads. Usually it is better to not use + `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented. Still experimental. [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 @@ -211,7 +216,7 @@ Overriding the standard `malloc` can be done either _dynamically_ or _statically ## Dynamic override -This is the recommended way to override the standard malloc interface. +This is the recommended way to override the standard malloc interface. ### Linux, BSD @@ -244,29 +249,29 @@ resolved to the _mimalloc_ library. Note that certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). -Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this +Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this (see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). ### Windows On Windows you need to link your program explicitly with the mimalloc -DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). -Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available -in the same folder as the mimalloc DLL at runtime (as it as referred to by the mimalloc DLL). -The redirection DLL's ensure all calls to the C runtime malloc API get redirected to mimalloc. +DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). +Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available +in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). +The redirection DLL ensures that all calls to the C runtime malloc API get redirected to +mimalloc (in `mimalloc-override.dll`). To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some -call to the mimalloc API in the `main` function, like `mi_version()` +call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project for an example on how to use this. The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic -overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc successfully redirected. +overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. -(Note: in principle, it should be possible to patch existing executables -that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the mimalloc DLL into -the import table (and putting `mimalloc-redirect.dll` in the same folder) -Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)). +(Note: in principle, it is possible to patch existing executables +that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the `mimalloc-override.dll` into the import table (and putting `mimalloc-redirect.dll` in the same folder) +Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)). ## Static override @@ -282,6 +287,12 @@ object file. For example: > gcc -o myprogram mimalloc-override.o myfile1.c ... ``` +Another way to override statically that works on all platforms, is to +link statically to mimalloc (as shown in the introduction) and include a +header file in each source file that re-defines `malloc` etc. to `mi_malloc`. +This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-override.h). This only works reliably though if all sources are +under your control or otherwise mixing of pointers from different heaps may occur! + # Performance From b820009df733afdd933cc70d29392593da837466 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:09:34 -0800 Subject: [PATCH 34/41] update documentation --- test/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/readme.md b/test/readme.md index b74364ff..db3524cd 100644 --- a/test/readme.md +++ b/test/readme.md @@ -1,7 +1,7 @@ Testing allocators is difficult as bugs may only surface after particular allocation patterns. The main approach to testing _mimalloc_ is therefore to have extensive internal invariant checking (see `page_is_valid` in `page.c` -for example), which is enabled in debug mode with `-DMI_CHECK_FULL=ON`. +for example), which is enabled in debug mode with `-DMI_DEBUG_FULL=ON`. The main testing strategy is then to run [`mimalloc-bench`][bench] using full invariant checking to catch any potential problems over a wide range of intensive allocation benchmarks and programs. From d55ab50a84250e335337724b6e002fd349e35226 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:10:04 -0800 Subject: [PATCH 35/41] update version to 1.2 --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- test/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 03316948..9d78b5a0 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ set(mi_version_major 1) -set(mi_version_minor 1) +set(mi_version_minor 2) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index bc817f54..7f26896c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 110 // major + 2 digits minor +#define MI_MALLOC_VERSION 120 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8bf36521..a80dde58 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 1.2 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library From 7586225fc5c6327e4b16a0abd2b4d75c37e497f6 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:10:21 -0800 Subject: [PATCH 36/41] add secure build to azure pipeline --- azure-pipelines.yml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 79228c41..41d67f86 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -35,22 +35,32 @@ jobs: CC: gcc CXX: g++ BuildType: debug - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_CHECK_FULL=ON + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON Release: CC: gcc CXX: g++ BuildType: release cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + Secure: + CC: gcc + CXX: g++ + BuildType: secure + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON Debug Clang: CC: clang CXX: clang++ BuildType: debug-clang - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_CHECK_FULL=ON + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON Release Clang: CC: clang CXX: clang++ BuildType: release-clang cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + Secure Clang: + CC: clang + CXX: clang++ + BuildType: secure-clang + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON steps: - task: CMake@1 From c6c24f9c2efb793a201e531057f25ea914792d3c Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:17:47 -0800 Subject: [PATCH 37/41] update documentation --- readme.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/readme.md b/readme.md index 44f62230..e5a870b8 100644 --- a/readme.md +++ b/readme.md @@ -37,7 +37,7 @@ Notable aspects of the design include: programs. - __secure__: _mimalloc_ can be built in secure mode, adding guard pages, randomized allocation, encrypted free lists, etc. to protect against various - heap vulnerabilities. The performance penalty is only around 3% on average + heap vulnerabilities. The performance penalty is usually around 10% on average over our benchmarks. - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions. A heap can be destroyed at once instead of deallocating each object separately. @@ -65,7 +65,7 @@ Enjoy! ## Windows -Open `ide/vs2017/mimalloc.sln` in Visual Studio 2017 and build. +Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`). The `mimalloc` project builds a static library (in `out/msvc-x64`), while the `mimalloc-override` project builds a DLL for overriding malloc in the entire program. @@ -98,7 +98,7 @@ maintains detailed statistics as: This will name the shared library as `libmimalloc-debug.so`. Finally, you can build a _secure_ version that uses guard pages, encrypted -free lists, etc, as: +free lists, etc., as: ``` > mkdir -p out/secure > cd out/secure @@ -141,8 +141,7 @@ to link with the static library. See `test\CMakeLists.txt` for an example. For best performance in C++ programs, it is also recommended to override the global `new` and `delete` operators. For convience, mimalloc provides -[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in -a single(!) source file in your project. +[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): @@ -264,7 +263,9 @@ mimalloc (in `mimalloc-override.dll`). To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project -for an example on how to use this. +for an example on how to use this. For best performance on Windows with C++, it +is highly recommended to also override the `new`/`delete` operations (as described +in the introduction). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. From f27c87c03cac0b5344c5f715377478375e145b3f Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:19:47 -0800 Subject: [PATCH 38/41] update documentation --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index e5a870b8..f68d79a1 100644 --- a/readme.md +++ b/readme.md @@ -56,7 +56,7 @@ Enjoy! ### Releases -* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode. +* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. * 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements. From b7d9ee8830f1e77eba002f26ac65f498e5cce0e6 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:20:35 -0800 Subject: [PATCH 39/41] azure pipeline logo shows dev branch --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index f68d79a1..feee8704 100644 --- a/readme.md +++ b/readme.md @@ -1,7 +1,7 @@ -[](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary) +[](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary) # mimalloc From 42a0666770688c4c39197320712e9d5c9bcc9dd7 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:24:10 -0800 Subject: [PATCH 40/41] update documentation --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index feee8704..9d3974c9 100644 --- a/readme.md +++ b/readme.md @@ -56,7 +56,7 @@ Enjoy! ### Releases -* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). +* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. * 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements. From 41caf6d0f8487ff856f6a10adf4a7fb016df9341 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:29:46 -0800 Subject: [PATCH 41/41] set secure default to 0 --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 9c5d3c19..96e1860f 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 4 +#define MI_SECURE 0 #endif // Define MI_DEBUG for debug mode