diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index be0304f2..450e0b4d 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -426,6 +426,7 @@ typedef struct mi_segment_s { // from here is zero initialized struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`) + bool was_reclaimed; // true if it was reclaimed (used to limit on-free reclamation) size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) @@ -597,7 +598,7 @@ typedef struct mi_stats_s { mi_stat_counter_t searches; mi_stat_counter_t normal_count; mi_stat_counter_t huge_count; - mi_stat_counter_t large_count; + mi_stat_counter_t large_count; mi_stat_counter_t arena_count; mi_stat_counter_t arena_crossover_count; mi_stat_counter_t arena_rollback_count; @@ -653,6 +654,7 @@ typedef struct mi_segments_tld_s { size_t peak_count; // peak number of segments size_t current_size; // current size of all segments size_t peak_size; // peak size of all segments + size_t reclaim_count;// number of reclaimed (abandoned) segments mi_stats_t* stats; // points to tld stats mi_os_tld_t* os; // points to os stats } mi_segments_tld_t; diff --git a/src/init.c b/src/init.c index bff84e00..0511e723 100644 --- a/src/init.c +++ b/src/init.c @@ -132,7 +132,7 @@ mi_decl_cache_align static const mi_tld_t tld_empty = { 0, false, NULL, NULL, - { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments + { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments { 0, tld_empty_stats }, // os { MI_STATS_NULL } // stats }; @@ -149,7 +149,7 @@ extern mi_heap_t _mi_heap_main; static mi_tld_t tld_main = { 0, false, &_mi_heap_main, & _mi_heap_main, - { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments + { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments { 0, &tld_main.stats }, // os { MI_STATS_NULL } // stats }; @@ -248,7 +248,7 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { is_zero = memid.initially_zero; } } - + if (td != NULL && !is_zero) { _mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid)); } @@ -427,23 +427,23 @@ void mi_thread_done(void) mi_attr_noexcept { _mi_thread_done(NULL); } -void _mi_thread_done(mi_heap_t* heap) +void _mi_thread_done(mi_heap_t* heap) { // calling with NULL implies using the default heap - if (heap == NULL) { - heap = mi_prim_get_default_heap(); + if (heap == NULL) { + heap = mi_prim_get_default_heap(); if (heap == NULL) return; } // prevent re-entrancy through heap_done/heap_set_default_direct (issue #699) if (!mi_heap_is_initialized(heap)) { - return; + return; } // adjust stats mi_atomic_decrement_relaxed(&thread_count); _mi_stat_decrease(&_mi_stats_main.threads, 1); - + // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... if (heap->thread_id != _mi_thread_id()) return; @@ -465,7 +465,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. - _mi_prim_thread_associate_default_heap(heap); + _mi_prim_thread_associate_default_heap(heap); } @@ -625,7 +625,7 @@ static void mi_cdecl mi_process_done(void) { // release any thread specific resources and ensure _mi_thread_done is called on all but the main thread _mi_prim_thread_done_auto_done(); - + #ifndef MI_SKIP_COLLECT_ON_EXIT #if (MI_DEBUG || !defined(MI_SHARED_LIB)) // free all memory if possible on process exit. This is not needed for a stand-alone process diff --git a/src/segment.c b/src/segment.c index 7ada9b80..edeef48f 100644 --- a/src/segment.c +++ b/src/segment.c @@ -380,6 +380,10 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { segment->thread_id = 0; _mi_segment_map_freed_at(segment); mi_segments_track_size(-((long)mi_segment_size(segment)),tld); + if (segment->was_reclaimed) { + tld->reclaim_count--; + segment->was_reclaimed = false; + } if (MI_SECURE>0) { // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set // unprotect the guard pages; we cannot just unprotect the whole segment size as part may be decommitted @@ -1075,7 +1079,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { } // perform delayed decommits (forcing is much slower on mstress) - // Only abandoned segments in arena memory can be reclaimed without a free + // Only abandoned segments in arena memory can be reclaimed without a free // so if a segment is not from an arena we force purge here to be conservative. const bool force_purge = (segment->memid.memkind != MI_MEM_ARENA) || mi_option_is_enabled(mi_option_abandoned_page_purge); mi_segment_try_purge(segment, force_purge, tld->stats); @@ -1085,6 +1089,10 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)mi_segment_size(segment)), tld); segment->thread_id = 0; segment->abandoned_visits = 1; // from 0 to 1 to signify it is abandoned + if (segment->was_reclaimed) { + tld->reclaim_count--; + segment->was_reclaimed = false; + } _mi_arena_segment_mark_abandoned(segment); } @@ -1171,6 +1179,8 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0 || mi_atomic_load_relaxed(&segment->thread_id) == _mi_thread_id()); mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); segment->abandoned_visits = 0; + segment->was_reclaimed = true; + tld->reclaim_count++; mi_segments_track_size((long)mi_segment_size(segment), tld); mi_assert_internal(segment->next == NULL); _mi_stat_decrease(&tld->stats->segments_abandoned, 1); @@ -1229,6 +1239,9 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, // attempt to reclaim a particular segment (called from multi threaded free `alloc.c:mi_free_block_mt`) bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) { if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned + // don't reclaim more from a free than half the current segments + // this is to prevent a pure free-ing thread to start owning too many segments + if (heap->tld->segments.reclaim_count * 2 > heap->tld->segments.count) return false; if (_mi_arena_segment_clear_abandoned(segment)) { // atomically unabandon mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, NULL, &heap->tld->segments); mi_assert_internal(res == segment); diff --git a/test/test-stress.c b/test/test-stress.c index e338c165..15d0e25b 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -37,11 +37,12 @@ static int ITER = 50; // N full iterations destructing and re-creating a // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor -#define STRESS // undefine for leak test +#define STRESS // undefine for leak test static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100) static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? +static bool main_participates = false; // main thread participates as a worker too // #define USE_STD_MALLOC #ifdef USE_STD_MALLOC @@ -301,14 +302,15 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { thread_entry_fun = fun; DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD)); HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE)); - for (uintptr_t i = 1; i < nthreads; i++) { + const size_t start = (main_participates ? 1 : 0); + for (size_t i = start; i < nthreads; i++) { thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]); } - fun(0); // run the main thread as well - for (size_t i = 1; i < nthreads; i++) { + if (main_participates) fun(0); // run the main thread as well + for (size_t i = start; i < nthreads; i++) { WaitForSingleObject(thandles[i], INFINITE); } - for (size_t i = 1; i < nthreads; i++) { + for (size_t i = start; i < nthreads; i++) { CloseHandle(thandles[i]); } custom_free(tids); @@ -335,12 +337,13 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { thread_entry_fun = fun; pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t)); memset(threads, 0, sizeof(pthread_t) * nthreads); + const size_t start = (main_participates ? 1 : 0); //pthread_setconcurrency(nthreads); - for (size_t i = 1; i < nthreads; i++) { + for (size_t i = start; i < nthreads; i++) { pthread_create(&threads[i], NULL, &thread_entry, (void*)i); } - fun(0); // run the main thread as well - for (size_t i = 1; i < nthreads; i++) { + if (main_participates) fun(0); // run the main thread as well + for (size_t i = start; i < nthreads; i++) { pthread_join(threads[i], NULL); } custom_free(threads);