clean up candidate search; add mi_collect_reduce

This commit is contained in:
daanx 2024-11-25 16:58:02 -08:00
parent b898dbe3e0
commit 9b7ac9a1a6
5 changed files with 58 additions and 18 deletions

View File

@ -116,7 +116,7 @@
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<ConformanceMode>Default</ConformanceMode> <ConformanceMode>Default</ConformanceMode>
<AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
<PreprocessorDefinitions>MI_DEBUG=4;MI_GUARDED=1;%(PreprocessorDefinitions);</PreprocessorDefinitions> <PreprocessorDefinitions>MI_DEBUG=3;MI_GUARDED=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs> <CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp20</LanguageStandard> <LanguageStandard>stdcpp20</LanguageStandard>

View File

@ -148,6 +148,7 @@ typedef void (mi_cdecl mi_error_fun)(int err, void* arg);
mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg); mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg);
mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export void mi_collect(bool force) mi_attr_noexcept;
mi_decl_export void mi_collect_reduce(size_t target_thread_owned) mi_attr_noexcept;
mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept;
mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept;
mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept;

View File

@ -259,6 +259,14 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
heap->page_count++; heap->page_count++;
} }
static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(mi_page_queue_contains(queue, page));
if (queue->first == page) return;
mi_page_queue_remove(queue, page);
mi_page_queue_push(heap, queue, page);
mi_assert_internal(queue->first == page);
}
static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* from, bool enqueue_at_end, mi_page_t* page) { static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* from, bool enqueue_at_end, mi_page_t* page) {
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
@ -335,7 +343,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) {
// note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`) // note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`)
mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end of the `to` queue? */, page); mi_page_queue_enqueue_from_ex(to, from, false /* enqueue at the end of the `to` queue? */, page);
} }
// Only called from `mi_heap_absorb`. // Only called from `mi_heap_absorb`.

View File

@ -471,6 +471,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
// how to check this efficiently though... // how to check this efficiently though...
// for now, we don't retire if it is the only page left of this size class. // for now, we don't retire if it is the only page left of this size class.
mi_page_queue_t* pq = mi_page_queue_of(page); mi_page_queue_t* pq = mi_page_queue_of(page);
#if MI_RETIRE_CYCLES > 0
const size_t bsize = mi_page_block_size(page); const size_t bsize = mi_page_block_size(page);
if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue? if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue?
if (pq->last==page && pq->first==page) { // the only page in the queue? if (pq->last==page && pq->first==page) { // the only page in the queue?
@ -486,7 +487,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
return; // don't free after all return; // don't free after all
} }
} }
#endif
_mi_page_free(page, pq, false); _mi_page_free(page, pq, false);
} }
@ -753,6 +754,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
size_t candidate_count = 0; // we reset this on the first candidate to limit the search size_t candidate_count = 0; // we reset this on the first candidate to limit the search
mi_page_t* page_candidate = NULL; // a page with free space mi_page_t* page_candidate = NULL; // a page with free space
mi_page_t* page = pq->first; mi_page_t* page = pq->first;
while (page != NULL) while (page != NULL)
{ {
mi_page_t* next = page->next; // remember next mi_page_t* next = page->next; // remember next
@ -764,7 +766,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
// collect freed blocks by us and other threads // collect freed blocks by us and other threads
_mi_page_free_collect(page, false); _mi_page_free_collect(page, false);
#if defined(MI_MAX_CANDIDATE_SEARCH) #if MI_MAX_CANDIDATE_SEARCH > 1
// search up to N pages for a best candidate // search up to N pages for a best candidate
// is the local free list non-empty? // is the local free list non-empty?
@ -783,7 +785,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
page_candidate = page; page_candidate = page;
candidate_count = 0; candidate_count = 0;
} }
else if (!mi_page_is_expandable(page) && page->used >= page_candidate->used) { else if (/* !mi_page_is_expandable(page) && */ page->used >= page_candidate->used) {
page_candidate = page; page_candidate = page;
} }
// if we find a non-expandable candidate, or searched for N pages, return with the best candidate // if we find a non-expandable candidate, or searched for N pages, return with the best candidate
@ -792,7 +794,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
break; break;
} }
} }
#else #else
// first-fit algorithm // first-fit algorithm
// If the page contains free blocks, we are done // If the page contains free blocks, we are done
if (mi_page_immediate_available(page) || mi_page_is_expandable(page)) { if (mi_page_immediate_available(page) || mi_page_is_expandable(page)) {
@ -803,7 +805,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
// queue so we don't visit long-lived pages too often. // queue so we don't visit long-lived pages too often.
mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
mi_page_to_full(page, pq); mi_page_to_full(page, pq);
#endif #endif
page = next; page = next;
} // for each page } // for each page
@ -828,10 +830,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
} }
} }
else { else {
// mi_assert(pq->first == page); // move the page to the front of the queue
mi_page_queue_move_to_front(heap, pq, page);
page->retire_expire = 0; page->retire_expire = 0;
// _mi_heap_collect_retired(heap, false); // update retire counts; note: increases rss on MemoryLoad bench so don't do this
} }
mi_assert_internal(page == NULL || mi_page_immediate_available(page)); mi_assert_internal(page == NULL || mi_page_immediate_available(page));
return page; return page;
} }
@ -839,7 +845,9 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
// Find a page with free blocks of `size`. // Find a page with free blocks of `size`.
static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
mi_page_queue_t* pq = mi_page_queue(heap,size); mi_page_queue_t* pq = mi_page_queue(heap, size);
// check the first page: we even do this with candidate search or otherwise we re-search every time
mi_page_t* page = pq->first; mi_page_t* page = pq->first;
if (page != NULL) { if (page != NULL) {
#if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
@ -858,6 +866,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
return page; // fast path return page; // fast path
} }
} }
return mi_page_queue_find_free_ex(heap, pq, true); return mi_page_queue_find_free_ex(heap, pq, true);
} }

View File

@ -979,6 +979,13 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
_mi_arena_field_cursor_done(&current); _mi_arena_field_cursor_done(&current);
} }
static bool segment_count_is_within_target(mi_segments_tld_t* tld, size_t* ptarget) {
const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread, 0, 1024);
if (ptarget != NULL) { *ptarget = target; }
return (target == 0 || tld->count < target);
}
static long mi_segment_get_reclaim_tries(mi_segments_tld_t* tld) { static long mi_segment_get_reclaim_tries(mi_segments_tld_t* tld) {
// limit the tries to 10% (default) of the abandoned segments with at least 8 and at most 1024 tries. // limit the tries to 10% (default) of the abandoned segments with at least 8 and at most 1024 tries.
const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100); const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100);
@ -1001,7 +1008,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
mi_segment_t* segment = NULL; mi_segment_t* segment = NULL;
mi_arena_field_cursor_t current; mi_arena_field_cursor_t current;
_mi_arena_field_cursor_init(heap, tld->subproc, false /* non-blocking */, &current); _mi_arena_field_cursor_init(heap, tld->subproc, false /* non-blocking */, &current);
while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL)) while (segment_count_is_within_target(tld,NULL) && (max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL))
{ {
mi_assert(segment->subproc == heap->tld->segments.subproc); // cursor only visits segments in our sub-process mi_assert(segment->subproc == heap->tld->segments.subproc); // cursor only visits segments in our sub-process
segment->abandoned_visits++; segment->abandoned_visits++;
@ -1026,7 +1033,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
result = mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); result = mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
break; break;
} }
else if (segment->abandoned_visits > 3 && is_suitable && !mi_option_is_enabled(mi_option_target_segments_per_thread)) { else if (segment->abandoned_visits > 3 && is_suitable) {
// always reclaim on 3rd visit to limit the abandoned segment count. // always reclaim on 3rd visit to limit the abandoned segment count.
mi_segment_reclaim(segment, heap, 0, NULL, tld); mi_segment_reclaim(segment, heap, 0, NULL, tld);
} }
@ -1087,15 +1094,11 @@ static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* t
// try abandon segments. // try abandon segments.
// this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use. // this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use.
static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) { static void mi_segments_try_abandon_to_target(mi_heap_t* heap, size_t target, mi_segments_tld_t* tld) {
const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread,0,1024); if (target <= 1) return;
// we call this when we are about to add a fresh segment so we should be under our target segment count.
if (target == 0 || tld->count < target) return;
const size_t min_target = (target > 4 ? (target*3)/4 : target); // 75% const size_t min_target = (target > 4 ? (target*3)/4 : target); // 75%
// todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages // todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages
for (int i = 0; i < 16 && tld->count >= min_target; i++) { for (int i = 0; i < 64 && tld->count >= min_target; i++) {
mi_page_t* page = heap->pages[MI_BIN_FULL].first; mi_page_t* page = heap->pages[MI_BIN_FULL].first;
while (page != NULL && mi_page_is_huge(page)) { while (page != NULL && mi_page_is_huge(page)) {
page = page->next; page = page->next;
@ -1109,6 +1112,25 @@ static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) {
} }
} }
// try abandon segments.
// this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use.
static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) {
// we call this when we are about to add a fresh segment so we should be under our target segment count.
size_t target = 0;
if (segment_count_is_within_target(tld, &target)) return;
mi_segments_try_abandon_to_target(heap, target, tld);
}
void mi_collect_reduce(size_t target_size) mi_attr_noexcept {
mi_collect(true);
mi_heap_t* heap = mi_heap_get_default();
mi_segments_tld_t* tld = &heap->tld->segments;
size_t target = target_size / MI_SEGMENT_SIZE;
if (target == 0) {
target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread, 1, 1024);
}
mi_segments_try_abandon_to_target(heap, target, tld);
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Reclaim or allocate Reclaim or allocate