move in_full and has_aligned into page threadid for a single test in mi_free

This commit is contained in:
daan 2019-08-08 15:23:18 -07:00
parent 55778d2fe4
commit 6596e970a5
6 changed files with 50 additions and 35 deletions

View File

@ -314,6 +314,10 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size)
return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
}
static inline uintptr_t mi_page_thread_id(const mi_page_t* page) {
return (page->flags.padding << MI_PAGE_FLAGS_BITS);
}
// -------------------------------------------------------------------
// Encoding/Decoding the free list next pointers
// -------------------------------------------------------------------

View File

@ -123,14 +123,26 @@ typedef enum mi_delayed_e {
} mi_delayed_t;
// Use the lowest two bits of a thread id for the `in_full` and `has_aligned` flags
// This allows a single test in `mi_free` to check for unlikely cases
// (namely, non-local free, aligned free, or freeing in a full page)
#define MI_PAGE_FLAGS_BITS (2)
typedef union mi_page_flags_u {
uint16_t value;
uintptr_t threadidx;
struct {
bool has_aligned;
bool in_full;
#ifdef MI_BIG_ENDIAN
uintptr_t padding : (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS);
uintptr_t in_full : 1;
uintptr_t has_aligned : 1;
#else
uintptr_t in_full : 1;
uintptr_t has_aligned : 1;
uintptr_t padding : (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS);
#endif
};
} mi_page_flags_t;
// Thread free list.
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
typedef uintptr_t mi_thread_free_t;
@ -160,16 +172,16 @@ typedef struct mi_page_s {
bool is_reset:1; // `true` if the page memory was reset
bool is_committed:1; // `true` if the page virtual memory is committed
// layout like this to optimize access in `mi_malloc` and `mi_free`
mi_page_flags_t flags;
// layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed
uint16_t reserved; // number of blocks reserved in memory
// 16 bits padding
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
#if MI_SECURE
uintptr_t cookie; // random cookie to encode the free lists
#endif
size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
volatile uintptr_t thread_freed; // at least this number of blocks are in `thread_free`
@ -182,10 +194,10 @@ typedef struct mi_page_s {
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// improve page index calculation
#if MI_INTPTR_SIZE==8
//void* padding[1]; // 12 words on 64-bit
#if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
void* padding[1]; // 12 words on 64-bit
#elif MI_INTPTR_SIZE==4
void* padding[1]; // 12 words on 32-bit
// void* padding[1]; // 12 words on 32-bit
#endif
} mi_page_t;
@ -215,7 +227,7 @@ typedef struct mi_segment_s {
// layout like this to optimize access in `mi_free`
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
uintptr_t thread_id; // unique id of the thread owning this segment
volatile uintptr_t thread_id; // unique id of the thread owning this segment
mi_page_kind_t page_kind; // kind of pages: small, large, or huge
mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
} mi_segment_t;

View File

@ -223,8 +223,7 @@ void mi_free(void* p) mi_attr_noexcept
return;
}
#endif
bool local = (_mi_thread_id() == segment->thread_id); // preload, note: putting the thread_id in the page->flags does not improve performance
mi_page_t* page = _mi_segment_page_of(segment, p);
#if (MI_STAT>1)
@ -237,23 +236,17 @@ void mi_free(void* p) mi_attr_noexcept
#endif
// adjust if it might be an un-aligned block
if (mi_likely(page->flags.value==0)) { // not full or aligned
uintptr_t tid = _mi_thread_id();
if (mi_likely(tid == page->flags.threadidx)) { // local, and not full or aligned
mi_block_t* block = (mi_block_t*)p;
if (mi_likely(local)) { // note: merging both tests (local | value) does not matter for performance
// owning thread can free a block directly
mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
page->local_free = block;
page->used--;
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
}
else {
// use atomic operations for a multi-threaded free
_mi_free_block_mt(page, block);
}
mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
page->local_free = block;
page->used--;
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
}
else {
// aligned blocks, or a full page; use the more generic path
mi_free_generic(segment, page, local, p);
// non-local, aligned blocks, or a full page; use the more generic path
mi_free_generic(segment, page, tid == mi_page_thread_id(page), p);
}
}

View File

@ -12,15 +12,16 @@ terms of the MIT license. A copy of the license can be found in the file
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
0, false, false, false, {0}, 0, 0,
NULL, 0, // free, used
0, false, false, false, 0, 0,
NULL, // free
#if MI_SECURE
0,
#endif
0, {0}, // used, flags
NULL, 0, 0,
0, NULL, NULL, NULL
#if (MI_INTPTR_SIZE==4)
, { NULL }
#if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
, { NULL }
#endif
};

View File

@ -71,10 +71,11 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(page->block_size > 0);
mi_assert_internal(page->used <= page->capacity);
mi_assert_internal(page->capacity <= page->reserved);
mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = _mi_page_start(segment,page,NULL);
mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL));
mi_assert_internal(segment->thread_id == mi_page_thread_id(page));
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
mi_assert_internal(mi_page_list_is_valid(page,page->free));
@ -458,7 +459,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
heap->random = _mi_random_shuffle(rnd);
}
static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats)
static void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats)
{
UNUSED(stats);
mi_assert_internal(page->free == NULL);
@ -524,7 +525,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
// and append the extend the free list
if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) {
mi_page_free_list_extend(heap, page, extend, stats );
mi_page_free_list_extend(page, extend, stats );
}
else {
mi_page_free_list_extend_secure(heap, page, extend, stats);

View File

@ -226,6 +226,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) {
segment->thread_id = 0;
mi_segments_track_size(-((long)segment_size),tld);
if (mi_option_is_enabled(mi_option_secure)) {
_mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
@ -412,8 +413,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment));
mi_assert(segment->next == NULL);
mi_assert(segment->prev == NULL);
_mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
segment->thread_id = 0;
_mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
// update reset memory statistics
/*
@ -618,6 +618,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
}
else {
// otherwise reclaim it
page->flags.threadidx = segment->thread_id;
_mi_page_reclaim(heap,page);
}
}
@ -648,6 +649,7 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl
mi_assert_internal(mi_segment_has_free(segment));
mi_page_t* page = mi_segment_find_free(segment, tld->stats);
page->segment_in_use = true;
page->flags.threadidx = segment->thread_id;
segment->used++;
mi_assert_internal(segment->used <= segment->capacity);
if (segment->used == segment->capacity) {
@ -687,6 +689,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_
segment->used = 1;
mi_page_t* page = &segment->pages[0];
page->segment_in_use = true;
page->flags.threadidx = segment->thread_id;
return page;
}
@ -698,6 +701,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
segment->used = 1;
mi_page_t* page = &segment->pages[0];
page->segment_in_use = true;
page->flags.threadidx = segment->thread_id;
return page;
}