small optimizations, use bitwise aligne

This commit is contained in:
daan 2019-07-22 20:51:12 -07:00
parent 7c26ce9280
commit f0530b6a83
9 changed files with 59 additions and 32 deletions

View File

@ -87,6 +87,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
if(CMAKE_C_COMPILER_ID MATCHES "GNU")
list(APPEND mi_cflags -Wno-invalid-memory-model)
list(APPEND mi_cflags -fvisibility=hidden)
list(APPEND mi_cflags -fbranch-target-load-optimize )
endif()
endif()

View File

@ -39,7 +39,6 @@ bool _mi_preloading(); // true while the C runtime is not ready
// os.c
size_t _mi_os_page_size(void);
uintptr_t _mi_align_up(uintptr_t sz, size_t alignment);
void _mi_os_init(void); // called from process init
void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
@ -165,6 +164,20 @@ static inline bool mi_mul_overflow(size_t size, size_t count, size_t* total) {
#endif
}
// Align upwards
static inline uintptr_t _mi_is_power_of_two(uintptr_t x) {
return ((x & (x - 1)) == 0);
}
static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
uintptr_t mask = alignment - 1;
if ((alignment & mask) == 0) { // power of two?
return ((sz + mask) & ~mask);
}
else {
return (((sz + mask)/alignment)*alignment);
}
}
// Align a byte size to a size in _machine words_,
// i.e. byte size == `wsize*sizeof(void*)`.
static inline size_t _mi_wsize_from_size(size_t size) {
@ -324,12 +337,23 @@ static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, mi_bl
}
static inline mi_block_t* mi_block_next(mi_page_t* page, mi_block_t* block) {
#if MI_SECURE
return mi_block_nextx(page->cookie,block);
#else
UNUSED(page);
return mi_block_nextx(0, block);
#endif
}
static inline void mi_block_set_next(mi_page_t* page, mi_block_t* block, mi_block_t* next) {
#if MI_SECURE
mi_block_set_nextx(page->cookie,block,next);
#else
UNUSED(page);
mi_block_set_nextx(0, block, next);
#endif
}
// -------------------------------------------------------------------
// Getting the thread id should be performant
// as it is called in the fast path of `_mi_free`,

View File

@ -132,10 +132,9 @@ typedef union mi_page_flags_u {
} mi_page_flags_t;
// Thread free list.
// We use bottom 2 bits of the pointer for mi_delayed_t flags
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
typedef uintptr_t mi_thread_free_t;
// A page contains blocks of one specific size (`block_size`).
// Each page has three list of free blocks:
// `free` for blocks that can be allocated,
@ -165,9 +164,11 @@ typedef struct mi_page_s {
mi_page_flags_t flags;
uint16_t capacity; // number of blocks committed
uint16_t reserved; // number of blocks reserved in memory
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
#if MI_SECURE
uintptr_t cookie; // random cookie to encode the free lists
#endif
size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
@ -182,9 +183,9 @@ typedef struct mi_page_s {
// improve page index calculation
#if MI_INTPTR_SIZE==8
//void* padding[1]; // 10 words on 64-bit
//void* padding[1]; // 12 words on 64-bit
#elif MI_INTPTR_SIZE==4
void* padding[1]; // 12 words on 32-bit
void* padding[1]; // 12 words on 32-bit
#endif
} mi_page_t;

View File

@ -52,8 +52,8 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_attr_alloc_size2(s1,s2)
#else
#define mi_attr_alloc_size(s) __attribute__((alloc_size(s)))
#define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2)))
#define mi_cdecl // leads to warnings... __attribute__((cdecl))
#define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2)))
#define mi_cdecl // leads to warnings... __attribute__((cdecl))
#endif
#else
#define mi_decl_thread __thread
@ -62,7 +62,7 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_attr_malloc
#define mi_attr_alloc_size(s)
#define mi_attr_alloc_size2(s1,s2)
#define mi_cdecl
#define mi_cdecl
#endif
// ------------------------------------------------------

View File

@ -237,9 +237,9 @@ void mi_free(void* p) mi_attr_noexcept
#endif
// adjust if it might be an un-aligned block
if (mi_likely(page->flags.value==0)) { // note: merging both tests (local | value) does not matter for performance
if (mi_likely(page->flags.value==0)) { // not full or aligned
mi_block_t* block = (mi_block_t*)p;
if (mi_likely(local)) {
if (mi_likely(local)) { // note: merging both tests (local | value) does not matter for performance
// owning thread can free a block directly
mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
page->local_free = block;
@ -248,7 +248,7 @@ void mi_free(void* p) mi_attr_noexcept
}
else {
// use atomic operations for a multi-threaded free
_mi_free_block_mt(page, block);
_mi_free_block_mt(page, block);
}
}
else {

View File

@ -12,9 +12,11 @@ terms of the MIT license. A copy of the license can be found in the file
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
0, false, false, false, {0},
0, 0,
NULL, 0, 0, // free, used, cookie
0, false, false, false, {0}, 0, 0,
NULL, 0, // free, used
#if MI_SECURE
0,
#endif
NULL, 0, 0,
0, NULL, NULL, NULL
#if (MI_INTPTR_SIZE==4)

View File

@ -34,13 +34,6 @@ terms of the MIT license. A copy of the license can be found in the file
----------------------------------------------------------- */
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
uintptr_t x = (sz / alignment) * alignment;
if (x < sz) x += alignment;
if (x < sz) return 0; // overflow
return x;
}
static void* mi_align_up_ptr(void* p, size_t alignment) {
return (void*)_mi_align_up((uintptr_t)p, alignment);
}

View File

@ -93,7 +93,9 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
bool _mi_page_is_valid(mi_page_t* page) {
mi_assert_internal(mi_page_is_valid_init(page));
#if MI_SECURE
mi_assert_internal(page->cookie != 0);
#endif
if (page->heap!=NULL) {
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id);
@ -119,7 +121,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) {
else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) {
mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
continue; // and try again
}
}
}
while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal
!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree));
@ -258,7 +260,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
mi_block_t* next = mi_block_nextx(heap->cookie,block);
// use internal free instead of regular one to keep stats etc correct
if (!_mi_free_delayed_block(block)) {
// we might already start delayed freeing while another thread has not yet
// we might already start delayed freeing while another thread has not yet
// reset the delayed_freeing flag; in that case delay it further by reinserting.
mi_block_t* dfree;
do {
@ -498,7 +500,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
if (page->capacity >= page->reserved) return;
size_t page_size;
_mi_page_start(_mi_page_segment(page), page, &page_size);
_mi_page_start(_mi_page_segment(page), page, &page_size);
_mi_stat_increase(&stats->pages_extended, 1);
// calculate the extend count
@ -533,7 +535,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
page->block_size = block_size;
mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size);
#if MI_SECURE
page->cookie = _mi_heap_random(heap) | 1;
#endif
mi_assert_internal(page->capacity == 0);
mi_assert_internal(page->free == NULL);
@ -543,7 +547,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page->next == NULL);
mi_assert_internal(page->prev == NULL);
mi_assert_internal(page->flags.has_aligned == false);
#if MI_SECURE
mi_assert_internal(page->cookie != 0);
#endif
mi_assert_expensive(mi_page_is_valid_init(page));
// initialize an initial free list
@ -683,7 +689,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(page->block_size == block_size);
mi_heap_stat_increase( heap, huge, block_size);
}
}
return page;
}

View File

@ -235,8 +235,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
// and no more than 4.
#define MI_SEGMENT_CACHE_MAX (4)
// and no more than 2.
#define MI_SEGMENT_CACHE_MAX (2)
#define MI_SEGMENT_CACHE_FRACTION (8)
// note: returned segment may be partially reset
@ -252,7 +252,7 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
}
static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
if (tld->cache_count < MI_SEGMENT_CACHE_MAX &&
if (tld->cache_count < MI_SEGMENT_CACHE_MAX &&
tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache
return false;
}
@ -318,7 +318,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
// Try to get it from our thread local cache first
bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM);
bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM);
bool protection_still_good = false;
mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
if (segment != NULL) {
@ -702,10 +702,10 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_page_t* page;
if (block_size <= (MI_SMALL_PAGE_SIZE/16)*3) {
if (block_size <= (MI_SMALL_PAGE_SIZE/4)) {
page = mi_segment_small_page_alloc(tld,os_tld);
}
else if (block_size <= (MI_MEDIUM_PAGE_SIZE/16)*3) {
else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) {
page = mi_segment_medium_page_alloc(tld, os_tld);
}
else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) {