fix padding check for aligned allocation; improve perf for small aligned allocations

This commit is contained in:
daan 2020-01-31 13:20:02 -08:00
parent 7ff3ec2bf7
commit 4531367de2
5 changed files with 53 additions and 47 deletions

View File

@ -54,16 +54,19 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_ENCODE_FREELIST 1 #define MI_ENCODE_FREELIST 1
#endif #endif
// Reserve extra padding at the end of each block; must be a multiple of `sizeof(intptr_t)`! // Reserve extra padding at the end of each block; must be a multiple of `2*sizeof(intptr_t)`!
// If free lists are encoded, the padding is checked if it was modified on free. // If free lists are encoded, the padding is checked if it was modified on free.
#if (!defined(MI_PADDING)) #if (!defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1))
#if (MI_SECURE>=3 || MI_DEBUG>=1) #define MI_PADDING
#define MI_PADDING MI_MAX_ALIGN_SIZE #endif
#if defined(MI_PADDING)
#define MI_PADDING_SIZE (2*sizeof(intptr_t))
#else #else
#define MI_PADDING 0 #define MI_PADDING_SIZE 0
#endif
#endif #endif
// ------------------------------------------------------ // ------------------------------------------------------
// Platform specific values // Platform specific values
// ------------------------------------------------------ // ------------------------------------------------------

View File

@ -18,20 +18,22 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
// note: we don't require `size > offset`, we just guarantee that // note: we don't require `size > offset`, we just guarantee that
// the address at offset is aligned regardless of the allocated size. // the address at offset is aligned regardless of the allocated size.
mi_assert(alignment > 0 && alignment % sizeof(void*) == 0); mi_assert(alignment > 0 && alignment % sizeof(void*) == 0);
if (alignment <= MI_MAX_ALIGN_SIZE && offset==0) return _mi_heap_malloc_zero(heap, size, zero);
if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>) if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>) if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
// try if there is a small block available with just the right alignment // try if there is a small block available with just the right alignment
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { if (mi_likely(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE))) {
mi_page_t* page = _mi_heap_get_free_small_page(heap,size); mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0;
if (mi_likely(page->free != NULL && is_aligned)) if (mi_likely(page->free != NULL && is_aligned))
{ {
#if MI_STAT>1 #if MI_STAT>1
mi_heap_stat_increase( heap, malloc, size); mi_heap_stat_increase( heap, malloc, size);
#endif #endif
void* p = _mi_page_malloc(heap,page,size); // TODO: inline _mi_page_malloc void* p = _mi_page_malloc(heap,page,size + MI_PADDING_SIZE); // TODO: inline _mi_page_malloc
mi_assert_internal(p != NULL); mi_assert_internal(p != NULL);
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
if (zero) _mi_block_zero_init(page,p,size); if (zero) _mi_block_zero_init(page,p,size);

View File

@ -47,16 +47,19 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept
// Note: The spec dictates we should not modify `*p` on an error. (issue#27) // Note: The spec dictates we should not modify `*p` on an error. (issue#27)
// <http://man7.org/linux/man-pages/man3/posix_memalign.3.html> // <http://man7.org/linux/man-pages/man3/posix_memalign.3.html>
if (p == NULL) return EINVAL; if (p == NULL) return EINVAL;
if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment
if (!_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 if (!_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2
void* q = mi_malloc_aligned(size, alignment); void* q = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment));
if (q==NULL && size != 0) return ENOMEM; if (q==NULL && size != 0) return ENOMEM;
mi_assert_internal(((uintptr_t)q % alignment) == 0);
*p = q; *p = q;
return 0; return 0;
} }
void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept {
return mi_malloc_aligned(size, alignment); void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment));
mi_assert_internal(((uintptr_t)p % alignment) == 0);
return p;
} }
void* mi_valloc(size_t size) mi_attr_noexcept { void* mi_valloc(size_t size) mi_attr_noexcept {
@ -73,7 +76,9 @@ void* mi_pvalloc(size_t size) mi_attr_noexcept {
void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept {
if (alignment==0 || !_mi_is_power_of_two(alignment)) return NULL; if (alignment==0 || !_mi_is_power_of_two(alignment)) return NULL;
if ((size&(alignment-1)) != 0) return NULL; // C11 requires integral multiple, see <https://en.cppreference.com/w/c/memory/aligned_alloc> if ((size&(alignment-1)) != 0) return NULL; // C11 requires integral multiple, see <https://en.cppreference.com/w/c/memory/aligned_alloc>
return mi_malloc_aligned(size, alignment); void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment));
mi_assert_internal(((uintptr_t)p % alignment) == 0);
return p;
} }
void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD

View File

@ -43,9 +43,9 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
mi_heap_stat_increase(heap,normal[bin], 1); mi_heap_stat_increase(heap,normal[bin], 1);
} }
#endif #endif
#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST)
mi_assert_internal((MI_PADDING % sizeof(mi_block_t*)) == 0); mi_assert_internal((MI_PADDING_SIZE % sizeof(mi_block_t*)) == 0);
mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE);
mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]); mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]);
#endif #endif
return block; return block;
@ -53,39 +53,27 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
// allocate a small block // allocate a small block
extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
mi_assert(size <= MI_SMALL_SIZE_MAX); mi_assert(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE));
mi_page_t* page = _mi_heap_get_free_small_page(heap,size); mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
return _mi_page_malloc(heap, page, size); void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE);
mi_assert_internal(p==NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE));
return p;
} }
extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept { extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept {
#if (MI_PADDING>0)
size += MI_PADDING;
#endif
return mi_heap_malloc_small(mi_get_default_heap(), size); return mi_heap_malloc_small(mi_get_default_heap(), size);
} }
// zero initialized small block
mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept {
void* p = mi_malloc_small(size);
if (p != NULL) { memset(p, 0, size); }
return p;
}
// The main allocation function // The main allocation function
extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
mi_assert(heap!=NULL); mi_assert(heap!=NULL);
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
#if (MI_PADDING>0)
size += MI_PADDING;
#endif
void* p; void* p;
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { if (mi_likely(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE))) {
p = mi_heap_malloc_small(heap, size); p = mi_heap_malloc_small(heap, size);
} }
else { else {
p = _mi_malloc_generic(heap, size); p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE);
} }
#if MI_STAT>1 #if MI_STAT>1
if (p != NULL) { if (p != NULL) {
@ -93,6 +81,7 @@ extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t siz
mi_heap_stat_increase( heap, malloc, mi_good_size(size) ); // overestimate for aligned sizes mi_heap_stat_increase( heap, malloc, mi_good_size(size) ); // overestimate for aligned sizes
} }
#endif #endif
mi_assert_internal(p == NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE));
return p; return p;
} }
@ -100,24 +89,34 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept {
return mi_heap_malloc(mi_get_default_heap(), size); return mi_heap_malloc(mi_get_default_heap(), size);
} }
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
// note: we need to initialize the whole block to zero, not just size // note: we need to initialize the whole block to zero, not just size
// or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
UNUSED_RELEASE(size); UNUSED_RELEASE(size);
mi_assert_internal(p != NULL); mi_assert_internal(p != NULL);
mi_assert_internal(mi_page_block_size(page) >= size); // size can be zero mi_assert_internal(mi_page_block_size(page) >= (size + MI_PADDING_SIZE)); // size can be zero
mi_assert_internal(_mi_ptr_page(p)==page); mi_assert_internal(_mi_ptr_page(p)==page);
if (page->is_zero) { if (page->is_zero) {
// already zero initialized memory? // already zero initialized memory?
((mi_block_t*)p)->next = 0; // clear the free list pointer ((mi_block_t*)p)->next = 0; // clear the free list pointer
mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING)); mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING_SIZE));
} }
else { else {
// otherwise memset // otherwise memset
memset(p, 0, mi_page_block_size(page) - MI_PADDING); memset(p, 0, mi_page_block_size(page) - MI_PADDING_SIZE);
} }
} }
// zero initialized small block
mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept {
void* p = mi_malloc_small(size);
if (p != NULL) {
_mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again?
}
return p;
}
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) { void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) {
void* p = mi_heap_malloc(heap,size); void* p = mi_heap_malloc(heap,size);
if (zero && p != NULL) { if (zero && p != NULL) {
@ -182,9 +181,9 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block
} }
#endif #endif
#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST)
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE);
mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]);
if (decoded != block) { if (decoded != block) {
_mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size);
@ -285,7 +284,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block)
{ {
#if (MI_DEBUG) #if (MI_DEBUG)
memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING); memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING_SIZE);
#endif #endif
// and push it on the free list // and push it on the free list
@ -411,10 +410,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept {
if (p==NULL) return 0; if (p==NULL) return 0;
const mi_segment_t* segment = _mi_ptr_segment(p); const mi_segment_t* segment = _mi_ptr_segment(p);
const mi_page_t* page = _mi_segment_page_of(segment, p); const mi_page_t* page = _mi_segment_page_of(segment, p);
size_t size = mi_page_block_size(page); size_t size = mi_page_block_size(page) - MI_PADDING_SIZE;
#if defined(MI_PADDING)
size -= MI_PADDING;
#endif
if (mi_unlikely(mi_page_has_aligned(page))) { if (mi_unlikely(mi_page_has_aligned(page))) {
ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p);
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);

View File

@ -67,10 +67,10 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
{ 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
{ 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free
{ 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
{ 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose