diff --git a/CMakeLists.txt b/CMakeLists.txt index b293f8c3..782a7d55 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,7 +117,10 @@ add_library(mimalloc SHARED ${mi_sources}) set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} NO_SONAME "YES" OUTPUT_NAME ${mi_basename} ) target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT) target_compile_options(mimalloc PRIVATE ${mi_cflags}) -target_include_directories(mimalloc PRIVATE include PUBLIC $) +target_include_directories(mimalloc PUBLIC + $ + $ +) target_link_libraries(mimalloc PUBLIC ${mi_libraries}) # static library @@ -132,7 +135,11 @@ else() endif() target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) -target_include_directories(mimalloc-static PRIVATE include PUBLIC $) + +target_include_directories(mimalloc-static PUBLIC + $ + $ +) target_link_libraries(mimalloc-static PUBLIC ${mi_libraries}) # install static and shared library, and the include files @@ -148,9 +155,12 @@ install(FILES "$" DESTINATION lib) # duplicate the .so in add_library(mimalloc-obj OBJECT src/static.c) target_compile_definitions(mimalloc-obj PRIVATE ${mi_defines}) target_compile_options(mimalloc-obj PRIVATE ${mi_cflags}) -target_include_directories(mimalloc-obj PRIVATE include PUBLIC $) +target_include_directories(mimalloc-obj PUBLIC + $ + $ +) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION} +install(FILES $ DESTINATION ${mi_install_dir} RENAME ${mi_basename}${CMAKE_C_OUTPUT_EXTENSION} ) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 3d2a1985..d8c44e1d 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -720,9 +720,9 @@ to link with the static library. See `test\CMakeLists.txt` for an example. You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) -and statistics (`MIMALLOC_STATS=1`) (in the debug version): +and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): ``` -> env MIMALLOC_STATS=1 ./cfrac 175451865205073170563711388363 +> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363 175451865205073170563711388363 = 374456281610909315237213 * 468551 @@ -791,7 +791,7 @@ env MIMALLOC_VERBOSE=1 LD_PRELOAD=/usr/lib/libmimalloc.so myprogram ``` or run with the debug version to get detailed statistics: ``` -env MIMALLOC_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram +env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram ``` ### Windows diff --git a/docs/using.html b/docs/using.html index 40f45522..c5d91910 100644 --- a/docs/using.html +++ b/docs/using.html @@ -103,7 +103,7 @@ $(document).ready(function(){initNavTree('using.html','');});

The preferred usage is including <mimalloc.h>, linking with the shared- or static library, and using the mi_malloc API exclusively for allocation. For example,

gcc -o myprogram -lmimalloc myfile.c

mimalloc uses only safe OS calls (mmap and VirtualAlloc) and can co-exist with other allocators linked to the same program. If you use cmake, you can simply use:

find_package(mimalloc 1.0 REQUIRED)

in your CMakeLists.txt to find a locally installed mimalloc. Then use either:

target_link_libraries(myapp PUBLIC mimalloc)

to link with the shared (dynamic) library, or:

target_link_libraries(myapp PUBLIC mimalloc-static)

to link with the static library. See test\CMakeLists.txt for an example.

-

You can pass environment variables to print verbose messages (MIMALLOC_VERBOSE=1) and statistics (MIMALLOC_STATS=1) (in the debug version):

> env MIMALLOC_STATS=1 ./cfrac 175451865205073170563711388363
175451865205073170563711388363 = 374456281610909315237213 * 468551
heap stats: peak total freed unit
normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok
normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok
normal 4: 64 b 4.6 kb 4.6 kb 32 b ok
normal 5: 80 b 118.4 kb 118.4 kb 40 b ok
normal 6: 48 b 48 b 48 b 48 b ok
normal 17: 960 b 960 b 960 b 320 b ok
heap stats: peak total freed unit
normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok
huge: 0 b 0 b 0 b 1 b ok
total: 33.9 kb 32.8 mb 32.8 mb 1 b ok
malloc requested: 32.8 mb
committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok
reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok
reset: 0 b 0 b 0 b 1 b ok
segments: 1 1 1
-abandoned: 0
pages: 6 6 6
-abandoned: 0
mmaps: 3
mmap fast: 0
mmap slow: 1
threads: 0
elapsed: 2.022s
process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb

The above model of using the mi_ prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface completely and redirect all calls to the mimalloc library instead.

+

You can pass environment variables to print verbose messages (MIMALLOC_VERBOSE=1) and statistics (MIMALLOC_SHOW_STATS=1) (in the debug version):

> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363
175451865205073170563711388363 = 374456281610909315237213 * 468551
heap stats: peak total freed unit
normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok
normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok
normal 4: 64 b 4.6 kb 4.6 kb 32 b ok
normal 5: 80 b 118.4 kb 118.4 kb 40 b ok
normal 6: 48 b 48 b 48 b 48 b ok
normal 17: 960 b 960 b 960 b 320 b ok
heap stats: peak total freed unit
normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok
huge: 0 b 0 b 0 b 1 b ok
total: 33.9 kb 32.8 mb 32.8 mb 1 b ok
malloc requested: 32.8 mb
committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok
reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok
reset: 0 b 0 b 0 b 1 b ok
segments: 1 1 1
-abandoned: 0
pages: 6 6 6
-abandoned: 0
mmaps: 3
mmap fast: 0
mmap slow: 1
threads: 0
elapsed: 2.022s
process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb

The above model of using the mi_ prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface completely and redirect all calls to the mimalloc library instead.

See Overriding Malloc for more info.

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 36b6915c..3e0df1ee 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" -#if defined(MI_MALLOC_OVERRIDE) && defined(MI_INTERPOSE) +#if defined(MI_MALLOC_OVERRIDE) && defined(__APPLE__) #define MI_TLS_RECURSE_GUARD #endif @@ -51,7 +51,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; @@ -212,7 +212,7 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - return _mi_segment_page_start(segment, page, page_size); + return _mi_segment_page_start(segment, page, page->block_size, page_size); } // Get the page containing the pointer diff --git a/readme.md b/readme.md index c957ac75..85234c24 100644 --- a/readme.md +++ b/readme.md @@ -134,9 +134,9 @@ to link with the static library. See `test\CMakeLists.txt` for an example. You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) -and statistics (`MIMALLOC_STATS=1`) (in the debug version): +and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): ``` -> env MIMALLOC_STATS=1 ./cfrac 175451865205073170563711388363 +> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363 175451865205073170563711388363 = 374456281610909315237213 * 468551 @@ -203,7 +203,7 @@ env MIMALLOC_VERBOSE=1 LD_PRELOAD=/usr/lib/libmimalloc.so myprogram ``` or run with the debug version to get detailed statistics: ``` -env MIMALLOC_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram +env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram ``` ### Windows diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c index 6a48a15c..d4f8b06d 100644 --- a/src/alloc-override-osx.c +++ b/src/alloc-override-osx.c @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" -#if defined(MI_MALLOC_OVERRIDE) +#if defined(MI_MALLOC_OVERRIDE) #if !defined(__APPLE__) #error "this file should only be included on macOS" @@ -21,6 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #include +#include // memset #if defined(MAC_OS_X_VERSION_10_6) && \ MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 @@ -65,7 +66,7 @@ static void zone_destroy(malloc_zone_t* zone) { // todo: ignore for now? } -static size_t zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, size_t count) { +static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) { size_t i; for (i = 0; i < count; i++) { ps[i] = zone_malloc(zone, size); @@ -74,7 +75,7 @@ static size_t zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, siz return i; } -static void zone_batch_free(malloc_zone_t* zone, void** ps, size_t count) { +static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { for(size_t i = 0; i < count; i++) { zone_free(zone, ps[i]); ps[i] = NULL; @@ -149,7 +150,7 @@ static malloc_zone_t* mi_get_default_zone() { // The first returned zone is the real default malloc_zone_t** zones = NULL; - size_t count = 0; + unsigned count = 0; kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count); if (ret == KERN_SUCCESS && count > 0) { return zones[0]; diff --git a/src/alloc-override.c b/src/alloc-override.c index 068b1fb3..3c9a19c1 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -25,7 +25,7 @@ terms of the MIT license. A copy of the license can be found in the file #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__) // use aliasing to alias the exported function to one of our `mi_` functions - #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"))); + #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"))) #define MI_FORWARD1(fun,x) MI_FORWARD(fun) #define MI_FORWARD2(fun,x,y) MI_FORWARD(fun) #define MI_FORWARD0(fun,x) MI_FORWARD(fun) @@ -50,14 +50,20 @@ terms of the MIT license. A copy of the license can be found in the file MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(realloc), - MI_INTERPOSE_MI(free) + MI_INTERPOSE_MI(free), + MI_INTERPOSE_MI(strdup), + MI_INTERPOSE_MI(strndup), + MI_INTERPOSE_MI(realpath) }; #else // On all other systems forward to our API - void* malloc(size_t size) mi_attr_noexcept MI_FORWARD1(mi_malloc, size) - void* calloc(size_t size, size_t n) mi_attr_noexcept MI_FORWARD2(mi_calloc, size, n) - void* realloc(void* p, size_t newsize) mi_attr_noexcept MI_FORWARD2(mi_realloc, p, newsize) - void free(void* p) mi_attr_noexcept MI_FORWARD0(mi_free, p) + void* malloc(size_t size) mi_attr_noexcept MI_FORWARD1(mi_malloc, size); + void* calloc(size_t size, size_t n) mi_attr_noexcept MI_FORWARD2(mi_calloc, size, n); + void* realloc(void* p, size_t newsize) mi_attr_noexcept MI_FORWARD2(mi_realloc, p, newsize); + void free(void* p) mi_attr_noexcept MI_FORWARD0(mi_free, p); + char* strdup(const char* s) MI_FORWARD1(mi_strdup, s); + char* strndup(const char* s, size_t n) MI_FORWARD2(mi_strndup, s, n); + char* realpath(const char* fname, char* resolved_name) MI_FORWARD2(mi_realpath, fname, resolved_name); #endif #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__) @@ -75,14 +81,14 @@ terms of the MIT license. A copy of the license can be found in the file // see // ------------------------------------------------------ #include - void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p) - void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p) - void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_malloc,n) - void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_malloc,n) + void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p); + void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p); + void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_malloc,n); + void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_malloc,n); #if (__cplusplus >= 201703L) - void* operator new( std::size_t n, std::align_val_t align) noexcept(false) MI_FORWARD2(mi_malloc_aligned,n,align) - void* operator new[]( std::size_t n, std::align_val_t align) noexcept(false) MI_FORWARD2(mi_malloc_aligned,n,align) + void* operator new( std::size_t n, std::align_val_t align) noexcept(false) MI_FORWARD2(mi_malloc_aligned,n,align); + void* operator new[]( std::size_t n, std::align_val_t align) noexcept(false) MI_FORWARD2(mi_malloc_aligned,n,align); #endif #else // ------------------------------------------------------ @@ -91,16 +97,16 @@ terms of the MIT license. A copy of the license can be found in the file // used by GCC and CLang). // See // ------------------------------------------------------ - void _ZdlPv(void* p) MI_FORWARD0(mi_free,p) // delete - void _ZdaPv(void* p) MI_FORWARD0(mi_free,p) // delete[] + void _ZdlPv(void* p) MI_FORWARD0(mi_free,p); // delete + void _ZdaPv(void* p) MI_FORWARD0(mi_free,p); // delete[] #if (MI_INTPTR_SIZE==8) - void* _Znwm(uint64_t n) MI_FORWARD1(mi_malloc,n) // new 64-bit - void* _Znam(uint64_t n) MI_FORWARD1(mi_malloc,n) // new[] 64-bit + void* _Znwm(uint64_t n) MI_FORWARD1(mi_malloc,n); // new 64-bit + void* _Znam(uint64_t n) MI_FORWARD1(mi_malloc,n); // new[] 64-bit void* _Znwmm(uint64_t n, uint64_t align) { return mi_malloc_aligned(n,align); } // aligned new 64-bit void* _Znamm(uint64_t n, uint64_t align) { return mi_malloc_aligned(n,align); } // aligned new[] 64-bit #elif (MI_INTPTR_SIZE==4) - void* _Znwj(uint32_t n) MI_FORWARD1(mi_malloc,n) // new 32-bit - void* _Znaj(uint32_t n) MI_FORWARD1(mi_malloc,n) // new[] 32-bit + void* _Znwj(uint32_t n) MI_FORWARD1(mi_malloc,n); // new 32-bit + void* _Znaj(uint32_t n) MI_FORWARD1(mi_malloc,n); // new[] 32-bit void* _Znwjj(uint32_t n, uint32_t align) { return mi_malloc_aligned(n,align); } // aligned new 32-bit void* _Znajj(uint32_t n, uint32_t align) { return mi_malloc_aligned(n,align); } // aligned new[] 32-bit #else @@ -126,10 +132,11 @@ extern "C" { #define ENOMEM 12 #endif -void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize) -size_t malloc_size(void* p) MI_FORWARD1(mi_usable_size,p) -size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p) -void cfree(void* p) MI_FORWARD0(mi_free, p) +void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize); +size_t malloc_size(void* p) MI_FORWARD1(mi_usable_size,p); +size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p); +void cfree(void* p) MI_FORWARD0(mi_free, p); + int posix_memalign(void** p, size_t alignment, size_t size) { // TODO: the spec says we should return EINVAL also if alignment is not a power of 2. @@ -169,12 +176,12 @@ void* reallocarray( void* p, size_t count, size_t size ) { // BSD } #if defined(__GLIBC__) && defined(__linux__) - // forward __libc interface (needed for redhat linux) - void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size) - void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size) - void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size) - void __libc_free(void* p) MI_FORWARD0(mi_free,p) - void __libc_cfree(void* p) MI_FORWARD0(mi_free,p) + // forward __libc interface (needed for glibc-based Linux distributions) + void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size); + void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size); + void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size); + void __libc_free(void* p) MI_FORWARD0(mi_free,p); + void __libc_cfree(void* p) MI_FORWARD0(mi_free,p); void* __libc_memalign(size_t alignment, size_t size) { return memalign(alignment,size); diff --git a/src/init.c b/src/init.c index d30684fb..9f64c439 100644 --- a/src/init.c +++ b/src/init.c @@ -103,7 +103,7 @@ mi_heap_t _mi_heap_main = { NULL, 0, 0, - 0, + 0xCDCDCDCDCDCDCDL, 0, false // can reclaim }; @@ -356,12 +356,18 @@ static void mi_process_done(void); void mi_process_init(void) mi_attr_noexcept { // ensure we are called once if (_mi_process_is_initialized) return; + // access _mi_heap_default before setting _mi_process_is_initialized to ensure + // that the TLS slot is allocated without getting into recursion on macOS + // when using dynamic linking with interpose. + mi_heap_t* h = _mi_heap_default; _mi_process_is_initialized = true; _mi_heap_main.thread_id = _mi_thread_id(); _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); - uintptr_t random = _mi_random_init(_mi_heap_main.thread_id); + uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h; + #ifndef __APPLE__ _mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random; + #endif _mi_heap_main.random = _mi_random_shuffle(random); #if (MI_DEBUG) _mi_verbose_message("debug level : %d\n", MI_DEBUG); diff --git a/src/os.c b/src/os.c index e95d47c9..3d49d4a9 100644 --- a/src/os.c +++ b/src/os.c @@ -199,6 +199,11 @@ static void* mi_os_mem_alloc(void* addr, size_t size, bool commit, int extra_fla flags |= MAP_FIXED; #endif } + int pflags = (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE); + #if defined(PROT_MAX) + pflags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD + #endif + if (large_os_page_size > 0 && use_large_os_page(size, 0) && ((uintptr_t)addr % large_os_page_size) == 0) { int lflags = flags; #ifdef MAP_ALIGNED_SUPER @@ -212,12 +217,12 @@ static void* mi_os_mem_alloc(void* addr, size_t size, bool commit, int extra_fla #endif if (lflags != flags) { // try large page allocation - p = mmap(addr, size, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE), lflags, -1, 0); - if (p == MAP_FAILED) p = NULL; + p = mmap(addr, size, pflags, lflags, -1, 0); + if (p == MAP_FAILED) p = NULL; // fall back to regular mmap if large is exhausted or no permission } } if (p == NULL) { - p = mmap(addr, size, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE), flags, -1, 0); + p = mmap(addr, size, pflags, flags, -1, 0); if (p == MAP_FAILED) p = NULL; } if (addr != NULL && p != addr) { diff --git a/src/page.c b/src/page.c index 63e0768d..fae67ef2 100644 --- a/src/page.c +++ b/src/page.c @@ -73,10 +73,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(page->block_size > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); - + mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -98,7 +98,7 @@ bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(page->cookie != 0); if (page->heap!=NULL) { mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(segment->thread_id == page->heap->thread_id); + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_SIZE_MAX || page->flags.in_full); @@ -172,19 +172,19 @@ void _mi_page_free_collect(mi_page_t* page) { // free the local free list if (page->local_free != NULL) { - if (mi_likely(page->free == NULL)) { + if (mi_likely(page->free == NULL)) { // usual caes page->free = page->local_free; } else { mi_block_t* tail = page->free; mi_block_t* next; - while ((next = mi_block_next(page, tail)) != NULL) { - tail = next; + while ((next = mi_block_next(page, tail)) != NULL) { + tail = next; } mi_block_set_next(page, tail, page->local_free); } - page->local_free = NULL; + page->local_free = NULL; } // and the thread free list if (page->thread_free.head != 0) { // quick test to avoid an atomic operation @@ -380,7 +380,7 @@ void _mi_page_retire(mi_page_t* page) { /* ----------------------------------------------------------- Initialize the initial free list in a page. - In secure mode we initialize a randomized list by + In secure mode we initialize a randomized list by alternating between slices. ----------------------------------------------------------- */ @@ -393,7 +393,7 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e UNUSED(stats); void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); size_t bsize = page->block_size; - mi_block_t* start = mi_page_block_at(page, page_area, page->capacity); + mi_block_t* start = mi_page_block_at(page, page_area, page->capacity); if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) { // initialize a sequential free list mi_block_t* end = mi_page_block_at(page, page_area, page->capacity + extend - 1); @@ -411,7 +411,7 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e // set up `slice_count` slices to alternate between size_t shift = MI_MAX_SLICE_SHIFT; while ((extend >> shift) == 0) { - shift--; + shift--; } size_t slice_count = (size_t)1U << shift; size_t slice_extend = extend / slice_count; @@ -419,12 +419,12 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice size_t counts[MI_MAX_SLICES]; // available objects in the slice for (size_t i = 0; i < slice_count; i++) { - blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); + blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); counts[i] = slice_extend; } counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) - // and initialize the free list by randomly threading through them + // and initialize the free list by randomly threading through them // set up first element size_t current = _mi_heap_random(heap) % slice_count; counts[current]--; @@ -436,16 +436,16 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e size_t round = i%MI_INTPTR_SIZE; if (round == 0) rnd = _mi_random_shuffle(rnd); // select a random next slice index - size_t next = ((rnd >> 8*round) & (slice_count-1)); + size_t next = ((rnd >> 8*round) & (slice_count-1)); while (counts[next]==0) { // ensure it still has space next++; if (next==slice_count) next = 0; } // and link the current block to it - counts[next]--; + counts[next]--; mi_block_t* block = blocks[current]; blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block - mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` + mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` current = next; } mi_block_set_next( page, blocks[current], NULL); // end of the list @@ -462,7 +462,7 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e #define MI_MAX_EXTEND_SIZE (4*1024) // heuristic, one OS page seems to work well. #if MI_SECURE -#define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many +#define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many #else #define MI_MIN_EXTEND (1) #endif @@ -490,10 +490,10 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st mi_stat_increase( stats->pages_extended, 1); // calculate the extend count - size_t extend = page->reserved - page->capacity; + size_t extend = page->reserved - page->capacity; size_t max_extend = MI_MAX_EXTEND_SIZE/page->block_size; if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND; - + if (extend > max_extend) { // ensure we don't touch memory beyond the page to reduce page commit. // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%. @@ -514,14 +514,14 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert(segment != NULL); + mi_assert_internal(block_size > 0); // set fields size_t page_size; - _mi_segment_page_start(segment, page, &page_size); + _mi_segment_page_start(segment, page, block_size, &page_size); page->block_size = block_size; - mi_assert_internal(block_size>0); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); - page->cookie = _mi_heap_random(heap) | 1; + page->cookie = _mi_heap_random(heap) | 1; mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); diff --git a/src/segment.c b/src/segment.c index ac5f995e..8892662e 100644 --- a/src/segment.c +++ b/src/segment.c @@ -108,16 +108,25 @@ static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment) } } -// Start of the page available memory -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) +// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size) { size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); uint8_t* p = (uint8_t*)segment + page->segment_idx*psize; if (page->segment_idx == 0) { - // the first page starts after the segment info (and possible guard page) - p += segment->segment_info_size; - psize -= segment->segment_info_size; + // the first page starts after the segment info (and possible guard page) + p += segment->segment_info_size; + psize -= segment->segment_info_size; + // for small objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + if (block_size > 0 && segment->page_kind == MI_PAGE_SMALL) { + size_t adjust = block_size - ((uintptr_t)p % block_size); + if (adjust < block_size) { + p += adjust; + psize -= adjust; + } + mi_assert_internal((uintptr_t)p % block_size == 0); + } } long secure = mi_option_get(mi_option_secure); if (secure > 1 || (secure == 1 && page->segment_idx == segment->capacity - 1)) { @@ -125,7 +134,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa // secure > 1: every page has an os guard page psize -= _mi_os_page_size(); } - + if (page_size != NULL) *page_size = psize; mi_assert_internal(_mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); @@ -338,7 +347,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, // Available memory in a page static size_t mi_page_size(const mi_page_t* page) { size_t psize; - _mi_segment_page_start(_mi_page_segment(page), page, &psize); + _mi_page_start(_mi_page_segment(page), page, &psize); return psize; } #endif @@ -422,7 +431,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_sta // reset the page memory to reduce memory pressure? if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; - uint8_t* start = _mi_segment_page_start(segment, page, &psize); + uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; if (inuse > 0) { _mi_mem_reset(start, psize, stats); // TODO: just `inuse`?