diff --git a/.gitattributes b/.gitattributes index 0332e031..f083b107 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10,3 +10,4 @@ *.dll binary *.lib binary *.exe binary +bin export-ignore diff --git a/CMakeLists.txt b/CMakeLists.txt index 1387e0db..0cc7e575 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,7 @@ option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clan option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF) option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF) option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF) +option(MI_NO_THP "Disable transparent huge pages support on Linux/Android for the mimalloc process only" OFF) # deprecated options option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) @@ -128,7 +129,7 @@ endif() if(MI_SECURE) message(STATUS "Set full secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=4) + list(APPEND mi_defines MI_SECURE=4) endif() if(MI_TRACK_VALGRIND) @@ -247,7 +248,7 @@ if(MI_DEBUG_UBSAN) message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)") endif() else() - message(WARNING "Can only use thread sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})") + message(WARNING "Can only use undefined-behavior sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})") endif() endif() @@ -263,10 +264,18 @@ if(MI_USE_CXX) endif() endif() -if(CMAKE_SYSTEM_NAME MATCHES "Haiku") - SET(CMAKE_INSTALL_LIBDIR ~/config/non-packaged/lib) - SET(CMAKE_INSTALL_INCLUDEDIR ~/config/non-packaged/headers) - endif() +if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android") + if(MI_NO_THP) + message(STATUS "Disable transparent huge pages support (MI_NO_THP=ON)") + list(APPEND mi_defines MI_NO_THP=1) + endif() +endif() + +# On Haiku use `-DCMAKE_INSTALL_PREFIX` instead, issue #788 +# if(CMAKE_SYSTEM_NAME MATCHES "Haiku") +# SET(CMAKE_INSTALL_LIBDIR ~/config/non-packaged/lib) +# SET(CMAKE_INSTALL_INCLUDEDIR ~/config/non-packaged/headers) +# endif() # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") @@ -468,7 +477,7 @@ if (MI_BUILD_OBJECT) set(mimalloc-obj-static "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION}") set(mimalloc-obj-out "${CMAKE_CURRENT_BINARY_DIR}/${mi_basename}${CMAKE_C_OUTPUT_EXTENSION}") add_custom_command(OUTPUT ${mimalloc-obj-out} DEPENDS mimalloc-obj COMMAND "${CMAKE_COMMAND}" -E copy "${mimalloc-obj-static}" "${mimalloc-obj-out}") - add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out}) + add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out}) endif() # the following seems to lead to cmake warnings/errors on some systems, disable for now :-( diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index ae19cfb3..96f3922e 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -30,14 +30,17 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#define mi_decl_weak #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread #define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#define mi_decl_weak __attribute__((weak)) #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) #define mi_decl_cache_align +#define mi_decl_weak #endif #if defined(__EMSCRIPTEN__) && !defined(__wasi__) @@ -309,6 +312,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { } } +// Align a pointer upwards +static inline void* mi_align_up_ptr(void* p, size_t alignment) { + return (void*)_mi_align_up((uintptr_t)p, alignment); +} + + // Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { mi_assert_internal(divider != 0); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 9e560696..d14b885b 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -35,10 +35,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ); // Free OS memory int _mi_prim_free(void* addr, size_t size ); - + // Allocate OS memory. Return NULL on error. // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. -// If `commit` is false, the virtual memory range only needs to be reserved (with no access) +// If `commit` is false, the virtual memory range only needs to be reserved (with no access) // which will later be committed explicitly using `_mi_prim_commit`. // `is_zero` is set to true if the memory was zero initialized (as on most OS's) // pre: !commit => !allow_large @@ -82,11 +82,11 @@ mi_msecs_t _mi_prim_clock_now(void); typedef struct mi_process_info_s { mi_msecs_t elapsed; mi_msecs_t utime; - mi_msecs_t stime; - size_t current_rss; - size_t peak_rss; + mi_msecs_t stime; + size_t current_rss; + size_t peak_rss; size_t current_commit; - size_t peak_commit; + size_t peak_commit; size_t page_faults; } mi_process_info_t; @@ -117,7 +117,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); //------------------------------------------------------------------- // Thread id: `_mi_prim_thread_id()` -// +// // Getting the thread id should be performant as it is called in the // fast path of `_mi_free` and we specialize for various platforms as // inlined definitions. Regular code should call `init.c:_mi_thread_id()`. @@ -125,33 +125,23 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); // for each thread (unequal to zero). //------------------------------------------------------------------- -// defined in `init.c`; do not use these directly -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from -extern bool _mi_process_is_initialized; // has mi_process_init been called? - -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; - -#if defined(_WIN32) - -#define WIN32_LEAN_AND_MEAN -#include -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { - // Windows: works on Intel and ARM in both 32- and 64-bit - return (uintptr_t)NtCurrentTeb(); -} - -// We use assembly for a fast thread id on the main platforms. The TLS layout depends on -// both the OS and libc implementation so we use specific tests for each main platform. +// On some libc + platform combinations we can directly access a thread-local storage (TLS) slot. +// The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform. // If you test on another platform and it works please send a PR :-) // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. -#elif defined(__GNUC__) && ( \ +// +// Note: on most platforms this is not actually used anymore as we prefer `__builtin_thread_pointer()` nowadays. +// However, we do still use it with older clang compilers and Apple OS (as we use TLS slot for the default heap there). +#if defined(__GNUC__) && ( \ (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ - || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ + || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \ || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ ) +#define MI_HAS_TLS_SLOT + static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { void* res; const size_t ofs = (slot*sizeof(void*)); @@ -175,6 +165,9 @@ static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); #endif res = tcb[slot]; + #elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781 + MI_UNUSED(ofs); + res = pthread_getspecific(slot); #endif return res; } @@ -202,9 +195,40 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); #endif tcb[slot] = value; + #elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781 + MI_UNUSED(ofs); + pthread_setspecific(slot, value); #endif } +#endif + +// defined in `init.c`; do not use these directly +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern bool _mi_process_is_initialized; // has mi_process_init been called? + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; + +#if defined(_WIN32) + +#define WIN32_LEAN_AND_MEAN +#include +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Windows: works on Intel and ARM in both 32- and 64-bit + return (uintptr_t)NtCurrentTeb(); +} + +#elif defined(__has_builtin) && __has_builtin(__builtin_thread_pointer) && \ + (!defined(__APPLE__)) && /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly ()*/ \ + (!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot () + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Works on most Unix based platforms + return (uintptr_t)__builtin_thread_pointer(); +} + +#elif defined(MI_HAS_TLS_SLOT) + static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { #if defined(__BIONIC__) // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id @@ -251,7 +275,6 @@ static inline mi_heap_t* mi_prim_get_default_heap(void); #if defined(MI_MALLOC_OVERRIDE) #if defined(__APPLE__) // macOS #define MI_TLS_SLOT 89 // seems unused? - // #define MI_TLS_RECURSE_GUARD 1 // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) // see #elif defined(__OpenBSD__) @@ -269,6 +292,9 @@ static inline mi_heap_t* mi_prim_get_default_heap(void); #if defined(MI_TLS_SLOT) +# if !defined(MI_HAS_TLS_SLOT) +# error "trying to use a TLS slot for the default heap, but the mi_prim_tls_slot primitives are not defined" +# endif static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT); diff --git a/src/alloc-override.c b/src/alloc-override.c index 873065dc..7cf0bf2c 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -259,10 +259,11 @@ extern "C" { // no forwarding here due to aliasing/name mangling issues void cfree(void* p) { mi_free(p); } void* pvalloc(size_t size) { return mi_pvalloc(size); } -void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); } -int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); } void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); } void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } +void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); } +// some systems define reallocarr so mark it as a weak symbol (#751) +mi_decl_weak int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); } #if defined(__wasi__) // forward __libc interface (see PR #667) diff --git a/src/alloc.c b/src/alloc.c index e2273d28..8a76d3d3 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -922,9 +922,13 @@ static bool mi_try_new_handler(bool nothrow) { #endif if (h==NULL) { _mi_error_message(ENOMEM, "out of memory in 'new'"); + #if defined(_CPPUNWIND) || defined(__cpp_exceptions) // exceptions are not always enabled if (!nothrow) { throw std::bad_alloc(); } + #else + MI_UNUSED(nothrow); + #endif return false; } else { diff --git a/src/arena.c b/src/arena.c index e08ea22a..fc8a79c6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -163,6 +163,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m // success *memid = _mi_memid_create(MI_MEM_STATIC); + memid->initially_zero = true; const size_t start = _mi_align_up(oldtop, alignment); uint8_t* const p = &mi_arena_static[start]; _mi_memzero(p, size); @@ -180,8 +181,10 @@ static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* st p = _mi_os_alloc(size, memid, stats); if (p == NULL) return NULL; + // zero the OS memory if needed if (!memid->initially_zero) { _mi_memzero_aligned(p, size); + memid->initially_zero = true; } return p; } @@ -480,7 +483,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t // schedule decommit mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); if (expire != 0) { - mi_atomic_addi64_acq_rel(&arena->purge_expire, delay/10); // add smallish extra delay + mi_atomic_addi64_acq_rel(&arena->purge_expire, (mi_msecs_t)(delay/10)); // add smallish extra delay } else { mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); @@ -524,7 +527,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi if (!force && expire > now) return false; // reset expire (if not already set concurrently) - mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, 0); + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0); // potential purges scheduled, walk through the bitmap bool any_purged = false; diff --git a/src/init.c b/src/init.c index fe885a5b..fda17d70 100644 --- a/src/init.c +++ b/src/init.c @@ -425,7 +425,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { #if defined(MI_TLS_SLOT) mi_prim_tls_slot_set(MI_TLS_SLOT,heap); #elif defined(MI_TLS_PTHREAD_SLOT_OFS) - *mi_tls_pthread_heap_slot() = heap; + *mi_prim_tls_pthread_heap_slot() = heap; #elif defined(MI_TLS_PTHREAD) // we use _mi_heap_default_key #else diff --git a/src/os.c b/src/os.c index 69ad2bf9..b98950a4 100644 --- a/src/os.c +++ b/src/os.c @@ -73,10 +73,6 @@ void _mi_os_init(void) { bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); -static void* mi_align_up_ptr(void* p, size_t alignment) { - return (void*)_mi_align_up((uintptr_t)p, alignment); -} - static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { mi_assert_internal(alignment != 0); uintptr_t mask = alignment - 1; diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c new file mode 100644 index 00000000..c0fa0f4a --- /dev/null +++ b/src/prim/emscripten/prim.c @@ -0,0 +1,251 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen, Alon Zakai +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" + +// Design +// ====== +// +// mimalloc is built on top of emmalloc. emmalloc is a minimal allocator on top +// of sbrk. The reason for having three layers here is that we want mimalloc to +// be able to allocate and release system memory properly, the same way it would +// when using VirtualAlloc on Windows or mmap on POSIX, and sbrk is too limited. +// Specifically, sbrk can only go up and down, and not "skip" over regions, and +// so we end up either never freeing memory to the system, or we can get stuck +// with holes. +// +// Atm wasm generally does *not* free memory back the system: once grown, we do +// not shrink back down (https://github.com/WebAssembly/design/issues/1397). +// However, that is expected to improve +// (https://github.com/WebAssembly/memory-control/blob/main/proposals/memory-control/Overview.md) +// and so we do not want to bake those limitations in here. +// +// Even without that issue, we want our system allocator to handle holes, that +// is, it should merge freed regions and allow allocating new content there of +// the full size, etc., so that we do not waste space. That means that the +// system allocator really does need to handle the general problem of allocating +// and freeing variable-sized chunks of memory in a random order, like malloc/ +// free do. And so it makes sense to layer mimalloc on top of such an +// implementation. +// +// emmalloc makes sense for the lower level because it is small and simple while +// still fully handling merging of holes etc. It is not the most efficient +// allocator, but our assumption is that mimalloc needs to be fast while the +// system allocator underneath it is called much less frequently. +// + +//--------------------------------------------- +// init +//--------------------------------------------- + +void _mi_prim_mem_init( mi_os_mem_config_t* config) { + config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB + config->alloc_granularity = 16; + config->has_overcommit = false; + config->must_free_whole = true; + config->has_virtual_reserve = false; +} + +extern void emmalloc_free(void*); + +int _mi_prim_free(void* addr, size_t size) { + MI_UNUSED(size); + emmalloc_free(addr); + return 0; +} + + +//--------------------------------------------- +// Allocation +//--------------------------------------------- + +extern void* emmalloc_memalign(size_t, size_t); + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { + MI_UNUSED(try_alignment); MI_UNUSED(allow_large); MI_UNUSED(commit); + *is_large = false; + // TODO: Track the highest address ever seen; first uses of it are zeroes. + // That assumes no one else uses sbrk but us (they could go up, + // scribble, and then down), but we could assert on that perhaps. + *is_zero = false; + // emmalloc has some limitations on alignment size. + // TODO: Why does mimalloc ask for an align of 4MB? that ends up allocating + // 8, which wastes quite a lot for us in wasm. If that is unavoidable, + // we may want to improve emmalloc to support such alignment. See also + // https://github.com/emscripten-core/emscripten/issues/20645 + #define MIN_EMMALLOC_ALIGN 8 + #define MAX_EMMALLOC_ALIGN (1024*1024) + if (try_alignment < MIN_EMMALLOC_ALIGN) { + try_alignment = MIN_EMMALLOC_ALIGN; + } else if (try_alignment > MAX_EMMALLOC_ALIGN) { + try_alignment = MAX_EMMALLOC_ALIGN; + } + void* p = emmalloc_memalign(try_alignment, size); + *addr = p; + if (p == 0) { + return ENOMEM; + } + return 0; +} + + +//--------------------------------------------- +// Commit/Reset +//--------------------------------------------- + +int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { + MI_UNUSED(addr); MI_UNUSED(size); + // See TODO above. + *is_zero = false; + return 0; +} + +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { + MI_UNUSED(addr); MI_UNUSED(size); + *needs_recommit = false; + return 0; +} + +int _mi_prim_reset(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + +int _mi_prim_protect(void* addr, size_t size, bool protect) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); + return 0; +} + + +//--------------------------------------------- +// Huge pages and NUMA nodes +//--------------------------------------------- + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *is_zero = true; + *addr = NULL; + return ENOSYS; +} + +size_t _mi_prim_numa_node(void) { + return 0; +} + +size_t _mi_prim_numa_node_count(void) { + return 1; +} + + +//---------------------------------------------------------------- +// Clock +//---------------------------------------------------------------- + +#include + +mi_msecs_t _mi_prim_clock_now(void) { + return emscripten_date_now(); +} + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + // use defaults + MI_UNUSED(pinfo); +} + + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +#include + +void _mi_prim_out_stderr( const char* msg) { + emscripten_console_error(msg); +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + // For code size reasons, do not support environ customization for now. + MI_UNUSED(name); + MI_UNUSED(result); + MI_UNUSED(result_size); + return false; +} + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + int err = getentropy(buf, buf_len); + return !err; +} + + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +#ifdef __EMSCRIPTEN_SHARED_MEMORY__ + +// use pthread local storage keys to detect thread ending +// (and used with MI_TLS_PTHREADS for the default heap) +pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); + +static void mi_pthread_done(void* value) { + if (value!=NULL) { + _mi_thread_done((mi_heap_t*)value); + } +} + +void _mi_prim_thread_init_auto_done(void) { + mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); + pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing to do +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD + pthread_setspecific(_mi_heap_default_key, heap); + } +} + +#else + +void _mi_prim_thread_init_auto_done(void) { + // nothing +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); + +} +#endif diff --git a/src/prim/prim.c b/src/prim/prim.c index 9a597d8e..3b7d3736 100644 --- a/src/prim/prim.c +++ b/src/prim/prim.c @@ -18,6 +18,9 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_USE_SBRK #include "wasi/prim.c" // memory-grow or sbrk (Wasm) +#elif defined(__EMSCRIPTEN__) +#include "emscripten/prim.c" // emmalloc_*, + pthread support + #else #include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 54bf57b2..2035e1a4 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -27,16 +27,20 @@ terms of the MIT license. A copy of the license can be found in the file #include // mmap #include // sysconf - +#include // open, close, read, access + #if defined(__linux__) #include - #include + #if defined(MI_NO_THP) + #include + #endif #if defined(__GLIBC__) #include // linux mmap flags #else #include #endif #elif defined(__APPLE__) + #include #include #if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR #include @@ -50,17 +54,19 @@ terms of the MIT license. A copy of the license can be found in the file #include #endif -#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) +#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) && !defined(__OpenBSD__) && !defined(__sun) #define MI_HAS_SYSCALL_H #include #endif + //------------------------------------------------------------------------------------ // Use syscalls for some primitives to allow for libraries that override open/read/close etc. // and do allocation themselves; using syscalls prevents recursion when mimalloc is // still initializing (issue #713) //------------------------------------------------------------------------------------ + #if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access) static int mi_prim_open(const char* fpath, int open_flags) { @@ -76,7 +82,7 @@ static int mi_prim_access(const char *fpath, int mode) { return syscall(SYS_access,fpath,mode); } -#elif !defined(__APPLE__) // avoid unused warnings +#elif (!defined(__APPLE__) || MAC_OS_X_VERSION_MIN_REQUIRED < 1070) && !defined(__sun) // avoid unused warnings on macOS and Solaris static int mi_prim_open(const char* fpath, int open_flags) { return open(fpath,open_flags); @@ -125,7 +131,8 @@ static bool unix_detect_overcommit(void) { return os_overcommit; } -void _mi_prim_mem_init( mi_os_mem_config_t* config ) { +void _mi_prim_mem_init( mi_os_mem_config_t* config ) +{ long psize = sysconf(_SC_PAGESIZE); if (psize > 0) { config->page_size = (size_t)psize; @@ -135,6 +142,17 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->has_overcommit = unix_detect_overcommit(); config->must_free_whole = false; // mmap can free in parts config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) + + // disable transparent huge pages for this process? + #if defined(MI_NO_THP) && (defined(__linux__) || defined(__ANDROID__)) + int val = 0; + if (prctl(PR_GET_THP_DISABLE, &val, 0, 0, 0) != 0) { + // Most likely since distros often come with always/madvise settings. + val = 1; + // Disabling only for mimalloc process rather than touching system wide settings + (void)prctl(PR_SET_THP_DISABLE, &val, 0, 0, 0); + } + #endif } @@ -276,7 +294,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec *is_large = true; p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); #ifdef MAP_HUGE_1GB - if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) { mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); @@ -310,7 +328,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec #elif defined(__sun) if (allow_large && _mi_os_use_large_page(size, try_alignment)) { struct memcntl_mha cmd = {0}; - cmd.mha_pagesize = large_os_page_size; + cmd.mha_pagesize = _mi_os_large_page_size(); cmd.mha_cmd = MHA_MAPSIZE_VA; if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { *is_large = true; @@ -731,28 +749,20 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { // Random //---------------------------------------------------------------- -#if defined(__APPLE__) - -#include -#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 +#if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 && MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 #include #include -#endif + bool _mi_prim_random_buf(void* buf, size_t buf_len) { - #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 - // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf - // may fail silently on macOS. See PR #390, and - return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); - #else - // fall back on older macOS - arc4random_buf(buf, buf_len); - return true; - #endif + // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf + // may fail silently on macOS. See PR #390, and + return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); } #elif defined(__ANDROID__) || defined(__DragonFly__) || \ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__sun) + defined(__sun) || \ + (defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 && MAC_OS_X_VERSION_MIN_REQUIRED >= 1070) #include bool _mi_prim_random_buf(void* buf, size_t buf_len) { @@ -760,11 +770,10 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { return true; } -#elif defined(__linux__) || defined(__HAIKU__) +#elif defined(__APPLE__) || defined(__linux__) || defined(__HAIKU__) // for old apple versions < 1070 (issue #829) #include #include -#include #include bool _mi_prim_random_buf(void* buf, size_t buf_len) { @@ -835,7 +844,9 @@ void _mi_prim_thread_init_auto_done(void) { } void _mi_prim_thread_done_auto_done(void) { - // nothing to do + if (_mi_heap_default_key != (pthread_key_t)(-1)) { // do not leak the key, see issue #809 + pthread_key_delete(_mi_heap_default_key); + } } void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index 50511f0b..f74acd2a 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -12,6 +12,9 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/atomic.h" #include "mimalloc/prim.h" +#include // fputs +#include // getenv + //--------------------------------------------- // Initialize //--------------------------------------------- @@ -40,6 +43,8 @@ int _mi_prim_free(void* addr, size_t size ) { //--------------------------------------------- #if defined(MI_USE_SBRK) + #include // for sbrk + static void* mi_memory_grow( size_t size ) { void* p = sbrk(size); if (p == (void*)(-1)) return NULL; diff --git a/src/segment-map.c b/src/segment-map.c index 4c2104bd..a306ec67 100644 --- a/src/segment-map.c +++ b/src/segment-map.c @@ -29,6 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { + // note: segment can be invalid or NULL. mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? if ((uintptr_t)segment >= MI_MAX_ADDRESS) { *bitidx = 0; @@ -70,8 +71,7 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) { // Determine the segment belonging to a pointer or NULL if it is not in a valid segment. static mi_segment_t* _mi_segment_of(const void* p) { if (p == NULL) return NULL; - mi_segment_t* segment = _mi_ptr_segment(p); - mi_assert_internal(segment != NULL); + mi_segment_t* segment = _mi_ptr_segment(p); // segment can be NULL size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge diff --git a/test/main-override.cpp b/test/main-override.cpp index f9ac7327..64ea178b 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -100,7 +100,7 @@ static void various_tests() { auto tbuf = new unsigned char[sizeof(Test)]; t = new (tbuf) Test(42); t->~Test(); - delete tbuf; + delete[] tbuf; } class Static {