From a2c3b0f8af5d43cea6398be8e7162a6fbce2ec95 Mon Sep 17 00:00:00 2001 From: paulip1792 Date: Tue, 10 Aug 2021 16:30:44 +0800 Subject: [PATCH 01/11] add option to reserve huge os pages at a specific numa node. --- doc/mimalloc-doc.h | 3 +++ include/mimalloc.h | 1 + readme.md | 2 ++ src/init.c | 7 ++++++- src/options.c | 1 + 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index b448f14a..e2287fe9 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -782,6 +782,7 @@ typedef enum mi_option_e { mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows) mi_option_large_os_pages, ///< Use large OS pages (2MiB in size) if possible mi_option_reserve_huge_os_pages, ///< The number of huge OS pages (1GiB in size) to reserve at the start of the program. + mi_option_reserve_huge_os_pages_at, ///< Reserve huge OS pages at node N. mi_option_segment_cache, ///< The number of segments per thread to keep cached. mi_option_page_reset, ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free. mi_option_segment_reset, ///< Experimental @@ -1053,6 +1054,8 @@ or via environment variables. `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) of a thread to not allocate in the huge OS pages; this prevents threads that are short lived and allocate just a little to take up space in the huge OS page area (which cannot be reset). +- `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N`: where N is the numa node. This reserves the huge pages at a specific numa node. + (`N` is -1 by default to reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)) Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write for all pages in the original process including the huge OS pages. When any memory is now written in that area, the diff --git a/include/mimalloc.h b/include/mimalloc.h index fe5aa8f3..db5bff40 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -306,6 +306,7 @@ typedef enum mi_option_e { mi_option_reset_decommits, mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, + mi_option_reserve_huge_os_pages_at, mi_option_reserve_os_memory, mi_option_segment_cache, mi_option_page_reset, diff --git a/readme.md b/readme.md index cdb1b82a..2c02f665 100644 --- a/readme.md +++ b/readme.md @@ -302,6 +302,8 @@ or via environment variables: `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) of a thread to not allocate in the huge OS pages; this prevents threads that are short lived and allocate just a little to take up space in the huge OS page area (which cannot be reset). +- `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N`: where N is the numa node. This reserves the huge pages at a specific numa node. + (`N` is -1 by default to reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)) Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write for all pages in the original process including the huge OS pages. When any memory is now written in that area, the diff --git a/src/init.c b/src/init.c index c0f09b5e..587e6b1c 100644 --- a/src/init.c +++ b/src/init.c @@ -494,7 +494,12 @@ void mi_process_init(void) mi_attr_noexcept { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); + long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at); + if (reserve_at != -1) { + mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500); + } else { + mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); + } } if (mi_option_is_enabled(mi_option_reserve_os_memory)) { long ksize = mi_option_get(mi_option_reserve_os_memory); diff --git a/src/options.c b/src/options.c index 30025db2..aa4601fe 100644 --- a/src/options.c +++ b/src/options.c @@ -76,6 +76,7 @@ static mi_option_desc_t options[_mi_option_last] = #endif { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages + { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free From 5a5e4e303629d49bc09a2ef2614c1e0374c23dfb Mon Sep 17 00:00:00 2001 From: sblondon Date: Thu, 28 Oct 2021 15:01:36 +0200 Subject: [PATCH 02/11] Fix preload path in documentation example The .so files are usually in `/usr/lib`, not `/usr/bin`. The updated path is the same as below in the text. --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index cdb1b82a..cf984c6b 100644 --- a/readme.md +++ b/readme.md @@ -18,7 +18,7 @@ Latest stable tag: `v1.7.2` (2021-06-17). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: ``` -> LD_PRELOAD=/usr/bin/libmimalloc.so myprogram +> LD_PRELOAD=/usr/lib/libmimalloc.so myprogram ``` It also has an easy way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include: From 6ca454a04a272a2a2f1309db3b8a050049734a14 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 14 Nov 2021 16:41:24 -0800 Subject: [PATCH 03/11] update readme --- readme.md | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/readme.md b/readme.md index cdb1b82a..2369b9c1 100644 --- a/readme.md +++ b/readme.md @@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the run-time systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.0.2` (beta, 2021-06-17). -Latest stable tag: `v1.7.2` (2021-06-17). +Latest release tag: `v2.0.3` (beta, 2021-11-14). +Latest stable tag: `v1.7.3` (2021-11-14). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -77,6 +77,10 @@ Note: the `v2.x` beta has a new algorithm for managing internal mimalloc pages t and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. +* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including + M1), improved performance for v2 for large objects, Python integration improvements, more standard + installation directories, various small fixes. + * 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. @@ -142,7 +146,7 @@ mimalloc is used in various large scale low-latency services and programs, for e ## Windows -Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`). +Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build. The `mimalloc` project builds a static library (in `out/msvc-x64`), while the `mimalloc-override` project builds a DLL for overriding malloc in the entire program. @@ -191,6 +195,11 @@ Notes: 2. Install CCMake: `sudo apt-get install cmake-curses-gui` +## Single source + +You can also directly build the single `src/static.c` file as part of your project without +needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path. + # Using the library @@ -337,9 +346,9 @@ When _mimalloc_ is built using debug mode, various checks are done at runtime to - Corrupted free-lists and some forms of use-after-free are detected. -# Overriding Malloc +# Overriding Standard Malloc -Overriding the standard `malloc` can be done either _dynamically_ or _statically_. +Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or _statically_. ## Dynamic override @@ -370,13 +379,12 @@ On macOS we can also preload the mimalloc shared library so all calls to the standard `malloc` interface are resolved to the _mimalloc_ library. ``` -> env DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram +> env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram ``` Note that certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). -(Note: macOS support for dynamic overriding is recent, please report any issues.) ### Override on Windows @@ -386,7 +394,7 @@ the (dynamic) C runtime allocator, including those from other DLL's or libraries The overriding on Windows requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). -Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be available +Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to mimalloc (in `mimalloc-override.dll`). From 0560fc27c08d28d523b7f741a42deb26cd01c0c6 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 14 Nov 2021 16:47:50 -0800 Subject: [PATCH 04/11] update readme --- readme.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index 1586bddc..dfef82d5 100644 --- a/readme.md +++ b/readme.md @@ -311,8 +311,9 @@ or via environment variables: `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) of a thread to not allocate in the huge OS pages; this prevents threads that are short lived and allocate just a little to take up space in the huge OS page area (which cannot be reset). -- `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N`: where N is the numa node. This reserves the huge pages at a specific numa node. - (`N` is -1 by default to reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)) + The huge pages are usually allocated evenly among NUMA nodes. + We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all + the huge pages at a specific numa node instead. Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write for all pages in the original process including the huge OS pages. When any memory is now written in that area, the From 09c658dd401a9a4a7a2c807f6bd274d0420e109d Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Nov 2021 10:10:47 -0800 Subject: [PATCH 05/11] try to allocate within our hint space (partially addresses issue #360) --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index ee6e92c0..90773fc6 100644 --- a/src/os.c +++ b/src/os.c @@ -585,7 +585,7 @@ static mi_decl_cache_align _Atomic(uintptr_t) aligned_base; static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; - if ((size%MI_SEGMENT_SIZE) != 0) return NULL; + size = _mi_align_up(size, MI_SEGMENT_SIZE); if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096. #if (MI_SECURE>0) size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas. From 1c22650719cc09cdeba10b9add7af4b1e9b2ef2d Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Nov 2021 10:52:09 -0800 Subject: [PATCH 06/11] remove no-invalid-memory-model supression --- CMakeLists.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 87f4cddd..7c2c6cfc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,10 +181,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden) if(NOT MI_USE_CXX) list(APPEND mi_cflags -Wstrict-prototypes) - endif() - if(CMAKE_C_COMPILER_ID MATCHES "GNU") - list(APPEND mi_cflags -Wno-invalid-memory-model) - endif() + endif() if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang") list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline) endif() From 8b60a5ab70b342ca1ff404a4ee6c5340ccfb47a6 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Nov 2021 17:59:27 -0800 Subject: [PATCH 07/11] add mi_unsafe_free_with_threadid and mi_get_current_threadid() --- include/mimalloc-internal.h | 3 +++ include/mimalloc.h | 2 ++ src/alloc.c | 25 +++++++++++++++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cf5b6783..1333c80f 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -20,14 +20,17 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #define mi_decl_noinline __declspec(noinline) +#define mi_decl_always_inline __forceinline #define mi_decl_thread __declspec(thread) #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_always_inline __attribute__((always_inline)) #define mi_decl_thread __thread #define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) #else #define mi_decl_noinline +#define mi_decl_always_inline inline #define mi_decl_thread __thread // hope for the best :-) #define mi_decl_cache_align #endif diff --git a/include/mimalloc.h b/include/mimalloc.h index 756e516c..f90af4a3 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -271,6 +271,8 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; +mi_decl_export size_t mi_get_current_threadid(void) mi_attr_noexcept; +mi_decl_export void mi_unsafe_free_with_threadid(void* p, size_t current_tid ) mi_attr_noexcept; // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; diff --git a/src/alloc.c b/src/alloc.c index d9b6dd60..4d0d9e59 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -475,14 +475,12 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms return segment; } - -// Free a block -void mi_free(void* p) mi_attr_noexcept +// Free a block with a known threadid +static mi_decl_always_inline void _mi_free_with_threadid(void* p, mi_threadid_t tid) mi_attr_noexcept { const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); if (mi_unlikely(segment == NULL)) return; - const mi_threadid_t tid = _mi_thread_id(); mi_page_t* const page = _mi_segment_page_of(segment, p); mi_block_t* const block = (mi_block_t*)p; @@ -507,6 +505,25 @@ void mi_free(void* p) mi_attr_noexcept } } +// Get the current thread id +size_t mi_get_current_threadid(void) mi_attr_noexcept { + return _mi_thread_id(); +} + +// Free a block passing the current thread id explicitly +void mi_unsafe_free_with_threadid(void* p, size_t current_tid ) mi_attr_noexcept +{ + mi_assert(current_tid == _mi_thread_id()); + _mi_free_with_threadid(p,current_tid); +} + + +// Free a block +void mi_free(void* p) mi_attr_noexcept { + _mi_free_with_threadid(p, _mi_thread_id()); +} + + bool _mi_free_delayed_block(mi_block_t* block) { // get segment and page const mi_segment_t* const segment = _mi_ptr_segment(block); From 3548d8d716569f96967af4da0fdc57d2f09e7c38 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Nov 2021 18:39:03 -0800 Subject: [PATCH 08/11] add noinline to avoid warnings --- src/alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 4d0d9e59..e9167848 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -511,7 +511,7 @@ size_t mi_get_current_threadid(void) mi_attr_noexcept { } // Free a block passing the current thread id explicitly -void mi_unsafe_free_with_threadid(void* p, size_t current_tid ) mi_attr_noexcept +void mi_decl_noinline mi_unsafe_free_with_threadid(void* p, size_t current_tid ) mi_attr_noexcept { mi_assert(current_tid == _mi_thread_id()); _mi_free_with_threadid(p,current_tid); @@ -519,7 +519,7 @@ void mi_unsafe_free_with_threadid(void* p, size_t current_tid ) mi_attr_noexcept // Free a block -void mi_free(void* p) mi_attr_noexcept { +void mi_decl_noinline mi_free(void* p) mi_attr_noexcept { _mi_free_with_threadid(p, _mi_thread_id()); } From 9183b1eec005be9863e58d51ba0f96b97721d48c Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Nov 2021 19:04:41 -0800 Subject: [PATCH 09/11] remove experiment with unsafe_free_with_threadid --- include/mimalloc-internal.h | 3 --- include/mimalloc.h | 3 --- src/alloc.c | 24 +++--------------------- 3 files changed, 3 insertions(+), 27 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 1333c80f..cf5b6783 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -20,17 +20,14 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #define mi_decl_noinline __declspec(noinline) -#define mi_decl_always_inline __forceinline #define mi_decl_thread __declspec(thread) #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) -#define mi_decl_always_inline __attribute__((always_inline)) #define mi_decl_thread __thread #define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) #else #define mi_decl_noinline -#define mi_decl_always_inline inline #define mi_decl_thread __thread // hope for the best :-) #define mi_decl_cache_align #endif diff --git a/include/mimalloc.h b/include/mimalloc.h index f90af4a3..06a16703 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -271,9 +271,6 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; -mi_decl_export size_t mi_get_current_threadid(void) mi_attr_noexcept; -mi_decl_export void mi_unsafe_free_with_threadid(void* p, size_t current_tid ) mi_attr_noexcept; - // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; diff --git a/src/alloc.c b/src/alloc.c index e9167848..ca32caba 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -475,12 +475,13 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms return segment; } -// Free a block with a known threadid -static mi_decl_always_inline void _mi_free_with_threadid(void* p, mi_threadid_t tid) mi_attr_noexcept +// Free a block +void mi_free(void* p) mi_attr_noexcept { const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); if (mi_unlikely(segment == NULL)) return; + mi_threadid_t tid = _mi_thread_id(); mi_page_t* const page = _mi_segment_page_of(segment, p); mi_block_t* const block = (mi_block_t*)p; @@ -505,25 +506,6 @@ static mi_decl_always_inline void _mi_free_with_threadid(void* p, mi_threadid_t } } -// Get the current thread id -size_t mi_get_current_threadid(void) mi_attr_noexcept { - return _mi_thread_id(); -} - -// Free a block passing the current thread id explicitly -void mi_decl_noinline mi_unsafe_free_with_threadid(void* p, size_t current_tid ) mi_attr_noexcept -{ - mi_assert(current_tid == _mi_thread_id()); - _mi_free_with_threadid(p,current_tid); -} - - -// Free a block -void mi_decl_noinline mi_free(void* p) mi_attr_noexcept { - _mi_free_with_threadid(p, _mi_thread_id()); -} - - bool _mi_free_delayed_block(mi_block_t* block) { // get segment and page const mi_segment_t* const segment = _mi_ptr_segment(block); From 0be71a2cac17062bd8913cbd272c472a44331b7f Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 24 Nov 2021 12:54:54 -0800 Subject: [PATCH 10/11] fix prefix --- src/os.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/os.c b/src/os.c index 90773fc6..603ec04a 100644 --- a/src/os.c +++ b/src/os.c @@ -578,29 +578,29 @@ static mi_decl_cache_align _Atomic(uintptr_t) aligned_base; // (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses // in the middle of the 2TiB - 6TiB address range (see issue #372)) -#define KK_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start -#define KK_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) -#define KK_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) +#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start +#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) +#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; size = _mi_align_up(size, MI_SEGMENT_SIZE); - if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096. + if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096. #if (MI_SECURE>0) size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas. #endif uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); - if (hint == 0 || hint > KK_HINT_MAX) { // wrap or initialize - uintptr_t init = KK_HINT_BASE; + if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize + uintptr_t init = MI_HINT_BASE; #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); - init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % KK_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB + init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif uintptr_t expected = hint + size; mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); - hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > KK_HINT_MAX but that is ok, it is a hint after all + hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all } if (hint%try_alignment != 0) return NULL; return (void*)hint; @@ -638,11 +638,11 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo if (commit) flags |= MEM_COMMIT; p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); #elif defined(MI_USE_SBRK) - KK_UNUSED(allow_large); + MI_UNUSED(allow_large); *is_large = false; p = mi_sbrk_heap_grow(size, try_alignment); #elif defined(__wasi__) - KK_UNUSED(allow_large); + MI_UNUSED(allow_large); *is_large = false; p = mi_wasm_heap_grow(size, try_alignment); #else From 36edfbc70ae4610d06b9fce245f35d92e6be92a5 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 9 Dec 2021 16:18:17 -0800 Subject: [PATCH 11/11] use rtlgenrandom by default on windows --- src/random.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/random.c b/src/random.c index 05c5c99c..1220401a 100644 --- a/src/random.c +++ b/src/random.c @@ -167,8 +167,9 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim #if defined(_WIN32) -#if !defined(MI_USE_RTLGENRANDOM) -// We prefer BCryptGenRandom over RtlGenRandom +#if defined(MI_USE_BCRYPTGENRANDOM) +// We would like to use BCryptGenRandom instead of RtlGenRandom but it can lead to a deadlock +// under the VS debugger when using dynamic overriding. #pragma comment (lib,"bcrypt.lib") #include static bool os_random_buf(void* buf, size_t buf_len) {