mirror of
https://github.com/microsoft/mimalloc.git
synced 2024-12-26 21:04:27 +08:00
Merge branch 'dev' of https://github.com/microsoft/mimalloc into dev
This commit is contained in:
commit
c858690dea
@ -181,10 +181,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
|
||||
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
|
||||
if(NOT MI_USE_CXX)
|
||||
list(APPEND mi_cflags -Wstrict-prototypes)
|
||||
endif()
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU")
|
||||
list(APPEND mi_cflags -Wno-invalid-memory-model)
|
||||
endif()
|
||||
endif()
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
|
||||
list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline)
|
||||
endif()
|
||||
|
@ -782,6 +782,7 @@ typedef enum mi_option_e {
|
||||
mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
|
||||
mi_option_large_os_pages, ///< Use large OS pages (2MiB in size) if possible
|
||||
mi_option_reserve_huge_os_pages, ///< The number of huge OS pages (1GiB in size) to reserve at the start of the program.
|
||||
mi_option_reserve_huge_os_pages_at, ///< Reserve huge OS pages at node N.
|
||||
mi_option_segment_cache, ///< The number of segments per thread to keep cached.
|
||||
mi_option_page_reset, ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free.
|
||||
mi_option_segment_reset, ///< Experimental
|
||||
@ -1053,6 +1054,8 @@ or via environment variables.
|
||||
`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
|
||||
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
|
||||
and allocate just a little to take up space in the huge OS page area (which cannot be reset).
|
||||
- `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N`: where N is the numa node. This reserves the huge pages at a specific numa node.
|
||||
(`N` is -1 by default to reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected))
|
||||
|
||||
Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
|
||||
for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
|
||||
|
@ -271,7 +271,6 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size
|
||||
mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
|
||||
mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
|
||||
|
||||
|
||||
// deprecated
|
||||
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
|
||||
|
||||
@ -310,6 +309,7 @@ typedef enum mi_option_e {
|
||||
mi_option_reset_decommits,
|
||||
mi_option_large_os_pages, // implies eager commit
|
||||
mi_option_reserve_huge_os_pages,
|
||||
mi_option_reserve_huge_os_pages_at,
|
||||
mi_option_reserve_os_memory,
|
||||
mi_option_segment_cache,
|
||||
mi_option_page_reset,
|
||||
|
29
readme.md
29
readme.md
@ -12,13 +12,13 @@ is a general purpose allocator with excellent [performance](#performance) charac
|
||||
Initially developed by Daan Leijen for the run-time systems of the
|
||||
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
|
||||
|
||||
Latest release tag: `v2.0.2` (beta, 2021-06-17).
|
||||
Latest stable tag: `v1.7.2` (2021-06-17).
|
||||
Latest release tag: `v2.0.3` (beta, 2021-11-14).
|
||||
Latest stable tag: `v1.7.3` (2021-11-14).
|
||||
|
||||
mimalloc is a drop-in replacement for `malloc` and can be used in other programs
|
||||
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
|
||||
```
|
||||
> LD_PRELOAD=/usr/bin/libmimalloc.so myprogram
|
||||
> LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
|
||||
```
|
||||
It also has an easy way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include:
|
||||
|
||||
@ -77,6 +77,10 @@ Note: the `v2.x` beta has a new algorithm for managing internal mimalloc pages t
|
||||
and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
|
||||
(see [below](#performance)); please report if you observe any significant performance regression.
|
||||
|
||||
* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
|
||||
M1), improved performance for v2 for large objects, Python integration improvements, more standard
|
||||
installation directories, various small fixes.
|
||||
|
||||
* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
|
||||
thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
|
||||
|
||||
@ -142,7 +146,7 @@ mimalloc is used in various large scale low-latency services and programs, for e
|
||||
|
||||
## Windows
|
||||
|
||||
Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`).
|
||||
Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build.
|
||||
The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
|
||||
`mimalloc-override` project builds a DLL for overriding malloc
|
||||
in the entire program.
|
||||
@ -191,6 +195,11 @@ Notes:
|
||||
2. Install CCMake: `sudo apt-get install cmake-curses-gui`
|
||||
|
||||
|
||||
## Single source
|
||||
|
||||
You can also directly build the single `src/static.c` file as part of your project without
|
||||
needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path.
|
||||
|
||||
|
||||
# Using the library
|
||||
|
||||
@ -302,6 +311,9 @@ or via environment variables:
|
||||
`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
|
||||
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
|
||||
and allocate just a little to take up space in the huge OS page area (which cannot be reset).
|
||||
The huge pages are usually allocated evenly among NUMA nodes.
|
||||
We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
|
||||
the huge pages at a specific numa node instead.
|
||||
|
||||
Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
|
||||
for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
|
||||
@ -337,9 +349,9 @@ When _mimalloc_ is built using debug mode, various checks are done at runtime to
|
||||
- Corrupted free-lists and some forms of use-after-free are detected.
|
||||
|
||||
|
||||
# Overriding Malloc
|
||||
# Overriding Standard Malloc
|
||||
|
||||
Overriding the standard `malloc` can be done either _dynamically_ or _statically_.
|
||||
Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or _statically_.
|
||||
|
||||
## Dynamic override
|
||||
|
||||
@ -370,13 +382,12 @@ On macOS we can also preload the mimalloc shared
|
||||
library so all calls to the standard `malloc` interface are
|
||||
resolved to the _mimalloc_ library.
|
||||
```
|
||||
> env DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram
|
||||
> env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram
|
||||
```
|
||||
|
||||
Note that certain security restrictions may apply when doing this from
|
||||
the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
|
||||
|
||||
(Note: macOS support for dynamic overriding is recent, please report any issues.)
|
||||
|
||||
### Override on Windows
|
||||
|
||||
@ -386,7 +397,7 @@ the (dynamic) C runtime allocator, including those from other DLL's or libraries
|
||||
|
||||
The overriding on Windows requires that you link your program explicitly with
|
||||
the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
|
||||
Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be available
|
||||
Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put
|
||||
in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
|
||||
The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
|
||||
mimalloc (in `mimalloc-override.dll`).
|
||||
|
@ -475,14 +475,13 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
|
||||
return segment;
|
||||
}
|
||||
|
||||
|
||||
// Free a block
|
||||
// Free a block
|
||||
void mi_free(void* p) mi_attr_noexcept
|
||||
{
|
||||
const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
|
||||
if (mi_unlikely(segment == NULL)) return;
|
||||
|
||||
const mi_threadid_t tid = _mi_thread_id();
|
||||
mi_threadid_t tid = _mi_thread_id();
|
||||
mi_page_t* const page = _mi_segment_page_of(segment, p);
|
||||
mi_block_t* const block = (mi_block_t*)p;
|
||||
|
||||
|
@ -504,7 +504,12 @@ void mi_process_init(void) mi_attr_noexcept {
|
||||
|
||||
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
|
||||
size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
|
||||
mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
|
||||
long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at);
|
||||
if (reserve_at != -1) {
|
||||
mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500);
|
||||
} else {
|
||||
mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
|
||||
}
|
||||
}
|
||||
if (mi_option_is_enabled(mi_option_reserve_os_memory)) {
|
||||
long ksize = mi_option_get(mi_option_reserve_os_memory);
|
||||
|
@ -76,6 +76,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||
#endif
|
||||
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
|
||||
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages
|
||||
{ -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
|
||||
{ 0, UNINIT, MI_OPTION(reserve_os_memory) },
|
||||
{ 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
|
||||
{ 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free
|
||||
|
22
src/os.c
22
src/os.c
@ -578,29 +578,29 @@ static mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
|
||||
// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
|
||||
// in the middle of the 2TiB - 6TiB address range (see issue #372))
|
||||
|
||||
#define KK_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
|
||||
#define KK_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
|
||||
#define KK_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
|
||||
#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
|
||||
#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
|
||||
#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
|
||||
|
||||
static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size)
|
||||
{
|
||||
if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL;
|
||||
if ((size%MI_SEGMENT_SIZE) != 0) return NULL;
|
||||
if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096.
|
||||
size = _mi_align_up(size, MI_SEGMENT_SIZE);
|
||||
if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
|
||||
#if (MI_SECURE>0)
|
||||
size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
|
||||
#endif
|
||||
|
||||
uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
|
||||
if (hint == 0 || hint > KK_HINT_MAX) { // wrap or initialize
|
||||
uintptr_t init = KK_HINT_BASE;
|
||||
if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
|
||||
uintptr_t init = MI_HINT_BASE;
|
||||
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
|
||||
uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
|
||||
init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % KK_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
|
||||
init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
|
||||
#endif
|
||||
uintptr_t expected = hint + size;
|
||||
mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
|
||||
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > KK_HINT_MAX but that is ok, it is a hint after all
|
||||
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
|
||||
}
|
||||
if (hint%try_alignment != 0) return NULL;
|
||||
return (void*)hint;
|
||||
@ -638,11 +638,11 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
|
||||
if (commit) flags |= MEM_COMMIT;
|
||||
p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
|
||||
#elif defined(MI_USE_SBRK)
|
||||
KK_UNUSED(allow_large);
|
||||
MI_UNUSED(allow_large);
|
||||
*is_large = false;
|
||||
p = mi_sbrk_heap_grow(size, try_alignment);
|
||||
#elif defined(__wasi__)
|
||||
KK_UNUSED(allow_large);
|
||||
MI_UNUSED(allow_large);
|
||||
*is_large = false;
|
||||
p = mi_wasm_heap_grow(size, try_alignment);
|
||||
#else
|
||||
|
@ -167,8 +167,9 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#if !defined(MI_USE_RTLGENRANDOM)
|
||||
// We prefer BCryptGenRandom over RtlGenRandom
|
||||
#if defined(MI_USE_BCRYPTGENRANDOM)
|
||||
// We would like to use BCryptGenRandom instead of RtlGenRandom but it can lead to a deadlock
|
||||
// under the VS debugger when using dynamic overriding.
|
||||
#pragma comment (lib,"bcrypt.lib")
|
||||
#include <bcrypt.h>
|
||||
static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user