merge from dev

This commit is contained in:
Daan 2024-04-22 11:05:01 -07:00
commit 6ba166f528
4 changed files with 78 additions and 55 deletions

View File

@ -318,41 +318,43 @@ mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size
typedef enum mi_option_e {
// stable options
mi_option_show_errors, // print error messages
mi_option_show_stats, // print statistics on termination
mi_option_verbose, // print verbose messages
// the following options are experimental (see src/options.h)
mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1)
mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2)
mi_option_purge_decommits, // should a memory purge decommit (or only reset) (=1)
mi_option_allow_large_os_pages, // allow large (2MiB) OS pages, implies eager commit
mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB/page) at startup
mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup
mi_option_show_errors, // print error messages
mi_option_show_stats, // print statistics on termination
mi_option_verbose, // print verbose messages
// advanced options
mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1)
mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2)
mi_option_purge_decommits, // should a memory purge decommit? (=1). Set to 0 to use memory reset on a purge (instead of decommit)
mi_option_allow_large_os_pages, // allow large (2 or 4 MiB) OS pages, implies eager commit. If false, also disables THP for the process.
mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB pages) at startup
mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup
mi_option_deprecated_segment_cache,
mi_option_deprecated_page_reset,
mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination
mi_option_deprecated_segment_reset,
mi_option_eager_commit_delay,
mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all.
mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes.
mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas)
mi_option_os_tag, // tag used for OS logging (macOS only for now)
mi_option_max_errors, // issue at most N error messages
mi_option_max_warnings, // issue at most N warning messages
mi_option_max_segment_reclaim,
mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe.
mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit)
mi_option_arena_purge_mult,
mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination
mi_option_deprecated_segment_reset,
mi_option_eager_commit_delay, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. (=10)
mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes.
mi_option_disallow_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas)
mi_option_os_tag, // tag used for OS logging (macOS only for now) (=100)
mi_option_max_errors, // issue at most N error messages
mi_option_max_warnings, // issue at most N warning messages
mi_option_max_segment_reclaim, // max. percentage of the abandoned segments can be reclaimed per try (=10%)
mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe
mi_option_arena_reserve, // initial memory size in KiB for arena reservation (= 1 GiB on 64-bit)
mi_option_arena_purge_mult, // multiplier for `purge_delay` for the purging delay for arenas (=10)
mi_option_purge_extend_delay,
mi_option_abandoned_reclaim_on_free, // reclaim abandoned segments on a free
mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1)
mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's)
_mi_option_last,
// legacy option names
mi_option_large_os_pages = mi_option_allow_large_os_pages,
mi_option_eager_region_commit = mi_option_arena_eager_commit,
mi_option_reset_decommits = mi_option_purge_decommits,
mi_option_reset_delay = mi_option_purge_delay,
mi_option_abandoned_page_reset = mi_option_abandoned_page_purge
mi_option_abandoned_page_reset = mi_option_abandoned_page_purge,
mi_option_limit_os_alloc = mi_option_disallow_os_alloc
} mi_option_t;

View File

@ -80,6 +80,16 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
(see [below](#performance)); please report if you observe any significant performance regression.
* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds.
Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size
directly available (and new `block_size_shift` to improve aligned block free-ing).
New approach to collection of abandoned segments: When
a thread terminates the segments it owns are abandoned (containing still live objects) and these can be
reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's
which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in
an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim`
gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%).
* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity
by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory
usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking.
@ -144,7 +154,7 @@ mimalloc is used in various large scale low-latency services and programs, for e
## Windows
Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build.
Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build.
The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
`mimalloc-override` project builds a DLL for overriding malloc
in the entire program.
@ -280,17 +290,23 @@ You can set further options either programmatically (using [`mi_option_set`](htt
Advanced options:
- `MIMALLOC_ARENA_EAGER_COMMIT=2`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc
allocates segments and pages. Set this to 2 (default) to
only enable this on overcommit systems (e.g. Linux). Set this to 1 to enable explicitly on other systems
as well (like Windows or macOS) which may improve performance (as the whole arena is committed at once).
Note that eager commit only increases the commit but not the actual the peak resident set
(rss) so it is generally ok to enable this.
- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge
OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which
can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when
a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher
value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times.
Setting it to `-1` disables purging completely.
- `MIMALLOC_ARENA_EAGER_COMMIT=1`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc
allocates segments and pages. This is by default
only enabled on overcommit systems (e.g. Linux) but enabling it explicitly on other systems (like Windows or macOS)
may improve performance. Note that eager commit only increases the commit but not the actual the peak resident set
(rss) so it is generally ok to enable this.
Setting it to `-1` disables purging completely.
- `MIMALLOC_PURGE_DECOMMITS=1`: By default "purging" memory means unused memory is decommitted (`MEM_DECOMMIT` on Windows,
`MADV_DONTNEED` (which decresease rss immediately) on `mmap` systems). Set this to 0 to instead "reset" unused
memory on a purge (`MEM_RESET` on Windows, generally `MADV_FREE` (which does not decrease rss immediately) on `mmap` systems).
Mimalloc generally does not "free" OS memory but only "purges" OS memory, in other words, it tries to keep virtual
address ranges and decommits within those ranges (to make the underlying physical memory available to other processes).
Further options for large workloads and services:
@ -298,9 +314,10 @@ Further options for large workloads and services:
at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than
the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA
nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed).
- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2 or 4MiB) when available; for some workloads this can significantly
improve performance. When this option is disabled, it also disables transparent huge pages (THP) for the process
(on Linux and Android). Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
to explicitly give permissions for large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
@ -309,11 +326,12 @@ Further options for large workloads and services:
OS pages, use with care as reserving
contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
startup only once).
Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
Note that we usually need to explicitly give permission for huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
With huge OS pages, it may be beneficial to set the setting
`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
and allocate just a little to take up space in the huge OS page area (which cannot be purged).
and allocate just a little to take up space in the huge OS page area (which cannot be purged as huge OS pages are pinned
to physical memory).
The huge pages are usually allocated evenly among NUMA nodes.
We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
the huge pages at a specific numa node instead.

View File

@ -356,7 +356,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re
if (arena_reserve == 0) return false;
if (!_mi_os_has_virtual_reserve()) {
arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for some embedded systems for example)
arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for WASM for example)
}
arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE);
if (arena_count >= 8 && arena_count <= 128) {
@ -369,7 +369,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re
if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); }
else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; }
return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0);
return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id) == 0);
}
@ -383,24 +383,26 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
const int numa_node = _mi_os_numa_node(tld); // current numa node
// try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
if (p != NULL) return p;
if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed?
if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
if (p != NULL) return p;
// otherwise, try to first eagerly reserve a new arena
if (req_arena_id == _mi_arena_id_none()) {
mi_arena_id_t arena_id = 0;
if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
// and try allocate in there
mi_assert_internal(req_arena_id == _mi_arena_id_none());
p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
if (p != NULL) return p;
// otherwise, try to first eagerly reserve a new arena
if (req_arena_id == _mi_arena_id_none()) {
mi_arena_id_t arena_id = 0;
if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
// and try allocate in there
mi_assert_internal(req_arena_id == _mi_arena_id_none());
p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
if (p != NULL) return p;
}
}
}
}
// if we cannot use OS allocation, return NULL
if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) {
if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) {
errno = ENOMEM;
return NULL;
}

View File

@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages
{-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
{ 0, UNINIT, MI_OPTION(reserve_os_memory) },
{ 0, UNINIT, MI_OPTION(reserve_os_memory) }, // reserve OS memory in advance
{ 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread
{ 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free
{ 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) }, // reset free page memory when a thread terminates
@ -77,7 +77,7 @@ static mi_option_desc_t options[_mi_option_last] =
#endif
{ 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
{ 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
{ 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output
{ 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output
@ -90,7 +90,8 @@ static mi_option_desc_t options[_mi_option_last] =
#endif
{ 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
{ 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
{ 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) }, // reclaim an abandoned segment on a free
{ 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
{ 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
};
static void mi_option_init(mi_option_desc_t* desc);