mirror of
https://github.com/microsoft/mimalloc.git
synced 2024-12-25 20:14:12 +08:00
merge from dev-slice v2.1.4
This commit is contained in:
commit
229ec9cbdc
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -10,3 +10,4 @@
|
||||
*.dll binary
|
||||
*.lib binary
|
||||
*.exe binary
|
||||
bin export-ignore
|
||||
|
@ -19,6 +19,7 @@ option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS"
|
||||
option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON)
|
||||
option(MI_WIN_REDIRECT "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON)
|
||||
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
|
||||
option(MI_LIBC_MUSL "Set this when linking with musl libc" OFF)
|
||||
option(MI_BUILD_SHARED "Build shared library" ON)
|
||||
option(MI_BUILD_STATIC "Build static library" ON)
|
||||
option(MI_BUILD_OBJECT "Build object library" ON)
|
||||
@ -27,10 +28,11 @@ option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF)
|
||||
option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF)
|
||||
option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF)
|
||||
option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF)
|
||||
option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF)
|
||||
option(MI_NO_THP "Disable transparent huge pages support on Linux/Android for the mimalloc process only" OFF)
|
||||
|
||||
# deprecated options
|
||||
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
|
||||
option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version (deprecated)" OFF)
|
||||
option(MI_USE_LIBATOMIC "Explicitly link with -latomic (on older systems) (deprecated and detected automatically)" OFF)
|
||||
|
||||
include(CheckIncludeFiles)
|
||||
@ -45,6 +47,7 @@ set(mi_sources
|
||||
src/bitmap.c
|
||||
src/heap.c
|
||||
src/init.c
|
||||
src/libc.c
|
||||
src/options.c
|
||||
src/os.c
|
||||
src/page.c
|
||||
@ -82,6 +85,17 @@ endif()
|
||||
# Process options
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# put -Wall early so other warnings can be disabled selectively
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
|
||||
list(APPEND mi_cflags -Wall -Wextra -Wpedantic)
|
||||
endif()
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU")
|
||||
list(APPEND mi_cflags -Wall -Wextra)
|
||||
endif()
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
list(APPEND mi_cflags -Wall)
|
||||
endif()
|
||||
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel")
|
||||
set(MI_USE_CXX "ON")
|
||||
endif()
|
||||
@ -127,7 +141,7 @@ endif()
|
||||
|
||||
if(MI_SECURE)
|
||||
message(STATUS "Set full secure build (MI_SECURE=ON)")
|
||||
list(APPEND mi_defines MI_SECURE=4)
|
||||
list(APPEND mi_defines MI_SECURE=4)
|
||||
endif()
|
||||
|
||||
if(MI_TRACK_VALGRIND)
|
||||
@ -184,6 +198,10 @@ endif()
|
||||
if(MI_SEE_ASM)
|
||||
message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)")
|
||||
list(APPEND mi_cflags -save-temps)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
|
||||
message(STATUS "No GNU Line marker")
|
||||
list(APPEND mi_cflags -Wno-gnu-line-marker)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MI_CHECK_FULL)
|
||||
@ -246,7 +264,7 @@ if(MI_DEBUG_UBSAN)
|
||||
message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "Can only use thread sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})")
|
||||
message(WARNING "Can only use undefined-behavior sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -262,24 +280,38 @@ if(MI_USE_CXX)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Haiku")
|
||||
SET(CMAKE_INSTALL_LIBDIR ~/config/non-packaged/lib)
|
||||
SET(CMAKE_INSTALL_INCLUDEDIR ~/config/non-packaged/headers)
|
||||
endif()
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android")
|
||||
if(MI_NO_THP)
|
||||
message(STATUS "Disable transparent huge pages support (MI_NO_THP=ON)")
|
||||
list(APPEND mi_defines MI_NO_THP=1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MI_LIBC_MUSL)
|
||||
message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON) (this implies MI_LOCAL_DYNAMIC_TLS=ON)")
|
||||
set(MI_LOCAL_DYNAMIC_TLS "ON")
|
||||
list(APPEND mi_defines MI_LIBC_MUSL=1)
|
||||
endif()
|
||||
|
||||
# On Haiku use `-DCMAKE_INSTALL_PREFIX` instead, issue #788
|
||||
# if(CMAKE_SYSTEM_NAME MATCHES "Haiku")
|
||||
# SET(CMAKE_INSTALL_LIBDIR ~/config/non-packaged/lib)
|
||||
# SET(CMAKE_INSTALL_INCLUDEDIR ~/config/non-packaged/headers)
|
||||
# endif()
|
||||
|
||||
# Compiler flags
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
|
||||
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
|
||||
list(APPEND mi_cflags -Wno-unknown-pragmas -fvisibility=hidden)
|
||||
if(NOT MI_USE_CXX)
|
||||
list(APPEND mi_cflags -Wstrict-prototypes)
|
||||
endif()
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
|
||||
list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline)
|
||||
list(APPEND mi_cflags -Wno-static-in-inline)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
list(APPEND mi_cflags -Wall -fvisibility=hidden)
|
||||
list(APPEND mi_cflags -fvisibility=hidden)
|
||||
endif()
|
||||
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
|
||||
@ -467,7 +499,7 @@ if (MI_BUILD_OBJECT)
|
||||
set(mimalloc-obj-static "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION}")
|
||||
set(mimalloc-obj-out "${CMAKE_CURRENT_BINARY_DIR}/${mi_basename}${CMAKE_C_OUTPUT_EXTENSION}")
|
||||
add_custom_command(OUTPUT ${mimalloc-obj-out} DEPENDS mimalloc-obj COMMAND "${CMAKE_COMMAND}" -E copy "${mimalloc-obj-static}" "${mimalloc-obj-out}")
|
||||
add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out})
|
||||
add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out})
|
||||
endif()
|
||||
|
||||
# the following seems to lead to cmake warnings/errors on some systems, disable for now :-(
|
||||
|
18
SECURITY.md
18
SECURITY.md
@ -1,20 +1,20 @@
|
||||
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
|
||||
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
|
||||
|
||||
## Security
|
||||
|
||||
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
|
||||
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
|
||||
|
||||
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
|
||||
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
|
||||
|
||||
## Reporting Security Issues
|
||||
|
||||
**Please do not report security vulnerabilities through public GitHub issues.**
|
||||
|
||||
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
|
||||
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
|
||||
|
||||
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
|
||||
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
|
||||
|
||||
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
|
||||
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
|
||||
|
||||
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
|
||||
|
||||
@ -28,7 +28,11 @@ Please include the requested information listed below (as much as you can provid
|
||||
|
||||
This information will help us triage your report more quickly.
|
||||
|
||||
<<<<<<< HEAD
|
||||
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
|
||||
=======
|
||||
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
|
||||
>>>>>>> dev-slice
|
||||
|
||||
## Preferred Languages
|
||||
|
||||
@ -36,6 +40,6 @@ We prefer all communications to be in English.
|
||||
|
||||
## Policy
|
||||
|
||||
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
|
||||
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
|
||||
|
||||
<!-- END MICROSOFT SECURITY.MD BLOCK -->
|
||||
|
BIN
bin/minject.exe
BIN
bin/minject.exe
Binary file not shown.
Binary file not shown.
71
bin/readme.md
Normal file
71
bin/readme.md
Normal file
@ -0,0 +1,71 @@
|
||||
# Windows Override
|
||||
|
||||
<span id="override_on_windows">Dynamically overriding on mimalloc on Windows</span>
|
||||
is robust and has the particular advantage to be able to redirect all malloc/free calls that go through
|
||||
the (dynamic) C runtime allocator, including those from other DLL's or libraries.
|
||||
As it intercepts all allocation calls on a low level, it can be used reliably
|
||||
on large programs that include other 3rd party components.
|
||||
There are four requirements to make the overriding work robustly:
|
||||
|
||||
1. Use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
|
||||
|
||||
2. Link your program explicitly with `mimalloc-override.dll` library.
|
||||
To ensure the `mimalloc-override.dll` is loaded at run-time it is easiest to insert some
|
||||
call to the mimalloc API in the `main` function, like `mi_version()`
|
||||
(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project
|
||||
for an example on how to use this.
|
||||
|
||||
3. The `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put
|
||||
in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency of that DLL).
|
||||
The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
|
||||
mimalloc functions (which reside in `mimalloc-override.dll`).
|
||||
|
||||
4. Ensure the `mimalloc-override.dll` comes as early as possible in the import
|
||||
list of the final executable (so it can intercept all potential allocations).
|
||||
|
||||
For best performance on Windows with C++, it
|
||||
is also recommended to also override the `new`/`delete` operations (by including
|
||||
[`mimalloc-new-delete.h`](../include/mimalloc-new-delete.h)
|
||||
a single(!) source file in your project).
|
||||
|
||||
The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
|
||||
overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected.
|
||||
|
||||
## Minject
|
||||
|
||||
We cannot always re-link an executable with `mimalloc-override.dll`, and similarly, we cannot always
|
||||
ensure the the DLL comes first in the import table of the final executable.
|
||||
In many cases though we can patch existing executables without any recompilation
|
||||
if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll`
|
||||
into the import table (and put `mimalloc-redirect.dll` in the same folder)
|
||||
Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388).
|
||||
|
||||
The `minject` program can also do this from the command line, use `minject --help` for options:
|
||||
|
||||
```
|
||||
> minject --help
|
||||
|
||||
minject:
|
||||
Injects the mimalloc dll into the import table of a 64-bit executable,
|
||||
and/or ensures that it comes first in het import table.
|
||||
|
||||
usage:
|
||||
> minject [options] <exe>
|
||||
|
||||
options:
|
||||
-h --help show this help
|
||||
-v --verbose be verbose
|
||||
-l --list only list imported modules
|
||||
-i --inplace update the exe in-place (make sure there is a backup!)
|
||||
-f --force always overwrite without prompting
|
||||
--postfix=<p> use <p> as a postfix to the mimalloc dll (default is 'override')
|
||||
e.g. use --postfix=override-debug to link with mimalloc-override-debug.dll
|
||||
|
||||
notes:
|
||||
Without '--inplace' an injected <exe> is generated with the same name ending in '-mi'.
|
||||
Ensure 'mimalloc-redirect.dll' is in the same folder as the mimalloc dll.
|
||||
|
||||
examples:
|
||||
> minject --list myprogram.exe
|
||||
> minject --force --inplace myprogram.exe
|
||||
```
|
@ -1,6 +1,6 @@
|
||||
set(mi_version_major 2)
|
||||
set(mi_version_minor 1)
|
||||
set(mi_version_patch 2)
|
||||
set(mi_version_patch 4)
|
||||
set(mi_version ${mi_version_major}.${mi_version_minor})
|
||||
|
||||
set(PACKAGE_VERSION ${mi_version})
|
||||
|
@ -466,7 +466,7 @@ LOOKUP_CACHE_SIZE = 0
|
||||
# than 0 to get more control over the balance between CPU load and processing
|
||||
# speed. At this moment only the input processing can be done using multiple
|
||||
# threads. Since this is still an experimental feature the default is set to 1,
|
||||
# which efficively disables parallel processing. Please report any issues you
|
||||
# which effectively disables parallel processing. Please report any issues you
|
||||
# encounter. Generating dot graphs in parallel is controlled by the
|
||||
# DOT_NUM_THREADS setting.
|
||||
# Minimum value: 0, maximum value: 32, default value: 1.
|
||||
|
@ -168,7 +168,7 @@ void* mi_expand(void* p, size_t newsize);
|
||||
/// @returns A pointer to a block of \a count * \a size bytes, or \a NULL
|
||||
/// if out of memory or if \a count * \a size overflows.
|
||||
///
|
||||
/// If there is no overflow, it behaves exactly like `mi_malloc(p,count*size)`.
|
||||
/// If there is no overflow, it behaves exactly like `mi_malloc(count*size)`.
|
||||
/// @see mi_calloc()
|
||||
/// @see mi_zallocn()
|
||||
void* mi_mallocn(size_t count, size_t size);
|
||||
@ -441,7 +441,7 @@ bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_la
|
||||
/// @param pages The number of 1GiB pages to reserve.
|
||||
/// @param numa_nodes The number of nodes do evenly divide the pages over, or 0 for using the actual number of NUMA nodes.
|
||||
/// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout.
|
||||
/// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
|
||||
/// @returns 0 if successful, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
|
||||
///
|
||||
/// The reserved memory is used by mimalloc to satisfy allocations.
|
||||
/// May quit before \a timeout_msecs are expired if it estimates it will take more than
|
||||
@ -455,7 +455,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
|
||||
/// @param pages The number of 1GiB pages to reserve.
|
||||
/// @param numa_node The NUMA node where the memory is reserved (start at 0).
|
||||
/// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout.
|
||||
/// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
|
||||
/// @returns 0 if successful, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
|
||||
///
|
||||
/// The reserved memory is used by mimalloc to satisfy allocations.
|
||||
/// May quit before \a timeout_msecs are expired if it estimates it will take more than
|
||||
@ -468,7 +468,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
|
||||
/// Is the C runtime \a malloc API redirected?
|
||||
/// @returns \a true if all malloc API calls are redirected to mimalloc.
|
||||
///
|
||||
/// Currenty only used on Windows.
|
||||
/// Currently only used on Windows.
|
||||
bool mi_is_redirected();
|
||||
|
||||
/// Return process information (time and memory usage).
|
||||
@ -499,11 +499,11 @@ void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_m
|
||||
/// \{
|
||||
|
||||
/// The maximum supported alignment size (currently 1MiB).
|
||||
#define MI_ALIGNMENT_MAX (1024*1024UL)
|
||||
#define MI_BLOCK_ALIGNMENT_MAX (1024*1024UL)
|
||||
|
||||
/// Allocate \a size bytes aligned by \a alignment.
|
||||
/// @param size number of bytes to allocate.
|
||||
/// @param alignment the minimal alignment of the allocated memory. Must be less than #MI_ALIGNMENT_MAX.
|
||||
/// @param alignment the minimal alignment of the allocated memory. Must be less than #MI_BLOCK_ALIGNMENT_MAX.
|
||||
/// @returns pointer to the allocated memory or \a NULL if out of memory.
|
||||
/// The returned pointer is aligned by \a alignment, i.e.
|
||||
/// `(uintptr_t)p % alignment == 0`.
|
||||
@ -558,7 +558,7 @@ mi_heap_t* mi_heap_new();
|
||||
|
||||
/// Delete a previously allocated heap.
|
||||
/// This will release resources and migrate any
|
||||
/// still allocated blocks in this heap (efficienty)
|
||||
/// still allocated blocks in this heap (efficiently)
|
||||
/// to the default heap.
|
||||
///
|
||||
/// If \a heap is the default heap, the default
|
||||
@ -888,7 +888,7 @@ void mi_free_aligned(void* p, size_t alignment);
|
||||
///
|
||||
/// Note: use the `mimalloc-new-delete.h` header to override the \a new
|
||||
/// and \a delete operators globally. The wrappers here are mostly
|
||||
/// for convience for library writers that need to interface with
|
||||
/// for convenience for library writers that need to interface with
|
||||
/// mimalloc from C++.
|
||||
///
|
||||
/// \{
|
||||
|
@ -100,7 +100,7 @@ $(document).ready(function(){initNavTree('bench.html',''); initResizable(); });
|
||||
<div class="contents">
|
||||
<div class="textblock"><p>We tested <em>mimalloc</em> against many other top allocators over a wide range of benchmarks, ranging from various real world programs to synthetic benchmarks that see how the allocator behaves under more extreme circumstances.</p>
|
||||
<p>In our benchmarks, <em>mimalloc</em> always outperforms all other leading allocators (<em>jemalloc</em>, <em>tcmalloc</em>, <em>Hoard</em>, etc) (Jan 2021), and usually uses less memory (up to 25% more in the worst case). A nice property is that it does <em>consistently</em> well over the wide range of benchmarks.</p>
|
||||
<p>See the <a href="https://github.com/microsoft/mimalloc#Performance">Performance</a> section in the <em>mimalloc</em> repository for benchmark results, or the the technical report for detailed benchmark results. </p>
|
||||
<p>See the <a href="https://github.com/microsoft/mimalloc#Performance">Performance</a> section in the <em>mimalloc</em> repository for benchmark results, or the technical report for detailed benchmark results. </p>
|
||||
</div></div><!-- contents -->
|
||||
</div><!-- PageDoc -->
|
||||
</div><!-- doc-content -->
|
||||
|
@ -238,6 +238,7 @@
|
||||
<ClCompile Include="..\..\src\bitmap.c" />
|
||||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\libc.c" />
|
||||
<ClCompile Include="..\..\src\prim\prim.c" />
|
||||
<ClCompile Include="..\..\src\options.c" />
|
||||
<ClCompile Include="..\..\src\os.c" />
|
||||
|
@ -94,5 +94,8 @@
|
||||
<ClCompile Include="..\..\src\segment-map.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\libc.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -227,6 +227,7 @@
|
||||
<ClCompile Include="..\..\src\bitmap.c" />
|
||||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\libc.c" />
|
||||
<ClCompile Include="..\..\src\prim\prim.c" />
|
||||
<ClCompile Include="..\..\src\options.c" />
|
||||
<ClCompile Include="..\..\src\page-queue.c">
|
||||
|
@ -65,6 +65,9 @@
|
||||
<ClCompile Include="..\..\src\segment-map.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\libc.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
|
||||
|
@ -238,6 +238,7 @@
|
||||
<ClCompile Include="..\..\src\bitmap.c" />
|
||||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\libc.c" />
|
||||
<ClCompile Include="..\..\src\prim\prim.c" />
|
||||
<ClCompile Include="..\..\src\options.c" />
|
||||
<ClCompile Include="..\..\src\os.c" />
|
||||
|
@ -55,6 +55,9 @@
|
||||
<ClCompile Include="..\..\src\segment-map.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\libc.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
|
||||
|
@ -219,6 +219,7 @@
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\libc.c" />
|
||||
<ClCompile Include="..\..\src\prim\prim.c" />
|
||||
<ClCompile Include="..\..\src\prim\windows\prim.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
|
@ -58,6 +58,9 @@
|
||||
<ClCompile Include="..\..\src\segment-map.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\libc.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
|
||||
|
@ -240,6 +240,7 @@
|
||||
<ClCompile Include="..\..\src\bitmap.c" />
|
||||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\libc.c" />
|
||||
<ClCompile Include="..\..\src\prim\prim.c" />
|
||||
<ClCompile Include="..\..\src\prim\windows\prim.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
|
@ -55,6 +55,9 @@
|
||||
<ClCompile Include="..\..\src\stats.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\libc.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\include\mimalloc\atomic.h">
|
||||
|
@ -217,8 +217,15 @@
|
||||
<ClCompile Include="..\..\src\bitmap.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\free.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\libc.c" />
|
||||
<ClCompile Include="..\..\src\prim\prim.c" />
|
||||
<ClCompile Include="..\..\src\prim\windows\prim.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
|
@ -55,6 +55,12 @@
|
||||
<ClCompile Include="..\..\src\stats.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\libc.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\free.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\src\bitmap.h">
|
||||
|
@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#ifndef MIMALLOC_H
|
||||
#define MIMALLOC_H
|
||||
|
||||
#define MI_MALLOC_VERSION 212 // major + 2 digits minor
|
||||
#define MI_MALLOC_VERSION 214 // major + 2 digits minor
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Compiler specific attributes
|
||||
@ -275,7 +275,7 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size
|
||||
mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
|
||||
mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
|
||||
|
||||
mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
|
||||
mi_decl_export void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_attr_noexcept;
|
||||
|
||||
// Experimental: heaps associated with specific memory arena's
|
||||
typedef int mi_arena_id_t;
|
||||
@ -318,40 +318,43 @@ mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size
|
||||
|
||||
typedef enum mi_option_e {
|
||||
// stable options
|
||||
mi_option_show_errors, // print error messages
|
||||
mi_option_show_stats, // print statistics on termination
|
||||
mi_option_verbose, // print verbose messages
|
||||
// the following options are experimental (see src/options.h)
|
||||
mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1)
|
||||
mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2)
|
||||
mi_option_purge_decommits, // should a memory purge decommit (or only reset) (=1)
|
||||
mi_option_allow_large_os_pages, // allow large (2MiB) OS pages, implies eager commit
|
||||
mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB/page) at startup
|
||||
mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
|
||||
mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup
|
||||
mi_option_show_errors, // print error messages
|
||||
mi_option_show_stats, // print statistics on termination
|
||||
mi_option_verbose, // print verbose messages
|
||||
// advanced options
|
||||
mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1)
|
||||
mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2)
|
||||
mi_option_purge_decommits, // should a memory purge decommit? (=1). Set to 0 to use memory reset on a purge (instead of decommit)
|
||||
mi_option_allow_large_os_pages, // allow large (2 or 4 MiB) OS pages, implies eager commit. If false, also disables THP for the process.
|
||||
mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB pages) at startup
|
||||
mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
|
||||
mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup
|
||||
mi_option_deprecated_segment_cache,
|
||||
mi_option_deprecated_page_reset,
|
||||
mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination
|
||||
mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination
|
||||
mi_option_deprecated_segment_reset,
|
||||
mi_option_eager_commit_delay,
|
||||
mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all.
|
||||
mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes.
|
||||
mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas)
|
||||
mi_option_os_tag, // tag used for OS logging (macOS only for now)
|
||||
mi_option_max_errors, // issue at most N error messages
|
||||
mi_option_max_warnings, // issue at most N warning messages
|
||||
mi_option_max_segment_reclaim,
|
||||
mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe.
|
||||
mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit)
|
||||
mi_option_arena_purge_mult,
|
||||
mi_option_eager_commit_delay, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
|
||||
mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. (=10)
|
||||
mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes.
|
||||
mi_option_disallow_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas)
|
||||
mi_option_os_tag, // tag used for OS logging (macOS only for now) (=100)
|
||||
mi_option_max_errors, // issue at most N error messages
|
||||
mi_option_max_warnings, // issue at most N warning messages
|
||||
mi_option_max_segment_reclaim, // max. percentage of the abandoned segments can be reclaimed per try (=10%)
|
||||
mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe
|
||||
mi_option_arena_reserve, // initial memory size in KiB for arena reservation (= 1 GiB on 64-bit)
|
||||
mi_option_arena_purge_mult, // multiplier for `purge_delay` for the purging delay for arenas (=10)
|
||||
mi_option_purge_extend_delay,
|
||||
mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1)
|
||||
mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's)
|
||||
_mi_option_last,
|
||||
// legacy option names
|
||||
mi_option_large_os_pages = mi_option_allow_large_os_pages,
|
||||
mi_option_eager_region_commit = mi_option_arena_eager_commit,
|
||||
mi_option_reset_decommits = mi_option_purge_decommits,
|
||||
mi_option_reset_delay = mi_option_purge_delay,
|
||||
mi_option_abandoned_page_reset = mi_option_abandoned_page_purge
|
||||
mi_option_abandoned_page_reset = mi_option_abandoned_page_purge,
|
||||
mi_option_limit_os_alloc = mi_option_disallow_os_alloc
|
||||
} mi_option_t;
|
||||
|
||||
|
||||
@ -494,7 +497,7 @@ template<class T, bool _mi_destroy> struct _mi_heap_stl_allocator_common : publi
|
||||
using typename _mi_stl_allocator_common<T>::value_type;
|
||||
using typename _mi_stl_allocator_common<T>::pointer;
|
||||
|
||||
_mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { } /* will not delete nor destroy the passed in heap */
|
||||
_mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp, [](mi_heap_t*) {}) {} /* will not delete nor destroy the passed in heap */
|
||||
|
||||
#if (__cplusplus >= 201703L) // C++17
|
||||
mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
|
||||
@ -513,7 +516,7 @@ template<class T, bool _mi_destroy> struct _mi_heap_stl_allocator_common : publi
|
||||
protected:
|
||||
std::shared_ptr<mi_heap_t> heap;
|
||||
template<class U, bool D> friend struct _mi_heap_stl_allocator_common;
|
||||
|
||||
|
||||
_mi_heap_stl_allocator_common() {
|
||||
mi_heap_t* hp = mi_heap_new();
|
||||
this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */
|
||||
@ -530,7 +533,7 @@ private:
|
||||
template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> {
|
||||
using typename _mi_heap_stl_allocator_common<T, false>::size_type;
|
||||
mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T, false>() { } // creates fresh heap that is deleted when the destructor is called
|
||||
mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { } // no delete nor destroy on the passed in heap
|
||||
mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { } // no delete nor destroy on the passed in heap
|
||||
template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { }
|
||||
|
||||
mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; }
|
||||
@ -547,7 +550,7 @@ template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x,
|
||||
template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> {
|
||||
using typename _mi_heap_stl_allocator_common<T, true>::size_type;
|
||||
mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T, true>() { } // creates fresh heap that is destroyed when the destructor is called
|
||||
mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { } // no delete nor destroy on the passed in heap
|
||||
mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { } // no delete nor destroy on the passed in heap
|
||||
template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { }
|
||||
|
||||
mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }
|
||||
|
@ -23,8 +23,10 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define _Atomic(tp) std::atomic<tp>
|
||||
#define mi_atomic(name) std::atomic_##name
|
||||
#define mi_memory_order(name) std::memory_order_##name
|
||||
#if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571
|
||||
#define MI_ATOMIC_VAR_INIT(x) x
|
||||
#if (__cplusplus >= 202002L) // c++20, see issue #571
|
||||
#define MI_ATOMIC_VAR_INIT(x) x
|
||||
#elif !defined(ATOMIC_VAR_INIT)
|
||||
#define MI_ATOMIC_VAR_INIT(x) x
|
||||
#else
|
||||
#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
|
||||
#endif
|
||||
@ -39,7 +41,9 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include <stdatomic.h>
|
||||
#define mi_atomic(name) atomic_##name
|
||||
#define mi_memory_order(name) memory_order_##name
|
||||
#if !defined(ATOMIC_VAR_INIT) || (__STDC_VERSION__ >= 201710L) // c17, see issue #735
|
||||
#if (__STDC_VERSION__ >= 201710L) // c17, see issue #735
|
||||
#define MI_ATOMIC_VAR_INIT(x) x
|
||||
#elif !defined(ATOMIC_VAR_INIT)
|
||||
#define MI_ATOMIC_VAR_INIT(x) x
|
||||
#else
|
||||
#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
|
||||
@ -129,7 +133,9 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
|
||||
#elif defined(_MSC_VER)
|
||||
|
||||
// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics.
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include <intrin.h>
|
||||
#ifdef _WIN64
|
||||
@ -323,7 +329,9 @@ static inline void mi_atomic_yield(void) {
|
||||
std::this_thread::yield();
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#include <windows.h>
|
||||
static inline void mi_atomic_yield(void) {
|
||||
YieldProcessor();
|
||||
|
@ -30,14 +30,17 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define mi_decl_noinline __declspec(noinline)
|
||||
#define mi_decl_thread __declspec(thread)
|
||||
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
|
||||
#define mi_decl_weak
|
||||
#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
|
||||
#define mi_decl_noinline __attribute__((noinline))
|
||||
#define mi_decl_thread __thread
|
||||
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
|
||||
#define mi_decl_weak __attribute__((weak))
|
||||
#else
|
||||
#define mi_decl_noinline
|
||||
#define mi_decl_thread __thread // hope for the best :-)
|
||||
#define mi_decl_cache_align
|
||||
#define mi_decl_weak
|
||||
#endif
|
||||
|
||||
#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
|
||||
@ -88,7 +91,7 @@ void _mi_thread_data_collect(void);
|
||||
|
||||
// os.c
|
||||
void _mi_os_init(void); // called from process init
|
||||
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);
|
||||
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);
|
||||
void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats);
|
||||
void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats);
|
||||
|
||||
@ -122,9 +125,21 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_
|
||||
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld);
|
||||
bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id);
|
||||
bool _mi_arena_contains(const void* p);
|
||||
void _mi_arena_collect(bool force_purge, mi_stats_t* stats);
|
||||
void _mi_arenas_collect(bool force_purge, mi_stats_t* stats);
|
||||
void _mi_arena_unsafe_destroy_all(mi_stats_t* stats);
|
||||
|
||||
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment);
|
||||
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment);
|
||||
size_t _mi_arena_segment_abandoned_count(void);
|
||||
|
||||
typedef struct mi_arena_field_cursor_s { // abstract
|
||||
mi_arena_id_t start;
|
||||
int count;
|
||||
size_t bitmap_idx;
|
||||
} mi_arena_field_cursor_t;
|
||||
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current);
|
||||
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous);
|
||||
|
||||
// "segment-map.c"
|
||||
void _mi_segment_map_allocated_at(const mi_segment_t* segment);
|
||||
void _mi_segment_map_freed_at(const mi_segment_t* segment);
|
||||
@ -134,7 +149,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
|
||||
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
|
||||
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
|
||||
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
|
||||
void _mi_segment_thread_collect(mi_segments_tld_t* tld);
|
||||
void _mi_segment_collect(mi_segment_t* segment, bool force, mi_segments_tld_t* tld);
|
||||
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
|
||||
@ -146,6 +161,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t*
|
||||
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
|
||||
void _mi_abandoned_await_readers(void);
|
||||
void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
|
||||
bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment);
|
||||
|
||||
// "page.c"
|
||||
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc;
|
||||
@ -187,19 +203,22 @@ void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool
|
||||
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
|
||||
void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
|
||||
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
|
||||
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
|
||||
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p);
|
||||
bool _mi_free_delayed_block(mi_block_t* block);
|
||||
void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
|
||||
void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
|
||||
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
|
||||
|
||||
// option.c, c primitives
|
||||
// "libc.c"
|
||||
#include <stdarg.h>
|
||||
void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args);
|
||||
void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...);
|
||||
char _mi_toupper(char c);
|
||||
int _mi_strnicmp(const char* s, const char* t, size_t n);
|
||||
void _mi_strlcpy(char* dest, const char* src, size_t dest_size);
|
||||
void _mi_strlcat(char* dest, const char* src, size_t dest_size);
|
||||
size_t _mi_strlen(const char* s);
|
||||
size_t _mi_strnlen(const char* s, size_t max_len);
|
||||
|
||||
bool _mi_getenv(const char* name, char* result, size_t result_size);
|
||||
|
||||
#if MI_DEBUG>1
|
||||
bool _mi_page_is_valid(mi_page_t* page);
|
||||
@ -308,6 +327,17 @@ static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
|
||||
}
|
||||
}
|
||||
|
||||
// Align a pointer upwards
|
||||
static inline void* mi_align_up_ptr(void* p, size_t alignment) {
|
||||
return (void*)_mi_align_up((uintptr_t)p, alignment);
|
||||
}
|
||||
|
||||
// Align a pointer downwards
|
||||
static inline void* mi_align_down_ptr(void* p, size_t alignment) {
|
||||
return (void*)_mi_align_down((uintptr_t)p, alignment);
|
||||
}
|
||||
|
||||
|
||||
// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
|
||||
static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
|
||||
mi_assert_internal(divider != 0);
|
||||
@ -407,9 +437,14 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si
|
||||
// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
|
||||
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
|
||||
// therefore we align one byte before `p`.
|
||||
// We check for NULL afterwards on 64-bit systems to improve codegen for `mi_free`.
|
||||
static inline mi_segment_t* _mi_ptr_segment(const void* p) {
|
||||
mi_assert_internal(p != NULL);
|
||||
return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
|
||||
mi_segment_t* const segment = (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
|
||||
#if MI_INTPTR_SIZE <= 4
|
||||
return (p==NULL ? NULL : segment);
|
||||
#else
|
||||
return ((intptr_t)segment <= 0 ? NULL : segment);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) {
|
||||
@ -424,7 +459,8 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
|
||||
|
||||
// Segment belonging to a page
|
||||
static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
|
||||
mi_segment_t* segment = _mi_ptr_segment(page);
|
||||
mi_assert_internal(page!=NULL);
|
||||
mi_segment_t* segment = _mi_ptr_segment(page);
|
||||
mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries));
|
||||
return segment;
|
||||
}
|
||||
@ -452,31 +488,28 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const
|
||||
}
|
||||
|
||||
// Quick page start for initialized pages
|
||||
static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
|
||||
return _mi_segment_page_start(segment, page, page_size);
|
||||
static inline uint8_t* mi_page_start(const mi_page_t* page) {
|
||||
mi_assert_internal(page->page_start != NULL);
|
||||
mi_assert_expensive(_mi_segment_page_start(_mi_page_segment(page),page,NULL) == page->page_start);
|
||||
return page->page_start;
|
||||
}
|
||||
|
||||
// Get the page containing the pointer
|
||||
static inline mi_page_t* _mi_ptr_page(void* p) {
|
||||
mi_assert_internal(p!=NULL);
|
||||
return _mi_segment_page_of(_mi_ptr_segment(p), p);
|
||||
}
|
||||
|
||||
// Get the block size of a page (special case for huge objects)
|
||||
static inline size_t mi_page_block_size(const mi_page_t* page) {
|
||||
const size_t bsize = page->xblock_size;
|
||||
mi_assert_internal(bsize > 0);
|
||||
if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) {
|
||||
return bsize;
|
||||
}
|
||||
else {
|
||||
size_t psize;
|
||||
_mi_segment_page_start(_mi_page_segment(page), page, &psize);
|
||||
return psize;
|
||||
}
|
||||
mi_assert_internal(page->block_size > 0);
|
||||
return page->block_size;
|
||||
}
|
||||
|
||||
static inline bool mi_page_is_huge(const mi_page_t* page) {
|
||||
return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
|
||||
mi_assert_internal((page->is_huge && _mi_page_segment(page)->kind == MI_SEGMENT_HUGE) ||
|
||||
(!page->is_huge && _mi_page_segment(page)->kind != MI_SEGMENT_HUGE));
|
||||
return page->is_huge;
|
||||
}
|
||||
|
||||
// Get the usable block size of a page without fixed padding.
|
||||
@ -726,12 +759,12 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
|
||||
|
||||
#define mi_commit_mask_foreach(cm,idx,count) \
|
||||
idx = 0; \
|
||||
while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) {
|
||||
|
||||
while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) {
|
||||
|
||||
#define mi_commit_mask_foreach_end() \
|
||||
idx += count; \
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
|
@ -35,10 +35,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config );
|
||||
|
||||
// Free OS memory
|
||||
int _mi_prim_free(void* addr, size_t size );
|
||||
|
||||
|
||||
// Allocate OS memory. Return NULL on error.
|
||||
// The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
|
||||
// If `commit` is false, the virtual memory range only needs to be reserved (with no access)
|
||||
// If `commit` is false, the virtual memory range only needs to be reserved (with no access)
|
||||
// which will later be committed explicitly using `_mi_prim_commit`.
|
||||
// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
|
||||
// pre: !commit => !allow_large
|
||||
@ -82,11 +82,11 @@ mi_msecs_t _mi_prim_clock_now(void);
|
||||
typedef struct mi_process_info_s {
|
||||
mi_msecs_t elapsed;
|
||||
mi_msecs_t utime;
|
||||
mi_msecs_t stime;
|
||||
size_t current_rss;
|
||||
size_t peak_rss;
|
||||
mi_msecs_t stime;
|
||||
size_t current_rss;
|
||||
size_t peak_rss;
|
||||
size_t current_commit;
|
||||
size_t peak_commit;
|
||||
size_t peak_commit;
|
||||
size_t page_faults;
|
||||
} mi_process_info_t;
|
||||
|
||||
@ -117,7 +117,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
// Thread id: `_mi_prim_thread_id()`
|
||||
//
|
||||
//
|
||||
// Getting the thread id should be performant as it is called in the
|
||||
// fast path of `_mi_free` and we specialize for various platforms as
|
||||
// inlined definitions. Regular code should call `init.c:_mi_thread_id()`.
|
||||
@ -125,33 +125,23 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
|
||||
// for each thread (unequal to zero).
|
||||
//-------------------------------------------------------------------
|
||||
|
||||
// defined in `init.c`; do not use these directly
|
||||
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
|
||||
extern bool _mi_process_is_initialized; // has mi_process_init been called?
|
||||
|
||||
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
|
||||
// Windows: works on Intel and ARM in both 32- and 64-bit
|
||||
return (uintptr_t)NtCurrentTeb();
|
||||
}
|
||||
|
||||
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
|
||||
// both the OS and libc implementation so we use specific tests for each main platform.
|
||||
// On some libc + platform combinations we can directly access a thread-local storage (TLS) slot.
|
||||
// The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform.
|
||||
// If you test on another platform and it works please send a PR :-)
|
||||
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
|
||||
#elif defined(__GNUC__) && ( \
|
||||
//
|
||||
// Note: on most platforms this is not actually used anymore as we prefer `__builtin_thread_pointer()` nowadays.
|
||||
// However, we do still use it with older clang compilers and Apple OS (as we use TLS slot for the default heap there).
|
||||
#if defined(__GNUC__) && ( \
|
||||
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|
||||
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \
|
||||
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \
|
||||
|| (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|
||||
|| (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
|
||||
|| (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
|
||||
)
|
||||
|
||||
#define MI_HAS_TLS_SLOT
|
||||
|
||||
static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
|
||||
void* res;
|
||||
const size_t ofs = (slot*sizeof(void*));
|
||||
@ -175,6 +165,9 @@ static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
|
||||
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
|
||||
#endif
|
||||
res = tcb[slot];
|
||||
#elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781
|
||||
MI_UNUSED(ofs);
|
||||
res = pthread_getspecific(slot);
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
@ -202,9 +195,53 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
|
||||
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
|
||||
#endif
|
||||
tcb[slot] = value;
|
||||
#elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781
|
||||
MI_UNUSED(ofs);
|
||||
pthread_setspecific(slot, value);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Do we have __builtin_thread_pointer? (do not make this a compound test as it fails on older gcc's, see issue #851)
|
||||
#if defined(__has_builtin)
|
||||
#if __has_builtin(__builtin_thread_pointer)
|
||||
#define MI_HAS_BUILTIN_THREAD_POINTER 1
|
||||
#endif
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) // special case aarch64 for older gcc versions (issue #851)
|
||||
#define MI_HAS_BUILTIN_THREAD_POINTER 1
|
||||
#endif
|
||||
|
||||
|
||||
// defined in `init.c`; do not use these directly
|
||||
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
|
||||
extern bool _mi_process_is_initialized; // has mi_process_init been called?
|
||||
|
||||
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
|
||||
|
||||
// Get a unique id for the current thread.
|
||||
#if defined(_WIN32)
|
||||
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#include <windows.h>
|
||||
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
|
||||
// Windows: works on Intel and ARM in both 32- and 64-bit
|
||||
return (uintptr_t)NtCurrentTeb();
|
||||
}
|
||||
|
||||
#elif MI_HAS_BUILTIN_THREAD_POINTER && \
|
||||
(!defined(__APPLE__)) && /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \
|
||||
(!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>)
|
||||
|
||||
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
|
||||
// Works on most Unix based platforms
|
||||
return (uintptr_t)__builtin_thread_pointer();
|
||||
}
|
||||
|
||||
#elif defined(MI_HAS_TLS_SLOT)
|
||||
|
||||
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
|
||||
#if defined(__BIONIC__)
|
||||
// issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
|
||||
@ -251,7 +288,6 @@ static inline mi_heap_t* mi_prim_get_default_heap(void);
|
||||
#if defined(MI_MALLOC_OVERRIDE)
|
||||
#if defined(__APPLE__) // macOS
|
||||
#define MI_TLS_SLOT 89 // seems unused?
|
||||
// #define MI_TLS_RECURSE_GUARD 1
|
||||
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
|
||||
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
|
||||
#elif defined(__OpenBSD__)
|
||||
@ -269,6 +305,9 @@ static inline mi_heap_t* mi_prim_get_default_heap(void);
|
||||
|
||||
|
||||
#if defined(MI_TLS_SLOT)
|
||||
# if !defined(MI_HAS_TLS_SLOT)
|
||||
# error "trying to use a TLS slot for the default heap, but the mi_prim_tls_slot primitives are not defined"
|
||||
# endif
|
||||
|
||||
static inline mi_heap_t* mi_prim_get_default_heap(void) {
|
||||
mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT);
|
||||
|
@ -82,7 +82,9 @@ defined, undefined, or not accessible at all:
|
||||
#define MI_TRACK_HEAP_DESTROY 1
|
||||
#define MI_TRACK_TOOL "ETW"
|
||||
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include "../src/prim/windows/etw.h"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
|
||||
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
@ -13,9 +13,12 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
// mi_heap_t : all data for a thread-local heap, contains
|
||||
// lists of all managed heap pages.
|
||||
// mi_segment_t : a larger chunk of memory (32GiB) from where pages
|
||||
// are allocated.
|
||||
// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from
|
||||
// are allocated. A segment is divided in slices (64KiB) from
|
||||
// which pages are allocated.
|
||||
// mi_page_t : a "mimalloc" page (usually 64KiB or 512KiB) from
|
||||
// where objects are allocated.
|
||||
// Note: we write "OS page" for OS memory pages while
|
||||
// using plain "page" for mimalloc pages (`mi_page_t`).
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -89,10 +92,11 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#endif
|
||||
|
||||
|
||||
// We used to abandon huge pages but to eagerly deallocate if freed from another thread,
|
||||
// but that makes it not possible to visit them during a heap walk or include them in a
|
||||
// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from
|
||||
// another thread so most memory is available until it gets properly freed by the owning thread.
|
||||
// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread.
|
||||
// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a
|
||||
// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from
|
||||
// another thread so the memory becomes "virtually" available (and eventually gets properly freed by
|
||||
// the owning thread).
|
||||
// #define MI_HUGE_PAGE_ABANDON 1
|
||||
|
||||
|
||||
@ -157,17 +161,24 @@ typedef int32_t mi_ssize_t;
|
||||
|
||||
// Main tuning parameters for segment and page sizes
|
||||
// Sizes for 64-bit (usually divide by two for 32-bit)
|
||||
#ifndef MI_SEGMENT_SLICE_SHIFT
|
||||
#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit)
|
||||
#endif
|
||||
|
||||
#ifndef MI_SEGMENT_SHIFT
|
||||
#if MI_INTPTR_SIZE > 4
|
||||
#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB
|
||||
#else
|
||||
#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MI_SMALL_PAGE_SHIFT
|
||||
#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB
|
||||
#endif
|
||||
#ifndef MI_MEDIUM_PAGE_SHIFT
|
||||
#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB
|
||||
|
||||
#endif
|
||||
|
||||
// Derived constants
|
||||
#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT)
|
||||
@ -181,7 +192,7 @@ typedef int32_t mi_ssize_t;
|
||||
|
||||
#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 8KiB on 64-bit
|
||||
#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB on 64-bit
|
||||
#define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
|
||||
#define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
|
||||
#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32MiB on 64-bit
|
||||
#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
|
||||
|
||||
@ -192,17 +203,22 @@ typedef int32_t mi_ssize_t;
|
||||
#error "mimalloc internal: define more bins"
|
||||
#endif
|
||||
|
||||
// Maximum slice offset (15)
|
||||
#define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
|
||||
|
||||
// Used as a special value to encode block sizes in 32 bits.
|
||||
#define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB))
|
||||
|
||||
// blocks up to this size are always allocated aligned
|
||||
#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE)
|
||||
#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE)
|
||||
|
||||
// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments
|
||||
#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
|
||||
// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
|
||||
#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
|
||||
|
||||
// Maximum slice count (255) for which we can find the page for interior pointers
|
||||
#define MI_MAX_SLICE_OFFSET_COUNT ((MI_BLOCK_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
|
||||
|
||||
// we never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
|
||||
// on 64-bit+ systems we also limit the maximum allocation size such that the slice count fits in 32-bits. (issue #877)
|
||||
#if PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX)
|
||||
#define MI_MAX_ALLOC_SIZE (MI_SEGMENT_SLICE_SIZE * (UINT32_MAX-1))
|
||||
#else
|
||||
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
|
||||
#endif
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
@ -227,7 +243,7 @@ typedef enum mi_delayed_e {
|
||||
MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list
|
||||
MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap
|
||||
MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
|
||||
MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim
|
||||
MI_NEVER_DELAYED_FREE = 3 // sticky: used for abondoned pages without a owning heap; this only resets on page reclaim
|
||||
} mi_delayed_t;
|
||||
|
||||
|
||||
@ -266,7 +282,6 @@ typedef uintptr_t mi_thread_free_t;
|
||||
// implement a monotonic heartbeat. The `thread_free` list is needed for
|
||||
// avoiding atomic operations in the common case.
|
||||
//
|
||||
//
|
||||
// `used - |thread_free|` == actual blocks that are in use (alive)
|
||||
// `used - |thread_free| + |free| + |local_free| == capacity`
|
||||
//
|
||||
@ -274,16 +289,13 @@ typedef uintptr_t mi_thread_free_t;
|
||||
// the number of memory accesses in the `mi_page_all_free` function(s).
|
||||
//
|
||||
// Notes:
|
||||
// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
|
||||
// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
|
||||
// - Using `uint16_t` does not seem to slow things down
|
||||
// - The size is 8 words on 64-bit which helps the page index calculations
|
||||
// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
|
||||
// and 12 are still good for address calculation)
|
||||
// - To limit the structure size, the `xblock_size` is 32-bits only; for
|
||||
// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
|
||||
// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
|
||||
// - The size is 12 words on 64-bit which helps the page index calculations
|
||||
// (and 14 words on 32-bit, and encoded free lists add 2 words)
|
||||
// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
|
||||
// concurrent frees where only the first concurrent free adds to the owning
|
||||
// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
|
||||
// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`).
|
||||
// The invariant is that no-delayed-free is only set if there is
|
||||
// at least one block that will be added, or as already been added, to
|
||||
// the owning heap `thread_delayed_free` list. This guarantees that pages
|
||||
@ -291,21 +303,25 @@ typedef uintptr_t mi_thread_free_t;
|
||||
typedef struct mi_page_s {
|
||||
// "owned" by the segment
|
||||
uint32_t slice_count; // slices in this page (0 if not a page)
|
||||
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
|
||||
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
|
||||
uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized
|
||||
|
||||
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
|
||||
uint8_t is_committed:1; // `true` if the page virtual memory is committed
|
||||
uint8_t is_zero_init:1; // `true` if the page was initially zero initialized
|
||||
uint8_t is_huge:1; // `true` if the page is in a huge segment (`segment->kind == MI_SEGMENT_HUGE`)
|
||||
// padding
|
||||
// layout like this to optimize access in `mi_malloc` and `mi_free`
|
||||
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
|
||||
uint16_t reserved; // number of blocks reserved in memory
|
||||
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
|
||||
uint8_t free_is_zero : 1; // `true` if the blocks in the free list are zero initialized
|
||||
uint8_t retire_expire : 7; // expiration count for retired blocks
|
||||
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
|
||||
uint8_t retire_expire:7; // expiration count for retired blocks
|
||||
|
||||
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
|
||||
uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
|
||||
uint32_t xblock_size; // size available in each block (always `>0`)
|
||||
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
|
||||
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
|
||||
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
|
||||
// padding
|
||||
size_t block_size; // size available in each block (always `>0`)
|
||||
uint8_t* page_start; // start of the page area containing the blocks
|
||||
|
||||
#if (MI_ENCODE_FREELIST || MI_PADDING)
|
||||
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
|
||||
@ -317,10 +333,8 @@ typedef struct mi_page_s {
|
||||
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
|
||||
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
|
||||
|
||||
// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
|
||||
#if MI_INTPTR_SIZE==8
|
||||
uintptr_t padding[1];
|
||||
#endif
|
||||
// 64-bit 11 words, 32-bit 13 words, (+2 for secure)
|
||||
void* padding[1];
|
||||
} mi_page_t;
|
||||
|
||||
|
||||
@ -331,21 +345,22 @@ typedef struct mi_page_s {
|
||||
|
||||
typedef enum mi_page_kind_e {
|
||||
MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment
|
||||
MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment
|
||||
MI_PAGE_LARGE, // larger blocks go into a page of just one block
|
||||
MI_PAGE_HUGE, // huge blocks (> 16 MiB) are put into a single page in a single segment.
|
||||
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment
|
||||
MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment
|
||||
MI_PAGE_HUGE // a huge page is a single page in a segment of variable size
|
||||
// used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`.
|
||||
} mi_page_kind_t;
|
||||
|
||||
typedef enum mi_segment_kind_e {
|
||||
MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside.
|
||||
MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
|
||||
MI_SEGMENT_HUGE, // segment with just one huge page inside.
|
||||
} mi_segment_kind_t;
|
||||
|
||||
// ------------------------------------------------------
|
||||
// A segment holds a commit mask where a bit is set if
|
||||
// the corresponding MI_COMMIT_SIZE area is committed.
|
||||
// The MI_COMMIT_SIZE must be a multiple of the slice
|
||||
// size. If it is equal we have the most fine grained
|
||||
// size. If it is equal we have the most fine grained
|
||||
// decommit (but setting it higher can be more efficient).
|
||||
// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will
|
||||
// be committed in one go which can be set higher than
|
||||
@ -353,9 +368,9 @@ typedef enum mi_segment_kind_e {
|
||||
// is still tracked in fine-grained MI_COMMIT_SIZE chunks)
|
||||
// ------------------------------------------------------
|
||||
|
||||
#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE)
|
||||
#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE)
|
||||
#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB
|
||||
#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)
|
||||
#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)
|
||||
#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS
|
||||
#define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
|
||||
|
||||
@ -371,13 +386,17 @@ typedef mi_page_t mi_slice_t;
|
||||
typedef int64_t mi_msecs_t;
|
||||
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// a memory id tracks the provenance of arena/OS allocated memory
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
|
||||
typedef enum mi_memkind_e {
|
||||
MI_MEM_NONE, // not allocated
|
||||
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
|
||||
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
|
||||
MI_MEM_OS, // allocated from the OS
|
||||
MI_MEM_OS_HUGE, // allocated as huge os pages
|
||||
MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
|
||||
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
|
||||
MI_MEM_ARENA // allocated from an arena (the usual case)
|
||||
} mi_memkind_t;
|
||||
@ -394,7 +413,7 @@ typedef struct mi_memid_os_info {
|
||||
typedef struct mi_memid_arena_info {
|
||||
size_t block_index; // index in the arena
|
||||
mi_arena_id_t id; // arena id (>= 1)
|
||||
bool is_exclusive; // the arena can only be used for specific arena allocations
|
||||
bool is_exclusive; // this arena can only be used for specific arena allocations
|
||||
} mi_memid_arena_info_t;
|
||||
|
||||
typedef struct mi_memid_s {
|
||||
@ -402,47 +421,56 @@ typedef struct mi_memid_s {
|
||||
mi_memid_os_info_t os; // only used for MI_MEM_OS
|
||||
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
|
||||
} mem;
|
||||
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages)
|
||||
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
|
||||
bool initially_committed;// `true` if the memory was originally allocated as committed
|
||||
bool initially_zero; // `true` if the memory was originally zero initialized
|
||||
mi_memkind_t memkind;
|
||||
} mi_memid_t;
|
||||
|
||||
|
||||
// Segments are large allocated memory blocks (8mb on 64 bit) from
|
||||
// the OS. Inside segments we allocated fixed size _pages_ that
|
||||
// contain blocks.
|
||||
// -----------------------------------------------------------------------------------------
|
||||
// Segments are large allocated memory blocks (8mb on 64 bit) from arenas or the OS.
|
||||
//
|
||||
// Inside segments we allocated fixed size mimalloc pages (`mi_page_t`) that contain blocks.
|
||||
// The start of a segment is this structure with a fixed number of slice entries (`slices`)
|
||||
// usually followed by a guard OS page and the actual allocation area with pages.
|
||||
// While a page is not allocated, we view it's data as a `mi_slice_t` (instead of a `mi_page_t`).
|
||||
// Of any free area, the first slice has the info and `slice_offset == 0`; for any subsequent
|
||||
// slices part of the area, the `slice_offset` is the byte offset back to the first slice
|
||||
// (so we can quickly find the page info on a free, `internal.h:_mi_segment_page_of`).
|
||||
// For slices, the `block_size` field is repurposed to signify if a slice is used (`1`) or not (`0`).
|
||||
// Small and medium pages use a fixed amount of slices to reduce slice fragmentation, while
|
||||
// large and huge pages span a variable amount of slices.
|
||||
typedef struct mi_segment_s {
|
||||
// constant fields
|
||||
mi_memid_t memid; // memory id for arena allocation
|
||||
bool allow_decommit;
|
||||
bool allow_purge;
|
||||
mi_memid_t memid; // memory id for arena/OS allocation
|
||||
bool allow_decommit; // can we decommmit the memory
|
||||
bool allow_purge; // can we purge the memory (reset or decommit)
|
||||
size_t segment_size;
|
||||
|
||||
// segment fields
|
||||
mi_msecs_t purge_expire;
|
||||
mi_commit_mask_t purge_mask;
|
||||
mi_commit_mask_t commit_mask;
|
||||
|
||||
_Atomic(struct mi_segment_s*) abandoned_next;
|
||||
mi_msecs_t purge_expire; // purge slices in the `purge_mask` after this time
|
||||
mi_commit_mask_t purge_mask; // slices that can be purged
|
||||
mi_commit_mask_t commit_mask; // slices that are currently committed
|
||||
|
||||
// from here is zero initialized
|
||||
struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
|
||||
|
||||
bool was_reclaimed; // true if it was reclaimed (used to limit on-free reclamation)
|
||||
|
||||
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
|
||||
size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
|
||||
size_t abandoned_visits; // count how often this segment is visited during abondoned reclamation (to force reclaim if it takes too long)
|
||||
size_t used; // count of pages in use
|
||||
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
|
||||
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
|
||||
|
||||
size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
|
||||
size_t segment_info_slices; // initial slices we are using segment info and possible guard pages.
|
||||
size_t segment_info_slices; // initial count of slices that we are using for segment info and possible guard pages.
|
||||
|
||||
// layout like this to optimize access in `mi_free`
|
||||
mi_segment_kind_t kind;
|
||||
size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
|
||||
_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
|
||||
|
||||
mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment
|
||||
mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one extra final entry for huge blocks with large alignment
|
||||
} mi_segment_t;
|
||||
|
||||
|
||||
@ -499,11 +527,9 @@ typedef struct mi_padding_s {
|
||||
// A heap owns a set of pages.
|
||||
struct mi_heap_s {
|
||||
mi_tld_t* tld;
|
||||
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
|
||||
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
|
||||
_Atomic(mi_block_t*) thread_delayed_free;
|
||||
mi_threadid_t thread_id; // thread this heap belongs too
|
||||
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
|
||||
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
|
||||
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
|
||||
uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list
|
||||
mi_random_ctx_t random; // random number context used for secure allocation
|
||||
@ -512,6 +538,8 @@ struct mi_heap_s {
|
||||
size_t page_retired_max; // largest retired index into the `pages` array.
|
||||
mi_heap_t* next; // list of heaps per thread
|
||||
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
|
||||
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
|
||||
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
|
||||
};
|
||||
|
||||
|
||||
@ -600,6 +628,9 @@ typedef struct mi_stats_s {
|
||||
mi_stat_counter_t normal_count;
|
||||
mi_stat_counter_t huge_count;
|
||||
mi_stat_counter_t large_count;
|
||||
mi_stat_counter_t arena_count;
|
||||
mi_stat_counter_t arena_crossover_count;
|
||||
mi_stat_counter_t arena_rollback_count;
|
||||
#if MI_STAT>1
|
||||
mi_stat_count_t normal_bins[MI_BIN_HUGE+1];
|
||||
#endif
|
||||
@ -624,6 +655,7 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
|
||||
#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
|
||||
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Thread Local data
|
||||
// ------------------------------------------------------
|
||||
@ -652,6 +684,7 @@ typedef struct mi_segments_tld_s {
|
||||
size_t peak_count; // peak number of segments
|
||||
size_t current_size; // current size of all segments
|
||||
size_t peak_size; // peak size of all segments
|
||||
size_t reclaim_count;// number of reclaimed (abandoned) segments
|
||||
mi_stats_t* stats; // points to tld stats
|
||||
mi_os_tld_t* os; // points to os stats
|
||||
} mi_segments_tld_t;
|
||||
|
101
readme.md
101
readme.md
@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac
|
||||
Initially developed by Daan Leijen for the runtime systems of the
|
||||
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
|
||||
|
||||
Latest release tag: `v2.1.2` (2023-04-24).
|
||||
Latest stable tag: `v1.8.2` (2023-04-24).
|
||||
Latest release tag: `v2.1.4` (2024-04-22).
|
||||
Latest stable tag: `v1.8.4` (2024-04-22).
|
||||
|
||||
mimalloc is a drop-in replacement for `malloc` and can be used in other programs
|
||||
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
|
||||
@ -29,6 +29,8 @@ It also includes a robust way to override the default allocator in [Windows](#ov
|
||||
bounded worst-case times with reference counting).
|
||||
Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS,
|
||||
Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding.
|
||||
At the same time, it is an industrial strength allocator that runs (very) large scale
|
||||
distributed services on thousands of machines with excellent worst case latencies.
|
||||
- __free list sharding__: instead of one big free list (per size class) we have
|
||||
many smaller lists per "mimalloc page" which reduces fragmentation and
|
||||
increases locality --
|
||||
@ -78,6 +80,16 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
|
||||
and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
|
||||
(see [below](#performance)); please report if you observe any significant performance regression.
|
||||
|
||||
* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds.
|
||||
Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size
|
||||
directly available (and new `block_size_shift` to improve aligned block free-ing).
|
||||
New approach to collection of abandoned segments: When
|
||||
a thread terminates the segments it owns are abandoned (containing still live objects) and these can be
|
||||
reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's
|
||||
which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in
|
||||
an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim`
|
||||
gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%).
|
||||
|
||||
* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity
|
||||
by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory
|
||||
usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking.
|
||||
@ -89,7 +101,7 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
|
||||
abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes.
|
||||
|
||||
* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support.
|
||||
Support abitrary large alignments (in particular for `std::pmr` pools).
|
||||
Support arbitrary large alignments (in particular for `std::pmr` pools).
|
||||
Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev).
|
||||
Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). Various small bug fixes.
|
||||
|
||||
@ -141,7 +153,7 @@ mimalloc is used in various large scale low-latency services and programs, for e
|
||||
|
||||
## Windows
|
||||
|
||||
Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build.
|
||||
Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build.
|
||||
The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
|
||||
`mimalloc-override` project builds a DLL for overriding malloc
|
||||
in the entire program.
|
||||
@ -221,7 +233,7 @@ target_link_libraries(myapp PUBLIC mimalloc-static)
|
||||
to link with the static library. See `test\CMakeLists.txt` for an example.
|
||||
|
||||
For best performance in C++ programs, it is also recommended to override the
|
||||
global `new` and `delete` operators. For convience, mimalloc provides
|
||||
global `new` and `delete` operators. For convenience, mimalloc provides
|
||||
[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project.
|
||||
In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator`
|
||||
interface.
|
||||
@ -277,17 +289,23 @@ You can set further options either programmatically (using [`mi_option_set`](htt
|
||||
|
||||
Advanced options:
|
||||
|
||||
- `MIMALLOC_ARENA_EAGER_COMMIT=2`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc
|
||||
allocates segments and pages. Set this to 2 (default) to
|
||||
only enable this on overcommit systems (e.g. Linux). Set this to 1 to enable explicitly on other systems
|
||||
as well (like Windows or macOS) which may improve performance (as the whole arena is committed at once).
|
||||
Note that eager commit only increases the commit but not the actual the peak resident set
|
||||
(rss) so it is generally ok to enable this.
|
||||
- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge
|
||||
OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which
|
||||
can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when
|
||||
a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher
|
||||
value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times.
|
||||
Setting it to `-1` disables purging completely.
|
||||
- `MIMALLOC_ARENA_EAGER_COMMIT=1`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc
|
||||
allocates segments and pages. This is by default
|
||||
only enabled on overcommit systems (e.g. Linux) but enabling it explicitly on other systems (like Windows or macOS)
|
||||
may improve performance. Note that eager commit only increases the commit but not the actual the peak resident set
|
||||
(rss) so it is generally ok to enable this.
|
||||
Setting it to `-1` disables purging completely.
|
||||
- `MIMALLOC_PURGE_DECOMMITS=1`: By default "purging" memory means unused memory is decommitted (`MEM_DECOMMIT` on Windows,
|
||||
`MADV_DONTNEED` (which decresease rss immediately) on `mmap` systems). Set this to 0 to instead "reset" unused
|
||||
memory on a purge (`MEM_RESET` on Windows, generally `MADV_FREE` (which does not decrease rss immediately) on `mmap` systems).
|
||||
Mimalloc generally does not "free" OS memory but only "purges" OS memory, in other words, it tries to keep virtual
|
||||
address ranges and decommits within those ranges (to make the underlying physical memory available to other processes).
|
||||
|
||||
Further options for large workloads and services:
|
||||
|
||||
@ -295,9 +313,10 @@ Further options for large workloads and services:
|
||||
at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than
|
||||
the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA
|
||||
nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed).
|
||||
- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
|
||||
improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
|
||||
to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
|
||||
- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2 or 4MiB) when available; for some workloads this can significantly
|
||||
improve performance. When this option is disabled, it also disables transparent huge pages (THP) for the process
|
||||
(on Linux and Android). Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
|
||||
to explicitly give permissions for large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
|
||||
the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
|
||||
can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
|
||||
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
|
||||
@ -306,11 +325,12 @@ Further options for large workloads and services:
|
||||
OS pages, use with care as reserving
|
||||
contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
|
||||
startup only once).
|
||||
Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
|
||||
Note that we usually need to explicitly give permission for huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
|
||||
With huge OS pages, it may be beneficial to set the setting
|
||||
`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
|
||||
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
|
||||
and allocate just a little to take up space in the huge OS page area (which cannot be purged).
|
||||
and allocate just a little to take up space in the huge OS page area (which cannot be purged as huge OS pages are pinned
|
||||
to physical memory).
|
||||
The huge pages are usually allocated evenly among NUMA nodes.
|
||||
We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
|
||||
the huge pages at a specific numa node instead.
|
||||
@ -391,32 +411,41 @@ the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-i
|
||||
|
||||
### Dynamic Override on Windows
|
||||
|
||||
<span id="override_on_windows">Overriding on Windows</span> is robust and has the
|
||||
particular advantage to be able to redirect all malloc/free calls that go through
|
||||
<span id="override_on_windows">Dynamically overriding on mimalloc on Windows</span>
|
||||
is robust and has the particular advantage to be able to redirect all malloc/free calls that go through
|
||||
the (dynamic) C runtime allocator, including those from other DLL's or libraries.
|
||||
As it intercepts all allocation calls on a low level, it can be used reliably
|
||||
on large programs that include other 3rd party components.
|
||||
There are four requirements to make the overriding work robustly:
|
||||
|
||||
The overriding on Windows requires that you link your program explicitly with
|
||||
the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
|
||||
Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put
|
||||
in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
|
||||
The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
|
||||
mimalloc (in `mimalloc-override.dll`).
|
||||
1. Use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
|
||||
2. Link your program explicitly with `mimalloc-override.dll` library.
|
||||
To ensure the `mimalloc-override.dll` is loaded at run-time it is easiest to insert some
|
||||
call to the mimalloc API in the `main` function, like `mi_version()`
|
||||
(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project
|
||||
for an example on how to use this.
|
||||
3. The [`mimalloc-redirect.dll`](bin) (or `mimalloc-redirect32.dll`) must be put
|
||||
in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency of that DLL).
|
||||
The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
|
||||
mimalloc functions (which reside in `mimalloc-override.dll`).
|
||||
4. Ensure the `mimalloc-override.dll` comes as early as possible in the import
|
||||
list of the final executable (so it can intercept all potential allocations).
|
||||
|
||||
To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some
|
||||
call to the mimalloc API in the `main` function, like `mi_version()`
|
||||
(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project
|
||||
for an example on how to use this. For best performance on Windows with C++, it
|
||||
For best performance on Windows with C++, it
|
||||
is also recommended to also override the `new`/`delete` operations (by including
|
||||
[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project).
|
||||
[`mimalloc-new-delete.h`](include/mimalloc-new-delete.h)
|
||||
a single(!) source file in your project).
|
||||
|
||||
The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
|
||||
overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected.
|
||||
|
||||
(Note: in principle, it is possible to even patch existing executables without any recompilation
|
||||
We cannot always re-link an executable with `mimalloc-override.dll`, and similarly, we cannot always
|
||||
ensure the the DLL comes first in the import table of the final executable.
|
||||
In many cases though we can patch existing executables without any recompilation
|
||||
if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll`
|
||||
into the import table (and put `mimalloc-redirect.dll` in the same folder)
|
||||
Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
|
||||
|
||||
Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388) or
|
||||
the [`minject`](bin) program.
|
||||
|
||||
## Static override
|
||||
|
||||
@ -438,7 +467,7 @@ This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimallo
|
||||
under your control or otherwise mixing of pointers from different heaps may occur!
|
||||
|
||||
|
||||
## Tools
|
||||
# Tools
|
||||
|
||||
Generally, we recommend using the standard allocator with memory tracking tools, but mimalloc
|
||||
can also be build to support the [address sanitizer][asan] or the excellent [Valgrind] tool.
|
||||
@ -446,7 +475,7 @@ Moreover, it can be build to support Windows event tracing ([ETW]).
|
||||
This has a small performance overhead but does allow detecting memory leaks and byte-precise
|
||||
buffer overflows directly on final executables. See also the `test/test-wrong.c` file to test with various tools.
|
||||
|
||||
### Valgrind
|
||||
## Valgrind
|
||||
|
||||
To build with [valgrind] support, use the `MI_TRACK_VALGRIND=ON` cmake option:
|
||||
|
||||
@ -480,7 +509,7 @@ Valgrind support is in its initial development -- please report any issues.
|
||||
[Valgrind]: https://valgrind.org/
|
||||
[valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms
|
||||
|
||||
### ASAN
|
||||
## ASAN
|
||||
|
||||
To build with the address sanitizer, use the `-DMI_TRACK_ASAN=ON` cmake option:
|
||||
|
||||
@ -509,7 +538,7 @@ Adress sanitizer support is in its initial development -- please report any issu
|
||||
|
||||
[asan]: https://github.com/google/sanitizers/wiki/AddressSanitizer
|
||||
|
||||
### ETW
|
||||
## ETW
|
||||
|
||||
Event tracing for Windows ([ETW]) provides a high performance way to capture all allocations though
|
||||
mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACK_ETW=ON` cmake option.
|
||||
|
@ -33,7 +33,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
|
||||
|
||||
void* p;
|
||||
size_t oversize;
|
||||
if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {
|
||||
if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) {
|
||||
// use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
|
||||
// This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
|
||||
// first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
|
||||
@ -47,7 +47,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
|
||||
oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
|
||||
p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
|
||||
// zero afterwards as only the area from the aligned_p may be committed!
|
||||
if (p == NULL) return NULL;
|
||||
if (p == NULL) return NULL;
|
||||
}
|
||||
else {
|
||||
// otherwise over-allocate
|
||||
@ -69,13 +69,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
|
||||
// todo: expand padding if overallocated ?
|
||||
|
||||
mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
|
||||
mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
|
||||
mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_page(aligned_p), aligned_p));
|
||||
mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
|
||||
mi_assert_internal(mi_usable_size(aligned_p)>=size);
|
||||
mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust);
|
||||
|
||||
|
||||
// now zero the block if needed
|
||||
if (alignment > MI_ALIGNMENT_MAX) {
|
||||
if (alignment > MI_BLOCK_ALIGNMENT_MAX) {
|
||||
// for the tracker, on huge aligned allocations only from the start of the large block is defined
|
||||
mi_track_mem_undefined(aligned_p, size);
|
||||
if (zero) {
|
||||
@ -85,7 +85,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
|
||||
|
||||
if (p != aligned_p) {
|
||||
mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p));
|
||||
}
|
||||
}
|
||||
return aligned_p;
|
||||
}
|
||||
|
||||
@ -159,7 +159,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap,
|
||||
|
||||
// ensure a definition is emitted
|
||||
#if defined(__cplusplus)
|
||||
static void* _mi_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned;
|
||||
void* _mi_extern_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned;
|
||||
#endif
|
||||
|
||||
// ------------------------------------------------------
|
||||
|
@ -23,7 +23,7 @@ mi_decl_externc size_t malloc_good_size(size_t size);
|
||||
#endif
|
||||
|
||||
// helper definition for C override of C++ new
|
||||
typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
|
||||
typedef void* mi_nothrow_t;
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Override system malloc
|
||||
@ -77,7 +77,9 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
|
||||
MI_INTERPOSE_MI(calloc),
|
||||
MI_INTERPOSE_MI(realloc),
|
||||
MI_INTERPOSE_MI(strdup),
|
||||
#if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7
|
||||
MI_INTERPOSE_MI(strndup),
|
||||
#endif
|
||||
MI_INTERPOSE_MI(realpath),
|
||||
MI_INTERPOSE_MI(posix_memalign),
|
||||
MI_INTERPOSE_MI(reallocf),
|
||||
@ -128,11 +130,16 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
|
||||
// cannot override malloc unless using a dll.
|
||||
// we just override new/delete which does work in a static library.
|
||||
#else
|
||||
// On all other systems forward to our API
|
||||
// On all other systems forward allocation primitives to our API
|
||||
mi_decl_export void* malloc(size_t size) MI_FORWARD1(mi_malloc, size)
|
||||
mi_decl_export void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n)
|
||||
mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
|
||||
mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p)
|
||||
mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p)
|
||||
// In principle we do not need to forward `strdup`/`strndup` but on some systems these do not use `malloc` internally (but a more primitive call)
|
||||
mi_decl_export char* strdup(const char* str) MI_FORWARD1(mi_strdup, str)
|
||||
#if !defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
|
||||
mi_decl_export char* strndup(const char* str, size_t n) MI_FORWARD2(mi_strndup, str, n)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__)
|
||||
@ -192,11 +199,17 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
|
||||
void _ZdaPv(void* p) MI_FORWARD0(mi_free,p) // delete[]
|
||||
void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
|
||||
void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
|
||||
|
||||
void _ZdlPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); }
|
||||
void _ZdaPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); }
|
||||
void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
|
||||
void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
|
||||
|
||||
void _ZdlPvRKSt9nothrow_t(void* p, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free(p); } // operator delete(void*, std::nothrow_t const&)
|
||||
void _ZdaPvRKSt9nothrow_t(void* p, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free(p); } // operator delete[](void*, std::nothrow_t const&)
|
||||
void _ZdlPvSt11align_val_tRKSt9nothrow_t(void* p, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free_aligned(p,al); } // operator delete(void*, std::align_val_t, std::nothrow_t const&)
|
||||
void _ZdaPvSt11align_val_tRKSt9nothrow_t(void* p, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free_aligned(p,al); } // operator delete[](void*, std::align_val_t, std::nothrow_t const&)
|
||||
|
||||
#if (MI_INTPTR_SIZE==8)
|
||||
void* _Znwm(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit
|
||||
void* _Znam(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit
|
||||
@ -259,10 +272,11 @@ extern "C" {
|
||||
// no forwarding here due to aliasing/name mangling issues
|
||||
void cfree(void* p) { mi_free(p); }
|
||||
void* pvalloc(size_t size) { return mi_pvalloc(size); }
|
||||
void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); }
|
||||
int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); }
|
||||
void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); }
|
||||
void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
|
||||
void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); }
|
||||
// some systems define reallocarr so mark it as a weak symbol (#751)
|
||||
mi_decl_weak int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); }
|
||||
|
||||
#if defined(__wasi__)
|
||||
// forward __libc interface (see PR #667)
|
||||
|
546
src/alloc.c
546
src/alloc.c
@ -1,5 +1,5 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2018-2022, Microsoft Research, Daan Leijen
|
||||
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
@ -18,6 +18,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
#define MI_IN_ALLOC_C
|
||||
#include "alloc-override.c"
|
||||
#include "free.c"
|
||||
#undef MI_IN_ALLOC_C
|
||||
|
||||
// ------------------------------------------------------
|
||||
@ -26,16 +27,18 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
// Fast allocation in a page: just pop from the free list.
|
||||
// Fall back to generic allocation only if the list is empty.
|
||||
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept {
|
||||
mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
|
||||
// Note: in release mode the (inlined) routine is about 7 instructions with a single test.
|
||||
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
|
||||
{
|
||||
mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
|
||||
mi_block_t* const block = page->free;
|
||||
if mi_unlikely(block == NULL) {
|
||||
return _mi_malloc_generic(heap, size, zero, 0);
|
||||
}
|
||||
mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
|
||||
// pop from the free list
|
||||
page->used++;
|
||||
page->free = mi_block_next(page, block);
|
||||
page->used++;
|
||||
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
|
||||
#if MI_DEBUG>3
|
||||
if (page->free_is_zero) {
|
||||
@ -50,54 +53,54 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
|
||||
|
||||
// zero the block? note: we need to zero the full block size (issue #63)
|
||||
if mi_unlikely(zero) {
|
||||
mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
|
||||
mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE);
|
||||
mi_assert_internal(page->block_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
|
||||
mi_assert_internal(page->block_size >= MI_PADDING_SIZE);
|
||||
if (page->free_is_zero) {
|
||||
block->next = 0;
|
||||
mi_track_mem_defined(block, page->xblock_size - MI_PADDING_SIZE);
|
||||
mi_track_mem_defined(block, page->block_size - MI_PADDING_SIZE);
|
||||
}
|
||||
else {
|
||||
_mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE);
|
||||
}
|
||||
_mi_memzero_aligned(block, page->block_size - MI_PADDING_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
|
||||
if (!zero && !mi_page_is_huge(page)) {
|
||||
memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page));
|
||||
}
|
||||
#elif (MI_SECURE!=0)
|
||||
#elif (MI_SECURE!=0)
|
||||
if (!zero) { block->next = 0; } // don't leak internal data
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if (MI_STAT>0)
|
||||
#if (MI_STAT>0)
|
||||
const size_t bsize = mi_page_usable_block_size(page);
|
||||
if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
|
||||
mi_heap_stat_increase(heap, normal, bsize);
|
||||
mi_heap_stat_counter_increase(heap, normal_count, 1);
|
||||
#if (MI_STAT>1)
|
||||
#if (MI_STAT>1)
|
||||
const size_t bin = _mi_bin(bsize);
|
||||
mi_heap_stat_increase(heap, normal_bins[bin], 1);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if MI_PADDING // && !MI_TRACK_ENABLED
|
||||
#if MI_PADDING // && !MI_TRACK_ENABLED
|
||||
mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page));
|
||||
ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE));
|
||||
#if (MI_DEBUG>=2)
|
||||
mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta));
|
||||
#endif
|
||||
#if (MI_DEBUG>=2)
|
||||
mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta));
|
||||
#endif
|
||||
mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess
|
||||
padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys));
|
||||
padding->delta = (uint32_t)(delta);
|
||||
#if MI_PADDING_CHECK
|
||||
if (!mi_page_is_huge(page)) {
|
||||
uint8_t* fill = (uint8_t*)padding - delta;
|
||||
const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes
|
||||
for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; }
|
||||
}
|
||||
#if MI_PADDING_CHECK
|
||||
if (!mi_page_is_huge(page)) {
|
||||
uint8_t* fill = (uint8_t*)padding - delta;
|
||||
const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes
|
||||
for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; }
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return block;
|
||||
}
|
||||
@ -112,9 +115,11 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
|
||||
#if (MI_PADDING)
|
||||
if (size == 0) { size = sizeof(void*); }
|
||||
#endif
|
||||
|
||||
mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
|
||||
void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
|
||||
void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
|
||||
mi_track_malloc(p,size,zero);
|
||||
|
||||
#if MI_STAT>1
|
||||
if (p != NULL) {
|
||||
if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
|
||||
@ -190,484 +195,6 @@ mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Check for double free in secure and debug mode
|
||||
// This is somewhat expensive so only enabled for secure mode 4
|
||||
// ------------------------------------------------------
|
||||
|
||||
#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0))
|
||||
// linear check if the free list contains a specific element
|
||||
static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) {
|
||||
while (list != NULL) {
|
||||
if (elem==list) return true;
|
||||
list = mi_block_next(page, list);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
|
||||
// The decoded value is in the same page (or NULL).
|
||||
// Walk the free lists to verify positively if it is already freed
|
||||
if (mi_list_contains(page, page->free, block) ||
|
||||
mi_list_contains(page, page->local_free, block) ||
|
||||
mi_list_contains(page, mi_page_thread_free(page), block))
|
||||
{
|
||||
_mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); }
|
||||
|
||||
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
|
||||
bool is_double_free = false;
|
||||
mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field
|
||||
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
|
||||
(n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
|
||||
{
|
||||
// Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
|
||||
// (continue in separate function to improve code generation)
|
||||
is_double_free = mi_check_is_double_freex(page, block);
|
||||
}
|
||||
return is_double_free;
|
||||
}
|
||||
#else
|
||||
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
|
||||
MI_UNUSED(page);
|
||||
MI_UNUSED(block);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Check for heap block overflow by setting up padding at the end of the block
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#if MI_PADDING // && !MI_TRACK_ENABLED
|
||||
static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) {
|
||||
*bsize = mi_page_usable_block_size(page);
|
||||
const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize);
|
||||
mi_track_mem_defined(padding,sizeof(mi_padding_t));
|
||||
*delta = padding->delta;
|
||||
uint32_t canary = padding->canary;
|
||||
uintptr_t keys[2];
|
||||
keys[0] = page->keys[0];
|
||||
keys[1] = page->keys[1];
|
||||
bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize);
|
||||
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
|
||||
return ok;
|
||||
}
|
||||
|
||||
// Return the exact usable size of a block.
|
||||
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
|
||||
size_t bsize;
|
||||
size_t delta;
|
||||
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
|
||||
mi_assert_internal(ok); mi_assert_internal(delta <= bsize);
|
||||
return (ok ? bsize - delta : 0);
|
||||
}
|
||||
|
||||
// When a non-thread-local block is freed, it becomes part of the thread delayed free
|
||||
// list that is freed later by the owning heap. If the exact usable size is too small to
|
||||
// contain the pointer for the delayed list, then shrink the padding (by decreasing delta)
|
||||
// so it will later not trigger an overflow error in `mi_free_block`.
|
||||
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
|
||||
size_t bsize;
|
||||
size_t delta;
|
||||
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
|
||||
mi_assert_internal(ok);
|
||||
if (!ok || (bsize - delta) >= min_size) return; // usually already enough space
|
||||
mi_assert_internal(bsize >= min_size);
|
||||
if (bsize < min_size) return; // should never happen
|
||||
size_t new_delta = (bsize - min_size);
|
||||
mi_assert_internal(new_delta < bsize);
|
||||
mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize);
|
||||
mi_track_mem_defined(padding,sizeof(mi_padding_t));
|
||||
padding->delta = (uint32_t)new_delta;
|
||||
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
|
||||
}
|
||||
#else
|
||||
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
|
||||
MI_UNUSED(block);
|
||||
return mi_page_usable_block_size(page);
|
||||
}
|
||||
|
||||
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
|
||||
MI_UNUSED(page);
|
||||
MI_UNUSED(block);
|
||||
MI_UNUSED(min_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if MI_PADDING && MI_PADDING_CHECK
|
||||
|
||||
static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) {
|
||||
size_t bsize;
|
||||
size_t delta;
|
||||
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
|
||||
*size = *wrong = bsize;
|
||||
if (!ok) return false;
|
||||
mi_assert_internal(bsize >= delta);
|
||||
*size = bsize - delta;
|
||||
if (!mi_page_is_huge(page)) {
|
||||
uint8_t* fill = (uint8_t*)block + bsize - delta;
|
||||
const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
|
||||
mi_track_mem_defined(fill, maxpad);
|
||||
for (size_t i = 0; i < maxpad; i++) {
|
||||
if (fill[i] != MI_DEBUG_PADDING) {
|
||||
*wrong = bsize - delta + i;
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mi_track_mem_noaccess(fill, maxpad);
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
|
||||
size_t size;
|
||||
size_t wrong;
|
||||
if (!mi_verify_padding(page,block,&size,&wrong)) {
|
||||
_mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong );
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
|
||||
MI_UNUSED(page);
|
||||
MI_UNUSED(block);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// only maintain stats for smaller objects if requested
|
||||
#if (MI_STAT>0)
|
||||
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
|
||||
#if (MI_STAT < 2)
|
||||
MI_UNUSED(block);
|
||||
#endif
|
||||
mi_heap_t* const heap = mi_heap_get_default();
|
||||
const size_t bsize = mi_page_usable_block_size(page);
|
||||
#if (MI_STAT>1)
|
||||
const size_t usize = mi_page_usable_size_of(page, block);
|
||||
mi_heap_stat_decrease(heap, malloc, usize);
|
||||
#endif
|
||||
if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
|
||||
mi_heap_stat_decrease(heap, normal, bsize);
|
||||
#if (MI_STAT > 1)
|
||||
mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
|
||||
#endif
|
||||
}
|
||||
else if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
|
||||
mi_heap_stat_decrease(heap, large, bsize);
|
||||
}
|
||||
else {
|
||||
mi_heap_stat_decrease(heap, huge, bsize);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
|
||||
MI_UNUSED(page); MI_UNUSED(block);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
#if (MI_STAT>0)
|
||||
// maintain stats for huge objects
|
||||
static void mi_stat_huge_free(const mi_page_t* page) {
|
||||
mi_heap_t* const heap = mi_heap_get_default();
|
||||
const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc`
|
||||
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
|
||||
mi_heap_stat_decrease(heap, large, bsize);
|
||||
}
|
||||
else {
|
||||
mi_heap_stat_decrease(heap, huge, bsize);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void mi_stat_huge_free(const mi_page_t* page) {
|
||||
MI_UNUSED(page);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Free
|
||||
// ------------------------------------------------------
|
||||
|
||||
// multi-threaded free (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
|
||||
static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
|
||||
{
|
||||
// The padding check may access the non-thread-owned page for the key values.
|
||||
// that is safe as these are constant and the page won't be freed (as the block is not freed yet).
|
||||
mi_check_padding(page, block);
|
||||
_mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
|
||||
|
||||
// huge page segments are always abandoned and can be freed immediately
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
if (segment->kind == MI_SEGMENT_HUGE) {
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
// huge page segments are always abandoned and can be freed immediately
|
||||
mi_stat_huge_free(page);
|
||||
_mi_segment_huge_page_free(segment, page, block);
|
||||
return;
|
||||
#else
|
||||
// huge pages are special as they occupy the entire segment
|
||||
// as these are large we reset the memory occupied by the page so it is available to other threads
|
||||
// (as the owning thread needs to actually free the memory later).
|
||||
_mi_segment_huge_page_reset(segment, page, block);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
|
||||
if (segment->kind != MI_SEGMENT_HUGE) { // not for huge segments as we just reset the content
|
||||
memset(block, MI_DEBUG_FREED, mi_usable_size(block));
|
||||
}
|
||||
#endif
|
||||
|
||||
// Try to put the block on either the page-local thread free list, or the heap delayed free list.
|
||||
mi_thread_free_t tfreex;
|
||||
bool use_delayed;
|
||||
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
|
||||
if mi_unlikely(use_delayed) {
|
||||
// unlikely: this only happens on the first concurrent free in a page that is in the full list
|
||||
tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
|
||||
}
|
||||
else {
|
||||
// usual: directly add to page thread_free list
|
||||
mi_block_set_next(page, block, mi_tf_block(tfree));
|
||||
tfreex = mi_tf_set_block(tfree,block);
|
||||
}
|
||||
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
|
||||
|
||||
if mi_unlikely(use_delayed) {
|
||||
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
|
||||
mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
|
||||
mi_assert_internal(heap != NULL);
|
||||
if (heap != NULL) {
|
||||
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
|
||||
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
|
||||
do {
|
||||
mi_block_set_nextx(heap,block,dfree, heap->keys);
|
||||
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
|
||||
}
|
||||
|
||||
// and reset the MI_DELAYED_FREEING flag
|
||||
tfree = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
tfreex = tfree;
|
||||
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
|
||||
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
|
||||
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
|
||||
}
|
||||
}
|
||||
|
||||
// regular free
|
||||
static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block)
|
||||
{
|
||||
// and push it on the free list
|
||||
//const size_t bsize = mi_page_block_size(page);
|
||||
if mi_likely(local) {
|
||||
// owning thread can free a block directly
|
||||
if mi_unlikely(mi_check_is_double_free(page, block)) return;
|
||||
mi_check_padding(page, block);
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
|
||||
if (!mi_page_is_huge(page)) { // huge page content may be already decommitted
|
||||
memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
|
||||
}
|
||||
#endif
|
||||
mi_block_set_next(page, block, page->local_free);
|
||||
page->local_free = block;
|
||||
page->used--;
|
||||
if mi_unlikely(mi_page_all_free(page)) {
|
||||
_mi_page_retire(page);
|
||||
}
|
||||
else if mi_unlikely(mi_page_is_in_full(page)) {
|
||||
_mi_page_unfull(page);
|
||||
}
|
||||
}
|
||||
else {
|
||||
_mi_free_block_mt(page,block);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Adjust a block that was allocated aligned, to the actual start of the block in the page.
|
||||
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) {
|
||||
mi_assert_internal(page!=NULL && p!=NULL);
|
||||
const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL);
|
||||
const size_t adjust = (diff % mi_page_block_size(page));
|
||||
return (mi_block_t*)((uintptr_t)p - adjust);
|
||||
}
|
||||
|
||||
|
||||
void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
|
||||
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
|
||||
mi_stat_free(page, block); // stat_free may access the padding
|
||||
mi_track_free_size(block, mi_page_usable_size_of(page,block));
|
||||
_mi_free_block(page, is_local, block);
|
||||
}
|
||||
|
||||
// Get the segment data belonging to a pointer
|
||||
// This is just a single `and` in assembly but does further checks in debug mode
|
||||
// (and secure mode) if this was a valid pointer.
|
||||
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
|
||||
{
|
||||
MI_UNUSED(msg);
|
||||
mi_assert(p != NULL);
|
||||
|
||||
#if (MI_DEBUG>0)
|
||||
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) {
|
||||
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
mi_segment_t* const segment = _mi_ptr_segment(p);
|
||||
mi_assert_internal(segment != NULL);
|
||||
|
||||
#if (MI_DEBUG>0)
|
||||
if mi_unlikely(!mi_is_in_heap_region(p)) {
|
||||
#if (MI_INTPTR_SIZE == 8 && defined(__linux__))
|
||||
if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640)
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
_mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
|
||||
"(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
|
||||
if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
|
||||
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if (MI_DEBUG>0 || MI_SECURE>=4)
|
||||
if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
|
||||
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
return segment;
|
||||
}
|
||||
|
||||
// Free a block
|
||||
// fast path written carefully to prevent spilling on the stack
|
||||
void mi_free(void* p) mi_attr_noexcept
|
||||
{
|
||||
if mi_unlikely(p == NULL) return;
|
||||
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
|
||||
const bool is_local= (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
|
||||
mi_page_t* const page = _mi_segment_page_of(segment, p);
|
||||
|
||||
if mi_likely(is_local) { // thread-local free?
|
||||
if mi_likely(page->flags.full_aligned == 0) // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
|
||||
{
|
||||
mi_block_t* const block = (mi_block_t*)p;
|
||||
if mi_unlikely(mi_check_is_double_free(page, block)) return;
|
||||
mi_check_padding(page, block);
|
||||
mi_stat_free(page, block);
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
|
||||
memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
|
||||
#endif
|
||||
mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned
|
||||
mi_block_set_next(page, block, page->local_free);
|
||||
page->local_free = block;
|
||||
if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page))
|
||||
_mi_page_retire(page);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// page is full or contains (inner) aligned blocks; use generic path
|
||||
_mi_free_generic(segment, page, true, p);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// not thread-local; use generic path
|
||||
_mi_free_generic(segment, page, false, p);
|
||||
}
|
||||
}
|
||||
|
||||
// return true if successful
|
||||
bool _mi_free_delayed_block(mi_block_t* block) {
|
||||
// get segment and page
|
||||
const mi_segment_t* const segment = _mi_ptr_segment(block);
|
||||
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
|
||||
mi_assert_internal(_mi_thread_id() == segment->thread_id);
|
||||
mi_page_t* const page = _mi_segment_page_of(segment, block);
|
||||
|
||||
// Clear the no-delayed flag so delayed freeing is used again for this page.
|
||||
// This must be done before collecting the free lists on this page -- otherwise
|
||||
// some blocks may end up in the page `thread_free` list with no blocks in the
|
||||
// heap `thread_delayed_free` list which may cause the page to be never freed!
|
||||
// (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
|
||||
if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// collect all other non-local frees to ensure up-to-date `used` count
|
||||
_mi_page_free_collect(page, false);
|
||||
|
||||
// and free the block (possibly freeing the page as well since used is updated)
|
||||
_mi_free_block(page, true, block);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Bytes available in a block
|
||||
mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept {
|
||||
const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p);
|
||||
const size_t size = mi_page_usable_size_of(page, block);
|
||||
const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;
|
||||
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
|
||||
return (size - adjust);
|
||||
}
|
||||
|
||||
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
|
||||
if (p == NULL) return 0;
|
||||
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
|
||||
const mi_page_t* const page = _mi_segment_page_of(segment, p);
|
||||
if mi_likely(!mi_page_has_aligned(page)) {
|
||||
const mi_block_t* block = (const mi_block_t*)p;
|
||||
return mi_page_usable_size_of(page, block);
|
||||
}
|
||||
else {
|
||||
// split out to separate routine for improved code generation
|
||||
return mi_page_usable_aligned_size_of(segment, page, p);
|
||||
}
|
||||
}
|
||||
|
||||
mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
|
||||
return _mi_usable_size(p, "mi_usable_size");
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Allocation extensions
|
||||
// ------------------------------------------------------
|
||||
|
||||
void mi_free_size(void* p, size_t size) mi_attr_noexcept {
|
||||
MI_UNUSED_RELEASE(size);
|
||||
mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
|
||||
mi_free(p);
|
||||
}
|
||||
|
||||
void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept {
|
||||
MI_UNUSED_RELEASE(alignment);
|
||||
mi_assert(((uintptr_t)p % alignment) == 0);
|
||||
mi_free_size(p,size);
|
||||
}
|
||||
|
||||
void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
|
||||
MI_UNUSED_RELEASE(alignment);
|
||||
mi_assert(((uintptr_t)p % alignment) == 0);
|
||||
mi_free(p);
|
||||
}
|
||||
|
||||
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
|
||||
size_t total;
|
||||
if (mi_count_size_overflow(count,size,&total)) return NULL;
|
||||
@ -869,7 +396,8 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name)
|
||||
char* rname = realpath(fname, NULL);
|
||||
if (rname == NULL) return NULL;
|
||||
char* result = mi_heap_strdup(heap, rname);
|
||||
free(rname); // use regular free! (which may be redirected to our free but that's ok)
|
||||
mi_cfree(rname); // use checked free (which may be redirected to our free but that's ok)
|
||||
// note: with ASAN realpath is intercepted and mi_cfree may leak the returned pointer :-(
|
||||
return result;
|
||||
}
|
||||
/*
|
||||
@ -913,9 +441,13 @@ static bool mi_try_new_handler(bool nothrow) {
|
||||
#endif
|
||||
if (h==NULL) {
|
||||
_mi_error_message(ENOMEM, "out of memory in 'new'");
|
||||
#if defined(_CPPUNWIND) || defined(__cpp_exceptions) // exceptions are not always enabled
|
||||
if (!nothrow) {
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
#else
|
||||
MI_UNUSED(nothrow);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
|
354
src/arena.c
354
src/arena.c
@ -13,7 +13,7 @@ threads and need to be accessed using atomic operations.
|
||||
|
||||
Arenas are used to for huge OS page (1GiB) reservations or for reserving
|
||||
OS memory upfront which can be improve performance or is sometimes needed
|
||||
on embedded devices. We can also employ this with WASI or `sbrk` systems
|
||||
on embedded devices. We can also employ this with WASI or `sbrk` systems
|
||||
to reserve large arenas upfront and be able to reuse the memory more effectively.
|
||||
|
||||
The arena allocation needs to be thread safe and we use an atomic bitmap to allocate.
|
||||
@ -48,13 +48,14 @@ typedef struct mi_arena_s {
|
||||
size_t meta_size; // size of the arena structure itself (including its bitmaps)
|
||||
mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation)
|
||||
int numa_node; // associated NUMA node
|
||||
bool exclusive; // only allow allocations if specifically for this arena
|
||||
bool exclusive; // only allow allocations if specifically for this arena
|
||||
bool is_large; // memory area consists of large- or huge OS pages (always committed)
|
||||
_Atomic(size_t) search_idx; // optimization to start the search for free blocks
|
||||
_Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
|
||||
_Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
|
||||
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
|
||||
mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted)
|
||||
mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
|
||||
mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
|
||||
mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
|
||||
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
|
||||
} mi_arena_t;
|
||||
|
||||
@ -94,7 +95,7 @@ bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_i
|
||||
return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id);
|
||||
}
|
||||
else {
|
||||
return mi_arena_id_is_suitable(0, false, request_arena_id);
|
||||
return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id);
|
||||
}
|
||||
}
|
||||
|
||||
@ -103,7 +104,7 @@ bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) {
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Arena allocations get a (currently) 16-bit memory id where the
|
||||
Arena allocations get a (currently) 16-bit memory id where the
|
||||
lower 8 bits are the arena id, and the upper bits the block index.
|
||||
----------------------------------------------------------- */
|
||||
|
||||
@ -165,6 +166,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m
|
||||
|
||||
// success
|
||||
*memid = _mi_memid_create(MI_MEM_STATIC);
|
||||
memid->initially_zero = true;
|
||||
const size_t start = _mi_align_up(oldtop, alignment);
|
||||
uint8_t* const p = &mi_arena_static[start];
|
||||
_mi_memzero(p, size);
|
||||
@ -175,11 +177,19 @@ static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* st
|
||||
*memid = _mi_memid_none();
|
||||
|
||||
// try static
|
||||
void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid);
|
||||
void* p = mi_arena_static_zalloc(size, MI_MAX_ALIGN_SIZE, memid);
|
||||
if (p != NULL) return p;
|
||||
|
||||
// or fall back to the OS
|
||||
return _mi_os_alloc(size, memid, stats);
|
||||
p = _mi_os_alloc(size, memid, stats);
|
||||
if (p == NULL) return NULL;
|
||||
|
||||
// zero the OS memory if needed
|
||||
if (!memid->initially_zero) {
|
||||
_mi_memzero_aligned(p, size);
|
||||
memid->initially_zero = true;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) {
|
||||
@ -201,11 +211,11 @@ static void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) {
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// claim the `blocks_inuse` bits
|
||||
static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
|
||||
static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
|
||||
{
|
||||
size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter
|
||||
if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
|
||||
mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around
|
||||
if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx, stats)) {
|
||||
mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around
|
||||
return true;
|
||||
};
|
||||
return false;
|
||||
@ -223,9 +233,9 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
|
||||
mi_assert_internal(mi_arena_id_index(arena->id) == arena_index);
|
||||
|
||||
mi_bitmap_index_t bitmap_index;
|
||||
if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL;
|
||||
if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index, tld->stats)) return NULL;
|
||||
|
||||
// claimed it!
|
||||
// claimed it!
|
||||
void* p = mi_arena_block_start(arena, bitmap_index);
|
||||
*memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index);
|
||||
memid->is_pinned = arena->memid.is_pinned;
|
||||
@ -265,21 +275,21 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
|
||||
// no need to commit, but check if already fully committed
|
||||
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
|
||||
}
|
||||
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
// allocate in a speficic arena
|
||||
static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment,
|
||||
bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld )
|
||||
static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment,
|
||||
bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld )
|
||||
{
|
||||
MI_UNUSED_RELEASE(alignment);
|
||||
mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
|
||||
const size_t bcount = mi_block_count_of_size(size);
|
||||
const size_t bcount = mi_block_count_of_size(size);
|
||||
const size_t arena_index = mi_arena_id_index(arena_id);
|
||||
mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count));
|
||||
mi_assert_internal(size <= mi_arena_block_size(bcount));
|
||||
|
||||
|
||||
// Check arena suitability
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]);
|
||||
if (arena == NULL) return NULL;
|
||||
@ -299,7 +309,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no
|
||||
|
||||
|
||||
// allocate from an arena with fallback to the OS
|
||||
static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment,
|
||||
static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment,
|
||||
bool commit, bool allow_large,
|
||||
mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld )
|
||||
{
|
||||
@ -307,9 +317,9 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz
|
||||
mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
|
||||
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
|
||||
if mi_likely(max_arena == 0) return NULL;
|
||||
|
||||
|
||||
if (req_arena_id != _mi_arena_id_none()) {
|
||||
// try a specific arena if requested
|
||||
// try a specific arena if requested
|
||||
if (mi_arena_id_index(req_arena_id) < max_arena) {
|
||||
void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
|
||||
if (p != NULL) return p;
|
||||
@ -317,7 +327,7 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz
|
||||
}
|
||||
else {
|
||||
// try numa affine allocation
|
||||
for (size_t i = 0; i < max_arena; i++) {
|
||||
for (size_t i = 0; i < max_arena; i++) {
|
||||
void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
|
||||
if (p != NULL) return p;
|
||||
}
|
||||
@ -345,22 +355,22 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re
|
||||
size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve);
|
||||
if (arena_reserve == 0) return false;
|
||||
|
||||
if (!_mi_os_has_virtual_reserve()) {
|
||||
arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for some embedded systems for example)
|
||||
if (!_mi_os_has_virtual_reserve()) {
|
||||
arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for WASM for example)
|
||||
}
|
||||
arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE);
|
||||
if (arena_count >= 8 && arena_count <= 128) {
|
||||
arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially
|
||||
}
|
||||
}
|
||||
if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size
|
||||
|
||||
|
||||
// commit eagerly?
|
||||
bool arena_commit = false;
|
||||
if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); }
|
||||
else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; }
|
||||
|
||||
return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0);
|
||||
}
|
||||
return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id) == 0);
|
||||
}
|
||||
|
||||
|
||||
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large,
|
||||
@ -373,35 +383,37 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
|
||||
const int numa_node = _mi_os_numa_node(tld); // current numa node
|
||||
|
||||
// try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
|
||||
if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
|
||||
void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
|
||||
if (p != NULL) return p;
|
||||
if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed?
|
||||
if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
|
||||
void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
|
||||
if (p != NULL) return p;
|
||||
|
||||
// otherwise, try to first eagerly reserve a new arena
|
||||
if (req_arena_id == _mi_arena_id_none()) {
|
||||
mi_arena_id_t arena_id = 0;
|
||||
if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
|
||||
// and try allocate in there
|
||||
mi_assert_internal(req_arena_id == _mi_arena_id_none());
|
||||
p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
|
||||
if (p != NULL) return p;
|
||||
// otherwise, try to first eagerly reserve a new arena
|
||||
if (req_arena_id == _mi_arena_id_none()) {
|
||||
mi_arena_id_t arena_id = 0;
|
||||
if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
|
||||
// and try allocate in there
|
||||
mi_assert_internal(req_arena_id == _mi_arena_id_none());
|
||||
p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
|
||||
if (p != NULL) return p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if we cannot use OS allocation, return NULL
|
||||
if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) {
|
||||
if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) {
|
||||
errno = ENOMEM;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
// finally, fall back to the OS
|
||||
if (align_offset > 0) {
|
||||
return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats);
|
||||
}
|
||||
else {
|
||||
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
|
||||
@ -437,22 +449,22 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks,
|
||||
mi_assert_internal(arena->blocks_purge != NULL);
|
||||
mi_assert_internal(!arena->memid.is_pinned);
|
||||
const size_t size = mi_arena_block_size(blocks);
|
||||
void* const p = mi_arena_block_start(arena, bitmap_idx);
|
||||
void* const p = mi_arena_block_start(arena, bitmap_idx);
|
||||
bool needs_recommit;
|
||||
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) {
|
||||
// all blocks are committed, we can purge freely
|
||||
needs_recommit = _mi_os_purge(p, size, stats);
|
||||
}
|
||||
else {
|
||||
// some blocks are not committed -- this can happen when a partially committed block is freed
|
||||
// some blocks are not committed -- this can happen when a partially committed block is freed
|
||||
// in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
|
||||
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
|
||||
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
|
||||
// and also undo the decommit stats (as it was already adjusted)
|
||||
mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
|
||||
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats);
|
||||
_mi_stat_increase(&stats->committed, size);
|
||||
if (needs_recommit) { _mi_stat_increase(&_mi_stats_main.committed, size); }
|
||||
}
|
||||
|
||||
|
||||
// clear the purged blocks
|
||||
_mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx);
|
||||
// update committed bitmap
|
||||
@ -470,13 +482,13 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
|
||||
|
||||
if (_mi_preloading() || delay == 0) {
|
||||
// decommit directly
|
||||
mi_arena_purge(arena, bitmap_idx, blocks, stats);
|
||||
mi_arena_purge(arena, bitmap_idx, blocks, stats);
|
||||
}
|
||||
else {
|
||||
// schedule decommit
|
||||
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
|
||||
if (expire != 0) {
|
||||
mi_atomic_addi64_acq_rel(&arena->purge_expire, delay/10); // add smallish extra delay
|
||||
mi_atomic_addi64_acq_rel(&arena->purge_expire, (mi_msecs_t)(delay/10)); // add smallish extra delay
|
||||
}
|
||||
else {
|
||||
mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay);
|
||||
@ -512,7 +524,7 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx,
|
||||
}
|
||||
|
||||
// returns true if anything was purged
|
||||
static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats)
|
||||
static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats)
|
||||
{
|
||||
if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false;
|
||||
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
|
||||
@ -520,11 +532,11 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi
|
||||
if (!force && expire > now) return false;
|
||||
|
||||
// reset expire (if not already set concurrently)
|
||||
mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, 0);
|
||||
|
||||
mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0);
|
||||
|
||||
// potential purges scheduled, walk through the bitmap
|
||||
bool any_purged = false;
|
||||
bool full_purge = true;
|
||||
bool full_purge = true;
|
||||
for (size_t i = 0; i < arena->field_count; i++) {
|
||||
size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]);
|
||||
if (purge != 0) {
|
||||
@ -575,7 +587,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats )
|
||||
|
||||
// allow only one thread to purge at a time
|
||||
static mi_atomic_guard_t purge_guard;
|
||||
mi_atomic_guard(&purge_guard)
|
||||
mi_atomic_guard(&purge_guard)
|
||||
{
|
||||
mi_msecs_t now = _mi_clock_now();
|
||||
size_t max_purge_count = (visit_all ? max_arena : 1);
|
||||
@ -588,7 +600,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats )
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -602,12 +614,12 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
|
||||
if (p==NULL) return;
|
||||
if (size==0) return;
|
||||
const bool all_committed = (committed_size == size);
|
||||
|
||||
|
||||
if (mi_memkind_is_os(memid.memkind)) {
|
||||
// was a direct OS allocation, pass through
|
||||
if (!all_committed && committed_size > 0) {
|
||||
// if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size)
|
||||
_mi_stat_decrease(&stats->committed, committed_size);
|
||||
_mi_stat_decrease(&_mi_stats_main.committed, committed_size);
|
||||
}
|
||||
_mi_os_free(p, size, memid, stats);
|
||||
}
|
||||
@ -620,15 +632,15 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t,&mi_arenas[arena_idx]);
|
||||
mi_assert_internal(arena != NULL);
|
||||
const size_t blocks = mi_block_count_of_size(size);
|
||||
|
||||
|
||||
// checks
|
||||
if (arena == NULL) {
|
||||
_mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
|
||||
_mi_error_message(EINVAL, "trying to free from an invalid arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
|
||||
return;
|
||||
}
|
||||
mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx));
|
||||
if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) {
|
||||
_mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
|
||||
_mi_error_message(EINVAL, "trying to free from an invalid arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -642,7 +654,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
|
||||
else {
|
||||
mi_assert_internal(arena->blocks_committed != NULL);
|
||||
mi_assert_internal(arena->blocks_purge != NULL);
|
||||
|
||||
|
||||
if (!all_committed) {
|
||||
// mark the entire range as no longer committed (so we recommit the full range when re-using)
|
||||
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
|
||||
@ -650,16 +662,16 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
|
||||
if (committed_size > 0) {
|
||||
// if partially committed, adjust the committed stats (is it will be recommitted when re-using)
|
||||
// in the delayed purge, we now need to not count a decommit if the range is not marked as committed.
|
||||
_mi_stat_decrease(&stats->committed, committed_size);
|
||||
_mi_stat_decrease(&_mi_stats_main.committed, committed_size);
|
||||
}
|
||||
// note: if not all committed, it may be that the purge will reset/decommit the entire range
|
||||
// that contains already decommitted parts. Since purge consistently uses reset or decommit that
|
||||
// works (as we should never reset decommitted parts).
|
||||
}
|
||||
// (delay) purge the entire range
|
||||
mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats);
|
||||
mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats);
|
||||
}
|
||||
|
||||
|
||||
// and make it available to others again
|
||||
bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
|
||||
if (!all_inuse) {
|
||||
@ -684,9 +696,9 @@ static void mi_arenas_unsafe_destroy(void) {
|
||||
for (size_t i = 0; i < max_arena; i++) {
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
|
||||
if (arena != NULL) {
|
||||
if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {
|
||||
if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {
|
||||
mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
|
||||
_mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main);
|
||||
_mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main);
|
||||
}
|
||||
else {
|
||||
new_max_arena = i;
|
||||
@ -701,15 +713,15 @@ static void mi_arenas_unsafe_destroy(void) {
|
||||
}
|
||||
|
||||
// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired
|
||||
void _mi_arena_collect(bool force_purge, mi_stats_t* stats) {
|
||||
mi_arenas_try_purge(force_purge, true /* visit all */, stats);
|
||||
void _mi_arenas_collect(bool force_purge, mi_stats_t* stats) {
|
||||
mi_arenas_try_purge(force_purge, force_purge /* visit all? */, stats);
|
||||
}
|
||||
|
||||
// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
|
||||
// for dynamic libraries that are unloaded and need to release all their allocated memory.
|
||||
void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) {
|
||||
mi_arenas_unsafe_destroy();
|
||||
_mi_arena_collect(true /* force purge */, stats); // purge non-owned arenas
|
||||
_mi_arenas_collect(true /* force purge */, stats); // purge non-owned arenas
|
||||
}
|
||||
|
||||
// Is a pointer inside any of our arenas?
|
||||
@ -717,19 +729,151 @@ bool _mi_arena_contains(const void* p) {
|
||||
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
|
||||
for (size_t i = 0; i < max_arena; i++) {
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
|
||||
if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) {
|
||||
return true;
|
||||
if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Abandoned blocks/segments.
|
||||
This is used to atomically abandon/reclaim segments
|
||||
(and crosses the arena API but it is convenient to have here).
|
||||
Abandoned segments still have live blocks; they get reclaimed
|
||||
when a thread frees a block in it, or when a thread needs a fresh
|
||||
segment; these threads scan the abandoned segments through
|
||||
the arena bitmaps.
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Maintain a count of all abandoned segments
|
||||
static mi_decl_cache_align _Atomic(size_t)abandoned_count;
|
||||
|
||||
size_t _mi_arena_segment_abandoned_count(void) {
|
||||
return mi_atomic_load_relaxed(&abandoned_count);
|
||||
}
|
||||
|
||||
// reclaim a specific abandoned segment; `true` on success.
|
||||
// sets the thread_id.
|
||||
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment )
|
||||
{
|
||||
if (segment->memid.memkind != MI_MEM_ARENA) {
|
||||
// not in an arena, consider it un-abandoned now.
|
||||
// but we need to still claim it atomically -- we use the thread_id for that.
|
||||
size_t expected = 0;
|
||||
if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected, _mi_thread_id())) {
|
||||
mi_atomic_decrement_relaxed(&abandoned_count);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// arena segment: use the blocks_abandoned bitmap.
|
||||
size_t arena_idx;
|
||||
size_t bitmap_idx;
|
||||
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
|
||||
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
|
||||
mi_assert_internal(arena != NULL);
|
||||
bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx);
|
||||
if (was_marked) {
|
||||
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
|
||||
mi_atomic_decrement_relaxed(&abandoned_count);
|
||||
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
|
||||
}
|
||||
// mi_assert_internal(was_marked);
|
||||
mi_assert_internal(!was_marked || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
|
||||
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
|
||||
return was_marked;
|
||||
}
|
||||
|
||||
// mark a specific segment as abandoned
|
||||
// clears the thread_id.
|
||||
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
|
||||
{
|
||||
mi_atomic_store_release(&segment->thread_id, 0);
|
||||
mi_assert_internal(segment->used == segment->abandoned);
|
||||
if (segment->memid.memkind != MI_MEM_ARENA) {
|
||||
// not in an arena; count it as abandoned and return
|
||||
mi_atomic_increment_relaxed(&abandoned_count);
|
||||
return;
|
||||
}
|
||||
size_t arena_idx;
|
||||
size_t bitmap_idx;
|
||||
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
|
||||
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
|
||||
mi_assert_internal(arena != NULL);
|
||||
const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
|
||||
if (was_unmarked) { mi_atomic_increment_relaxed(&abandoned_count); }
|
||||
mi_assert_internal(was_unmarked);
|
||||
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
|
||||
}
|
||||
|
||||
// start a cursor at a randomized arena
|
||||
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current) {
|
||||
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
|
||||
current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena));
|
||||
current->count = 0;
|
||||
current->bitmap_idx = 0;
|
||||
}
|
||||
|
||||
// reclaim abandoned segments
|
||||
// this does not set the thread id (so it appears as still abandoned)
|
||||
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous )
|
||||
{
|
||||
const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count);
|
||||
if (max_arena <= 0 || mi_atomic_load_relaxed(&abandoned_count) == 0) return NULL;
|
||||
|
||||
int count = previous->count;
|
||||
size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
|
||||
size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1;
|
||||
// visit arena's (from previous)
|
||||
for (; count < max_arena; count++, field_idx = 0, bit_idx = 0) {
|
||||
mi_arena_id_t arena_idx = previous->start + count;
|
||||
if (arena_idx >= max_arena) { arena_idx = arena_idx % max_arena; } // wrap around
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
|
||||
if (arena != NULL) {
|
||||
// visit the abandoned fields (starting at previous_idx)
|
||||
for ( ; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
|
||||
size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
|
||||
if mi_unlikely(field != 0) { // skip zero fields quickly
|
||||
// visit each set bit in the field (todo: maybe use `ctz` here?)
|
||||
for ( ; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
|
||||
// pre-check if the bit is set
|
||||
size_t mask = ((size_t)1 << bit_idx);
|
||||
if mi_unlikely((field & mask) == mask) {
|
||||
mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
|
||||
// try to reclaim it atomically
|
||||
if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) {
|
||||
mi_atomic_decrement_relaxed(&abandoned_count);
|
||||
previous->bitmap_idx = bitmap_idx;
|
||||
previous->count = count;
|
||||
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
|
||||
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
|
||||
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
|
||||
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
|
||||
return segment;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// no more found
|
||||
previous->bitmap_idx = 0;
|
||||
previous->count = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Add an arena.
|
||||
----------------------------------------------------------- */
|
||||
|
||||
static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) {
|
||||
static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) {
|
||||
mi_assert_internal(arena != NULL);
|
||||
mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
|
||||
mi_assert_internal(arena->block_count > 0);
|
||||
@ -740,6 +884,7 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) {
|
||||
mi_atomic_decrement_acq_rel(&mi_arena_count);
|
||||
return false;
|
||||
}
|
||||
_mi_stat_counter_increase(&stats->arena_count,1);
|
||||
arena->id = mi_arena_id_create(i);
|
||||
mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
|
||||
if (arena_id != NULL) { *arena_id = arena->id; }
|
||||
@ -757,13 +902,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
|
||||
|
||||
const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
|
||||
const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
|
||||
const size_t bitmaps = (memid.is_pinned ? 2 : 4);
|
||||
const size_t bitmaps = (memid.is_pinned ? 3 : 5);
|
||||
const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
|
||||
mi_memid_t meta_memid;
|
||||
mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
|
||||
if (arena == NULL) return false;
|
||||
|
||||
// already zero'd due to os_alloc
|
||||
|
||||
// already zero'd due to zalloc
|
||||
// _mi_memzero(arena, asize);
|
||||
arena->id = _mi_arena_id_none();
|
||||
arena->memid = memid;
|
||||
@ -777,14 +922,16 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
|
||||
arena->is_large = is_large;
|
||||
arena->purge_expire = 0;
|
||||
arena->search_idx = 0;
|
||||
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
|
||||
arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
|
||||
arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap
|
||||
// consequetive bitmaps
|
||||
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
|
||||
arena->blocks_abandoned = &arena->blocks_inuse[2 * fields]; // just after dirty bitmap
|
||||
arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after abandoned bitmap
|
||||
arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[4*fields]); // just after committed bitmap
|
||||
// initialize committed bitmap?
|
||||
if (arena->blocks_committed != NULL && arena->memid.initially_committed) {
|
||||
memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning
|
||||
}
|
||||
|
||||
|
||||
// and claim leftover blocks if needed (so we never allocate there)
|
||||
ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
|
||||
mi_assert_internal(post >= 0);
|
||||
@ -793,7 +940,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
|
||||
mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
|
||||
_mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
|
||||
}
|
||||
return mi_arena_add(arena, arena_id);
|
||||
return mi_arena_add(arena, arena_id, &_mi_stats_main);
|
||||
|
||||
}
|
||||
|
||||
@ -815,7 +962,7 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc
|
||||
const bool is_large = memid.is_pinned; // todo: use separate is_large field?
|
||||
if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) {
|
||||
_mi_os_free_ex(start, size, commit, memid, &_mi_stats_main);
|
||||
_mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size, 1024));
|
||||
_mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024));
|
||||
return ENOMEM;
|
||||
}
|
||||
_mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : "");
|
||||
@ -838,32 +985,55 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noe
|
||||
Debugging
|
||||
----------------------------------------------------------- */
|
||||
|
||||
static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) {
|
||||
static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t block_count, mi_bitmap_field_t* fields, size_t field_count ) {
|
||||
_mi_verbose_message("%s%s:\n", prefix, header);
|
||||
size_t bcount = 0;
|
||||
size_t inuse_count = 0;
|
||||
for (size_t i = 0; i < field_count; i++) {
|
||||
char buf[MI_BITMAP_FIELD_BITS + 1];
|
||||
uintptr_t field = mi_atomic_load_relaxed(&fields[i]);
|
||||
for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++) {
|
||||
bool inuse = ((((uintptr_t)1 << bit) & field) != 0);
|
||||
if (inuse) inuse_count++;
|
||||
buf[MI_BITMAP_FIELD_BITS - 1 - bit] = (inuse ? 'x' : '.');
|
||||
for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++, bcount++) {
|
||||
if (bcount < block_count) {
|
||||
bool inuse = ((((uintptr_t)1 << bit) & field) != 0);
|
||||
if (inuse) inuse_count++;
|
||||
buf[bit] = (inuse ? 'x' : '.');
|
||||
}
|
||||
else {
|
||||
buf[bit] = ' ';
|
||||
}
|
||||
}
|
||||
buf[MI_BITMAP_FIELD_BITS] = 0;
|
||||
_mi_verbose_message("%s%s\n", prefix, buf);
|
||||
_mi_verbose_message("%s %s\n", prefix, buf);
|
||||
}
|
||||
_mi_verbose_message("%s total ('x'): %zu\n", prefix, inuse_count);
|
||||
return inuse_count;
|
||||
}
|
||||
|
||||
void mi_debug_show_arenas(void) mi_attr_noexcept {
|
||||
void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_attr_noexcept {
|
||||
size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count);
|
||||
size_t inuse_total = 0;
|
||||
size_t abandoned_total = 0;
|
||||
size_t purge_total = 0;
|
||||
for (size_t i = 0; i < max_arenas; i++) {
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
|
||||
if (arena == NULL) break;
|
||||
size_t inuse_count = 0;
|
||||
_mi_verbose_message("arena %zu: %zu blocks with %zu fields\n", i, arena->block_count, arena->field_count);
|
||||
inuse_count += mi_debug_show_bitmap(" ", arena->blocks_inuse, arena->field_count);
|
||||
_mi_verbose_message(" blocks in use ('x'): %zu\n", inuse_count);
|
||||
_mi_verbose_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, MI_ARENA_BLOCK_SIZE / MI_MiB, arena->field_count, (arena->memid.is_pinned ? ", pinned" : ""));
|
||||
if (show_inuse) {
|
||||
inuse_total += mi_debug_show_bitmap(" ", "inuse blocks", arena->block_count, arena->blocks_inuse, arena->field_count);
|
||||
}
|
||||
if (arena->blocks_committed != NULL) {
|
||||
mi_debug_show_bitmap(" ", "committed blocks", arena->block_count, arena->blocks_committed, arena->field_count);
|
||||
}
|
||||
if (show_abandoned) {
|
||||
abandoned_total += mi_debug_show_bitmap(" ", "abandoned blocks", arena->block_count, arena->blocks_abandoned, arena->field_count);
|
||||
}
|
||||
if (show_purge && arena->blocks_purge != NULL) {
|
||||
purge_total += mi_debug_show_bitmap(" ", "purgeable blocks", arena->block_count, arena->blocks_purge, arena->field_count);
|
||||
}
|
||||
}
|
||||
if (show_inuse) _mi_verbose_message("total inuse blocks : %zu\n", inuse_total);
|
||||
if (show_abandoned) _mi_verbose_message("total abandoned blocks: %zu\n", abandoned_total);
|
||||
if (show_purge) _mi_verbose_message("total purgeable blocks: %zu\n", purge_total);
|
||||
}
|
||||
|
||||
|
||||
|
14
src/bitmap.c
14
src/bitmap.c
@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically,
|
||||
represeted as an array of fields where each field is a machine word (`size_t`)
|
||||
represented as an array of fields where each field is a machine word (`size_t`)
|
||||
|
||||
There are two api's; the standard one cannot have sequences that cross
|
||||
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
|
||||
@ -200,7 +200,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
|
||||
// Try to atomically claim a sequence of `count` bits starting from the field
|
||||
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
|
||||
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
|
||||
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx)
|
||||
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
|
||||
@ -260,6 +260,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
|
||||
// claimed!
|
||||
mi_stat_counter_increase(stats->arena_crossover_count,1);
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
|
||||
return true;
|
||||
|
||||
@ -279,9 +280,10 @@ rollback:
|
||||
newmap = (map & ~initial_mask);
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
}
|
||||
mi_stat_counter_increase(stats->arena_rollback_count,1);
|
||||
// retry? (we make a recursive call instead of goto to be able to use const declarations)
|
||||
if (retries <= 2) {
|
||||
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx);
|
||||
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats);
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
@ -291,7 +293,7 @@ rollback:
|
||||
|
||||
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) {
|
||||
mi_assert_internal(count > 0);
|
||||
if (count <= 2) {
|
||||
// we don't bother with crossover fields for small counts
|
||||
@ -303,13 +305,15 @@ bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitm
|
||||
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
|
||||
if (idx >= bitmap_fields) { idx = 0; } // wrap
|
||||
// first try to claim inside a field
|
||||
/*
|
||||
if (count <= MI_BITMAP_FIELD_BITS) {
|
||||
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
*/
|
||||
// if that fails, then try to claim across fields
|
||||
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) {
|
||||
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically,
|
||||
represeted as an array of fields where each field is a machine word (`size_t`)
|
||||
represented as an array of fields where each field is a machine word (`size_t`)
|
||||
|
||||
There are two api's; the standard one cannot have sequences that cross
|
||||
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
|
||||
@ -99,7 +99,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
|
||||
|
||||
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
|
530
src/free.c
Normal file
530
src/free.c
Normal file
@ -0,0 +1,530 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
#if !defined(MI_IN_ALLOC_C)
|
||||
#error "this file should be included from 'alloc.c' (so aliases can work from alloc-override)"
|
||||
// add includes help an IDE
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/atomic.h"
|
||||
#include "mimalloc/prim.h" // _mi_prim_thread_id()
|
||||
#endif
|
||||
|
||||
// forward declarations
|
||||
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block);
|
||||
static bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block);
|
||||
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block);
|
||||
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block);
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Free
|
||||
// ------------------------------------------------------
|
||||
|
||||
// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
|
||||
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block);
|
||||
|
||||
// regular free of a (thread local) block pointer
|
||||
// fast path written carefully to prevent spilling on the stack
|
||||
static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool track_stats, bool check_full)
|
||||
{
|
||||
// checks
|
||||
if mi_unlikely(mi_check_is_double_free(page, block)) return;
|
||||
mi_check_padding(page, block);
|
||||
if (track_stats) { mi_stat_free(page, block); }
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
|
||||
if (!mi_page_is_huge(page)) { // huge page content may be already decommitted
|
||||
memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
|
||||
}
|
||||
#endif
|
||||
if (track_stats) { mi_track_free_size(block, mi_page_usable_size_of(page, block)); } // faster then mi_usable_size as we already know the page and that p is unaligned
|
||||
|
||||
// actual free: push on the local free list
|
||||
mi_block_set_next(page, block, page->local_free);
|
||||
page->local_free = block;
|
||||
if mi_unlikely(--page->used == 0) {
|
||||
_mi_page_retire(page);
|
||||
}
|
||||
else if mi_unlikely(check_full && mi_page_is_in_full(page)) {
|
||||
_mi_page_unfull(page);
|
||||
}
|
||||
}
|
||||
|
||||
// Adjust a block that was allocated aligned, to the actual start of the block in the page.
|
||||
// note: this can be called from `mi_free_generic_mt` where a non-owning thread accesses the
|
||||
// `page_start` and `block_size` fields; however these are constant and the page won't be
|
||||
// deallocated (as the block we are freeing keeps it alive) and thus safe to read concurrently.
|
||||
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
|
||||
mi_assert_internal(page!=NULL && p!=NULL);
|
||||
|
||||
size_t diff = (uint8_t*)p - page->page_start;
|
||||
size_t adjust;
|
||||
if mi_likely(page->block_size_shift != 0) {
|
||||
adjust = diff & (((size_t)1 << page->block_size_shift) - 1);
|
||||
}
|
||||
else {
|
||||
adjust = diff % mi_page_block_size(page);
|
||||
}
|
||||
|
||||
return (mi_block_t*)((uintptr_t)p - adjust);
|
||||
}
|
||||
|
||||
// free a local pointer (page parameter comes first for better codegen)
|
||||
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
|
||||
MI_UNUSED(segment);
|
||||
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p);
|
||||
mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
|
||||
}
|
||||
|
||||
// free a pointer owned by another thread (page parameter comes first for better codegen)
|
||||
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
|
||||
mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
|
||||
mi_free_block_mt(page, segment, block);
|
||||
}
|
||||
|
||||
// generic free (for runtime integration)
|
||||
void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
|
||||
if (is_local) mi_free_generic_local(page,segment,p);
|
||||
else mi_free_generic_mt(page,segment,p);
|
||||
}
|
||||
|
||||
// Get the segment data belonging to a pointer
|
||||
// This is just a single `and` in release mode but does further checks in debug mode
|
||||
// (and secure mode) to see if this was a valid pointer.
|
||||
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
|
||||
{
|
||||
MI_UNUSED(msg);
|
||||
|
||||
#if (MI_DEBUG>0)
|
||||
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) {
|
||||
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
mi_segment_t* const segment = _mi_ptr_segment(p);
|
||||
if mi_unlikely(segment==NULL) return segment;
|
||||
|
||||
#if (MI_DEBUG>0)
|
||||
if mi_unlikely(!mi_is_in_heap_region(p)) {
|
||||
#if (MI_INTPTR_SIZE == 8 && defined(__linux__))
|
||||
if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640)
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
_mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
|
||||
"(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
|
||||
if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
|
||||
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if (MI_DEBUG>0 || MI_SECURE>=4)
|
||||
if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
|
||||
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
return segment;
|
||||
}
|
||||
|
||||
// Free a block
|
||||
// Fast path written carefully to prevent register spilling on the stack
|
||||
void mi_free(void* p) mi_attr_noexcept
|
||||
{
|
||||
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
|
||||
if mi_unlikely(segment==NULL) return;
|
||||
|
||||
const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
|
||||
mi_page_t* const page = _mi_segment_page_of(segment, p);
|
||||
|
||||
if mi_likely(is_local) { // thread-local free?
|
||||
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
|
||||
// thread-local, aligned, and not a full page
|
||||
mi_block_t* const block = (mi_block_t*)p;
|
||||
mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */);
|
||||
}
|
||||
else {
|
||||
// page is full or contains (inner) aligned blocks; use generic path
|
||||
mi_free_generic_local(page, segment, p);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// not thread-local; use generic path
|
||||
mi_free_generic_mt(page, segment, p);
|
||||
}
|
||||
}
|
||||
|
||||
// return true if successful
|
||||
bool _mi_free_delayed_block(mi_block_t* block) {
|
||||
// get segment and page
|
||||
mi_assert_internal(block!=NULL);
|
||||
const mi_segment_t* const segment = _mi_ptr_segment(block);
|
||||
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
|
||||
mi_assert_internal(_mi_thread_id() == segment->thread_id);
|
||||
mi_page_t* const page = _mi_segment_page_of(segment, block);
|
||||
|
||||
// Clear the no-delayed flag so delayed freeing is used again for this page.
|
||||
// This must be done before collecting the free lists on this page -- otherwise
|
||||
// some blocks may end up in the page `thread_free` list with no blocks in the
|
||||
// heap `thread_delayed_free` list which may cause the page to be never freed!
|
||||
// (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
|
||||
if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// collect all other non-local frees (move from `thread_free` to `free`) to ensure up-to-date `used` count
|
||||
_mi_page_free_collect(page, false);
|
||||
|
||||
// and free the block (possibly freeing the page as well since `used` is updated)
|
||||
mi_free_block_local(page, block, false /* stats have already been adjusted */, true /* check for a full page */);
|
||||
return true;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Multi-threaded Free (`_mt`)
|
||||
// ------------------------------------------------------
|
||||
|
||||
// Push a block that is owned by another thread on its page-local thread free
|
||||
// list or it's heap delayed free list. Such blocks are later collected by
|
||||
// the owning thread in `_mi_free_delayed_block`.
|
||||
static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block_t* block )
|
||||
{
|
||||
// Try to put the block on either the page-local thread free list,
|
||||
// or the heap delayed free list (if this is the first non-local free in that page)
|
||||
mi_thread_free_t tfreex;
|
||||
bool use_delayed;
|
||||
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
|
||||
if mi_unlikely(use_delayed) {
|
||||
// unlikely: this only happens on the first concurrent free in a page that is in the full list
|
||||
tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
|
||||
}
|
||||
else {
|
||||
// usual: directly add to page thread_free list
|
||||
mi_block_set_next(page, block, mi_tf_block(tfree));
|
||||
tfreex = mi_tf_set_block(tfree,block);
|
||||
}
|
||||
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
|
||||
|
||||
// If this was the first non-local free, we need to push it on the heap delayed free list instead
|
||||
if mi_unlikely(use_delayed) {
|
||||
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
|
||||
mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
|
||||
mi_assert_internal(heap != NULL);
|
||||
if (heap != NULL) {
|
||||
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
|
||||
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
|
||||
do {
|
||||
mi_block_set_nextx(heap,block,dfree, heap->keys);
|
||||
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
|
||||
}
|
||||
|
||||
// and reset the MI_DELAYED_FREEING flag
|
||||
tfree = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
tfreex = tfree;
|
||||
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
|
||||
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
|
||||
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
|
||||
}
|
||||
}
|
||||
|
||||
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
|
||||
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block)
|
||||
{
|
||||
// first see if the segment was abandoned and if we can reclaim it into our thread
|
||||
if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) &&
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
segment->page_kind != MI_PAGE_HUGE &&
|
||||
#endif
|
||||
mi_atomic_load_relaxed(&segment->thread_id) == 0)
|
||||
{
|
||||
// the segment is abandoned, try to reclaim it into our heap
|
||||
if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
|
||||
mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
|
||||
mi_free(block); // recursively free as now it will be a local free in our heap
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// The padding check may access the non-thread-owned page for the key values.
|
||||
// that is safe as these are constant and the page won't be freed (as the block is not freed yet).
|
||||
mi_check_padding(page, block);
|
||||
|
||||
// adjust stats (after padding check and potentially recursive `mi_free` above)
|
||||
mi_stat_free(page, block); // stat_free may access the padding
|
||||
mi_track_free_size(block, mi_page_usable_size_of(page,block));
|
||||
|
||||
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
|
||||
_mi_padding_shrink(page, block, sizeof(mi_block_t));
|
||||
|
||||
if (segment->kind == MI_SEGMENT_HUGE) {
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
// huge page segments are always abandoned and can be freed immediately
|
||||
_mi_segment_huge_page_free(segment, page, block);
|
||||
return;
|
||||
#else
|
||||
// huge pages are special as they occupy the entire segment
|
||||
// as these are large we reset the memory occupied by the page so it is available to other threads
|
||||
// (as the owning thread needs to actually free the memory later).
|
||||
_mi_segment_huge_page_reset(segment, page, block);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
|
||||
memset(block, MI_DEBUG_FREED, mi_usable_size(block));
|
||||
#endif
|
||||
}
|
||||
|
||||
// and finally free the actual block by pushing it on the owning heap
|
||||
// thread_delayed free list (or heap delayed free list)
|
||||
mi_free_block_delayed_mt(page,block);
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Usable size
|
||||
// ------------------------------------------------------
|
||||
|
||||
// Bytes available in a block
|
||||
static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* page, const void* p) mi_attr_noexcept {
|
||||
const mi_block_t* block = _mi_page_ptr_unalign(page, p);
|
||||
const size_t size = mi_page_usable_size_of(page, block);
|
||||
const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;
|
||||
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
|
||||
return (size - adjust);
|
||||
}
|
||||
|
||||
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
|
||||
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
|
||||
if mi_unlikely(segment==NULL) return 0;
|
||||
const mi_page_t* const page = _mi_segment_page_of(segment, p);
|
||||
if mi_likely(!mi_page_has_aligned(page)) {
|
||||
const mi_block_t* block = (const mi_block_t*)p;
|
||||
return mi_page_usable_size_of(page, block);
|
||||
}
|
||||
else {
|
||||
// split out to separate routine for improved code generation
|
||||
return mi_page_usable_aligned_size_of(page, p);
|
||||
}
|
||||
}
|
||||
|
||||
mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
|
||||
return _mi_usable_size(p, "mi_usable_size");
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Free variants
|
||||
// ------------------------------------------------------
|
||||
|
||||
void mi_free_size(void* p, size_t size) mi_attr_noexcept {
|
||||
MI_UNUSED_RELEASE(size);
|
||||
mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
|
||||
mi_free(p);
|
||||
}
|
||||
|
||||
void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept {
|
||||
MI_UNUSED_RELEASE(alignment);
|
||||
mi_assert(((uintptr_t)p % alignment) == 0);
|
||||
mi_free_size(p,size);
|
||||
}
|
||||
|
||||
void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
|
||||
MI_UNUSED_RELEASE(alignment);
|
||||
mi_assert(((uintptr_t)p % alignment) == 0);
|
||||
mi_free(p);
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Check for double free in secure and debug mode
|
||||
// This is somewhat expensive so only enabled for secure mode 4
|
||||
// ------------------------------------------------------
|
||||
|
||||
#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0))
|
||||
// linear check if the free list contains a specific element
|
||||
static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) {
|
||||
while (list != NULL) {
|
||||
if (elem==list) return true;
|
||||
list = mi_block_next(page, list);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
|
||||
// The decoded value is in the same page (or NULL).
|
||||
// Walk the free lists to verify positively if it is already freed
|
||||
if (mi_list_contains(page, page->free, block) ||
|
||||
mi_list_contains(page, page->local_free, block) ||
|
||||
mi_list_contains(page, mi_page_thread_free(page), block))
|
||||
{
|
||||
_mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); }
|
||||
|
||||
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
|
||||
bool is_double_free = false;
|
||||
mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field
|
||||
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
|
||||
(n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
|
||||
{
|
||||
// Suspicious: decoded value a in block is in the same page (or NULL) -- maybe a double free?
|
||||
// (continue in separate function to improve code generation)
|
||||
is_double_free = mi_check_is_double_freex(page, block);
|
||||
}
|
||||
return is_double_free;
|
||||
}
|
||||
#else
|
||||
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
|
||||
MI_UNUSED(page);
|
||||
MI_UNUSED(block);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Check for heap block overflow by setting up padding at the end of the block
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#if MI_PADDING // && !MI_TRACK_ENABLED
|
||||
static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) {
|
||||
*bsize = mi_page_usable_block_size(page);
|
||||
const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize);
|
||||
mi_track_mem_defined(padding,sizeof(mi_padding_t));
|
||||
*delta = padding->delta;
|
||||
uint32_t canary = padding->canary;
|
||||
uintptr_t keys[2];
|
||||
keys[0] = page->keys[0];
|
||||
keys[1] = page->keys[1];
|
||||
bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize);
|
||||
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
|
||||
return ok;
|
||||
}
|
||||
|
||||
// Return the exact usable size of a block.
|
||||
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
|
||||
size_t bsize;
|
||||
size_t delta;
|
||||
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
|
||||
mi_assert_internal(ok); mi_assert_internal(delta <= bsize);
|
||||
return (ok ? bsize - delta : 0);
|
||||
}
|
||||
|
||||
// When a non-thread-local block is freed, it becomes part of the thread delayed free
|
||||
// list that is freed later by the owning heap. If the exact usable size is too small to
|
||||
// contain the pointer for the delayed list, then shrink the padding (by decreasing delta)
|
||||
// so it will later not trigger an overflow error in `mi_free_block`.
|
||||
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
|
||||
size_t bsize;
|
||||
size_t delta;
|
||||
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
|
||||
mi_assert_internal(ok);
|
||||
if (!ok || (bsize - delta) >= min_size) return; // usually already enough space
|
||||
mi_assert_internal(bsize >= min_size);
|
||||
if (bsize < min_size) return; // should never happen
|
||||
size_t new_delta = (bsize - min_size);
|
||||
mi_assert_internal(new_delta < bsize);
|
||||
mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize);
|
||||
mi_track_mem_defined(padding,sizeof(mi_padding_t));
|
||||
padding->delta = (uint32_t)new_delta;
|
||||
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
|
||||
}
|
||||
#else
|
||||
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
|
||||
MI_UNUSED(block);
|
||||
return mi_page_usable_block_size(page);
|
||||
}
|
||||
|
||||
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
|
||||
MI_UNUSED(page);
|
||||
MI_UNUSED(block);
|
||||
MI_UNUSED(min_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if MI_PADDING && MI_PADDING_CHECK
|
||||
|
||||
static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) {
|
||||
size_t bsize;
|
||||
size_t delta;
|
||||
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
|
||||
*size = *wrong = bsize;
|
||||
if (!ok) return false;
|
||||
mi_assert_internal(bsize >= delta);
|
||||
*size = bsize - delta;
|
||||
if (!mi_page_is_huge(page)) {
|
||||
uint8_t* fill = (uint8_t*)block + bsize - delta;
|
||||
const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
|
||||
mi_track_mem_defined(fill, maxpad);
|
||||
for (size_t i = 0; i < maxpad; i++) {
|
||||
if (fill[i] != MI_DEBUG_PADDING) {
|
||||
*wrong = bsize - delta + i;
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mi_track_mem_noaccess(fill, maxpad);
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
|
||||
size_t size;
|
||||
size_t wrong;
|
||||
if (!mi_verify_padding(page,block,&size,&wrong)) {
|
||||
_mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong );
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
|
||||
MI_UNUSED(page);
|
||||
MI_UNUSED(block);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// only maintain stats for smaller objects if requested
|
||||
#if (MI_STAT>0)
|
||||
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
|
||||
#if (MI_STAT < 2)
|
||||
MI_UNUSED(block);
|
||||
#endif
|
||||
mi_heap_t* const heap = mi_heap_get_default();
|
||||
const size_t bsize = mi_page_usable_block_size(page);
|
||||
#if (MI_STAT>1)
|
||||
const size_t usize = mi_page_usable_size_of(page, block);
|
||||
mi_heap_stat_decrease(heap, malloc, usize);
|
||||
#endif
|
||||
if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
|
||||
mi_heap_stat_decrease(heap, normal, bsize);
|
||||
#if (MI_STAT > 1)
|
||||
mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
|
||||
#endif
|
||||
}
|
||||
else if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
|
||||
mi_heap_stat_decrease(heap, large, bsize);
|
||||
}
|
||||
else {
|
||||
mi_heap_stat_decrease(heap, huge, bsize);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
|
||||
MI_UNUSED(page); MI_UNUSED(block);
|
||||
}
|
||||
#endif
|
34
src/heap.c
34
src/heap.c
@ -32,7 +32,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
|
||||
#if MI_DEBUG>1
|
||||
size_t total = heap->page_count;
|
||||
size_t count = 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
for (size_t i = 0; i <= MI_BIN_FULL; i++) {
|
||||
mi_page_queue_t* pq = &heap->pages[i];
|
||||
@ -104,6 +104,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
|
||||
// still used blocks but the thread is done; abandon the page
|
||||
_mi_page_abandon(page, pq);
|
||||
}
|
||||
if (collect == MI_FORCE) {
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
_mi_segment_collect(segment, true /* force? */, &heap->tld->segments);
|
||||
}
|
||||
return true; // don't break
|
||||
}
|
||||
|
||||
@ -120,11 +124,11 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
|
||||
{
|
||||
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
|
||||
|
||||
const bool force = collect >= MI_FORCE;
|
||||
const bool force = (collect >= MI_FORCE);
|
||||
_mi_deferred_free(heap, force);
|
||||
|
||||
// note: never reclaim on collect but leave it to threads that need storage to reclaim
|
||||
const bool force_main =
|
||||
// note: never reclaim on collect but leave it to threads that need storage to reclaim
|
||||
const bool force_main =
|
||||
#ifdef NDEBUG
|
||||
collect == MI_FORCE
|
||||
#else
|
||||
@ -157,17 +161,14 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
|
||||
// collect abandoned segments (in particular, purge expired parts of segments in the abandoned segment list)
|
||||
// note: forced purge can be quite expensive if many threads are created/destroyed so we do not force on abandonment
|
||||
_mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments);
|
||||
|
||||
// collect segment local caches
|
||||
if (force) {
|
||||
_mi_segment_thread_collect(&heap->tld->segments);
|
||||
}
|
||||
|
||||
// collect regions on program-exit (or shared library unload)
|
||||
|
||||
// if forced, collect thread data cache on program-exit (or shared library unload)
|
||||
if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
|
||||
_mi_thread_data_collect(); // collect thread data cache
|
||||
_mi_arena_collect(true /* force purge */, &heap->tld->stats);
|
||||
}
|
||||
|
||||
// collect arenas (this is program wide so don't force purges on abandonment of threads)
|
||||
_mi_arenas_collect(collect == MI_FORCE /* force purge? */, &heap->tld->stats);
|
||||
}
|
||||
|
||||
void _mi_heap_collect_abandon(mi_heap_t* heap) {
|
||||
@ -425,7 +426,7 @@ void mi_heap_delete(mi_heap_t* heap)
|
||||
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
|
||||
|
||||
if (!mi_heap_is_backing(heap)) {
|
||||
// tranfer still used pages to the backing heap
|
||||
// transfer still used pages to the backing heap
|
||||
mi_heap_absorb(heap->tld->heap_backing, heap);
|
||||
}
|
||||
else {
|
||||
@ -474,8 +475,7 @@ static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
|
||||
MI_UNUSED(heap);
|
||||
MI_UNUSED(pq);
|
||||
bool* found = (bool*)vfound;
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
void* start = _mi_page_start(segment, page, NULL);
|
||||
void* start = mi_page_start(page);
|
||||
void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page));
|
||||
*found = (p >= start && p < end);
|
||||
return (!*found); // continue if not found
|
||||
@ -521,7 +521,7 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
|
||||
const size_t bsize = mi_page_block_size(page);
|
||||
const size_t ubsize = mi_page_usable_block_size(page); // without padding
|
||||
size_t psize;
|
||||
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
|
||||
uint8_t* pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
|
||||
|
||||
if (page->capacity == 1) {
|
||||
// optimize page with one block
|
||||
@ -588,7 +588,7 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
|
||||
xarea.page = page;
|
||||
xarea.area.reserved = page->reserved * bsize;
|
||||
xarea.area.committed = page->capacity * bsize;
|
||||
xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
|
||||
xarea.area.blocks = mi_page_start(page);
|
||||
xarea.area.used = page->used; // number of blocks in use (#553)
|
||||
xarea.area.block_size = ubsize;
|
||||
xarea.area.full_block_size = bsize;
|
||||
|
53
src/init.c
53
src/init.c
@ -14,16 +14,19 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
// Empty page used to initialize the small free pages array
|
||||
const mi_page_t _mi_page_empty = {
|
||||
0, false, false, false,
|
||||
0,
|
||||
false, false, false, false,
|
||||
0, // capacity
|
||||
0, // reserved capacity
|
||||
{ 0 }, // flags
|
||||
false, // is_zero
|
||||
0, // retire_expire
|
||||
NULL, // free
|
||||
0, // used
|
||||
0, // xblock_size
|
||||
NULL, // local_free
|
||||
0, // used
|
||||
0, // block size shift
|
||||
0, // block_size
|
||||
NULL, // page_start
|
||||
#if (MI_PADDING || MI_ENCODE_FREELIST)
|
||||
{ 0, 0 },
|
||||
#endif
|
||||
@ -84,7 +87,9 @@ const mi_page_t _mi_page_empty = {
|
||||
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
|
||||
MI_STAT_COUNT_NULL(), \
|
||||
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
|
||||
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \
|
||||
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
|
||||
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
|
||||
{ 0, 0 } \
|
||||
MI_STAT_COUNT_END_NULL()
|
||||
|
||||
|
||||
@ -110,8 +115,6 @@ const mi_page_t _mi_page_empty = {
|
||||
|
||||
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
|
||||
NULL,
|
||||
MI_SMALL_PAGES_EMPTY,
|
||||
MI_PAGE_QUEUES_EMPTY,
|
||||
MI_ATOMIC_VAR_INIT(NULL),
|
||||
0, // tid
|
||||
0, // cookie
|
||||
@ -121,7 +124,9 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
|
||||
0, // page count
|
||||
MI_BIN_FULL, 0, // page retired min/max
|
||||
NULL, // next
|
||||
false
|
||||
false,
|
||||
MI_SMALL_PAGES_EMPTY,
|
||||
MI_PAGE_QUEUES_EMPTY
|
||||
};
|
||||
|
||||
#define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats)))
|
||||
@ -131,7 +136,7 @@ mi_decl_cache_align static const mi_tld_t tld_empty = {
|
||||
0,
|
||||
false,
|
||||
NULL, NULL,
|
||||
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments
|
||||
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments
|
||||
{ 0, tld_empty_stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
};
|
||||
@ -148,15 +153,13 @@ extern mi_heap_t _mi_heap_main;
|
||||
static mi_tld_t tld_main = {
|
||||
0, false,
|
||||
&_mi_heap_main, & _mi_heap_main,
|
||||
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments
|
||||
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments
|
||||
{ 0, &tld_main.stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
};
|
||||
|
||||
mi_heap_t _mi_heap_main = {
|
||||
&tld_main,
|
||||
MI_SMALL_PAGES_EMPTY,
|
||||
MI_PAGE_QUEUES_EMPTY,
|
||||
MI_ATOMIC_VAR_INIT(NULL),
|
||||
0, // thread id
|
||||
0, // initial cookie
|
||||
@ -166,7 +169,9 @@ mi_heap_t _mi_heap_main = {
|
||||
0, // page count
|
||||
MI_BIN_FULL, 0, // page retired min/max
|
||||
NULL, // next heap
|
||||
false // can reclaim
|
||||
false, // can reclaim
|
||||
MI_SMALL_PAGES_EMPTY,
|
||||
MI_PAGE_QUEUES_EMPTY
|
||||
};
|
||||
|
||||
bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
|
||||
@ -201,9 +206,9 @@ mi_heap_t* _mi_heap_main_get(void) {
|
||||
|
||||
// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size).
|
||||
typedef struct mi_thread_data_s {
|
||||
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
|
||||
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
|
||||
mi_tld_t tld;
|
||||
mi_memid_t memid;
|
||||
mi_memid_t memid; // must come last due to zero'ing
|
||||
} mi_thread_data_t;
|
||||
|
||||
|
||||
@ -247,9 +252,9 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) {
|
||||
is_zero = memid.initially_zero;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (td != NULL && !is_zero) {
|
||||
_mi_memzero_aligned(td, sizeof(*td));
|
||||
_mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid));
|
||||
}
|
||||
return td;
|
||||
}
|
||||
@ -426,23 +431,23 @@ void mi_thread_done(void) mi_attr_noexcept {
|
||||
_mi_thread_done(NULL);
|
||||
}
|
||||
|
||||
void _mi_thread_done(mi_heap_t* heap)
|
||||
void _mi_thread_done(mi_heap_t* heap)
|
||||
{
|
||||
// calling with NULL implies using the default heap
|
||||
if (heap == NULL) {
|
||||
heap = mi_prim_get_default_heap();
|
||||
if (heap == NULL) {
|
||||
heap = mi_prim_get_default_heap();
|
||||
if (heap == NULL) return;
|
||||
}
|
||||
|
||||
// prevent re-entrancy through heap_done/heap_set_default_direct (issue #699)
|
||||
if (!mi_heap_is_initialized(heap)) {
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
// adjust stats
|
||||
mi_atomic_decrement_relaxed(&thread_count);
|
||||
_mi_stat_decrease(&_mi_stats_main.threads, 1);
|
||||
|
||||
|
||||
// check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps...
|
||||
if (heap->thread_id != _mi_thread_id()) return;
|
||||
|
||||
@ -455,7 +460,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
|
||||
#if defined(MI_TLS_SLOT)
|
||||
mi_prim_tls_slot_set(MI_TLS_SLOT,heap);
|
||||
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
|
||||
*mi_tls_pthread_heap_slot() = heap;
|
||||
*mi_prim_tls_pthread_heap_slot() = heap;
|
||||
#elif defined(MI_TLS_PTHREAD)
|
||||
// we use _mi_heap_default_key
|
||||
#else
|
||||
@ -464,7 +469,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
|
||||
|
||||
// ensure the default heap is passed to `_mi_thread_done`
|
||||
// setting to a non-NULL value also ensures `mi_thread_done` is called.
|
||||
_mi_prim_thread_associate_default_heap(heap);
|
||||
_mi_prim_thread_associate_default_heap(heap);
|
||||
}
|
||||
|
||||
|
||||
@ -624,7 +629,7 @@ static void mi_cdecl mi_process_done(void) {
|
||||
|
||||
// release any thread specific resources and ensure _mi_thread_done is called on all but the main thread
|
||||
_mi_prim_thread_done_auto_done();
|
||||
|
||||
|
||||
#ifndef MI_SKIP_COLLECT_ON_EXIT
|
||||
#if (MI_DEBUG || !defined(MI_SHARED_LIB))
|
||||
// free all memory if possible on process exit. This is not needed for a stand-alone process
|
||||
|
273
src/libc.c
Normal file
273
src/libc.c
Normal file
@ -0,0 +1,273 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
// --------------------------------------------------------
|
||||
// This module defines various std libc functions to reduce
|
||||
// the dependency on libc, and also prevent errors caused
|
||||
// by some libc implementations when called before `main`
|
||||
// executes (due to malloc redirection)
|
||||
// --------------------------------------------------------
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/prim.h" // mi_prim_getenv
|
||||
|
||||
char _mi_toupper(char c) {
|
||||
if (c >= 'a' && c <= 'z') return (c - 'a' + 'A');
|
||||
else return c;
|
||||
}
|
||||
|
||||
int _mi_strnicmp(const char* s, const char* t, size_t n) {
|
||||
if (n == 0) return 0;
|
||||
for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) {
|
||||
if (_mi_toupper(*s) != _mi_toupper(*t)) break;
|
||||
}
|
||||
return (n == 0 ? 0 : *s - *t);
|
||||
}
|
||||
|
||||
void _mi_strlcpy(char* dest, const char* src, size_t dest_size) {
|
||||
if (dest==NULL || src==NULL || dest_size == 0) return;
|
||||
// copy until end of src, or when dest is (almost) full
|
||||
while (*src != 0 && dest_size > 1) {
|
||||
*dest++ = *src++;
|
||||
dest_size--;
|
||||
}
|
||||
// always zero terminate
|
||||
*dest = 0;
|
||||
}
|
||||
|
||||
void _mi_strlcat(char* dest, const char* src, size_t dest_size) {
|
||||
if (dest==NULL || src==NULL || dest_size == 0) return;
|
||||
// find end of string in the dest buffer
|
||||
while (*dest != 0 && dest_size > 1) {
|
||||
dest++;
|
||||
dest_size--;
|
||||
}
|
||||
// and catenate
|
||||
_mi_strlcpy(dest, src, dest_size);
|
||||
}
|
||||
|
||||
size_t _mi_strlen(const char* s) {
|
||||
if (s==NULL) return 0;
|
||||
size_t len = 0;
|
||||
while(s[len] != 0) { len++; }
|
||||
return len;
|
||||
}
|
||||
|
||||
size_t _mi_strnlen(const char* s, size_t max_len) {
|
||||
if (s==NULL) return 0;
|
||||
size_t len = 0;
|
||||
while(s[len] != 0 && len < max_len) { len++; }
|
||||
return len;
|
||||
}
|
||||
|
||||
#ifdef MI_NO_GETENV
|
||||
bool _mi_getenv(const char* name, char* result, size_t result_size) {
|
||||
MI_UNUSED(name);
|
||||
MI_UNUSED(result);
|
||||
MI_UNUSED(result_size);
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
bool _mi_getenv(const char* name, char* result, size_t result_size) {
|
||||
if (name==NULL || result == NULL || result_size < 64) return false;
|
||||
return _mi_prim_getenv(name,result,result_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
// --------------------------------------------------------
|
||||
// Define our own limited `_mi_vsnprintf` and `_mi_snprintf`
|
||||
// This is mostly to avoid calling these when libc is not yet
|
||||
// initialized (and to reduce dependencies)
|
||||
//
|
||||
// format: d i, p x u, s
|
||||
// prec: z l ll L
|
||||
// width: 10
|
||||
// align-left: -
|
||||
// fill: 0
|
||||
// plus: +
|
||||
// --------------------------------------------------------
|
||||
|
||||
static void mi_outc(char c, char** out, char* end) {
|
||||
char* p = *out;
|
||||
if (p >= end) return;
|
||||
*p = c;
|
||||
*out = p + 1;
|
||||
}
|
||||
|
||||
static void mi_outs(const char* s, char** out, char* end) {
|
||||
if (s == NULL) return;
|
||||
char* p = *out;
|
||||
while (*s != 0 && p < end) {
|
||||
*p++ = *s++;
|
||||
}
|
||||
*out = p;
|
||||
}
|
||||
|
||||
static void mi_out_fill(char fill, size_t len, char** out, char* end) {
|
||||
char* p = *out;
|
||||
for (size_t i = 0; i < len && p < end; i++) {
|
||||
*p++ = fill;
|
||||
}
|
||||
*out = p;
|
||||
}
|
||||
|
||||
static void mi_out_alignright(char fill, char* start, size_t len, size_t extra, char* end) {
|
||||
if (len == 0 || extra == 0) return;
|
||||
if (start + len + extra >= end) return;
|
||||
// move `len` characters to the right (in reverse since it can overlap)
|
||||
for (size_t i = 1; i <= len; i++) {
|
||||
start[len + extra - i] = start[len - i];
|
||||
}
|
||||
// and fill the start
|
||||
for (size_t i = 0; i < extra; i++) {
|
||||
start[i] = fill;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void mi_out_num(uintptr_t x, size_t base, char prefix, char** out, char* end)
|
||||
{
|
||||
if (x == 0 || base == 0 || base > 16) {
|
||||
if (prefix != 0) { mi_outc(prefix, out, end); }
|
||||
mi_outc('0',out,end);
|
||||
}
|
||||
else {
|
||||
// output digits in reverse
|
||||
char* start = *out;
|
||||
while (x > 0) {
|
||||
char digit = (char)(x % base);
|
||||
mi_outc((digit <= 9 ? '0' + digit : 'A' + digit - 10),out,end);
|
||||
x = x / base;
|
||||
}
|
||||
if (prefix != 0) {
|
||||
mi_outc(prefix, out, end);
|
||||
}
|
||||
size_t len = *out - start;
|
||||
// and reverse in-place
|
||||
for (size_t i = 0; i < (len / 2); i++) {
|
||||
char c = start[len - i - 1];
|
||||
start[len - i - 1] = start[i];
|
||||
start[i] = c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define MI_NEXTC() c = *in; if (c==0) break; in++;
|
||||
|
||||
void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
|
||||
if (buf == NULL || bufsize == 0 || fmt == NULL) return;
|
||||
buf[bufsize - 1] = 0;
|
||||
char* const end = buf + (bufsize - 1);
|
||||
const char* in = fmt;
|
||||
char* out = buf;
|
||||
while (true) {
|
||||
if (out >= end) break;
|
||||
char c;
|
||||
MI_NEXTC();
|
||||
if (c != '%') {
|
||||
if ((c >= ' ' && c <= '~') || c=='\n' || c=='\r' || c=='\t') { // output visible ascii or standard control only
|
||||
mi_outc(c, &out, end);
|
||||
}
|
||||
}
|
||||
else {
|
||||
MI_NEXTC();
|
||||
char fill = ' ';
|
||||
size_t width = 0;
|
||||
char numtype = 'd';
|
||||
char numplus = 0;
|
||||
bool alignright = true;
|
||||
if (c == '+' || c == ' ') { numplus = c; MI_NEXTC(); }
|
||||
if (c == '-') { alignright = false; MI_NEXTC(); }
|
||||
if (c == '0') { fill = '0'; MI_NEXTC(); }
|
||||
if (c >= '1' && c <= '9') {
|
||||
width = (c - '0'); MI_NEXTC();
|
||||
while (c >= '0' && c <= '9') {
|
||||
width = (10 * width) + (c - '0'); MI_NEXTC();
|
||||
}
|
||||
if (c == 0) break; // extra check due to while
|
||||
}
|
||||
if (c == 'z' || c == 't' || c == 'L') { numtype = c; MI_NEXTC(); }
|
||||
else if (c == 'l') {
|
||||
numtype = c; MI_NEXTC();
|
||||
if (c == 'l') { numtype = 'L'; MI_NEXTC(); }
|
||||
}
|
||||
|
||||
char* start = out;
|
||||
if (c == 's') {
|
||||
// string
|
||||
const char* s = va_arg(args, const char*);
|
||||
mi_outs(s, &out, end);
|
||||
}
|
||||
else if (c == 'p' || c == 'x' || c == 'u') {
|
||||
// unsigned
|
||||
uintptr_t x = 0;
|
||||
if (c == 'x' || c == 'u') {
|
||||
if (numtype == 'z') x = va_arg(args, size_t);
|
||||
else if (numtype == 't') x = va_arg(args, uintptr_t); // unsigned ptrdiff_t
|
||||
else if (numtype == 'L') x = (uintptr_t)va_arg(args, unsigned long long);
|
||||
else x = va_arg(args, unsigned long);
|
||||
}
|
||||
else if (c == 'p') {
|
||||
x = va_arg(args, uintptr_t);
|
||||
mi_outs("0x", &out, end);
|
||||
start = out;
|
||||
width = (width >= 2 ? width - 2 : 0);
|
||||
}
|
||||
if (width == 0 && (c == 'x' || c == 'p')) {
|
||||
if (c == 'p') { width = 2 * (x <= UINT32_MAX ? 4 : ((x >> 16) <= UINT32_MAX ? 6 : sizeof(void*))); }
|
||||
if (width == 0) { width = 2; }
|
||||
fill = '0';
|
||||
}
|
||||
mi_out_num(x, (c == 'x' || c == 'p' ? 16 : 10), numplus, &out, end);
|
||||
}
|
||||
else if (c == 'i' || c == 'd') {
|
||||
// signed
|
||||
intptr_t x = 0;
|
||||
if (numtype == 'z') x = va_arg(args, intptr_t );
|
||||
else if (numtype == 't') x = va_arg(args, ptrdiff_t);
|
||||
else if (numtype == 'L') x = (intptr_t)va_arg(args, long long);
|
||||
else x = va_arg(args, long);
|
||||
char pre = 0;
|
||||
if (x < 0) {
|
||||
pre = '-';
|
||||
if (x > INTPTR_MIN) { x = -x; }
|
||||
}
|
||||
else if (numplus != 0) {
|
||||
pre = numplus;
|
||||
}
|
||||
mi_out_num((uintptr_t)x, 10, pre, &out, end);
|
||||
}
|
||||
else if (c >= ' ' && c <= '~') {
|
||||
// unknown format
|
||||
mi_outc('%', &out, end);
|
||||
mi_outc(c, &out, end);
|
||||
}
|
||||
|
||||
// fill & align
|
||||
mi_assert_internal(out <= end);
|
||||
mi_assert_internal(out >= start);
|
||||
const size_t len = out - start;
|
||||
if (len < width) {
|
||||
mi_out_fill(fill, width - len, &out, end);
|
||||
if (alignright && out <= end) {
|
||||
mi_out_alignright(fill, start, len, width - len, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_assert_internal(out <= end);
|
||||
*out = 0;
|
||||
}
|
||||
|
||||
void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
_mi_vsnprintf(buf, buflen, fmt, args);
|
||||
va_end(args);
|
||||
}
|
@ -9,9 +9,9 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include "mimalloc/atomic.h"
|
||||
#include "mimalloc/prim.h" // mi_prim_out_stderr
|
||||
|
||||
#include <stdio.h> // FILE
|
||||
#include <stdio.h> // stdin/stdout
|
||||
#include <stdlib.h> // abort
|
||||
#include <stdarg.h>
|
||||
|
||||
|
||||
|
||||
static long mi_max_error_count = 16; // stop outputting errors after this (use < 0 for no limit)
|
||||
@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||
{ 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
|
||||
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages
|
||||
{-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
|
||||
{ 0, UNINIT, MI_OPTION(reserve_os_memory) },
|
||||
{ 0, UNINIT, MI_OPTION(reserve_os_memory) }, // reserve OS memory in advance
|
||||
{ 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread
|
||||
{ 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free
|
||||
{ 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) }, // reset free page memory when a thread terminates
|
||||
@ -77,11 +77,11 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||
#endif
|
||||
{ 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds
|
||||
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
|
||||
{ 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
|
||||
{ 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
|
||||
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
|
||||
{ 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output
|
||||
{ 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output
|
||||
{ 8, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. number of segment reclaims from the abandoned segments per try.
|
||||
{ 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments per try.
|
||||
{ 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
|
||||
#if (MI_INTPTR_SIZE>4)
|
||||
{ 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time
|
||||
@ -90,10 +90,16 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||
#endif
|
||||
{ 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
|
||||
{ 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
|
||||
{ 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
|
||||
{ 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
|
||||
};
|
||||
|
||||
static void mi_option_init(mi_option_desc_t* desc);
|
||||
|
||||
static bool mi_option_has_size_in_kib(mi_option_t option) {
|
||||
return (option == mi_option_reserve_os_memory || option == mi_option_arena_reserve);
|
||||
}
|
||||
|
||||
void _mi_options_init(void) {
|
||||
// called on process load; should not be called before the CRT is initialized!
|
||||
// (e.g. do not call this from process_init as that may run before CRT initialization)
|
||||
@ -104,7 +110,7 @@ void _mi_options_init(void) {
|
||||
// if (option != mi_option_verbose)
|
||||
{
|
||||
mi_option_desc_t* desc = &options[option];
|
||||
_mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
|
||||
_mi_verbose_message("option '%s': %ld %s\n", desc->name, desc->value, (mi_option_has_size_in_kib(option) ? "KiB" : ""));
|
||||
}
|
||||
}
|
||||
mi_max_error_count = mi_option_get(mi_option_max_errors);
|
||||
@ -128,7 +134,7 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma
|
||||
}
|
||||
|
||||
mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) {
|
||||
mi_assert_internal(option == mi_option_reserve_os_memory || option == mi_option_arena_reserve);
|
||||
mi_assert_internal(mi_option_has_size_in_kib(option));
|
||||
long x = mi_option_get(option);
|
||||
return (x < 0 ? 0 : (size_t)x * MI_KiB);
|
||||
}
|
||||
@ -311,12 +317,12 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me
|
||||
}
|
||||
|
||||
// Define our own limited `fprintf` that avoids memory allocation.
|
||||
// We do this using `snprintf` with a limited buffer.
|
||||
// We do this using `_mi_vsnprintf` with a limited buffer.
|
||||
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
|
||||
char buf[512];
|
||||
if (fmt==NULL) return;
|
||||
if (!mi_recurse_enter()) return;
|
||||
vsnprintf(buf,sizeof(buf)-1,fmt,args);
|
||||
_mi_vsnprintf(buf, sizeof(buf)-1, fmt, args);
|
||||
mi_recurse_exit();
|
||||
_mi_fputs(out,arg,prefix,buf);
|
||||
}
|
||||
@ -331,7 +337,7 @@ void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
|
||||
static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) {
|
||||
if (prefix != NULL && _mi_strnlen(prefix,33) <= 32 && !_mi_is_main_thread()) {
|
||||
char tprefix[64];
|
||||
snprintf(tprefix, sizeof(tprefix), "%sthread 0x%llx: ", prefix, (unsigned long long)_mi_thread_id());
|
||||
_mi_snprintf(tprefix, sizeof(tprefix), "%sthread 0x%tx: ", prefix, (uintptr_t)_mi_thread_id());
|
||||
mi_vfprintf(out, arg, tprefix, fmt, args);
|
||||
}
|
||||
else {
|
||||
@ -434,68 +440,6 @@ void _mi_error_message(int err, const char* fmt, ...) {
|
||||
// --------------------------------------------------------
|
||||
// Initialize options by checking the environment
|
||||
// --------------------------------------------------------
|
||||
char _mi_toupper(char c) {
|
||||
if (c >= 'a' && c <= 'z') return (c - 'a' + 'A');
|
||||
else return c;
|
||||
}
|
||||
|
||||
int _mi_strnicmp(const char* s, const char* t, size_t n) {
|
||||
if (n == 0) return 0;
|
||||
for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) {
|
||||
if (_mi_toupper(*s) != _mi_toupper(*t)) break;
|
||||
}
|
||||
return (n == 0 ? 0 : *s - *t);
|
||||
}
|
||||
|
||||
void _mi_strlcpy(char* dest, const char* src, size_t dest_size) {
|
||||
if (dest==NULL || src==NULL || dest_size == 0) return;
|
||||
// copy until end of src, or when dest is (almost) full
|
||||
while (*src != 0 && dest_size > 1) {
|
||||
*dest++ = *src++;
|
||||
dest_size--;
|
||||
}
|
||||
// always zero terminate
|
||||
*dest = 0;
|
||||
}
|
||||
|
||||
void _mi_strlcat(char* dest, const char* src, size_t dest_size) {
|
||||
if (dest==NULL || src==NULL || dest_size == 0) return;
|
||||
// find end of string in the dest buffer
|
||||
while (*dest != 0 && dest_size > 1) {
|
||||
dest++;
|
||||
dest_size--;
|
||||
}
|
||||
// and catenate
|
||||
_mi_strlcpy(dest, src, dest_size);
|
||||
}
|
||||
|
||||
size_t _mi_strlen(const char* s) {
|
||||
if (s==NULL) return 0;
|
||||
size_t len = 0;
|
||||
while(s[len] != 0) { len++; }
|
||||
return len;
|
||||
}
|
||||
|
||||
size_t _mi_strnlen(const char* s, size_t max_len) {
|
||||
if (s==NULL) return 0;
|
||||
size_t len = 0;
|
||||
while(s[len] != 0 && len < max_len) { len++; }
|
||||
return len;
|
||||
}
|
||||
|
||||
#ifdef MI_NO_GETENV
|
||||
static bool mi_getenv(const char* name, char* result, size_t result_size) {
|
||||
MI_UNUSED(name);
|
||||
MI_UNUSED(result);
|
||||
MI_UNUSED(result_size);
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
static bool mi_getenv(const char* name, char* result, size_t result_size) {
|
||||
if (name==NULL || result == NULL || result_size < 64) return false;
|
||||
return _mi_prim_getenv(name,result,result_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO: implement ourselves to reduce dependencies on the C runtime
|
||||
#include <stdlib.h> // strtol
|
||||
@ -508,11 +452,11 @@ static void mi_option_init(mi_option_desc_t* desc) {
|
||||
char buf[64+1];
|
||||
_mi_strlcpy(buf, "mimalloc_", sizeof(buf));
|
||||
_mi_strlcat(buf, desc->name, sizeof(buf));
|
||||
bool found = mi_getenv(buf, s, sizeof(s));
|
||||
bool found = _mi_getenv(buf, s, sizeof(s));
|
||||
if (!found && desc->legacy_name != NULL) {
|
||||
_mi_strlcpy(buf, "mimalloc_", sizeof(buf));
|
||||
_mi_strlcat(buf, desc->legacy_name, sizeof(buf));
|
||||
found = mi_getenv(buf, s, sizeof(s));
|
||||
found = _mi_getenv(buf, s, sizeof(s));
|
||||
if (found) {
|
||||
_mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name);
|
||||
}
|
||||
|
79
src/os.c
79
src/os.c
@ -29,7 +29,7 @@ bool _mi_os_has_overcommit(void) {
|
||||
return mi_os_mem_config.has_overcommit;
|
||||
}
|
||||
|
||||
bool _mi_os_has_virtual_reserve(void) {
|
||||
bool _mi_os_has_virtual_reserve(void) {
|
||||
return mi_os_mem_config.has_virtual_reserve;
|
||||
}
|
||||
|
||||
@ -73,14 +73,6 @@ void _mi_os_init(void) {
|
||||
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
|
||||
bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats);
|
||||
|
||||
static void* mi_align_up_ptr(void* p, size_t alignment) {
|
||||
return (void*)_mi_align_up((uintptr_t)p, alignment);
|
||||
}
|
||||
|
||||
static void* mi_align_down_ptr(void* p, size_t alignment) {
|
||||
return (void*)_mi_align_down((uintptr_t)p, alignment);
|
||||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
aligned hinting
|
||||
@ -141,13 +133,13 @@ static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
|
||||
|
||||
static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) {
|
||||
MI_UNUSED(tld_stats);
|
||||
mi_stats_t* stats = &_mi_stats_main;
|
||||
mi_assert_internal((size % _mi_os_page_size()) == 0);
|
||||
if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
|
||||
int err = _mi_prim_free(addr, size);
|
||||
if (err != 0) {
|
||||
_mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
|
||||
}
|
||||
mi_stats_t* stats = &_mi_stats_main;
|
||||
if (still_committed) { _mi_stat_decrease(&stats->committed, size); }
|
||||
_mi_stat_decrease(&stats->reserved, size);
|
||||
}
|
||||
@ -173,7 +165,7 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
|
||||
}
|
||||
}
|
||||
else {
|
||||
// nothing to do
|
||||
// nothing to do
|
||||
mi_assert(memid.memkind < MI_MEM_OS);
|
||||
}
|
||||
}
|
||||
@ -188,31 +180,33 @@ void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats)
|
||||
-------------------------------------------------------------- */
|
||||
|
||||
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
|
||||
static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
|
||||
static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) {
|
||||
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
|
||||
mi_assert_internal(is_zero != NULL);
|
||||
mi_assert_internal(is_large != NULL);
|
||||
if (size == 0) return NULL;
|
||||
if (!commit) { allow_large = false; }
|
||||
if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning
|
||||
|
||||
*is_zero = false;
|
||||
void* p = NULL;
|
||||
void* p = NULL;
|
||||
int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p);
|
||||
if (err != 0) {
|
||||
_mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
|
||||
}
|
||||
|
||||
MI_UNUSED(tld_stats);
|
||||
mi_stats_t* stats = &_mi_stats_main;
|
||||
mi_stat_counter_increase(stats->mmap_calls, 1);
|
||||
if (p != NULL) {
|
||||
_mi_stat_increase(&stats->reserved, size);
|
||||
if (commit) {
|
||||
_mi_stat_increase(&stats->committed, size);
|
||||
if (commit) {
|
||||
_mi_stat_increase(&stats->committed, size);
|
||||
// seems needed for asan (or `mimalloc-test-api` fails)
|
||||
#ifdef MI_TRACK_ASAN
|
||||
if (*is_zero) { mi_track_mem_defined(p,size); }
|
||||
else { mi_track_mem_undefined(p,size); }
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
@ -249,7 +243,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
|
||||
// over-allocate uncommitted (virtual) memory
|
||||
p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
|
||||
if (p == NULL) return NULL;
|
||||
|
||||
|
||||
// set p to the aligned part in the full region
|
||||
// note: this is dangerous on Windows as VirtualFree needs the actual base pointer
|
||||
// this is handled though by having the `base` field in the memid's
|
||||
@ -265,7 +259,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
|
||||
// overallocate...
|
||||
p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
|
||||
if (p == NULL) return NULL;
|
||||
|
||||
|
||||
// and selectively unmap parts around the over-allocated area. (noop on sbrk)
|
||||
void* aligned_p = mi_align_up_ptr(p, alignment);
|
||||
size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
|
||||
@ -276,7 +270,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
|
||||
if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); }
|
||||
// we can return the aligned pointer on `mmap` (and sbrk) systems
|
||||
p = aligned_p;
|
||||
*base = aligned_p; // since we freed the pre part, `*base == p`.
|
||||
*base = aligned_p; // since we freed the pre part, `*base == p`.
|
||||
}
|
||||
}
|
||||
|
||||
@ -289,10 +283,8 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
|
||||
OS API: alloc and alloc_aligned
|
||||
----------------------------------------------------------- */
|
||||
|
||||
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) {
|
||||
MI_UNUSED(tld_stats);
|
||||
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
|
||||
*memid = _mi_memid_none();
|
||||
mi_stats_t* stats = &_mi_stats_main;
|
||||
if (size == 0) return NULL;
|
||||
size = _mi_os_good_alloc_size(size);
|
||||
bool os_is_large = false;
|
||||
@ -300,23 +292,22 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) {
|
||||
void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats);
|
||||
if (p != NULL) {
|
||||
*memid = _mi_memid_create_os(true, os_is_zero, os_is_large);
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats)
|
||||
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats)
|
||||
{
|
||||
MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
|
||||
MI_UNUSED(tld_stats);
|
||||
*memid = _mi_memid_none();
|
||||
if (size == 0) return NULL;
|
||||
size = _mi_os_good_alloc_size(size);
|
||||
alignment = _mi_align_up(alignment, _mi_os_page_size());
|
||||
|
||||
|
||||
bool os_is_large = false;
|
||||
bool os_is_zero = false;
|
||||
void* os_base = NULL;
|
||||
void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, &_mi_stats_main /*tld->stats*/ );
|
||||
void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, stats );
|
||||
if (p != NULL) {
|
||||
*memid = _mi_memid_create_os(commit, os_is_zero, os_is_large);
|
||||
memid->mem.os.base = os_base;
|
||||
@ -327,13 +318,13 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
OS aligned allocation with an offset. This is used
|
||||
for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc
|
||||
for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc
|
||||
page where the object can be aligned at an offset from the start of the segment.
|
||||
As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
|
||||
to use the actual start of the memory region.
|
||||
----------------------------------------------------------- */
|
||||
|
||||
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) {
|
||||
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) {
|
||||
mi_assert(offset <= MI_SEGMENT_SIZE);
|
||||
mi_assert(offset <= size);
|
||||
mi_assert((alignment % _mi_os_page_size()) == 0);
|
||||
@ -341,20 +332,20 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offse
|
||||
if (offset > MI_SEGMENT_SIZE) return NULL;
|
||||
if (offset == 0) {
|
||||
// regular aligned allocation
|
||||
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats);
|
||||
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats);
|
||||
}
|
||||
else {
|
||||
// overallocate to align at an offset
|
||||
const size_t extra = _mi_align_up(offset, alignment) - offset;
|
||||
const size_t oversize = size + extra;
|
||||
void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats);
|
||||
void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, stats);
|
||||
if (start == NULL) return NULL;
|
||||
|
||||
void* const p = (uint8_t*)start + extra;
|
||||
mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
|
||||
// decommit the overallocation at the start
|
||||
if (commit && extra > _mi_os_page_size()) {
|
||||
_mi_os_decommit(start, extra, tld_stats);
|
||||
_mi_os_decommit(start, extra, stats);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
@ -390,7 +381,7 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t*
|
||||
|
||||
bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
|
||||
MI_UNUSED(tld_stats);
|
||||
mi_stats_t* stats = &_mi_stats_main;
|
||||
mi_stats_t* stats = &_mi_stats_main;
|
||||
if (is_zero != NULL) { *is_zero = false; }
|
||||
_mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit
|
||||
_mi_stat_counter_increase(&stats->commit_calls, 1);
|
||||
@ -400,21 +391,21 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats
|
||||
void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize);
|
||||
if (csize == 0) return true;
|
||||
|
||||
// commit
|
||||
// commit
|
||||
bool os_is_zero = false;
|
||||
int err = _mi_prim_commit(start, csize, &os_is_zero);
|
||||
int err = _mi_prim_commit(start, csize, &os_is_zero);
|
||||
if (err != 0) {
|
||||
_mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
|
||||
return false;
|
||||
}
|
||||
if (os_is_zero && is_zero != NULL) {
|
||||
if (os_is_zero && is_zero != NULL) {
|
||||
*is_zero = true;
|
||||
mi_assert_expensive(mi_mem_is_zero(start, csize));
|
||||
}
|
||||
// note: the following seems required for asan (otherwise `mimalloc-test-stress` fails)
|
||||
#ifdef MI_TRACK_ASAN
|
||||
if (os_is_zero) { mi_track_mem_defined(start,csize); }
|
||||
else { mi_track_mem_undefined(start,csize); }
|
||||
else { mi_track_mem_undefined(start,csize); }
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
@ -428,11 +419,11 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_
|
||||
// page align
|
||||
size_t csize;
|
||||
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
|
||||
if (csize == 0) return true;
|
||||
if (csize == 0) return true;
|
||||
|
||||
// decommit
|
||||
*needs_recommit = true;
|
||||
int err = _mi_prim_decommit(start,csize,needs_recommit);
|
||||
int err = _mi_prim_decommit(start,csize,needs_recommit);
|
||||
if (err != 0) {
|
||||
_mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
|
||||
}
|
||||
@ -450,7 +441,7 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
|
||||
// but may be used later again. This will release physical memory
|
||||
// pages and reduce swapping while keeping the memory committed.
|
||||
// We page align to a conservative area inside the range to reset.
|
||||
bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
|
||||
bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
|
||||
// page align conservatively within the range
|
||||
size_t csize;
|
||||
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
|
||||
@ -470,7 +461,7 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
|
||||
}
|
||||
|
||||
|
||||
// either resets or decommits memory, returns true if the memory needs
|
||||
// either resets or decommits memory, returns true if the memory needs
|
||||
// to be recommitted if it is to be re-used later on.
|
||||
bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
|
||||
{
|
||||
@ -483,7 +474,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
|
||||
{
|
||||
bool needs_recommit = true;
|
||||
mi_os_decommit_ex(p, size, &needs_recommit, stats);
|
||||
return needs_recommit;
|
||||
return needs_recommit;
|
||||
}
|
||||
else {
|
||||
if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed
|
||||
@ -493,7 +484,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
|
||||
}
|
||||
}
|
||||
|
||||
// either resets or decommits memory, returns true if the memory needs
|
||||
// either resets or decommits memory, returns true if the memory needs
|
||||
// to be recommitted if it is to be re-used later on.
|
||||
bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) {
|
||||
return _mi_os_purge_ex(p, size, true, stats);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*----------------------------------------------------------------------------
|
||||
Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
|
||||
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
@ -11,6 +11,10 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
#ifndef MI_IN_PAGE_C
|
||||
#error "this file should be included from 'page.c'"
|
||||
// include to help an IDE
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/atomic.h"
|
||||
#endif
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
@ -137,21 +141,25 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t*
|
||||
}
|
||||
#endif
|
||||
|
||||
static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
|
||||
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
|
||||
mi_heap_t* heap = mi_page_heap(page);
|
||||
mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL);
|
||||
mi_page_queue_t* pq = &heap->pages[bin];
|
||||
mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size);
|
||||
mi_assert_expensive(mi_page_queue_contains(pq, page));
|
||||
return pq;
|
||||
static inline bool mi_page_is_large_or_huge(const mi_page_t* page) {
|
||||
return (mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_huge(page));
|
||||
}
|
||||
|
||||
static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
|
||||
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
|
||||
mi_assert_internal(heap!=NULL);
|
||||
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page))));
|
||||
mi_assert_internal(bin <= MI_BIN_FULL);
|
||||
mi_page_queue_t* pq = &heap->pages[bin];
|
||||
mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size);
|
||||
mi_assert_internal((mi_page_block_size(page) == pq->block_size) ||
|
||||
(mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(pq)) ||
|
||||
(mi_page_is_in_full(page) && mi_page_queue_is_full(pq)));
|
||||
return pq;
|
||||
}
|
||||
|
||||
static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
|
||||
mi_heap_t* heap = mi_page_heap(page);
|
||||
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
|
||||
mi_assert_expensive(mi_page_queue_contains(pq, page));
|
||||
return pq;
|
||||
}
|
||||
|
||||
@ -206,7 +214,9 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
|
||||
static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
|
||||
mi_assert_internal(page != NULL);
|
||||
mi_assert_expensive(mi_page_queue_contains(queue, page));
|
||||
mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
|
||||
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
|
||||
(mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(queue)) ||
|
||||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
|
||||
mi_heap_t* heap = mi_page_heap(page);
|
||||
|
||||
if (page->prev != NULL) page->prev->next = page->next;
|
||||
@ -232,8 +242,8 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
|
||||
#endif
|
||||
mi_assert_internal(page->xblock_size == queue->block_size ||
|
||||
(page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX) ||
|
||||
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
|
||||
(mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(queue)) ||
|
||||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
|
||||
|
||||
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
|
||||
@ -259,12 +269,13 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
|
||||
mi_assert_internal(page != NULL);
|
||||
mi_assert_expensive(mi_page_queue_contains(from, page));
|
||||
mi_assert_expensive(!mi_page_queue_contains(to, page));
|
||||
|
||||
mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) ||
|
||||
(page->xblock_size == to->block_size && mi_page_queue_is_full(from)) ||
|
||||
(page->xblock_size == from->block_size && mi_page_queue_is_full(to)) ||
|
||||
(page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) ||
|
||||
(page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to)));
|
||||
const size_t bsize = mi_page_block_size(page);
|
||||
MI_UNUSED(bsize);
|
||||
mi_assert_internal((bsize == to->block_size && bsize == from->block_size) ||
|
||||
(bsize == to->block_size && mi_page_queue_is_full(from)) ||
|
||||
(bsize == from->block_size && mi_page_queue_is_full(to)) ||
|
||||
(mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(to)) ||
|
||||
(mi_page_is_large_or_huge(page) && mi_page_queue_is_full(to)));
|
||||
|
||||
mi_heap_t* heap = mi_page_heap(page);
|
||||
if (page->prev != NULL) page->prev->next = page->next;
|
||||
|
106
src/page.c
106
src/page.c
@ -1,5 +1,5 @@
|
||||
/*----------------------------------------------------------------------------
|
||||
Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
|
||||
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
@ -59,7 +59,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) {
|
||||
|
||||
static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
|
||||
size_t psize;
|
||||
uint8_t* page_area = _mi_page_start(_mi_page_segment(page), page, &psize);
|
||||
uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
|
||||
mi_block_t* start = (mi_block_t*)page_area;
|
||||
mi_block_t* end = (mi_block_t*)(page_area + psize);
|
||||
while(p != NULL) {
|
||||
@ -78,14 +78,13 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
|
||||
}
|
||||
|
||||
static bool mi_page_is_valid_init(mi_page_t* page) {
|
||||
mi_assert_internal(page->xblock_size > 0);
|
||||
mi_assert_internal(mi_page_block_size(page) > 0);
|
||||
mi_assert_internal(page->used <= page->capacity);
|
||||
mi_assert_internal(page->capacity <= page->reserved);
|
||||
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
uint8_t* start = _mi_page_start(segment,page,NULL);
|
||||
mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
|
||||
//const size_t bsize = mi_page_block_size(page);
|
||||
uint8_t* start = mi_page_start(page);
|
||||
mi_assert_internal(start == _mi_segment_page_start(_mi_page_segment(page), page, NULL));
|
||||
mi_assert_internal(page->is_huge == (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE));
|
||||
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
|
||||
|
||||
mi_assert_internal(mi_page_list_is_valid(page,page->free));
|
||||
@ -125,9 +124,9 @@ bool _mi_page_is_valid(mi_page_t* page) {
|
||||
|
||||
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == mi_page_heap(page)->thread_id);
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
if (segment->kind != MI_SEGMENT_HUGE)
|
||||
if (segment->kind != MI_SEGMENT_HUGE)
|
||||
#endif
|
||||
{
|
||||
{
|
||||
mi_page_queue_t* pq = mi_page_queue_of(page);
|
||||
mi_assert_internal(mi_page_queue_contains(pq, page));
|
||||
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_in_full(page));
|
||||
@ -193,8 +192,8 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
|
||||
if (head == NULL) return;
|
||||
|
||||
// find the tail -- also to get a proper count (without data races)
|
||||
uint32_t max_count = page->capacity; // cannot collect more than capacity
|
||||
uint32_t count = 1;
|
||||
size_t max_count = page->capacity; // cannot collect more than capacity
|
||||
size_t count = 1;
|
||||
mi_block_t* tail = head;
|
||||
mi_block_t* next;
|
||||
while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) {
|
||||
@ -212,7 +211,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
|
||||
page->local_free = head;
|
||||
|
||||
// update counts now
|
||||
page->used -= count;
|
||||
page->used -= (uint16_t)count;
|
||||
}
|
||||
|
||||
void _mi_page_free_collect(mi_page_t* page, bool force) {
|
||||
@ -263,7 +262,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
|
||||
#endif
|
||||
|
||||
|
||||
// TODO: push on full queue immediately if it is full?
|
||||
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
|
||||
mi_page_queue_push(heap, pq, page);
|
||||
@ -282,11 +281,13 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
|
||||
// this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
|
||||
return NULL;
|
||||
}
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
|
||||
#endif
|
||||
mi_assert_internal(page_alignment >0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
|
||||
mi_assert_internal(pq!=NULL || page->xblock_size != 0);
|
||||
mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
|
||||
// a fresh page was found, initialize it
|
||||
const size_t full_block_size = ((pq == NULL || mi_page_queue_is_huge(pq)) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
|
||||
const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
|
||||
mi_assert_internal(full_block_size >= block_size);
|
||||
mi_page_init(heap, page, full_block_size, heap->tld);
|
||||
mi_heap_stat_increase(heap, pages, 1);
|
||||
@ -427,8 +428,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
|
||||
_mi_segment_page_free(page, force, segments_tld);
|
||||
}
|
||||
|
||||
// Retire parameters
|
||||
#define MI_MAX_RETIRE_SIZE (MI_MEDIUM_OBJ_SIZE_MAX)
|
||||
#define MI_MAX_RETIRE_SIZE MI_MEDIUM_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE
|
||||
#define MI_RETIRE_CYCLES (16)
|
||||
|
||||
// Retire a page with no more used blocks
|
||||
@ -441,7 +441,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
|
||||
mi_assert_internal(page != NULL);
|
||||
mi_assert_expensive(_mi_page_is_valid(page));
|
||||
mi_assert_internal(mi_page_all_free(page));
|
||||
|
||||
|
||||
mi_page_set_has_aligned(page, false);
|
||||
|
||||
// don't retire too often..
|
||||
@ -451,10 +451,11 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
|
||||
// how to check this efficiently though...
|
||||
// for now, we don't retire if it is the only page left of this size class.
|
||||
mi_page_queue_t* pq = mi_page_queue_of(page);
|
||||
if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_queue_is_special(pq)) { // not too large && not full or huge queue?
|
||||
const size_t bsize = mi_page_block_size(page);
|
||||
if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue?
|
||||
if (pq->last==page && pq->first==page) { // the only page in the queue?
|
||||
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
|
||||
page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
|
||||
page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
|
||||
mi_heap_t* heap = mi_page_heap(page);
|
||||
mi_assert_internal(pq >= heap->pages);
|
||||
const size_t index = pq - heap->pages;
|
||||
@ -462,7 +463,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
|
||||
if (index < heap->page_retired_min) heap->page_retired_min = index;
|
||||
if (index > heap->page_retired_max) heap->page_retired_max = index;
|
||||
mi_assert_internal(mi_page_all_free(page));
|
||||
return; // dont't free after all
|
||||
return; // don't free after all
|
||||
}
|
||||
}
|
||||
_mi_page_free(page, pq, false);
|
||||
@ -516,7 +517,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
|
||||
#endif
|
||||
mi_assert_internal(page->capacity + extend <= page->reserved);
|
||||
mi_assert_internal(bsize == mi_page_block_size(page));
|
||||
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
|
||||
void* const page_area = mi_page_start(page);
|
||||
|
||||
// initialize a randomized free list
|
||||
// set up `slice_count` slices to alternate between
|
||||
@ -574,7 +575,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
|
||||
#endif
|
||||
mi_assert_internal(page->capacity + extend <= page->reserved);
|
||||
mi_assert_internal(bsize == mi_page_block_size(page));
|
||||
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
|
||||
void* const page_area = mi_page_start(page);
|
||||
|
||||
mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity);
|
||||
|
||||
@ -608,7 +609,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
|
||||
// allocations but this did not speed up any benchmark (due to an
|
||||
// extra test in malloc? or cache effects?)
|
||||
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
|
||||
MI_UNUSED(tld);
|
||||
MI_UNUSED(tld);
|
||||
mi_assert_expensive(mi_page_is_valid_init(page));
|
||||
#if (MI_SECURE<=2)
|
||||
mi_assert(page->free == NULL);
|
||||
@ -617,16 +618,14 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
|
||||
#endif
|
||||
if (page->capacity >= page->reserved) return;
|
||||
|
||||
size_t page_size;
|
||||
_mi_page_start(_mi_page_segment(page), page, &page_size);
|
||||
mi_stat_counter_increase(tld->stats.pages_extended, 1);
|
||||
|
||||
// calculate the extend count
|
||||
const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size);
|
||||
const size_t bsize = mi_page_block_size(page);
|
||||
size_t extend = page->reserved - page->capacity;
|
||||
mi_assert_internal(extend > 0);
|
||||
|
||||
size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
|
||||
size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/bsize);
|
||||
if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; }
|
||||
mi_assert_internal(max_extend > 0);
|
||||
|
||||
@ -660,11 +659,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
||||
mi_assert_internal(block_size > 0);
|
||||
// set fields
|
||||
mi_page_set_heap(page, heap);
|
||||
page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start
|
||||
page->block_size = block_size;
|
||||
size_t page_size;
|
||||
const void* page_start = _mi_segment_page_start(segment, page, &page_size);
|
||||
MI_UNUSED(page_start);
|
||||
mi_track_mem_noaccess(page_start,page_size);
|
||||
page->page_start = _mi_segment_page_start(segment, page, &page_size);
|
||||
mi_track_mem_noaccess(page->page_start,page_size);
|
||||
mi_assert_internal(mi_page_block_size(page) <= page_size);
|
||||
mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE);
|
||||
mi_assert_internal(page_size / block_size < (1L<<16));
|
||||
@ -677,12 +675,18 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
||||
page->free_is_zero = page->is_zero_init;
|
||||
#if MI_DEBUG>2
|
||||
if (page->is_zero_init) {
|
||||
mi_track_mem_defined(page_start, page_size);
|
||||
mi_assert_expensive(mi_mem_is_zero(page_start, page_size));
|
||||
mi_track_mem_defined(page->page_start, page_size);
|
||||
mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size));
|
||||
}
|
||||
#endif
|
||||
|
||||
mi_assert_internal(page->is_committed);
|
||||
if (block_size > 0 && _mi_is_power_of_two(block_size)) {
|
||||
page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size));
|
||||
}
|
||||
else {
|
||||
page->block_size_shift = 0;
|
||||
}
|
||||
|
||||
mi_assert_internal(page->capacity == 0);
|
||||
mi_assert_internal(page->free == NULL);
|
||||
mi_assert_internal(page->used == 0);
|
||||
@ -695,6 +699,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
||||
mi_assert_internal(page->keys[0] != 0);
|
||||
mi_assert_internal(page->keys[1] != 0);
|
||||
#endif
|
||||
mi_assert_internal(page->block_size_shift == 0 || (block_size == ((size_t)1 << page->block_size_shift)));
|
||||
mi_assert_expensive(mi_page_is_valid_init(page));
|
||||
|
||||
// initialize an initial free list
|
||||
@ -718,7 +723,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
|
||||
while (page != NULL)
|
||||
{
|
||||
mi_page_t* next = page->next; // remember next
|
||||
#if MI_STAT
|
||||
#if MI_STAT
|
||||
count++;
|
||||
#endif
|
||||
|
||||
@ -820,11 +825,9 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Large and huge page allocation.
|
||||
// Huge pages are allocated directly without being in a queue.
|
||||
// Because huge pages contain just one block, and the segment contains
|
||||
// just that page, we always treat them as abandoned and any thread
|
||||
// that frees the block can free the whole page and segment directly.
|
||||
// Huge pages are also use if the requested alignment is very large (> MI_ALIGNMENT_MAX).
|
||||
// Huge pages contain just one block, and the segment contains just that page (as `MI_SEGMENT_HUGE`).
|
||||
// Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX)
|
||||
// so their size is not always `> MI_LARGE_OBJ_SIZE_MAX`.
|
||||
static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
|
||||
size_t block_size = _mi_os_good_alloc_size(size);
|
||||
mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
|
||||
@ -832,25 +835,26 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
|
||||
#else
|
||||
mi_page_queue_t* pq = mi_page_queue(heap, is_huge ? MI_HUGE_BLOCK_SIZE : block_size); // not block_size as that can be low if the page_alignment > 0
|
||||
mi_page_queue_t* pq = mi_page_queue(heap, is_huge ? MI_LARGE_OBJ_SIZE_MAX+1 : block_size);
|
||||
mi_assert_internal(!is_huge || mi_page_queue_is_huge(pq));
|
||||
#endif
|
||||
mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
|
||||
if (page != NULL) {
|
||||
mi_assert_internal(mi_page_immediate_available(page));
|
||||
|
||||
|
||||
if (is_huge) {
|
||||
mi_assert_internal(mi_page_is_huge(page));
|
||||
mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
|
||||
mi_assert_internal(_mi_page_segment(page)->used==1);
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
|
||||
mi_page_set_heap(page, NULL);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
|
||||
mi_assert_internal(!mi_page_is_huge(page));
|
||||
}
|
||||
|
||||
|
||||
const size_t bsize = mi_page_usable_block_size(page); // note: not `mi_page_block_size` to account for padding
|
||||
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
|
||||
mi_heap_stat_increase(heap, large, bsize);
|
||||
@ -869,9 +873,9 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t
|
||||
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
|
||||
static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
|
||||
// huge allocation?
|
||||
const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
|
||||
const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
|
||||
if mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
|
||||
if mi_unlikely(req_size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
|
||||
if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
|
||||
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
|
||||
return NULL;
|
||||
}
|
||||
@ -882,7 +886,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme
|
||||
else {
|
||||
// otherwise find a page with free blocks in our size segregated queues
|
||||
#if MI_PADDING
|
||||
mi_assert_internal(size >= MI_PADDING_SIZE);
|
||||
mi_assert_internal(size >= MI_PADDING_SIZE);
|
||||
#endif
|
||||
return mi_find_free_page(heap, size);
|
||||
}
|
||||
@ -898,7 +902,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
|
||||
|
||||
// initialize if necessary
|
||||
if mi_unlikely(!mi_heap_is_initialized(heap)) {
|
||||
heap = mi_heap_get_default(); // calls mi_thread_init
|
||||
heap = mi_heap_get_default(); // calls mi_thread_init
|
||||
if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; }
|
||||
}
|
||||
mi_assert_internal(mi_heap_is_initialized(heap));
|
||||
@ -926,7 +930,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
|
||||
mi_assert_internal(mi_page_block_size(page) >= size);
|
||||
|
||||
// and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
|
||||
if mi_unlikely(zero && page->xblock_size == 0) {
|
||||
if mi_unlikely(zero && page->block_size == 0) {
|
||||
// note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
|
||||
void* p = _mi_page_malloc(heap, page, size, false);
|
||||
mi_assert_internal(p != NULL);
|
||||
|
251
src/prim/emscripten/prim.c
Normal file
251
src/prim/emscripten/prim.c
Normal file
@ -0,0 +1,251 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen, Alon Zakai
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
// This file is included in `src/prim/prim.c`
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/atomic.h"
|
||||
#include "mimalloc/prim.h"
|
||||
|
||||
// Design
|
||||
// ======
|
||||
//
|
||||
// mimalloc is built on top of emmalloc. emmalloc is a minimal allocator on top
|
||||
// of sbrk. The reason for having three layers here is that we want mimalloc to
|
||||
// be able to allocate and release system memory properly, the same way it would
|
||||
// when using VirtualAlloc on Windows or mmap on POSIX, and sbrk is too limited.
|
||||
// Specifically, sbrk can only go up and down, and not "skip" over regions, and
|
||||
// so we end up either never freeing memory to the system, or we can get stuck
|
||||
// with holes.
|
||||
//
|
||||
// Atm wasm generally does *not* free memory back the system: once grown, we do
|
||||
// not shrink back down (https://github.com/WebAssembly/design/issues/1397).
|
||||
// However, that is expected to improve
|
||||
// (https://github.com/WebAssembly/memory-control/blob/main/proposals/memory-control/Overview.md)
|
||||
// and so we do not want to bake those limitations in here.
|
||||
//
|
||||
// Even without that issue, we want our system allocator to handle holes, that
|
||||
// is, it should merge freed regions and allow allocating new content there of
|
||||
// the full size, etc., so that we do not waste space. That means that the
|
||||
// system allocator really does need to handle the general problem of allocating
|
||||
// and freeing variable-sized chunks of memory in a random order, like malloc/
|
||||
// free do. And so it makes sense to layer mimalloc on top of such an
|
||||
// implementation.
|
||||
//
|
||||
// emmalloc makes sense for the lower level because it is small and simple while
|
||||
// still fully handling merging of holes etc. It is not the most efficient
|
||||
// allocator, but our assumption is that mimalloc needs to be fast while the
|
||||
// system allocator underneath it is called much less frequently.
|
||||
//
|
||||
|
||||
//---------------------------------------------
|
||||
// init
|
||||
//---------------------------------------------
|
||||
|
||||
void _mi_prim_mem_init( mi_os_mem_config_t* config) {
|
||||
config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB
|
||||
config->alloc_granularity = 16;
|
||||
config->has_overcommit = false;
|
||||
config->must_free_whole = true;
|
||||
config->has_virtual_reserve = false;
|
||||
}
|
||||
|
||||
extern void emmalloc_free(void*);
|
||||
|
||||
int _mi_prim_free(void* addr, size_t size) {
|
||||
MI_UNUSED(size);
|
||||
emmalloc_free(addr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------
|
||||
// Allocation
|
||||
//---------------------------------------------
|
||||
|
||||
extern void* emmalloc_memalign(size_t, size_t);
|
||||
|
||||
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
|
||||
int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
|
||||
MI_UNUSED(try_alignment); MI_UNUSED(allow_large); MI_UNUSED(commit);
|
||||
*is_large = false;
|
||||
// TODO: Track the highest address ever seen; first uses of it are zeroes.
|
||||
// That assumes no one else uses sbrk but us (they could go up,
|
||||
// scribble, and then down), but we could assert on that perhaps.
|
||||
*is_zero = false;
|
||||
// emmalloc has some limitations on alignment size.
|
||||
// TODO: Why does mimalloc ask for an align of 4MB? that ends up allocating
|
||||
// 8, which wastes quite a lot for us in wasm. If that is unavoidable,
|
||||
// we may want to improve emmalloc to support such alignment. See also
|
||||
// https://github.com/emscripten-core/emscripten/issues/20645
|
||||
#define MIN_EMMALLOC_ALIGN 8
|
||||
#define MAX_EMMALLOC_ALIGN (1024*1024)
|
||||
if (try_alignment < MIN_EMMALLOC_ALIGN) {
|
||||
try_alignment = MIN_EMMALLOC_ALIGN;
|
||||
} else if (try_alignment > MAX_EMMALLOC_ALIGN) {
|
||||
try_alignment = MAX_EMMALLOC_ALIGN;
|
||||
}
|
||||
void* p = emmalloc_memalign(try_alignment, size);
|
||||
*addr = p;
|
||||
if (p == 0) {
|
||||
return ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------
|
||||
// Commit/Reset
|
||||
//---------------------------------------------
|
||||
|
||||
int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
|
||||
MI_UNUSED(addr); MI_UNUSED(size);
|
||||
// See TODO above.
|
||||
*is_zero = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
|
||||
MI_UNUSED(addr); MI_UNUSED(size);
|
||||
*needs_recommit = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int _mi_prim_reset(void* addr, size_t size) {
|
||||
MI_UNUSED(addr); MI_UNUSED(size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int _mi_prim_protect(void* addr, size_t size, bool protect) {
|
||||
MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------
|
||||
// Huge pages and NUMA nodes
|
||||
//---------------------------------------------
|
||||
|
||||
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
|
||||
MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
|
||||
*is_zero = true;
|
||||
*addr = NULL;
|
||||
return ENOSYS;
|
||||
}
|
||||
|
||||
size_t _mi_prim_numa_node(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t _mi_prim_numa_node_count(void) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Clock
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#include <emscripten/html5.h>
|
||||
|
||||
mi_msecs_t _mi_prim_clock_now(void) {
|
||||
return emscripten_date_now();
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Process info
|
||||
//----------------------------------------------------------------
|
||||
|
||||
void _mi_prim_process_info(mi_process_info_t* pinfo)
|
||||
{
|
||||
// use defaults
|
||||
MI_UNUSED(pinfo);
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Output
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#include <emscripten/console.h>
|
||||
|
||||
void _mi_prim_out_stderr( const char* msg) {
|
||||
emscripten_console_error(msg);
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Environment
|
||||
//----------------------------------------------------------------
|
||||
|
||||
bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
|
||||
// For code size reasons, do not support environ customization for now.
|
||||
MI_UNUSED(name);
|
||||
MI_UNUSED(result);
|
||||
MI_UNUSED(result_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Random
|
||||
//----------------------------------------------------------------
|
||||
|
||||
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
|
||||
int err = getentropy(buf, buf_len);
|
||||
return !err;
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Thread init/done
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#ifdef __EMSCRIPTEN_SHARED_MEMORY__
|
||||
|
||||
// use pthread local storage keys to detect thread ending
|
||||
// (and used with MI_TLS_PTHREADS for the default heap)
|
||||
pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
|
||||
|
||||
static void mi_pthread_done(void* value) {
|
||||
if (value!=NULL) {
|
||||
_mi_thread_done((mi_heap_t*)value);
|
||||
}
|
||||
}
|
||||
|
||||
void _mi_prim_thread_init_auto_done(void) {
|
||||
mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
|
||||
pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
|
||||
}
|
||||
|
||||
void _mi_prim_thread_done_auto_done(void) {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
|
||||
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD
|
||||
pthread_setspecific(_mi_heap_default_key, heap);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void _mi_prim_thread_init_auto_done(void) {
|
||||
// nothing
|
||||
}
|
||||
|
||||
void _mi_prim_thread_done_auto_done(void) {
|
||||
// nothing
|
||||
}
|
||||
|
||||
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
|
||||
MI_UNUSED(heap);
|
||||
|
||||
}
|
||||
#endif
|
@ -225,7 +225,9 @@ static malloc_zone_t mi_malloc_zone = {
|
||||
// switch to version 9+ on OSX 10.6 to support memalign.
|
||||
.memalign = &zone_memalign,
|
||||
.free_definite_size = &zone_free_definite_size,
|
||||
#if defined(MAC_OS_X_VERSION_10_7) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
|
||||
.pressure_relief = &zone_pressure_relief,
|
||||
#endif
|
||||
#if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
|
||||
.claimed_address = &zone_claimed_address,
|
||||
#endif
|
||||
|
@ -18,6 +18,9 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define MI_USE_SBRK
|
||||
#include "wasi/prim.c" // memory-grow or sbrk (Wasm)
|
||||
|
||||
#elif defined(__EMSCRIPTEN__)
|
||||
#include "emscripten/prim.c" // emmalloc_*, + pthread support
|
||||
|
||||
#else
|
||||
#include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.)
|
||||
|
||||
|
@ -27,19 +27,26 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
#include <sys/mman.h> // mmap
|
||||
#include <unistd.h> // sysconf
|
||||
|
||||
#include <fcntl.h> // open, close, read, access
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <features.h>
|
||||
#include <fcntl.h>
|
||||
#if defined(MI_NO_THP)
|
||||
#include <sys/prctl.h>
|
||||
#endif
|
||||
#if defined(__GLIBC__)
|
||||
#include <linux/mman.h> // linux mmap flags
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
#elif defined(__APPLE__)
|
||||
#include <AvailabilityMacros.h>
|
||||
#include <TargetConditionals.h>
|
||||
#if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR
|
||||
#include <mach/vm_statistics.h>
|
||||
#if !defined(TARGET_OS_OSX) || TARGET_OS_OSX // see issue #879, used to be (!TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR)
|
||||
#include <mach/vm_statistics.h> // VM_MAKE_TAG, VM_FLAGS_SUPERPAGE_SIZE_2MB, etc.
|
||||
#endif
|
||||
#if !defined(MAC_OS_X_VERSION_10_7)
|
||||
#define MAC_OS_X_VERSION_10_7 1070
|
||||
#endif
|
||||
#elif defined(__FreeBSD__) || defined(__DragonFly__)
|
||||
#include <sys/param.h>
|
||||
@ -50,17 +57,19 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__)
|
||||
#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) && !defined(__OpenBSD__) && !defined(__sun)
|
||||
#define MI_HAS_SYSCALL_H
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------------
|
||||
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
|
||||
// and do allocation themselves; using syscalls prevents recursion when mimalloc is
|
||||
// still initializing (issue #713)
|
||||
//------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access)
|
||||
|
||||
static int mi_prim_open(const char* fpath, int open_flags) {
|
||||
@ -76,7 +85,8 @@ static int mi_prim_access(const char *fpath, int mode) {
|
||||
return syscall(SYS_access,fpath,mode);
|
||||
}
|
||||
|
||||
#elif !defined(__APPLE__) // avoid unused warnings
|
||||
#elif !defined(__sun) && \
|
||||
(!defined(__APPLE__) || (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_7)) // avoid unused warnings on macOS and Solaris
|
||||
|
||||
static int mi_prim_open(const char* fpath, int open_flags) {
|
||||
return open(fpath,open_flags);
|
||||
@ -125,7 +135,8 @@ static bool unix_detect_overcommit(void) {
|
||||
return os_overcommit;
|
||||
}
|
||||
|
||||
void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
|
||||
void _mi_prim_mem_init( mi_os_mem_config_t* config )
|
||||
{
|
||||
long psize = sysconf(_SC_PAGESIZE);
|
||||
if (psize > 0) {
|
||||
config->page_size = (size_t)psize;
|
||||
@ -135,6 +146,24 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
|
||||
config->has_overcommit = unix_detect_overcommit();
|
||||
config->must_free_whole = false; // mmap can free in parts
|
||||
config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE)
|
||||
|
||||
// disable transparent huge pages for this process?
|
||||
#if (defined(__linux__) || defined(__ANDROID__)) && defined(PR_GET_THP_DISABLE)
|
||||
#if defined(MI_NO_THP)
|
||||
if (true)
|
||||
#else
|
||||
if (!mi_option_is_enabled(mi_option_allow_large_os_pages)) // disable THP also if large OS pages are not allowed in the options
|
||||
#endif
|
||||
{
|
||||
int val = 0;
|
||||
if (prctl(PR_GET_THP_DISABLE, &val, 0, 0, 0) != 0) {
|
||||
// Most likely since distros often come with always/madvise settings.
|
||||
val = 1;
|
||||
// Disabling only for mimalloc process rather than touching system wide settings
|
||||
(void)prctl(PR_SET_THP_DISABLE, &val, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -276,7 +305,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
|
||||
*is_large = true;
|
||||
p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd);
|
||||
#ifdef MAP_HUGE_1GB
|
||||
if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
|
||||
if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) {
|
||||
mi_huge_pages_available = false; // don't try huge 1GiB pages again
|
||||
_mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
|
||||
lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
|
||||
@ -310,7 +339,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
|
||||
#elif defined(__sun)
|
||||
if (allow_large && _mi_os_use_large_page(size, try_alignment)) {
|
||||
struct memcntl_mha cmd = {0};
|
||||
cmd.mha_pagesize = large_os_page_size;
|
||||
cmd.mha_pagesize = _mi_os_large_page_size();
|
||||
cmd.mha_cmd = MHA_MAPSIZE_VA;
|
||||
if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
|
||||
*is_large = true;
|
||||
@ -472,8 +501,6 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bo
|
||||
|
||||
#if defined(__linux__)
|
||||
|
||||
#include <stdio.h> // snprintf
|
||||
|
||||
size_t _mi_prim_numa_node(void) {
|
||||
#if defined(MI_HAS_SYSCALL_H) && defined(SYS_getcpu)
|
||||
unsigned long node = 0;
|
||||
@ -491,7 +518,7 @@ size_t _mi_prim_numa_node_count(void) {
|
||||
unsigned node = 0;
|
||||
for(node = 0; node < 256; node++) {
|
||||
// enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
|
||||
snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
|
||||
_mi_snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
|
||||
if (mi_prim_access(buf,R_OK) != 0) break;
|
||||
}
|
||||
return (node+1);
|
||||
@ -729,28 +756,20 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
|
||||
// Random
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#if defined(__APPLE__)
|
||||
|
||||
#include <AvailabilityMacros.h>
|
||||
#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10
|
||||
#if defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_15) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_15)
|
||||
#include <CommonCrypto/CommonCryptoError.h>
|
||||
#include <CommonCrypto/CommonRandom.h>
|
||||
#endif
|
||||
|
||||
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
|
||||
#if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
|
||||
// We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
|
||||
// may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
|
||||
return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
|
||||
#else
|
||||
// fall back on older macOS
|
||||
arc4random_buf(buf, buf_len);
|
||||
return true;
|
||||
#endif
|
||||
// We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
|
||||
// may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
|
||||
return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
|
||||
}
|
||||
|
||||
#elif defined(__ANDROID__) || defined(__DragonFly__) || \
|
||||
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
|
||||
defined(__sun)
|
||||
defined(__sun) || \
|
||||
(defined(__APPLE__) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7))
|
||||
|
||||
#include <stdlib.h>
|
||||
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
|
||||
@ -758,11 +777,10 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#elif defined(__linux__) || defined(__HAIKU__)
|
||||
#elif defined(__APPLE__) || defined(__linux__) || defined(__HAIKU__) // also for old apple versions < 10.7 (issue #829)
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
|
||||
@ -833,7 +851,9 @@ void _mi_prim_thread_init_auto_done(void) {
|
||||
}
|
||||
|
||||
void _mi_prim_thread_done_auto_done(void) {
|
||||
// nothing to do
|
||||
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // do not leak the key, see issue #809
|
||||
pthread_key_delete(_mi_heap_default_key);
|
||||
}
|
||||
}
|
||||
|
||||
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
|
||||
|
@ -12,6 +12,9 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include "mimalloc/atomic.h"
|
||||
#include "mimalloc/prim.h"
|
||||
|
||||
#include <stdio.h> // fputs
|
||||
#include <stdlib.h> // getenv
|
||||
|
||||
//---------------------------------------------
|
||||
// Initialize
|
||||
//---------------------------------------------
|
||||
@ -40,6 +43,8 @@ int _mi_prim_free(void* addr, size_t size ) {
|
||||
//---------------------------------------------
|
||||
|
||||
#if defined(MI_USE_SBRK)
|
||||
#include <unistd.h> // for sbrk
|
||||
|
||||
static void* mi_memory_grow( size_t size ) {
|
||||
void* p = sbrk(size);
|
||||
if (p == (void*)(-1)) return NULL;
|
||||
|
@ -29,6 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments
|
||||
|
||||
static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
|
||||
// note: segment can be invalid or NULL.
|
||||
mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
|
||||
if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
|
||||
*bitidx = 0;
|
||||
@ -70,8 +71,7 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) {
|
||||
// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
|
||||
static mi_segment_t* _mi_segment_of(const void* p) {
|
||||
if (p == NULL) return NULL;
|
||||
mi_segment_t* segment = _mi_ptr_segment(p);
|
||||
mi_assert_internal(segment != NULL);
|
||||
mi_segment_t* segment = _mi_ptr_segment(p); // segment can be NULL
|
||||
size_t bitidx;
|
||||
size_t index = mi_segment_map_index_of(segment, &bitidx);
|
||||
// fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
|
||||
|
517
src/segment.c
517
src/segment.c
File diff suppressed because it is too large
Load Diff
@ -27,6 +27,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include "bitmap.c"
|
||||
#include "heap.c"
|
||||
#include "init.c"
|
||||
#include "libc.c"
|
||||
#include "options.c"
|
||||
#include "os.c"
|
||||
#include "page.c" // includes page-queue.c
|
||||
|
60
src/stats.c
60
src/stats.c
@ -9,7 +9,6 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include "mimalloc/atomic.h"
|
||||
#include "mimalloc/prim.h"
|
||||
|
||||
#include <stdio.h> // snprintf
|
||||
#include <string.h> // memset
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1920)
|
||||
@ -146,7 +145,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void*
|
||||
const int64_t pos = (n < 0 ? -n : n);
|
||||
if (pos < base) {
|
||||
if (n!=1 || suffix[0] != 'B') { // skip printing 1 B for the unit column
|
||||
snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix));
|
||||
_mi_snprintf(buf, len, "%lld %-3s", (long long)n, (n==0 ? "" : suffix));
|
||||
}
|
||||
}
|
||||
else {
|
||||
@ -158,8 +157,8 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void*
|
||||
const long whole = (long)(tens/10);
|
||||
const long frac1 = (long)(tens%10);
|
||||
char unitdesc[8];
|
||||
snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix);
|
||||
snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc);
|
||||
_mi_snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix);
|
||||
_mi_snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc);
|
||||
}
|
||||
_mi_fprintf(out, arg, (fmt==NULL ? "%12s" : fmt), buf);
|
||||
}
|
||||
@ -176,13 +175,28 @@ static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* ar
|
||||
|
||||
static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) {
|
||||
_mi_fprintf(out, arg,"%10s:", msg);
|
||||
if (unit > 0) {
|
||||
mi_print_amount(stat->peak, unit, out, arg);
|
||||
mi_print_amount(stat->allocated, unit, out, arg);
|
||||
mi_print_amount(stat->freed, unit, out, arg);
|
||||
mi_print_amount(stat->current, unit, out, arg);
|
||||
mi_print_amount(unit, 1, out, arg);
|
||||
mi_print_count(stat->allocated, unit, out, arg);
|
||||
if (unit != 0) {
|
||||
if (unit > 0) {
|
||||
mi_print_amount(stat->peak, unit, out, arg);
|
||||
mi_print_amount(stat->allocated, unit, out, arg);
|
||||
mi_print_amount(stat->freed, unit, out, arg);
|
||||
mi_print_amount(stat->current, unit, out, arg);
|
||||
mi_print_amount(unit, 1, out, arg);
|
||||
mi_print_count(stat->allocated, unit, out, arg);
|
||||
}
|
||||
else {
|
||||
mi_print_amount(stat->peak, -1, out, arg);
|
||||
mi_print_amount(stat->allocated, -1, out, arg);
|
||||
mi_print_amount(stat->freed, -1, out, arg);
|
||||
mi_print_amount(stat->current, -1, out, arg);
|
||||
if (unit == -1) {
|
||||
_mi_fprintf(out, arg, "%24s", "");
|
||||
}
|
||||
else {
|
||||
mi_print_amount(-unit, 1, out, arg);
|
||||
mi_print_count((stat->allocated / -unit), 0, out, arg);
|
||||
}
|
||||
}
|
||||
if (stat->allocated > stat->freed) {
|
||||
_mi_fprintf(out, arg, " ");
|
||||
_mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok));
|
||||
@ -192,23 +206,6 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64
|
||||
_mi_fprintf(out, arg, " ok\n");
|
||||
}
|
||||
}
|
||||
else if (unit<0) {
|
||||
mi_print_amount(stat->peak, -1, out, arg);
|
||||
mi_print_amount(stat->allocated, -1, out, arg);
|
||||
mi_print_amount(stat->freed, -1, out, arg);
|
||||
mi_print_amount(stat->current, -1, out, arg);
|
||||
if (unit==-1) {
|
||||
_mi_fprintf(out, arg, "%24s", "");
|
||||
}
|
||||
else {
|
||||
mi_print_amount(-unit, 1, out, arg);
|
||||
mi_print_count((stat->allocated / -unit), 0, out, arg);
|
||||
}
|
||||
if (stat->allocated > stat->freed)
|
||||
_mi_fprintf(out, arg, " not all freed!\n");
|
||||
else
|
||||
_mi_fprintf(out, arg, " ok\n");
|
||||
}
|
||||
else {
|
||||
mi_print_amount(stat->peak, 1, out, arg);
|
||||
mi_print_amount(stat->allocated, 1, out, arg);
|
||||
@ -255,7 +252,7 @@ static void mi_stats_print_bins(const mi_stat_count_t* bins, size_t max, const c
|
||||
if (bins[i].allocated > 0) {
|
||||
found = true;
|
||||
int64_t unit = _mi_bin_size((uint8_t)i);
|
||||
snprintf(buf, 64, "%s %3lu", fmt, (long)i);
|
||||
_mi_snprintf(buf, 64, "%s %3lu", fmt, (long)i);
|
||||
mi_stat_print(&bins[i], buf, unit, out, arg);
|
||||
}
|
||||
}
|
||||
@ -341,6 +338,9 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
|
||||
mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg);
|
||||
mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg);
|
||||
mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg);
|
||||
mi_stat_counter_print(&stats->arena_count, "arenas", out, arg);
|
||||
mi_stat_counter_print(&stats->arena_crossover_count, "-crossover", out, arg);
|
||||
mi_stat_counter_print(&stats->arena_rollback_count, "-rollback", out, arg);
|
||||
mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
|
||||
mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
|
||||
mi_stat_counter_print(&stats->reset_calls, "resets", out, arg);
|
||||
@ -455,7 +455,7 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
|
||||
pinfo.page_faults = 0;
|
||||
|
||||
_mi_prim_process_info(&pinfo);
|
||||
|
||||
|
||||
if (elapsed_msecs!=NULL) *elapsed_msecs = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX));
|
||||
if (user_msecs!=NULL) *user_msecs = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX));
|
||||
if (system_msecs!=NULL) *system_msecs = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX));
|
||||
|
@ -108,7 +108,7 @@ static void various_tests() {
|
||||
auto tbuf = new unsigned char[sizeof(Test)];
|
||||
t = new (tbuf) Test(42);
|
||||
t->~Test();
|
||||
delete tbuf;
|
||||
delete[] tbuf;
|
||||
}
|
||||
|
||||
class Static {
|
||||
@ -379,7 +379,7 @@ static void bench_alloc_large(void) {
|
||||
static constexpr size_t kMaxBufferSize = 25 * 1024 * 1024;
|
||||
std::unique_ptr<char[]> buffers[kNumBuffers];
|
||||
|
||||
std::random_device rd;
|
||||
std::random_device rd; (void)rd;
|
||||
std::mt19937 gen(42); //rd());
|
||||
std::uniform_int_distribution<> size_distribution(kMinBufferSize, kMaxBufferSize);
|
||||
std::uniform_int_distribution<> buf_number_distribution(0, kNumBuffers - 1);
|
||||
|
@ -34,7 +34,7 @@ we therefore test the API over various inputs. Please add more tests :-)
|
||||
|
||||
#include "mimalloc.h"
|
||||
// #include "mimalloc/internal.h"
|
||||
#include "mimalloc/types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX
|
||||
#include "mimalloc/types.h" // for MI_DEBUG and MI_BLOCK_ALIGNMENT_MAX
|
||||
|
||||
#include "testhelper.h"
|
||||
|
||||
@ -46,6 +46,11 @@ bool test_heap2(void);
|
||||
bool test_stl_allocator1(void);
|
||||
bool test_stl_allocator2(void);
|
||||
|
||||
bool test_stl_heap_allocator1(void);
|
||||
bool test_stl_heap_allocator2(void);
|
||||
bool test_stl_heap_allocator3(void);
|
||||
bool test_stl_heap_allocator4(void);
|
||||
|
||||
bool mem_is_zero(uint8_t* p, size_t size) {
|
||||
if (p==NULL) return false;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
@ -59,7 +64,7 @@ bool mem_is_zero(uint8_t* p, size_t size) {
|
||||
// ---------------------------------------------------------------------------
|
||||
int main(void) {
|
||||
mi_option_disable(mi_option_verbose);
|
||||
|
||||
|
||||
// ---------------------------------------------------
|
||||
// Malloc
|
||||
// ---------------------------------------------------
|
||||
@ -154,7 +159,7 @@ int main(void) {
|
||||
};
|
||||
CHECK_BODY("malloc-aligned6") {
|
||||
bool ok = true;
|
||||
for (size_t align = 1; align <= MI_ALIGNMENT_MAX && ok; align *= 2) {
|
||||
for (size_t align = 1; align <= MI_BLOCK_ALIGNMENT_MAX && ok; align *= 2) {
|
||||
void* ps[8];
|
||||
for (int i = 0; i < 8 && ok; i++) {
|
||||
ps[i] = mi_malloc_aligned(align*13 // size
|
||||
@ -170,16 +175,16 @@ int main(void) {
|
||||
result = ok;
|
||||
};
|
||||
CHECK_BODY("malloc-aligned7") {
|
||||
void* p = mi_malloc_aligned(1024,MI_ALIGNMENT_MAX);
|
||||
void* p = mi_malloc_aligned(1024,MI_BLOCK_ALIGNMENT_MAX);
|
||||
mi_free(p);
|
||||
result = ((uintptr_t)p % MI_ALIGNMENT_MAX) == 0;
|
||||
result = ((uintptr_t)p % MI_BLOCK_ALIGNMENT_MAX) == 0;
|
||||
};
|
||||
CHECK_BODY("malloc-aligned8") {
|
||||
bool ok = true;
|
||||
for (int i = 0; i < 5 && ok; i++) {
|
||||
int n = (1 << i);
|
||||
void* p = mi_malloc_aligned(1024, n * MI_ALIGNMENT_MAX);
|
||||
ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0;
|
||||
void* p = mi_malloc_aligned(1024, n * MI_BLOCK_ALIGNMENT_MAX);
|
||||
ok = ((uintptr_t)p % (n*MI_BLOCK_ALIGNMENT_MAX)) == 0;
|
||||
mi_free(p);
|
||||
}
|
||||
result = ok;
|
||||
@ -187,7 +192,7 @@ int main(void) {
|
||||
CHECK_BODY("malloc-aligned9") {
|
||||
bool ok = true;
|
||||
void* p[8];
|
||||
size_t sizes[8] = { 8, 512, 1024 * 1024, MI_ALIGNMENT_MAX, MI_ALIGNMENT_MAX + 1, 2 * MI_ALIGNMENT_MAX, 8 * MI_ALIGNMENT_MAX, 0 };
|
||||
size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 };
|
||||
for (int i = 0; i < 28 && ok; i++) {
|
||||
int align = (1 << i);
|
||||
for (int j = 0; j < 8 && ok; j++) {
|
||||
@ -295,15 +300,22 @@ int main(void) {
|
||||
// ---------------------------------------------------
|
||||
// various
|
||||
// ---------------------------------------------------
|
||||
#if !defined(MI_TRACK_ASAN) // realpath may leak with ASAN enabled (as the ASAN allocator intercepts it)
|
||||
CHECK_BODY("realpath") {
|
||||
char* s = mi_realpath( ".", NULL );
|
||||
// printf("realpath: %s\n",s);
|
||||
mi_free(s);
|
||||
};
|
||||
#endif
|
||||
|
||||
CHECK("stl_allocator1", test_stl_allocator1());
|
||||
CHECK("stl_allocator2", test_stl_allocator2());
|
||||
|
||||
CHECK("stl_heap_allocator1", test_stl_heap_allocator1());
|
||||
CHECK("stl_heap_allocator2", test_stl_heap_allocator2());
|
||||
CHECK("stl_heap_allocator3", test_stl_heap_allocator3());
|
||||
CHECK("stl_heap_allocator4", test_stl_heap_allocator4());
|
||||
|
||||
// ---------------------------------------------------
|
||||
// Done
|
||||
// ---------------------------------------------------[]
|
||||
@ -357,3 +369,61 @@ bool test_stl_allocator2(void) {
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool test_stl_heap_allocator1(void) {
|
||||
#ifdef __cplusplus
|
||||
std::vector<some_struct, mi_heap_stl_allocator<some_struct> > vec;
|
||||
vec.push_back(some_struct());
|
||||
vec.pop_back();
|
||||
return vec.size() == 0;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool test_stl_heap_allocator2(void) {
|
||||
#ifdef __cplusplus
|
||||
std::vector<some_struct, mi_heap_destroy_stl_allocator<some_struct> > vec;
|
||||
vec.push_back(some_struct());
|
||||
vec.pop_back();
|
||||
return vec.size() == 0;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool test_stl_heap_allocator3(void) {
|
||||
#ifdef __cplusplus
|
||||
mi_heap_t* heap = mi_heap_new();
|
||||
bool good = false;
|
||||
{
|
||||
mi_heap_stl_allocator<some_struct> myAlloc(heap);
|
||||
std::vector<some_struct, mi_heap_stl_allocator<some_struct> > vec(myAlloc);
|
||||
vec.push_back(some_struct());
|
||||
vec.pop_back();
|
||||
good = vec.size() == 0;
|
||||
}
|
||||
mi_heap_delete(heap);
|
||||
return good;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool test_stl_heap_allocator4(void) {
|
||||
#ifdef __cplusplus
|
||||
mi_heap_t* heap = mi_heap_new();
|
||||
bool good = false;
|
||||
{
|
||||
mi_heap_destroy_stl_allocator<some_struct> myAlloc(heap);
|
||||
std::vector<some_struct, mi_heap_destroy_stl_allocator<some_struct> > vec(myAlloc);
|
||||
vec.push_back(some_struct());
|
||||
vec.pop_back();
|
||||
good = vec.size() == 0;
|
||||
}
|
||||
mi_heap_destroy(heap);
|
||||
return good;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
@ -37,11 +37,12 @@ static int ITER = 50; // N full iterations destructing and re-creating a
|
||||
// static int THREADS = 8; // more repeatable if THREADS <= #processors
|
||||
// static int SCALE = 100; // scaling factor
|
||||
|
||||
#define STRESS // undefine for leak test
|
||||
#define STRESS // undefine for leak test
|
||||
|
||||
static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100)
|
||||
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
|
||||
|
||||
static bool main_participates = false; // main thread participates as a worker too
|
||||
|
||||
// #define USE_STD_MALLOC
|
||||
#ifdef USE_STD_MALLOC
|
||||
@ -276,8 +277,8 @@ int main(int argc, char** argv) {
|
||||
|
||||
#ifndef USE_STD_MALLOC
|
||||
#ifndef NDEBUG
|
||||
mi_collect(true);
|
||||
//mi_debug_show_arenas();
|
||||
// mi_collect(true);
|
||||
mi_debug_show_arenas(true,true,true);
|
||||
#endif
|
||||
mi_stats_print(NULL);
|
||||
#endif
|
||||
@ -301,13 +302,15 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
|
||||
thread_entry_fun = fun;
|
||||
DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
|
||||
HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
|
||||
for (uintptr_t i = 0; i < nthreads; i++) {
|
||||
const size_t start = (main_participates ? 1 : 0);
|
||||
for (size_t i = start; i < nthreads; i++) {
|
||||
thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]);
|
||||
}
|
||||
for (size_t i = 0; i < nthreads; i++) {
|
||||
if (main_participates) fun(0); // run the main thread as well
|
||||
for (size_t i = start; i < nthreads; i++) {
|
||||
WaitForSingleObject(thandles[i], INFINITE);
|
||||
}
|
||||
for (size_t i = 0; i < nthreads; i++) {
|
||||
for (size_t i = start; i < nthreads; i++) {
|
||||
CloseHandle(thandles[i]);
|
||||
}
|
||||
custom_free(tids);
|
||||
@ -334,11 +337,13 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
|
||||
thread_entry_fun = fun;
|
||||
pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
|
||||
memset(threads, 0, sizeof(pthread_t) * nthreads);
|
||||
const size_t start = (main_participates ? 1 : 0);
|
||||
//pthread_setconcurrency(nthreads);
|
||||
for (size_t i = 0; i < nthreads; i++) {
|
||||
for (size_t i = start; i < nthreads; i++) {
|
||||
pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
|
||||
}
|
||||
for (size_t i = 0; i < nthreads; i++) {
|
||||
if (main_participates) fun(0); // run the main thread as well
|
||||
for (size_t i = start; i < nthreads; i++) {
|
||||
pthread_join(threads[i], NULL);
|
||||
}
|
||||
custom_free(threads);
|
||||
|
Loading…
x
Reference in New Issue
Block a user