mirror of
https://github.com/microsoft/mimalloc.git
synced 2024-12-27 13:33:18 +08:00
merge from dev
This commit is contained in:
commit
1b0de9b4cf
@ -14,6 +14,9 @@ option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macO
|
||||
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
|
||||
option(MI_BUILD_TESTS "Build test executables" ON)
|
||||
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
|
||||
option(MI_PADDING "Enable padding to detect heap block overflow (only in debug mode)" ON)
|
||||
option(MI_XMALLOC "Enable abort() call on memory allocation failure by default" OFF)
|
||||
option(MI_SHOW_ERRORS "Show error and warning messages by default" OFF)
|
||||
|
||||
include("cmake/mimalloc-config-version.cmake")
|
||||
|
||||
@ -65,6 +68,7 @@ if(MI_OVERRIDE MATCHES "ON")
|
||||
# use zone's on macOS
|
||||
message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
|
||||
list(APPEND mi_sources src/alloc-override-osx.c)
|
||||
list(APPEND mi_defines MI_OSX_ZONE=1)
|
||||
if(NOT MI_INTERPOSE MATCHES "ON")
|
||||
message(STATUS " (enabling INTERPOSE as well since zone's require this)")
|
||||
set(MI_INTERPOSE "ON")
|
||||
@ -98,6 +102,21 @@ if(MI_DEBUG_FULL MATCHES "ON")
|
||||
list(APPEND mi_defines MI_DEBUG=3) # full invariant checking
|
||||
endif()
|
||||
|
||||
if(MI_PADDING MATCHES "OFF")
|
||||
message(STATUS "Disable padding of heap blocks in debug mode (MI_PADDING=OFF)")
|
||||
list(APPEND mi_defines MI_PADDING=0)
|
||||
endif()
|
||||
|
||||
if(MI_XMALLOC MATCHES "ON")
|
||||
message(STATUS "Enable abort() calls on memory allocation failure (MI_XMALLOC=ON)")
|
||||
list(APPEND mi_defines MI_XMALLOC=1)
|
||||
endif()
|
||||
|
||||
if(MI_SHOW_ERRORS MATCHES "ON")
|
||||
message(STATUS "Enable printing of error and warning messages by default (MI_SHOW_ERRORS=ON)")
|
||||
list(APPEND mi_defines MI_SHOW_ERRORS=1)
|
||||
endif()
|
||||
|
||||
if(MI_USE_CXX MATCHES "ON")
|
||||
message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)")
|
||||
set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX )
|
||||
@ -139,10 +158,12 @@ endif()
|
||||
if(WIN32)
|
||||
list(APPEND mi_libraries psapi shell32 user32 bcrypt)
|
||||
else()
|
||||
list(APPEND mi_libraries pthread)
|
||||
find_library(LIBRT rt)
|
||||
if(LIBRT)
|
||||
list(APPEND mi_libraries ${LIBRT})
|
||||
if(NOT ${CMAKE_C_COMPILER} MATCHES "android")
|
||||
list(APPEND mi_libraries pthread)
|
||||
find_library(LIBRT rt)
|
||||
if(LIBRT)
|
||||
list(APPEND mi_libraries ${LIBRT})
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -18,12 +18,15 @@ jobs:
|
||||
Debug:
|
||||
BuildType: debug
|
||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
|
||||
MSBuildConfiguration: Debug
|
||||
Release:
|
||||
BuildType: release
|
||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
|
||||
MSBuildConfiguration: Release
|
||||
Secure:
|
||||
BuildType: secure
|
||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
|
||||
MSBuildConfiguration: Release
|
||||
steps:
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
@ -32,12 +35,13 @@ jobs:
|
||||
- task: MSBuild@1
|
||||
inputs:
|
||||
solution: $(BuildType)/libmimalloc.sln
|
||||
configuration: '$(MSBuildConfiguration)'
|
||||
- script: |
|
||||
cd $(BuildType)
|
||||
ctest
|
||||
displayName: CTest
|
||||
- upload: $(Build.SourcesDirectory)/$(BuildType)
|
||||
artifact: mimalloc-windows-$(BuildType)
|
||||
# - upload: $(Build.SourcesDirectory)/$(BuildType)
|
||||
# artifact: mimalloc-windows-$(BuildType)
|
||||
|
||||
- job:
|
||||
displayName: Linux
|
||||
@ -95,8 +99,8 @@ jobs:
|
||||
displayName: Make
|
||||
- script: make test -C $(BuildType)
|
||||
displayName: CTest
|
||||
- upload: $(Build.SourcesDirectory)/$(BuildType)
|
||||
artifact: mimalloc-ubuntu-$(BuildType)
|
||||
# - upload: $(Build.SourcesDirectory)/$(BuildType)
|
||||
# artifact: mimalloc-ubuntu-$(BuildType)
|
||||
|
||||
- job:
|
||||
displayName: macOS
|
||||
@ -123,5 +127,5 @@ jobs:
|
||||
displayName: Make
|
||||
- script: make test -C $(BuildType)
|
||||
displayName: CTest
|
||||
- upload: $(Build.SourcesDirectory)/$(BuildType)
|
||||
artifact: mimalloc-macos-$(BuildType)
|
||||
# - upload: $(Build.SourcesDirectory)/$(BuildType)
|
||||
# artifact: mimalloc-macos-$(BuildType)
|
||||
|
@ -38,7 +38,7 @@ PROJECT_NAME = mi-malloc
|
||||
# could be handy for archiving the generated documentation or if some version
|
||||
# control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.4
|
||||
PROJECT_NUMBER = 1.6
|
||||
|
||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||
# for a project that appears at the top of each page and should give viewer a
|
||||
|
@ -1009,28 +1009,31 @@ or via environment variables.
|
||||
- `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates.
|
||||
- `MIMALLOC_VERBOSE=1`: show verbose messages.
|
||||
- `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages.
|
||||
- `MIMALLOC_PAGE_RESET=1`: reset (or purge) OS pages when not in use. This can reduce
|
||||
memory fragmentation in long running (server) programs. If performance is impacted,
|
||||
`MIMALLOC_RESET_DELAY=`_msecs_ can be set higher (100ms by default) to make the page
|
||||
reset occur less frequently.
|
||||
- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly
|
||||
- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages when not in use to signal to the OS
|
||||
that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server)
|
||||
programs. By setting it to `0` no such page resets will be done which can improve performance for programs that are not long
|
||||
running. As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page
|
||||
reset occur less frequently instead of turning it off completely.
|
||||
- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
|
||||
improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
|
||||
to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
|
||||
the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
|
||||
can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible).
|
||||
- `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions
|
||||
show in the working set even though usually just a small part is committed to physical memory. This is why it
|
||||
turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better
|
||||
to turn it on as it improves performance and has no other drawbacks.
|
||||
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at
|
||||
startup and can give quite a performance improvement on long running workloads. Usually it is better to not use
|
||||
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
|
||||
startup and sometimes this can give a large (latency) performance improvement on big workloads.
|
||||
Usually it is better to not use
|
||||
`MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
|
||||
contiguous physical memory can take a long time when memory is fragmented.
|
||||
contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
|
||||
startup only once).
|
||||
Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting
|
||||
`MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments
|
||||
`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
|
||||
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
|
||||
and allocate just a little to take up space in the huge OS page area (which cannot be reset).
|
||||
|
||||
Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
|
||||
for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
|
||||
OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in big increments.
|
||||
|
||||
[linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5
|
||||
[windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017
|
||||
|
||||
@ -1074,14 +1077,18 @@ resolved to the _mimalloc_ library.
|
||||
Note that certain security restrictions may apply when doing this from
|
||||
the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
|
||||
|
||||
Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this
|
||||
(see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)).
|
||||
(Note: macOS support for dynamic overriding is recent, please report any issues.)
|
||||
|
||||
|
||||
### Windows
|
||||
|
||||
Overriding on Windows is robust but requires that you link your program explicitly with
|
||||
Overriding on Windows is robust and has the
|
||||
particular advantage to be able to redirect all malloc/free calls that go through
|
||||
the (dynamic) C runtime allocator, including those from other DLL's or libraries.
|
||||
|
||||
The overriding on Windows requires that you link your program explicitly with
|
||||
the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
|
||||
Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available
|
||||
Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be available
|
||||
in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
|
||||
The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
|
||||
mimalloc (in `mimalloc-override.dll`).
|
||||
@ -1090,14 +1097,15 @@ To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some
|
||||
call to the mimalloc API in the `main` function, like `mi_version()`
|
||||
(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project
|
||||
for an example on how to use this. For best performance on Windows with C++, it
|
||||
is highly recommended to also override the `new`/`delete` operations (by including
|
||||
is also recommended to also override the `new`/`delete` operations (by including
|
||||
[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project).
|
||||
|
||||
The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
|
||||
overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected.
|
||||
|
||||
(Note: in principle, it is possible to patch existing executables
|
||||
that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the `mimalloc-override.dll` into the import table (and putting `mimalloc-redirect.dll` in the same folder)
|
||||
(Note: in principle, it is possible to even patch existing executables without any recompilation
|
||||
if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll`
|
||||
into the import table (and put `mimalloc-redirect.dll` in the same folder)
|
||||
Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
|
||||
|
||||
|
||||
|
@ -36,15 +36,15 @@
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="23.706667"
|
||||
inkscape:cx="34.419045"
|
||||
inkscape:cx="24.864771"
|
||||
inkscape:cy="35.79485"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
inkscape:window-width="3840"
|
||||
inkscape:window-height="2160"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-height="2050"
|
||||
inkscape:window-x="-12"
|
||||
inkscape:window-y="-12"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:snap-object-midpoints="false"
|
||||
inkscape:snap-bbox="false"
|
||||
@ -126,17 +126,15 @@
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.28797817px;font-family:RoutedGothicEx;-inkscape-font-specification:'RoutedGothicEx, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.28599727"
|
||||
id="path6525" />
|
||||
</g>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
<g
|
||||
aria-label="m"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.3694315px;line-height:1.25;font-family:RoutedGothicEx;-inkscape-font-specification:'RoutedGothicEx, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.15923578"
|
||||
x="1.6752419"
|
||||
y="293.17081"
|
||||
id="text848"><tspan
|
||||
y="293.17081"
|
||||
x="1.6752419"
|
||||
sodipodi:role="line"
|
||||
id="tspan872"
|
||||
style="stroke-width:0.15923578">m</tspan></text>
|
||||
id="text848">
|
||||
<path
|
||||
d="m 2.3718985,293.17081 c 0.080862,0 0.1492836,-0.0249 0.211485,-0.0871 0.055981,-0.056 0.087082,-0.1244 0.087082,-0.21148 v -1.95313 c 0.037321,-0.0622 0.087082,-0.13062 0.1430634,-0.19282 0.049761,-0.0622 0.1368433,-0.1244 0.2488059,-0.19283 0.1119627,-0.0622 0.2301455,-0.0995 0.3545485,-0.0995 0.1119626,0 0.2177051,0.0498 0.3110074,0.14929 0.087082,0.0995 0.1368432,0.23014 0.1368432,0.39808 v 1.89093 c 0,0.0871 0.024881,0.1555 0.087082,0.21148 0.055981,0.0622 0.124403,0.0871 0.211485,0.0871 0.080862,0 0.1492836,-0.0249 0.2114851,-0.0871 0.055981,-0.056 0.087082,-0.1244 0.087082,-0.21148 v -1.89093 -0.0622 c 0.018661,-0.0311 0.043541,-0.0622 0.068422,-0.0995 0.024881,-0.0373 0.062201,-0.0746 0.1119626,-0.1244 0.049761,-0.0498 0.099522,-0.0933 0.1492836,-0.13063 0.049761,-0.0373 0.1181828,-0.0684 0.1928246,-0.0933 0.074642,-0.0249 0.1492835,-0.0373 0.2239253,-0.0373 0.1119626,0 0.2177052,0.0498 0.3110074,0.14929 0.087082,0.0995 0.1368432,0.23014 0.1368432,0.39808 v 1.89093 c 0,0.0871 0.024881,0.1555 0.087082,0.21148 0.055981,0.0622 0.124403,0.0871 0.211485,0.0871 0.080862,0 0.1492836,-0.0249 0.2114851,-0.0871 0.055981,-0.056 0.087082,-0.1244 0.087082,-0.21148 v -1.89093 c 0,-0.31722 -0.1057425,-0.58469 -0.3047872,-0.80861 -0.2052649,-0.22393 -0.4540708,-0.33589 -0.7401976,-0.33589 -0.2052649,0 -0.3918693,0.0435 -0.5535932,0.11818 -0.1617238,0.0746 -0.3047872,0.17416 -0.4291902,0.29857 -0.211485,-0.27369 -0.4789514,-0.41675 -0.8086192,-0.41675 -0.2861268,0 -0.5411529,0.0809 -0.7650782,0.23636 -0.024881,-0.0498 -0.062202,-0.0933 -0.1119627,-0.13062 -0.049761,-0.0373 -0.1057425,-0.056 -0.167944,-0.056 -0.087082,0 -0.1555037,0.0311 -0.211485,0.0871 -0.062202,0.0622 -0.087082,0.13062 -0.087082,0.21149 v 2.6871 c 0,0.0871 0.024881,0.1555 0.087082,0.21148 0.055981,0.0622 0.1244029,0.0871 0.211485,0.0871 z"
|
||||
style="stroke-width:0.15923578"
|
||||
id="path834" />
|
||||
</g>
|
||||
<g
|
||||
id="g28"
|
||||
transform="translate(-0.23995531,0.02790178)">
|
||||
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 18 KiB |
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
@ -107,11 +107,11 @@ $(document).ready(function(){initNavTree('environment.html','');});
|
||||
<li><code>MIMALLOC_SHOW_STATS=1</code>: show statistics when the program terminates.</li>
|
||||
<li><code>MIMALLOC_VERBOSE=1</code>: show verbose messages.</li>
|
||||
<li><code>MIMALLOC_SHOW_ERRORS=1</code>: show error and warning messages.</li>
|
||||
<li><code>MIMALLOC_PAGE_RESET=1</code>: reset (or purge) OS pages when not in use. This can reduce memory fragmentation in long running (server) programs. If performance is impacted, <code>MIMALLOC_RESET_DELAY=</code>_msecs_ can be set higher (100ms by default) to make the page reset occur less frequently.</li>
|
||||
<li><code>MIMALLOC_LARGE_OS_PAGES=1</code>: use large OS pages when available; for some workloads this can significantly improve performance. Use <code>MIMALLOC_VERBOSE</code> to check if the large OS pages are enabled – usually one needs to explicitly allow large OS pages (as on <a href="https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017">Windows</a> and <a href="https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5">Linux</a>). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory (for that reason, we generally recommend to use <code>MIMALLOC_RESERVE_HUGE_OS_PAGES</code> instead when possible).</li>
|
||||
<li><code>MIMALLOC_EAGER_REGION_COMMIT=1</code>: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions show in the working set even though usually just a small part is committed to physical memory. This is why it turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better to turn it on as it improves performance and has no other drawbacks.</li>
|
||||
<li><code>MIMALLOC_RESERVE_HUGE_OS_PAGES=N</code>: where N is the number of 1GiB huge OS pages. This reserves the huge pages at startup and can give quite a performance improvement on long running workloads. Usually it is better to not use <code>MIMALLOC_LARGE_OS_PAGES</code> in combination with this setting. Just like large OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented. Note that we usually need to explicitly enable huge OS pages (as on <a href="https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017">Windows</a> and <a href="https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5">Linux</a>)). With huge OS pages, it may be beneficial to set the setting <code>MIMALLOC_EAGER_COMMIT_DELAY=N</code> (with usually <code>N</code> as 1) to delay the initial <code>N</code> segments of a thread to not allocate in the huge OS pages; this prevents threads that are short lived and allocate just a little to take up space in the huge OS page area (which cannot be reset). </li>
|
||||
<li><code>MIMALLOC_PAGE_RESET=0</code>: by default, mimalloc will reset (or purge) OS pages when not in use to signal to the OS that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server) programs. By setting it to <code>0</code> no such page resets will be done which can improve performance for programs that are not long running. As an alternative, the <code>MIMALLOC_RESET_DELAY=</code><msecs> can be set higher (100ms by default) to make the page reset occur less frequently instead of turning it off completely.</li>
|
||||
<li><code>MIMALLOC_LARGE_OS_PAGES=1</code>: use large OS pages (2MiB) when available; for some workloads this can significantly improve performance. Use <code>MIMALLOC_VERBOSE</code> to check if the large OS pages are enabled – usually one needs to explicitly allow large OS pages (as on <a href="https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017">Windows</a> and <a href="https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5">Linux</a>). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory (for that reason, we generally recommend to use <code>MIMALLOC_RESERVE_HUGE_OS_PAGES</code> instead when possible).</li>
|
||||
<li><code>MIMALLOC_RESERVE_HUGE_OS_PAGES=N</code>: where N is the number of 1GiB <em>huge</em> OS pages. This reserves the huge pages at startup and sometimes this can give a large (latency) performance improvement on big workloads. Usually it is better to not use <code>MIMALLOC_LARGE_OS_PAGES</code> in combination with this setting. Just like large OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at startup only once). Note that we usually need to explicitly enable huge OS pages (as on <a href="https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017">Windows</a> and <a href="https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5">Linux</a>)). With huge OS pages, it may be beneficial to set the setting <code>MIMALLOC_EAGER_COMMIT_DELAY=N</code> (<code>N</code> is 1 by default) to delay the initial <code>N</code> segments (of 4MiB) of a thread to not allocate in the huge OS pages; this prevents threads that are short lived and allocate just a little to take up space in the huge OS page area (which cannot be reset).</li>
|
||||
</ul>
|
||||
<p>Use caution when using <code>fork</code> in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write for all pages in the original process including the huge OS pages. When any memory is now written in that area, the OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in big increments. </p>
|
||||
</div></div><!-- PageDoc -->
|
||||
</div><!-- contents -->
|
||||
</div><!-- doc-content -->
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
@ -146,7 +146,7 @@ Macros</h2></td></tr>
|
||||
</table>
|
||||
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
|
||||
<p>Typed allocation macros. </p>
|
||||
<h2 class="groupheader">Macro Definition Documentation</h2>
|
||||
<p>For example: </p><div class="fragment"><div class="line"><span class="keywordtype">int</span>* p = <a class="code" href="group__typed.html#ga0619a62c5fd886f1016030abe91f0557">mi_malloc_tp</a>(<span class="keywordtype">int</span>)</div></div><!-- fragment --> <h2 class="groupheader">Macro Definition Documentation</h2>
|
||||
<a id="gae80c47c9d4cab10961fff1a8ac98fc07"></a>
|
||||
<h2 class="memtitle"><span class="permalink"><a href="#gae80c47c9d4cab10961fff1a8ac98fc07">◆ </a></span>mi_calloc_tp</h2>
|
||||
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
@ -127,7 +127,9 @@ $(document).ready(function(){initNavTree('index.html','');});
|
||||
<li><a class="el" href="group__heap.html">Heap Allocation</a></li>
|
||||
<li><a class="el" href="group__typed.html">Typed Macros</a></li>
|
||||
<li><a class="el" href="group__analysis.html">Heap Introspection</a></li>
|
||||
<li><a class="el" href="group__options.html">Runtime Options</a> </li>
|
||||
<li><a class="el" href="group__options.html">Runtime Options</a></li>
|
||||
<li><a class="el" href="group__posix.html">Posix</a></li>
|
||||
<li><a class="el" href="group__cpp.html">C++ wrappers</a> </li>
|
||||
</ul>
|
||||
</div></div><!-- PageDoc -->
|
||||
</div><!-- contents -->
|
||||
|
File diff suppressed because one or more lines are too long
@ -36,15 +36,15 @@
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="23.706667"
|
||||
inkscape:cx="34.419045"
|
||||
inkscape:cx="24.864771"
|
||||
inkscape:cy="35.79485"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
inkscape:window-width="3840"
|
||||
inkscape:window-height="2160"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-height="2050"
|
||||
inkscape:window-x="-12"
|
||||
inkscape:window-y="-12"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:snap-object-midpoints="false"
|
||||
inkscape:snap-bbox="false"
|
||||
@ -126,17 +126,15 @@
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.28797817px;font-family:RoutedGothicEx;-inkscape-font-specification:'RoutedGothicEx, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:0.28599727"
|
||||
id="path6525" />
|
||||
</g>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
<g
|
||||
aria-label="m"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.3694315px;line-height:1.25;font-family:RoutedGothicEx;-inkscape-font-specification:'RoutedGothicEx, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.15923578"
|
||||
x="1.6752419"
|
||||
y="293.17081"
|
||||
id="text848"><tspan
|
||||
y="293.17081"
|
||||
x="1.6752419"
|
||||
sodipodi:role="line"
|
||||
id="tspan872"
|
||||
style="stroke-width:0.15923578">m</tspan></text>
|
||||
id="text848">
|
||||
<path
|
||||
d="m 2.3718985,293.17081 c 0.080862,0 0.1492836,-0.0249 0.211485,-0.0871 0.055981,-0.056 0.087082,-0.1244 0.087082,-0.21148 v -1.95313 c 0.037321,-0.0622 0.087082,-0.13062 0.1430634,-0.19282 0.049761,-0.0622 0.1368433,-0.1244 0.2488059,-0.19283 0.1119627,-0.0622 0.2301455,-0.0995 0.3545485,-0.0995 0.1119626,0 0.2177051,0.0498 0.3110074,0.14929 0.087082,0.0995 0.1368432,0.23014 0.1368432,0.39808 v 1.89093 c 0,0.0871 0.024881,0.1555 0.087082,0.21148 0.055981,0.0622 0.124403,0.0871 0.211485,0.0871 0.080862,0 0.1492836,-0.0249 0.2114851,-0.0871 0.055981,-0.056 0.087082,-0.1244 0.087082,-0.21148 v -1.89093 -0.0622 c 0.018661,-0.0311 0.043541,-0.0622 0.068422,-0.0995 0.024881,-0.0373 0.062201,-0.0746 0.1119626,-0.1244 0.049761,-0.0498 0.099522,-0.0933 0.1492836,-0.13063 0.049761,-0.0373 0.1181828,-0.0684 0.1928246,-0.0933 0.074642,-0.0249 0.1492835,-0.0373 0.2239253,-0.0373 0.1119626,0 0.2177052,0.0498 0.3110074,0.14929 0.087082,0.0995 0.1368432,0.23014 0.1368432,0.39808 v 1.89093 c 0,0.0871 0.024881,0.1555 0.087082,0.21148 0.055981,0.0622 0.124403,0.0871 0.211485,0.0871 0.080862,0 0.1492836,-0.0249 0.2114851,-0.0871 0.055981,-0.056 0.087082,-0.1244 0.087082,-0.21148 v -1.89093 c 0,-0.31722 -0.1057425,-0.58469 -0.3047872,-0.80861 -0.2052649,-0.22393 -0.4540708,-0.33589 -0.7401976,-0.33589 -0.2052649,0 -0.3918693,0.0435 -0.5535932,0.11818 -0.1617238,0.0746 -0.3047872,0.17416 -0.4291902,0.29857 -0.211485,-0.27369 -0.4789514,-0.41675 -0.8086192,-0.41675 -0.2861268,0 -0.5411529,0.0809 -0.7650782,0.23636 -0.024881,-0.0498 -0.062202,-0.0933 -0.1119627,-0.13062 -0.049761,-0.0373 -0.1057425,-0.056 -0.167944,-0.056 -0.087082,0 -0.1555037,0.0311 -0.211485,0.0871 -0.062202,0.0622 -0.087082,0.13062 -0.087082,0.21149 v 2.6871 c 0,0.0871 0.024881,0.1555 0.087082,0.21148 0.055981,0.0622 0.1244029,0.0871 0.211485,0.0871 z"
|
||||
style="stroke-width:0.15923578"
|
||||
id="path834" />
|
||||
</g>
|
||||
<g
|
||||
id="g28"
|
||||
transform="translate(-0.23995531,0.02790178)">
|
||||
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 18 KiB |
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
@ -116,12 +116,13 @@ $(document).ready(function(){initNavTree('overrides.html','');});
|
||||
<li><code>env DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram</code></li>
|
||||
</ul>
|
||||
<p>Note that certain security restrictions may apply when doing this from the <a href="https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash">shell</a>.</p>
|
||||
<p>Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this (see issue <a href="https://github.com/microsoft/mimalloc/issues/50"><code>#50</code></a>).</p>
|
||||
<p>(Note: macOS support for dynamic overriding is recent, please report any issues.)</p>
|
||||
<h3>Windows</h3>
|
||||
<p>Overriding on Windows is robust but requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the <code>/MD</code> or <code>/MDd</code> switch). Moreover, you need to ensure the <code>mimalloc-redirect.dll</code> (or <code>mimalloc-redirect32.dll</code>) is available in the same folder as the main <code>mimalloc-override.dll</code> at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to mimalloc (in <code>mimalloc-override.dll</code>).</p>
|
||||
<p>To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the <code>main</code> function, like <code>mi_version()</code> (or use the <code>/INCLUDE:mi_version</code> switch on the linker). See the <code>mimalloc-override-test</code> project for an example on how to use this. For best performance on Windows with C++, it is highly recommended to also override the <code>new</code>/<code>delete</code> operations (by including <a href="https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h"><code>mimalloc-new-delete.h</code></a> a single(!) source file in your project).</p>
|
||||
<p>Overriding on Windows is robust and has the particular advantage to be able to redirect all malloc/free calls that go through the (dynamic) C runtime allocator, including those from other DLL's or libraries.</p>
|
||||
<p>The overriding on Windows requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the <code>/MD</code> or <code>/MDd</code> switch). Also, the <code>mimalloc-redirect.dll</code> (or <code>mimalloc-redirect32.dll</code>) must be available in the same folder as the main <code>mimalloc-override.dll</code> at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to mimalloc (in <code>mimalloc-override.dll</code>).</p>
|
||||
<p>To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the <code>main</code> function, like <code>mi_version()</code> (or use the <code>/INCLUDE:mi_version</code> switch on the linker). See the <code>mimalloc-override-test</code> project for an example on how to use this. For best performance on Windows with C++, it is also recommended to also override the <code>new</code>/<code>delete</code> operations (by including <a href="https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h"><code>mimalloc-new-delete.h</code></a> a single(!) source file in your project).</p>
|
||||
<p>The environment variable <code>MIMALLOC_DISABLE_REDIRECT=1</code> can be used to disable dynamic overriding at run-time. Use <code>MIMALLOC_VERBOSE=1</code> to check if mimalloc was successfully redirected.</p>
|
||||
<p>(Note: in principle, it is possible to patch existing executables that are linked with the dynamic C runtime (<code>ucrtbase.dll</code>) by just putting the <code>mimalloc-override.dll</code> into the import table (and putting <code>mimalloc-redirect.dll</code> in the same folder) Such patching can be done for example with <a href="https://ntcore.com/?page_id=388">CFF Explorer</a>).</p>
|
||||
<p>(Note: in principle, it is possible to even patch existing executables without any recompilation if they are linked with the dynamic C runtime (<code>ucrtbase.dll</code>) – just put the <code>mimalloc-override.dll</code> into the import table (and put <code>mimalloc-redirect.dll</code> in the same folder) Such patching can be done for example with <a href="https://ntcore.com/?page_id=388">CFF Explorer</a>).</p>
|
||||
<h2>Static override</h2>
|
||||
<p>On Unix systems, you can also statically link with <em>mimalloc</em> to override the standard malloc interface. The recommended way is to link the final program with the <em>mimalloc</em> single object file (<code>mimalloc-override.o</code>). We use an object file instead of a library file as linkers give preference to that over archives to resolve symbols. To ensure that the standard malloc interface resolves to the <em>mimalloc</em> library, link it as the first object file. For example:</p>
|
||||
<div class="fragment"><div class="line">gcc -o myprogram mimalloc-<span class="keyword">override</span>.o myfile1.c ...</div></div><!-- fragment --><h2>List of Overrides:</h2>
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -37,7 +37,7 @@
|
||||
<td id="projectlogo"><img alt="Logo" src="mimalloc-logo.svg"/></td>
|
||||
<td id="projectalign" style="padding-left: 0.5em;">
|
||||
<div id="projectname">mi-malloc
|
||||
 <span id="projectnumber">1.4</span>
|
||||
 <span id="projectnumber">1.6</span>
|
||||
</div>
|
||||
</td>
|
||||
<td> <div id="MSearchBox" class="MSearchBoxInactive">
|
||||
|
@ -176,14 +176,14 @@
|
||||
</Command>
|
||||
</PostBuildEvent>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\test\main-override.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="mimalloc-override.vcxproj">
|
||||
<Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\test\main-override.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
|
@ -253,4 +253,4 @@
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
@ -1,722 +0,0 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
#pragma once
|
||||
#ifndef MIMALLOC_INTERNAL_TLD_H
|
||||
#define MIMALLOC_INTERNAL_TLD_H
|
||||
|
||||
#include "mimalloc-types.h"
|
||||
#include "mimalloc-internal.h"
|
||||
|
||||
#define MI_TLD_DECL 1 // thread local declaration
|
||||
#define MI_TLD_PTHREAD 2 // ptrhead_get/setspecific
|
||||
#define MI_TLD_DECL_GUARD 3 // thread local + recursion guard at initial load
|
||||
#define MI_TLD_PTHREAD_GUARD 4 // ptrhead_get/setspecific + recursion guard at initial load
|
||||
#define MI_TLD_SLOT 5 // steal slot from OS thread local predefined slots
|
||||
#define MI_TLD_PTHREAD_SLOT 6 // steal slot from pthread structure (usually `retval`)
|
||||
|
||||
|
||||
#if !defined(MI_TLD)
|
||||
#if defined(_MSC_VER) || defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
|
||||
// on windows and linux/freeBSD/netBSD (with initial-exec) a __thread always works without recursion into malloc
|
||||
#define MI_TLD MI_TLD_DECL
|
||||
#elif !defined(MI_MIMALLOC_OVERRIDE)
|
||||
// if not overriding, __thread declarations should be fine (use MI_TLD_PTHREAD if your OS does not have __thread)
|
||||
#define MI_TLD MI_TLD_DECL
|
||||
#elif // defined(MI_MALLOC_OVERRIDE)
|
||||
// if overriding, some BSD variants allocate when accessing a thread local the first time
|
||||
#if defined(__APPLE__)
|
||||
#define MI_TLD MI_TLD_SLOT
|
||||
#define MI_TLD_SLOT_NUM 89 // seems unused? (__PTK_FRAMEWORK_OLDGC_KEY9) see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
|
||||
// possibly unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
|
||||
// #define MI_TLD MI_TLD_PTHREAD_SLOT
|
||||
// #define MI_TLD_PTHREAD_SLOT_OFS (2*sizeof(void*) + sizeof(long) + 2*sizeof(void*) /*TAILQ*/) // offset `tl_exit_value` <https://github.com/apple/darwin-libpthread/blob/master/src/internal.h#L184>
|
||||
#elif defined(__OpenBSD__)
|
||||
#define MI_TLD MI_TLD_PTHREAD_SLOT
|
||||
#define MI_TLD_PTHREAD_SLOT_OFS (6*sizeof(int) + 1*sizeof(void*)) // offset `retval` <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
|
||||
#elif defined(__DragonFly__)
|
||||
#define MI_TLD MI_TLD_PTHREAD_SLOT
|
||||
#define MI_TLD_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if (MI_DEBUG>0)
|
||||
#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__)
|
||||
#else
|
||||
#define mi_trace_message(...)
|
||||
#endif
|
||||
|
||||
#define MI_CACHE_LINE 64
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
|
||||
#define mi_decl_noinline __declspec(noinline)
|
||||
#define mi_decl_thread __declspec(thread)
|
||||
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
|
||||
#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc
|
||||
#define mi_decl_noinline __attribute__((noinline))
|
||||
#define mi_decl_thread __thread
|
||||
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
|
||||
#else
|
||||
#define mi_decl_noinline
|
||||
#define mi_decl_thread __thread // hope for the best :-)
|
||||
#define mi_decl_cache_align
|
||||
#endif
|
||||
|
||||
|
||||
// "options.c"
|
||||
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
|
||||
void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
|
||||
void _mi_warning_message(const char* fmt, ...);
|
||||
void _mi_verbose_message(const char* fmt, ...);
|
||||
void _mi_trace_message(const char* fmt, ...);
|
||||
void _mi_options_init(void);
|
||||
void _mi_error_message(int err, const char* fmt, ...);
|
||||
|
||||
// random.c
|
||||
void _mi_random_init(mi_random_ctx_t* ctx);
|
||||
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
|
||||
uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
|
||||
uintptr_t _mi_heap_random_next(mi_heap_t* heap);
|
||||
uintptr_t _os_random_weak(uintptr_t extra_seed);
|
||||
static inline uintptr_t _mi_random_shuffle(uintptr_t x);
|
||||
|
||||
// init.c
|
||||
extern mi_stats_t _mi_stats_main;
|
||||
extern const mi_page_t _mi_page_empty;
|
||||
bool _mi_is_main_thread(void);
|
||||
bool _mi_preloading(); // true while the C runtime is not ready
|
||||
|
||||
// os.c
|
||||
size_t _mi_os_page_size(void);
|
||||
void _mi_os_init(void); // called from process init
|
||||
void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
|
||||
void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
|
||||
size_t _mi_os_good_alloc_size(size_t size);
|
||||
|
||||
// memory.c
|
||||
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld);
|
||||
void _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld);
|
||||
|
||||
bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld);
|
||||
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
|
||||
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
|
||||
bool _mi_mem_protect(void* addr, size_t size);
|
||||
bool _mi_mem_unprotect(void* addr, size_t size);
|
||||
|
||||
void _mi_mem_collect(mi_os_tld_t* tld);
|
||||
|
||||
// "segment.c"
|
||||
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
|
||||
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
|
||||
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
|
||||
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page
|
||||
void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
|
||||
|
||||
void _mi_segment_thread_collect(mi_segments_tld_t* tld);
|
||||
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
|
||||
void _mi_abandoned_await_readers(void);
|
||||
|
||||
|
||||
|
||||
// "page.c"
|
||||
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc;
|
||||
|
||||
void _mi_page_retire(mi_page_t* page); // free the page if there are no other pages with many free blocks
|
||||
void _mi_page_unfull(mi_page_t* page);
|
||||
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page
|
||||
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread...
|
||||
void _mi_heap_delayed_free(mi_heap_t* heap);
|
||||
void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
|
||||
|
||||
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
|
||||
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
|
||||
void _mi_deferred_free(mi_heap_t* heap, bool force);
|
||||
|
||||
void _mi_page_free_collect(mi_page_t* page,bool force);
|
||||
void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments
|
||||
|
||||
size_t _mi_bin_size(uint8_t bin); // for stats
|
||||
uint8_t _mi_bin(size_t size); // for stats
|
||||
uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD in "os.c"
|
||||
|
||||
// "heap.c"
|
||||
void _mi_heap_destroy_pages(mi_heap_t* heap);
|
||||
void _mi_heap_collect_abandon(mi_heap_t* heap);
|
||||
void _mi_heap_set_default_direct(mi_heap_t* heap);
|
||||
|
||||
// "stats.c"
|
||||
void _mi_stats_done(mi_stats_t* stats);
|
||||
|
||||
mi_msecs_t _mi_clock_now(void);
|
||||
mi_msecs_t _mi_clock_end(mi_msecs_t start);
|
||||
mi_msecs_t _mi_clock_start(void);
|
||||
|
||||
// "alloc.c"
|
||||
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic`
|
||||
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero);
|
||||
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero);
|
||||
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
|
||||
bool _mi_free_delayed_block(mi_block_t* block);
|
||||
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size);
|
||||
|
||||
#if MI_DEBUG>1
|
||||
bool _mi_page_is_valid(mi_page_t* page);
|
||||
#endif
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Branches
|
||||
// ------------------------------------------------------
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define mi_unlikely(x) __builtin_expect((x),0)
|
||||
#define mi_likely(x) __builtin_expect((x),1)
|
||||
#else
|
||||
#define mi_unlikely(x) (x)
|
||||
#define mi_likely(x) (x)
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Error codes passed to `_mi_fatal_error`
|
||||
All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
|
||||
For portability define undefined error codes using common Unix codes:
|
||||
<https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html>
|
||||
----------------------------------------------------------- */
|
||||
#include <errno.h>
|
||||
#ifndef EAGAIN // double free
|
||||
#define EAGAIN (11)
|
||||
#endif
|
||||
#ifndef ENOMEM // out of memory
|
||||
#define ENOMEM (12)
|
||||
#endif
|
||||
#ifndef EFAULT // corrupted free-list or meta-data
|
||||
#define EFAULT (14)
|
||||
#endif
|
||||
#ifndef EINVAL // trying to free an invalid pointer
|
||||
#define EINVAL (22)
|
||||
#endif
|
||||
#ifndef EOVERFLOW // count*size overflow
|
||||
#define EOVERFLOW (75)
|
||||
#endif
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Inlined definitions
|
||||
----------------------------------------------------------- */
|
||||
#define UNUSED(x) (void)(x)
|
||||
#if (MI_DEBUG>0)
|
||||
#define UNUSED_RELEASE(x)
|
||||
#else
|
||||
#define UNUSED_RELEASE(x) UNUSED(x)
|
||||
#endif
|
||||
|
||||
#define MI_INIT4(x) x(),x(),x(),x()
|
||||
#define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x)
|
||||
#define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x)
|
||||
#define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x)
|
||||
#define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x)
|
||||
#define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x)
|
||||
#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)
|
||||
|
||||
|
||||
// Is `x` a power of two? (0 is considered a power of two)
|
||||
static inline bool _mi_is_power_of_two(uintptr_t x) {
|
||||
return ((x & (x - 1)) == 0);
|
||||
}
|
||||
|
||||
// Align upwards
|
||||
static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
|
||||
mi_assert_internal(alignment != 0);
|
||||
uintptr_t mask = alignment - 1;
|
||||
if ((alignment & mask) == 0) { // power of two?
|
||||
return ((sz + mask) & ~mask);
|
||||
}
|
||||
else {
|
||||
return (((sz + mask)/alignment)*alignment);
|
||||
}
|
||||
}
|
||||
|
||||
// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
|
||||
static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
|
||||
mi_assert_internal(divider != 0);
|
||||
return (divider == 0 ? size : ((size + divider - 1) / divider));
|
||||
}
|
||||
|
||||
// Is memory zero initialized?
|
||||
static inline bool mi_mem_is_zero(void* p, size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
if (((uint8_t*)p)[i] != 0) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Align a byte size to a size in _machine words_,
|
||||
// i.e. byte size == `wsize*sizeof(void*)`.
|
||||
static inline size_t _mi_wsize_from_size(size_t size) {
|
||||
mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t));
|
||||
return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
|
||||
}
|
||||
|
||||
|
||||
// Overflow detecting multiply
|
||||
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
|
||||
#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
|
||||
#include <limits.h> // UINT_MAX, ULONG_MAX
|
||||
#if (SIZE_MAX == UINT_MAX)
|
||||
return __builtin_umul_overflow(count, size, total);
|
||||
#elif (SIZE_MAX == ULONG_MAX)
|
||||
return __builtin_umull_overflow(count, size, total);
|
||||
#else
|
||||
return __builtin_umulll_overflow(count, size, total);
|
||||
#endif
|
||||
#else /* __builtin_umul_overflow is unavailable */
|
||||
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
|
||||
*total = count * size;
|
||||
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
|
||||
&& size > 0 && (SIZE_MAX / size) < count);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Safe multiply `count*size` into `total`; return `true` on overflow.
|
||||
static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) {
|
||||
if (count==1) { // quick check for the case where count is one (common for C++ allocators)
|
||||
*total = size;
|
||||
return false;
|
||||
}
|
||||
else if (mi_unlikely(mi_mul_overflow(count, size, total))) {
|
||||
_mi_error_message(EOVERFLOW, "allocation request too large (%zu * %zu bytes)\n", count, size);
|
||||
*total = SIZE_MAX;
|
||||
return true;
|
||||
}
|
||||
else return false;
|
||||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
The thread local default heap
|
||||
----------------------------------------------------------- */
|
||||
|
||||
extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap
|
||||
extern mi_heap_t _mi_heap_main; // statically allocated main backing heap
|
||||
extern bool _mi_process_is_initialized;
|
||||
|
||||
#if defined(MI_TLS_OSX_FAST)
|
||||
#define MI_TLS_OSX_OFFSET (MI_TLS_OSX_SLOT*sizeof(void*))
|
||||
static inline void* mi_tls_osx_fast_get(void) {
|
||||
void* ret;
|
||||
__asm__("mov %%gs:%1, %0" : "=r" (ret) : "m" (*(void**)(MI_TLS_OSX_OFFSET)));
|
||||
return ret;
|
||||
}
|
||||
static inline void mi_tls_osx_fast_set(void* value) {
|
||||
__asm__("movq %1,%%gs:%0" : "=m" (*(void**)(MI_TLS_OSX_OFFSET)) : "rn" (value));
|
||||
}
|
||||
#elif defined(MI_TLS_PTHREADS)
|
||||
extern pthread_key_t _mi_heap_default_key;
|
||||
#else
|
||||
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
|
||||
#endif
|
||||
|
||||
|
||||
static inline mi_heap_t* mi_get_default_heap(void) {
|
||||
#if defined(MI_TLS_OSX_FAST)
|
||||
// Use a fixed slot in the TSD on MacOSX to avoid recursion (since the loader calls malloc).
|
||||
// We use slot 94 (__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4) <https://github.com/apportable/Foundation/blob/master/System/System/src/pthread_machdep.h>
|
||||
// which seems unused except for the more recent Webkit <https://github.com/WebKit/webkit/blob/master/Source/WTF/wtf/FastTLS.h>
|
||||
// Use with care.
|
||||
mi_heap_t* heap = (mi_heap_t*)mi_tls_osx_fast_get();
|
||||
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
|
||||
#elif defined(MI_TLS_PTHREADS)
|
||||
// Use pthreads for TLS; this is used on macOSX with interpose as the loader calls `malloc`
|
||||
// to allocate TLS storage leading to recursive calls if __thread declared variables are accessed.
|
||||
// Using pthreads allows us to initialize without recursive calls. (performance seems still quite good).
|
||||
mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? (mi_heap_t*)&_mi_heap_empty : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
|
||||
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
|
||||
#else
|
||||
#if defined(MI_TLS_RECURSE_GUARD)
|
||||
// On some BSD platforms, like openBSD, the dynamic loader calls `malloc`
|
||||
// to initialize thread local data (before our module is loaded).
|
||||
// To avoid recursion, we need to avoid accessing the thread local `_mi_default_heap`
|
||||
// until our module is loaded and use the statically allocated main heap until that time.
|
||||
// TODO: patch ourselves dynamically to avoid this check every time?
|
||||
// if (mi_unlikely(!_mi_process_is_initialized)) return &_mi_heap_main;
|
||||
#endif
|
||||
return _mi_heap_default;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool mi_heap_is_default(const mi_heap_t* heap) {
|
||||
return (heap == mi_get_default_heap());
|
||||
}
|
||||
|
||||
static inline bool mi_heap_is_backing(const mi_heap_t* heap) {
|
||||
return (heap->tld->heap_backing == heap);
|
||||
}
|
||||
|
||||
static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
|
||||
mi_assert_internal(heap != NULL);
|
||||
return (heap != &_mi_heap_empty);
|
||||
}
|
||||
|
||||
static inline uintptr_t _mi_ptr_cookie(const void* p) {
|
||||
mi_assert_internal(_mi_heap_main.cookie != 0);
|
||||
return ((uintptr_t)p ^ _mi_heap_main.cookie);
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Pages
|
||||
----------------------------------------------------------- */
|
||||
|
||||
static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) {
|
||||
mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE));
|
||||
const size_t idx = _mi_wsize_from_size(size);
|
||||
mi_assert_internal(idx < MI_PAGES_DIRECT);
|
||||
return heap->pages_free_direct[idx];
|
||||
}
|
||||
|
||||
// Get the page belonging to a certain size class
|
||||
static inline mi_page_t* _mi_get_free_small_page(size_t size) {
|
||||
return _mi_heap_get_free_small_page(mi_get_default_heap(), size);
|
||||
}
|
||||
|
||||
// Segment that contains the pointer
|
||||
static inline mi_segment_t* _mi_ptr_segment(const void* p) {
|
||||
// mi_assert_internal(p != NULL);
|
||||
return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK);
|
||||
}
|
||||
|
||||
// Segment belonging to a page
|
||||
static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
|
||||
mi_segment_t* segment = _mi_ptr_segment(page);
|
||||
mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]);
|
||||
return segment;
|
||||
}
|
||||
|
||||
// used internally
|
||||
static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) {
|
||||
// if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages
|
||||
ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
|
||||
mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE);
|
||||
uintptr_t idx = (uintptr_t)diff >> segment->page_shift;
|
||||
mi_assert_internal(idx < segment->capacity);
|
||||
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
|
||||
return idx;
|
||||
}
|
||||
|
||||
// Get the page containing the pointer
|
||||
static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
|
||||
uintptr_t idx = _mi_segment_page_idx_of(segment, p);
|
||||
return &((mi_segment_t*)segment)->pages[idx];
|
||||
}
|
||||
|
||||
// Quick page start for initialized pages
|
||||
static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
|
||||
const size_t bsize = page->xblock_size;
|
||||
mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0);
|
||||
return _mi_segment_page_start(segment, page, bsize, page_size, NULL);
|
||||
}
|
||||
|
||||
// Get the page containing the pointer
|
||||
static inline mi_page_t* _mi_ptr_page(void* p) {
|
||||
return _mi_segment_page_of(_mi_ptr_segment(p), p);
|
||||
}
|
||||
|
||||
// Get the block size of a page (special cased for huge objects)
|
||||
static inline size_t mi_page_block_size(const mi_page_t* page) {
|
||||
const size_t bsize = page->xblock_size;
|
||||
mi_assert_internal(bsize > 0);
|
||||
if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) {
|
||||
return bsize;
|
||||
}
|
||||
else {
|
||||
size_t psize;
|
||||
_mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL);
|
||||
return psize;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the usable block size of a page without fixed padding.
|
||||
// This may still include internal padding due to alignment and rounding up size classes.
|
||||
static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
|
||||
return mi_page_block_size(page) - MI_PADDING_SIZE;
|
||||
}
|
||||
|
||||
|
||||
// Thread free access
|
||||
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
|
||||
return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3);
|
||||
}
|
||||
|
||||
static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
|
||||
return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3);
|
||||
}
|
||||
|
||||
// Heap access
|
||||
static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
|
||||
return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap));
|
||||
}
|
||||
|
||||
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
|
||||
mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
|
||||
mi_atomic_write(&page->xheap,(uintptr_t)heap);
|
||||
}
|
||||
|
||||
// Thread free flag helpers
|
||||
static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
|
||||
return (mi_block_t*)(tf & ~0x03);
|
||||
}
|
||||
static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) {
|
||||
return (mi_delayed_t)(tf & 0x03);
|
||||
}
|
||||
static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) {
|
||||
return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed);
|
||||
}
|
||||
static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) {
|
||||
return mi_tf_make(mi_tf_block(tf),delayed);
|
||||
}
|
||||
static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) {
|
||||
return mi_tf_make(block, mi_tf_delayed(tf));
|
||||
}
|
||||
|
||||
// are all blocks in a page freed?
|
||||
// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
|
||||
static inline bool mi_page_all_free(const mi_page_t* page) {
|
||||
mi_assert_internal(page != NULL);
|
||||
return (page->used == 0);
|
||||
}
|
||||
|
||||
// are there any available blocks?
|
||||
static inline bool mi_page_has_any_available(const mi_page_t* page) {
|
||||
mi_assert_internal(page != NULL && page->reserved > 0);
|
||||
return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
|
||||
}
|
||||
|
||||
// are there immediately available blocks, i.e. blocks available on the free list.
|
||||
static inline bool mi_page_immediate_available(const mi_page_t* page) {
|
||||
mi_assert_internal(page != NULL);
|
||||
return (page->free != NULL);
|
||||
}
|
||||
|
||||
// is more than 7/8th of a page in use?
|
||||
static inline bool mi_page_mostly_used(const mi_page_t* page) {
|
||||
if (page==NULL) return true;
|
||||
uint16_t frac = page->reserved / 8U;
|
||||
return (page->reserved - page->used <= frac);
|
||||
}
|
||||
|
||||
static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
|
||||
return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------
|
||||
// Page flags
|
||||
//-----------------------------------------------------------
|
||||
static inline bool mi_page_is_in_full(const mi_page_t* page) {
|
||||
return page->flags.x.in_full;
|
||||
}
|
||||
|
||||
static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
|
||||
page->flags.x.in_full = in_full;
|
||||
}
|
||||
|
||||
static inline bool mi_page_has_aligned(const mi_page_t* page) {
|
||||
return page->flags.x.has_aligned;
|
||||
}
|
||||
|
||||
static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
|
||||
page->flags.x.has_aligned = has_aligned;
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------
|
||||
Encoding/Decoding the free list next pointers
|
||||
|
||||
This is to protect against buffer overflow exploits where the
|
||||
free list is mutated. Many hardened allocators xor the next pointer `p`
|
||||
with a secret key `k1`, as `p^k1`. This prevents overwriting with known
|
||||
values but might be still too weak: if the attacker can guess
|
||||
the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
|
||||
Moreover, if multiple blocks can be read as well, the attacker can
|
||||
xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
|
||||
about the pointers (and subsequently `k1`).
|
||||
|
||||
Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
|
||||
Since these operations are not associative, the above approaches do not
|
||||
work so well any more even if the `p` can be guesstimated. For example,
|
||||
for the read case we can subtract two entries to discard the `+k1` term,
|
||||
but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
|
||||
We include the left-rotation since xor and addition are otherwise linear
|
||||
in the lowest bit. Finally, both keys are unique per page which reduces
|
||||
the re-use of keys by a large factor.
|
||||
|
||||
We also pass a separate `null` value to be used as `NULL` or otherwise
|
||||
`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
|
||||
------------------------------------------------------------------- */
|
||||
|
||||
static inline bool mi_is_in_same_segment(const void* p, const void* q) {
|
||||
return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
|
||||
}
|
||||
|
||||
static inline bool mi_is_in_same_page(const void* p, const void* q) {
|
||||
mi_segment_t* segmentp = _mi_ptr_segment(p);
|
||||
mi_segment_t* segmentq = _mi_ptr_segment(q);
|
||||
if (segmentp != segmentq) return false;
|
||||
uintptr_t idxp = _mi_segment_page_idx_of(segmentp, p);
|
||||
uintptr_t idxq = _mi_segment_page_idx_of(segmentq, q);
|
||||
return (idxp == idxq);
|
||||
}
|
||||
|
||||
static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
|
||||
shift %= MI_INTPTR_BITS;
|
||||
return ((x << shift) | (x >> (MI_INTPTR_BITS - shift)));
|
||||
}
|
||||
static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
|
||||
shift %= MI_INTPTR_BITS;
|
||||
return ((x >> shift) | (x << (MI_INTPTR_BITS - shift)));
|
||||
}
|
||||
|
||||
static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) {
|
||||
void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]);
|
||||
return (mi_unlikely(p==null) ? NULL : p);
|
||||
}
|
||||
|
||||
static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) {
|
||||
uintptr_t x = (uintptr_t)(mi_unlikely(p==NULL) ? null : p);
|
||||
return mi_rotl(x ^ keys[1], keys[0]) + keys[0];
|
||||
}
|
||||
|
||||
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) {
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
return (mi_block_t*)mi_ptr_decode(null, block->next, keys);
|
||||
#else
|
||||
UNUSED(keys); UNUSED(null);
|
||||
return (mi_block_t*)block->next;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
block->next = mi_ptr_encode(null, next, keys);
|
||||
#else
|
||||
UNUSED(keys); UNUSED(null);
|
||||
block->next = (mi_encoded_t)next;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
mi_block_t* next = mi_block_nextx(page,block,page->keys);
|
||||
// check for free list corruption: is `next` at least in the same page?
|
||||
// TODO: check if `next` is `page->block_size` aligned?
|
||||
if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) {
|
||||
_mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next);
|
||||
next = NULL;
|
||||
}
|
||||
return next;
|
||||
#else
|
||||
UNUSED(page);
|
||||
return mi_block_nextx(page,block,NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
mi_block_set_nextx(page,block,next, page->keys);
|
||||
#else
|
||||
UNUSED(page);
|
||||
mi_block_set_nextx(page,block,next,NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Fast "random" shuffle
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
|
||||
if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros
|
||||
#if (MI_INTPTR_SIZE==8)
|
||||
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
|
||||
x ^= x >> 30;
|
||||
x *= 0xbf58476d1ce4e5b9UL;
|
||||
x ^= x >> 27;
|
||||
x *= 0x94d049bb133111ebUL;
|
||||
x ^= x >> 31;
|
||||
#elif (MI_INTPTR_SIZE==4)
|
||||
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
|
||||
x ^= x >> 16;
|
||||
x *= 0x7feb352dUL;
|
||||
x ^= x >> 15;
|
||||
x *= 0x846ca68bUL;
|
||||
x ^= x >> 16;
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Optimize numa node access for the common case (= one node)
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
int _mi_os_numa_node_get(mi_os_tld_t* tld);
|
||||
size_t _mi_os_numa_node_count_get(void);
|
||||
|
||||
extern size_t _mi_numa_node_count;
|
||||
static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
|
||||
if (mi_likely(_mi_numa_node_count == 1)) return 0;
|
||||
else return _mi_os_numa_node_get(tld);
|
||||
}
|
||||
static inline size_t _mi_os_numa_node_count(void) {
|
||||
if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count;
|
||||
else return _mi_os_numa_node_count_get();
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Getting the thread id should be performant
|
||||
// as it is called in the fast path of `_mi_free`,
|
||||
// so we specialize for various platforms.
|
||||
// -------------------------------------------------------------------
|
||||
#if defined(_WIN32)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
|
||||
// Windows: works on Intel and ARM in both 32- and 64-bit
|
||||
return (uintptr_t)NtCurrentTeb();
|
||||
}
|
||||
#elif (defined(__GNUC__) || defined(__clang__)) && \
|
||||
(defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))
|
||||
// TLS register on x86 is in the FS or GS register
|
||||
// see: https://akkadia.org/drepper/tls.pdf
|
||||
static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
|
||||
uintptr_t tid;
|
||||
#if defined(__i386__)
|
||||
__asm__("movl %%gs:0, %0" : "=r" (tid) : : ); // 32-bit always uses GS
|
||||
#elif defined(__MACH__)
|
||||
__asm__("movq %%gs:0, %0" : "=r" (tid) : : ); // x86_64 macOS uses GS
|
||||
#elif defined(__x86_64__)
|
||||
__asm__("movq %%fs:0, %0" : "=r" (tid) : : ); // x86_64 Linux, BSD uses FS
|
||||
#elif defined(__arm__)
|
||||
asm volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid));
|
||||
#elif defined(__aarch64__)
|
||||
asm volatile ("mrs %0, tpidr_el0" : "=r" (tid));
|
||||
#endif
|
||||
return tid;
|
||||
}
|
||||
#else
|
||||
// otherwise use standard C
|
||||
static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
|
||||
return (uintptr_t)&_mi_heap_default;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
@ -57,11 +57,10 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
// Encoded free lists allow detection of corrupted free lists
|
||||
// and can detect buffer overflows, modify after free, and double `free`s.
|
||||
#if (MI_SECURE>=3 || MI_DEBUG>=1 || defined(MI_PADDING))
|
||||
#if (MI_SECURE>=3 || MI_DEBUG>=1 || MI_PADDING > 0)
|
||||
#define MI_ENCODE_FREELIST 1
|
||||
#endif
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Platform specific values
|
||||
// ------------------------------------------------------
|
||||
@ -101,7 +100,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
// Main tuning parameters for segment and page sizes
|
||||
// Sizes for 64-bit, divide by two for 32-bit
|
||||
#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb
|
||||
#define MI_SEGMENT_SHIFT ( 8 + MI_SEGMENT_SLICE_SHIFT) // 16mb
|
||||
#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 8mb
|
||||
|
||||
#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb
|
||||
#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb
|
||||
@ -332,7 +331,7 @@ typedef struct mi_random_cxt_s {
|
||||
|
||||
|
||||
// In debug mode there is a padding stucture at the end of the blocks to check for buffer overflows
|
||||
#if defined(MI_PADDING)
|
||||
#if (MI_PADDING)
|
||||
typedef struct mi_padding_s {
|
||||
uint32_t canary; // encoded block value to check validity of the padding (in case of overflow)
|
||||
uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes)
|
||||
|
@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#ifndef MIMALLOC_H
|
||||
#define MIMALLOC_H
|
||||
|
||||
#define MI_MALLOC_VERSION 161 // major + 2 digits minor
|
||||
#define MI_MALLOC_VERSION 163 // major + 2 digits minor
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Compiler specific attributes
|
||||
@ -28,13 +28,13 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define mi_decl_nodiscard [[nodiscard]]
|
||||
#elif (__GNUC__ >= 4) || defined(__clang__) // includes clang, icc, and clang-cl
|
||||
#define mi_decl_nodiscard __attribute__((warn_unused_result))
|
||||
#elif (_MSC_VER >= 1700)
|
||||
#elif (_MSC_VER >= 1700)
|
||||
#define mi_decl_nodiscard _Check_return_
|
||||
#else
|
||||
#define mi_decl_nodiscard
|
||||
#endif
|
||||
#else
|
||||
#define mi_decl_nodiscard
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#if !defined(MI_SHARED_LIB)
|
||||
#define mi_decl_export
|
||||
#elif defined(MI_SHARED_LIB_EXPORT)
|
||||
@ -42,13 +42,18 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#else
|
||||
#define mi_decl_export __declspec(dllimport)
|
||||
#endif
|
||||
#if (_MSC_VER >= 1900) && !defined(__EDG__)
|
||||
#define mi_decl_restrict __declspec(allocator) __declspec(restrict)
|
||||
#if defined(__MINGW32__)
|
||||
#define mi_decl_restrict
|
||||
#define mi_attr_malloc __attribute__((malloc))
|
||||
#else
|
||||
#define mi_decl_restrict __declspec(restrict)
|
||||
#if (_MSC_VER >= 1900) && !defined(__EDG__)
|
||||
#define mi_decl_restrict __declspec(allocator) __declspec(restrict)
|
||||
#else
|
||||
#define mi_decl_restrict __declspec(restrict)
|
||||
#endif
|
||||
#define mi_attr_malloc
|
||||
#endif
|
||||
#define mi_cdecl __cdecl
|
||||
#define mi_attr_malloc
|
||||
#define mi_attr_alloc_size(s)
|
||||
#define mi_attr_alloc_size2(s1,s2)
|
||||
#define mi_attr_alloc_align(p)
|
||||
|
54
readme.md
54
readme.md
@ -11,7 +11,7 @@ mimalloc (pronounced "me-malloc")
|
||||
is a general purpose allocator with excellent [performance](#performance) characteristics.
|
||||
Initially developed by Daan Leijen for the run-time systems of the
|
||||
[Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages.
|
||||
Latest release:`v1.6.1` (2020-02-17).
|
||||
Latest release:`v1.6.2` (2020-04-20).
|
||||
|
||||
It is a drop-in replacement for `malloc` and can be used in other programs
|
||||
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
|
||||
@ -57,6 +57,9 @@ Enjoy!
|
||||
|
||||
### Releases
|
||||
|
||||
* 2020-04-20, `v1.6.2`: stable release 1.6: fix compilation on Android, MingW, Raspberry, and Conda,
|
||||
stability fix for Windows 7, fix multiple mimalloc instances in one executable, fix `strnlen` overload,
|
||||
fix aligned debug padding.
|
||||
* 2020-02-17, `v1.6.1`: stable release 1.6: minor updates (build with clang-cl, fix alignment issue for small objects).
|
||||
* 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding
|
||||
and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise
|
||||
@ -75,6 +78,11 @@ free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af
|
||||
* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support.
|
||||
* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements.
|
||||
|
||||
Special thanks to:
|
||||
|
||||
* Jason Gibson (@jasongibson) for exhaustive testing on large workloads and server environments and finding complex bugs in (early versions of) `mimalloc`.
|
||||
* Manuel Pöter (@mpoeter) and Sam Gross (@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation.
|
||||
|
||||
# Building
|
||||
|
||||
## Windows
|
||||
@ -208,31 +216,42 @@ or via environment variables.
|
||||
- `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates.
|
||||
- `MIMALLOC_VERBOSE=1`: show verbose messages.
|
||||
- `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages.
|
||||
- `MIMALLOC_PAGE_RESET=1`: reset (or purge) OS pages when not in use. This can reduce
|
||||
memory fragmentation in long running (server) programs. If performance is impacted,
|
||||
`MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page
|
||||
reset occur less frequently.
|
||||
- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly
|
||||
- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS
|
||||
that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server)
|
||||
programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs.
|
||||
As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page
|
||||
reset occur less frequently instead of turning it off completely.
|
||||
- `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected
|
||||
at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than
|
||||
the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA
|
||||
nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed).
|
||||
- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
|
||||
improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
|
||||
to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
|
||||
the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
|
||||
can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible).
|
||||
can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
|
||||
<!--
|
||||
- `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions
|
||||
show in the working set even though usually just a small part is committed to physical memory. This is why it
|
||||
turned off by default on Windows as it looks not good in the task manager. However, turning it on has no
|
||||
real drawbacks and may improve performance by a little.
|
||||
-->
|
||||
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at
|
||||
startup and can give quite a (latency) performance improvement on long running workloads. Usually it is better to not use
|
||||
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
|
||||
startup and sometimes this can give a large (latency) performance improvement on big workloads.
|
||||
Usually it is better to not use
|
||||
`MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
|
||||
contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
|
||||
startup only once).
|
||||
Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting
|
||||
`MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments
|
||||
Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
|
||||
With huge OS pages, it may be beneficial to set the setting
|
||||
`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
|
||||
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
|
||||
and allocate just a little to take up space in the huge OS page area (which cannot be reset).
|
||||
|
||||
Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
|
||||
for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
|
||||
OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in big increments.
|
||||
|
||||
[linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5
|
||||
[windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017
|
||||
|
||||
@ -280,9 +299,13 @@ the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-i
|
||||
|
||||
### Override on Windows
|
||||
|
||||
<span id="override_on_windows">Overriding on Windows</span> is robust but requires that you link your program explicitly with
|
||||
<span id="override_on_windows">Overriding on Windows</span> is robust and has the
|
||||
particular advantage to be able to redirect all malloc/free calls that go through
|
||||
the (dynamic) C runtime allocator, including those from other DLL's or libraries.
|
||||
|
||||
The overriding on Windows requires that you link your program explicitly with
|
||||
the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
|
||||
Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available
|
||||
Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be available
|
||||
in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
|
||||
The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
|
||||
mimalloc (in `mimalloc-override.dll`).
|
||||
@ -297,8 +320,9 @@ is also recommended to also override the `new`/`delete` operations (by including
|
||||
The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
|
||||
overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected.
|
||||
|
||||
(Note: in principle, it is possible to patch existing executables
|
||||
that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the `mimalloc-override.dll` into the import table (and putting `mimalloc-redirect.dll` in the same folder)
|
||||
(Note: in principle, it is possible to even patch existing executables without any recompilation
|
||||
if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll`
|
||||
into the import table (and put `mimalloc-redirect.dll` in the same folder)
|
||||
Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
|
||||
|
||||
|
||||
|
@ -41,8 +41,11 @@ extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_im
|
||||
------------------------------------------------------ */
|
||||
|
||||
static size_t zone_size(malloc_zone_t* zone, const void* p) {
|
||||
UNUSED(zone); UNUSED(p);
|
||||
return 0; // as we cannot guarantee that `p` comes from us, just return 0
|
||||
UNUSED(zone);
|
||||
if (!mi_is_in_heap_region(p))
|
||||
return 0; // not our pointer, bail out
|
||||
|
||||
return mi_usable_size(p);
|
||||
}
|
||||
|
||||
static void* zone_malloc(malloc_zone_t* zone, size_t size) {
|
||||
|
@ -163,18 +163,30 @@ extern "C" {
|
||||
// Posix & Unix functions definitions
|
||||
// ------------------------------------------------------
|
||||
|
||||
void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize);
|
||||
size_t malloc_size(void* p) MI_FORWARD1(mi_usable_size,p);
|
||||
size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p);
|
||||
void cfree(void* p) MI_FORWARD0(mi_free, p);
|
||||
void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize);
|
||||
size_t malloc_size(const void* p) MI_FORWARD1(mi_usable_size,p);
|
||||
#if !defined(__ANDROID__)
|
||||
size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p);
|
||||
#else
|
||||
size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p);
|
||||
#endif
|
||||
|
||||
// no forwarding here due to aliasing/name mangling issues
|
||||
void* valloc(size_t size) { return mi_valloc(size); }
|
||||
void* pvalloc(size_t size) { return mi_pvalloc(size); }
|
||||
void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); }
|
||||
void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); }
|
||||
void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
|
||||
int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
|
||||
void* valloc(size_t size) { return mi_valloc(size); }
|
||||
void* pvalloc(size_t size) { return mi_pvalloc(size); }
|
||||
void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); }
|
||||
void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); }
|
||||
int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
|
||||
void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
|
||||
|
||||
// on some glibc `aligned_alloc` is declared `static inline` so we cannot override it (e.g. Conda). This happens
|
||||
// when _GLIBCXX_HAVE_ALIGNED_ALLOC is not defined. However, in those cases it will use `memalign`, `posix_memalign`,
|
||||
// or `_aligned_malloc` and we can avoid overriding it ourselves.
|
||||
#if _GLIBCXX_HAVE_ALIGNED_ALLOC
|
||||
void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__GLIBC__) && defined(__linux__)
|
||||
// forward __libc interface (needed for glibc-based Linux distributions)
|
||||
@ -184,10 +196,10 @@ int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_me
|
||||
void __libc_free(void* p) MI_FORWARD0(mi_free,p);
|
||||
void __libc_cfree(void* p) MI_FORWARD0(mi_free,p);
|
||||
|
||||
void* __libc_valloc(size_t size) { return mi_valloc(size); }
|
||||
void* __libc_pvalloc(size_t size) { return mi_pvalloc(size); }
|
||||
void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment,size); }
|
||||
int __posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p,alignment,size); }
|
||||
void* __libc_valloc(size_t size) { return mi_valloc(size); }
|
||||
void* __libc_pvalloc(size_t size) { return mi_pvalloc(size); }
|
||||
void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment,size); }
|
||||
int __posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p,alignment,size); }
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
24
src/alloc.c
24
src/alloc.c
@ -44,7 +44,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
|
||||
mi_heap_stat_increase(heap, normal[bin], 1);
|
||||
}
|
||||
#endif
|
||||
#if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST)
|
||||
#if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST)
|
||||
mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page));
|
||||
ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE));
|
||||
mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta));
|
||||
@ -203,7 +203,7 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block
|
||||
// Check for heap block overflow by setting up padding at the end of the block
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST)
|
||||
#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST)
|
||||
static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) {
|
||||
*bsize = mi_page_usable_block_size(page);
|
||||
const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize);
|
||||
@ -506,15 +506,16 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept {
|
||||
if (p==NULL) return 0;
|
||||
const mi_segment_t* const segment = _mi_ptr_segment(p);
|
||||
const mi_page_t* const page = _mi_segment_page_of(segment, p);
|
||||
const mi_block_t* const block = (const mi_block_t*)p;
|
||||
const size_t size = mi_page_usable_size_of(page, block);
|
||||
const mi_block_t* block = (const mi_block_t*)p;
|
||||
if (mi_unlikely(mi_page_has_aligned(page))) {
|
||||
ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p);
|
||||
block = _mi_page_ptr_unalign(segment, page, p);
|
||||
size_t size = mi_page_usable_size_of(page, block);
|
||||
ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)block;
|
||||
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
|
||||
return (size - adjust);
|
||||
}
|
||||
else {
|
||||
return size;
|
||||
return mi_page_usable_size_of(page, block);
|
||||
}
|
||||
}
|
||||
|
||||
@ -677,12 +678,13 @@ mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept {
|
||||
// `strndup` using mi_malloc
|
||||
mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept {
|
||||
if (s == NULL) return NULL;
|
||||
size_t m = strlen(s);
|
||||
if (n > m) n = m;
|
||||
char* t = (char*)mi_heap_malloc(heap, n+1);
|
||||
const char* end = (const char*)memchr(s, 0, n); // find end of string in the first `n` characters (returns NULL if not found)
|
||||
const size_t m = (end != NULL ? (size_t)(end - s) : n); // `m` is the minimum of `n` or the end-of-string
|
||||
mi_assert_internal(m <= n);
|
||||
char* t = (char*)mi_heap_malloc(heap, m+1);
|
||||
if (t == NULL) return NULL;
|
||||
memcpy(t, s, n);
|
||||
t[n] = 0;
|
||||
memcpy(t, s, m);
|
||||
t[m] = 0;
|
||||
return t;
|
||||
}
|
||||
|
||||
|
10
src/init.c
10
src/init.c
@ -35,9 +35,9 @@ const mi_page_t _mi_page_empty = {
|
||||
|
||||
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
|
||||
|
||||
#if defined(MI_PADDING) && (MI_INTPTR_SIZE >= 8)
|
||||
#if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8)
|
||||
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
|
||||
#elif defined(MI_PADDING)
|
||||
#elif (MI_PADDING>0)
|
||||
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
|
||||
#else
|
||||
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() }
|
||||
@ -312,6 +312,12 @@ static void _mi_thread_done(mi_heap_t* default_heap);
|
||||
// use thread local storage keys to detect thread ending
|
||||
#include <windows.h>
|
||||
#include <fibersapi.h>
|
||||
#if (_WIN32_WINNT < 0x600) // before Windows Vista
|
||||
WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
|
||||
WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex );
|
||||
WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
|
||||
WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex);
|
||||
#endif
|
||||
static DWORD mi_fls_key = (DWORD)(-1);
|
||||
static void NTAPI mi_fls_done(PVOID value) {
|
||||
if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
|
||||
|
@ -51,7 +51,11 @@ typedef struct mi_option_desc_s {
|
||||
static mi_option_desc_t options[_mi_option_last] =
|
||||
{
|
||||
// stable options
|
||||
{ MI_DEBUG, UNINIT, MI_OPTION(show_errors) },
|
||||
#if MI_DEBUG || defined(MI_SHOW_ERRORS)
|
||||
{ 1, UNINIT, MI_OPTION(show_errors) },
|
||||
#else
|
||||
{ 0, UNINIT, MI_OPTION(show_errors) },
|
||||
#endif
|
||||
{ 0, UNINIT, MI_OPTION(show_stats) },
|
||||
{ 0, UNINIT, MI_OPTION(verbose) },
|
||||
|
||||
@ -262,13 +266,17 @@ static void mi_recurse_exit(void) {
|
||||
}
|
||||
|
||||
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
|
||||
if (!mi_recurse_enter()) return;
|
||||
if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr?
|
||||
if (!mi_recurse_enter()) return;
|
||||
out = mi_out_get_default(&arg);
|
||||
if (prefix != NULL) out(prefix, arg);
|
||||
out(message, arg);
|
||||
mi_recurse_exit();
|
||||
}
|
||||
else {
|
||||
if (prefix != NULL) out(prefix, arg);
|
||||
out(message, arg);
|
||||
}
|
||||
if (prefix != NULL) out(prefix,arg);
|
||||
out(message,arg);
|
||||
mi_recurse_exit();
|
||||
}
|
||||
|
||||
// Define our own limited `fprintf` that avoids memory allocation.
|
||||
@ -350,6 +358,11 @@ static void mi_error_default(int err) {
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
#if defined(MI_XMALLOC)
|
||||
if (err==ENOMEM || err==EOVERFLOW) { // abort on memory allocation fails in xmalloc mode
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void mi_register_error(mi_error_fun* fun, void* arg) {
|
||||
|
22
src/os.c
22
src/os.c
@ -211,10 +211,12 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
|
||||
void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
|
||||
if (p != NULL) return p;
|
||||
DWORD err = GetLastError();
|
||||
if (err != ERROR_INVALID_ADDRESS) { // if linked with multiple instances, we may have tried to allocate at an already allocated area
|
||||
if (err != ERROR_INVALID_ADDRESS && // If linked with multiple instances, we may have tried to allocate at an already allocated area (#210)
|
||||
err != ERROR_INVALID_PARAMETER) { // Windows7 instability (#230)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
// fall through
|
||||
}
|
||||
#endif
|
||||
#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
|
||||
// on modern Windows try use VirtualAlloc2 for aligned allocation
|
||||
@ -227,6 +229,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
|
||||
return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1);
|
||||
}
|
||||
#endif
|
||||
// last resort
|
||||
return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
|
||||
}
|
||||
|
||||
@ -614,7 +617,7 @@ static void mi_mprotect_hint(int err) {
|
||||
}
|
||||
|
||||
// Commit/Decommit memory.
|
||||
// Usuelly commit is aligned liberal, while decommit is aligned conservative.
|
||||
// Usually commit is aligned liberal, while decommit is aligned conservative.
|
||||
// (but not for the reset version where we want commit to be conservative as well)
|
||||
static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) {
|
||||
// page align in the range, commit liberally, decommit conservative
|
||||
@ -822,7 +825,7 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`)
|
||||
-----------------------------------------------------------------------------*/
|
||||
#define MI_HUGE_OS_PAGE_SIZE (GiB)
|
||||
|
||||
#if defined(WIN32) && (MI_INTPTR_SIZE >= 8)
|
||||
#if defined(_WIN32) && (MI_INTPTR_SIZE >= 8)
|
||||
static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
|
||||
{
|
||||
mi_assert_internal(size%GiB == 0);
|
||||
@ -865,6 +868,8 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
|
||||
params[0].ULong = (unsigned)numa_node;
|
||||
return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
|
||||
}
|
||||
#else
|
||||
UNUSED(numa_node);
|
||||
#endif
|
||||
// otherwise use regular virtual alloc on older windows
|
||||
return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
|
||||
@ -904,6 +909,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
|
||||
}
|
||||
#else
|
||||
static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
|
||||
UNUSED(addr); UNUSED(size); UNUSED(numa_node);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
@ -939,6 +945,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
|
||||
}
|
||||
#else
|
||||
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
|
||||
UNUSED(pages);
|
||||
if (total_size != NULL) *total_size = 0;
|
||||
return NULL;
|
||||
}
|
||||
@ -1011,7 +1018,12 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
|
||||
/* ----------------------------------------------------------------------------
|
||||
Support NUMA aware allocation
|
||||
-----------------------------------------------------------------------------*/
|
||||
#ifdef WIN32
|
||||
#ifdef _WIN32
|
||||
#if (_WIN32_WINNT < 0x601) // before Win7
|
||||
typedef struct _PROCESSOR_NUMBER { WORD Group; BYTE Number; BYTE Reserved; } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
|
||||
WINBASEAPI VOID WINAPI GetCurrentProcessorNumberEx(_Out_ PPROCESSOR_NUMBER ProcNumber);
|
||||
WINBASEAPI BOOL WINAPI GetNumaProcessorNodeEx(_In_ PPROCESSOR_NUMBER Processor, _Out_ PUSHORT NodeNumber);
|
||||
#endif
|
||||
static size_t mi_os_numa_nodex() {
|
||||
PROCESSOR_NUMBER pnum;
|
||||
USHORT numa_node = 0;
|
||||
|
@ -49,7 +49,7 @@ bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
|
||||
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
|
||||
|
||||
// arena.c
|
||||
void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
|
||||
void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats);
|
||||
void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
|
||||
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
|
||||
|
||||
@ -187,7 +187,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
|
||||
const uintptr_t idx = mi_atomic_increment(®ions_count);
|
||||
if (idx >= MI_REGION_MAX) {
|
||||
mi_atomic_decrement(®ions_count);
|
||||
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
|
||||
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats);
|
||||
_mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB));
|
||||
return false;
|
||||
}
|
||||
@ -391,7 +391,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
|
||||
mem_region_t* region;
|
||||
if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) {
|
||||
// was a direct arena allocation, pass through
|
||||
_mi_arena_free(p, size, arena_memid, tld->stats);
|
||||
_mi_arena_free(p, size, arena_memid, full_commit, tld->stats);
|
||||
}
|
||||
else {
|
||||
// allocated in a region
|
||||
@ -454,12 +454,13 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
|
||||
// on success, free the whole region
|
||||
uint8_t* start = mi_atomic_read_ptr(uint8_t,®ions[i].start);
|
||||
size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid);
|
||||
uintptr_t commit = mi_atomic_read_relaxed(®ions[i].commit);
|
||||
memset(®ions[i], 0, sizeof(mem_region_t));
|
||||
// and release the whole region
|
||||
mi_atomic_write(®ion->info, 0);
|
||||
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
|
||||
_mi_abandoned_await_readers(); // ensure no pending reads
|
||||
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
|
||||
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -202,7 +202,6 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
|
||||
uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
|
||||
/*
|
||||
if (idx == 0) {
|
||||
|
||||
// the first page starts after the segment info (and possible guard page)
|
||||
p += segment->segment_info_size;
|
||||
psize -= segment->segment_info_size;
|
||||
@ -1300,7 +1299,7 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
|
||||
mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0);
|
||||
|
||||
// claim it and free
|
||||
mi_heap_t* heap = mi_get_default_heap();
|
||||
mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized.
|
||||
// paranoia: if this it the last reference, the cas should always succeed
|
||||
if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) {
|
||||
mi_block_set_next(page, block, page->free);
|
||||
@ -1308,16 +1307,16 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
|
||||
page->used--;
|
||||
page->is_zero = false;
|
||||
mi_assert(page->used == 0);
|
||||
mi_segments_tld_t* tld = &heap->tld->segments;
|
||||
mi_tld_t* tld = heap->tld;
|
||||
const size_t bsize = mi_page_usable_block_size(page);
|
||||
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
|
||||
_mi_stat_decrease(&tld->stats->large, bsize);
|
||||
_mi_stat_decrease(&tld->stats.large, bsize);
|
||||
}
|
||||
else {
|
||||
_mi_stat_decrease(&tld->stats->huge, bsize);
|
||||
_mi_stat_decrease(&tld->stats.huge, bsize);
|
||||
}
|
||||
// mi_segments_track_size((long)segment->segment_size, tld);
|
||||
_mi_segment_page_free(page, true, tld);
|
||||
_mi_segment_page_free(page, true, &tld->segments);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,5 +24,8 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include "alloc.c"
|
||||
#include "alloc-aligned.c"
|
||||
#include "alloc-posix.c"
|
||||
#if MI_OSX_ZONE
|
||||
#include "alloc-override-osx.c"
|
||||
#endif
|
||||
#include "init.c"
|
||||
#include "options.c"
|
||||
|
46
src/stats.c
46
src/stats.c
@ -237,9 +237,51 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Use an output wrapper for line-buffered output
|
||||
// (which is nice when using loggers etc.)
|
||||
//------------------------------------------------------------
|
||||
typedef struct buffered_s {
|
||||
mi_output_fun* out; // original output function
|
||||
void* arg; // and state
|
||||
char* buf; // local buffer of at least size `count+1`
|
||||
size_t used; // currently used chars `used <= count`
|
||||
size_t count; // total chars available for output
|
||||
} buffered_t;
|
||||
|
||||
static void mi_buffered_flush(buffered_t* buf) {
|
||||
buf->buf[buf->used] = 0;
|
||||
_mi_fputs(buf->out, buf->arg, NULL, buf->buf);
|
||||
buf->used = 0;
|
||||
}
|
||||
|
||||
static void mi_buffered_out(const char* msg, void* arg) {
|
||||
buffered_t* buf = (buffered_t*)arg;
|
||||
if (msg==NULL || buf==NULL) return;
|
||||
for (const char* src = msg; *src != 0; src++) {
|
||||
char c = *src;
|
||||
if (buf->used >= buf->count) mi_buffered_flush(buf);
|
||||
mi_assert_internal(buf->used < buf->count);
|
||||
buf->buf[buf->used++] = c;
|
||||
if (c == '\n') mi_buffered_flush(buf);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Print statistics
|
||||
//------------------------------------------------------------
|
||||
|
||||
static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit);
|
||||
|
||||
static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out, void* arg) mi_attr_noexcept {
|
||||
static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out0, void* arg0) mi_attr_noexcept {
|
||||
// wrap the output function to be line buffered
|
||||
char buf[256];
|
||||
buffered_t buffer = { out0, arg0, buf, 0, 255 };
|
||||
mi_output_fun* out = &mi_buffered_out;
|
||||
void* arg = &buffer;
|
||||
|
||||
// and print using that
|
||||
mi_print_header(out,arg);
|
||||
#if MI_STAT>1
|
||||
mi_stat_count_t normal = { 0,0,0,0 };
|
||||
@ -287,7 +329,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun
|
||||
_mi_fprintf(out, arg, ", commit charge: ");
|
||||
mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s");
|
||||
}
|
||||
_mi_fprintf(out, arg, "\n");
|
||||
_mi_fprintf(out, arg, "\n");
|
||||
}
|
||||
|
||||
static mi_msecs_t mi_time_start; // = 0
|
||||
|
@ -184,7 +184,7 @@ int main() {
|
||||
// double_free1();
|
||||
// double_free2();
|
||||
// corrupt_free();
|
||||
// block_overflow1();
|
||||
block_overflow1();
|
||||
|
||||
void* p1 = malloc(78);
|
||||
void* p2 = malloc(24);
|
||||
|
@ -20,16 +20,18 @@ static void msleep(unsigned long msecs) { Sleep(msecs); }
|
||||
static void msleep(unsigned long msecs) { usleep(msecs * 1000UL); }
|
||||
#endif
|
||||
|
||||
void heap_no_delete();
|
||||
void heap_late_free();
|
||||
void padding_shrink();
|
||||
void heap_thread_free_large(); // issue #221
|
||||
void heap_no_delete(); // issue #202
|
||||
void heap_late_free(); // issue #204
|
||||
void padding_shrink(); // issue #209
|
||||
void various_tests();
|
||||
|
||||
int main() {
|
||||
mi_stats_reset(); // ignore earlier allocations
|
||||
// heap_no_delete(); // issue #202
|
||||
// heap_late_free(); // issue #204
|
||||
padding_shrink(); // issue #209
|
||||
heap_thread_free_large();
|
||||
heap_no_delete();
|
||||
heap_late_free();
|
||||
padding_shrink();
|
||||
various_tests();
|
||||
mi_stats_print(NULL);
|
||||
return 0;
|
||||
@ -157,3 +159,17 @@ void padding_shrink(void)
|
||||
t1.join();
|
||||
mi_free(shared_p);
|
||||
}
|
||||
|
||||
|
||||
// Issue #221
|
||||
void heap_thread_free_large_worker() {
|
||||
mi_free(shared_p);
|
||||
}
|
||||
|
||||
void heap_thread_free_large() {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
shared_p = mi_malloc_aligned(2*1024*1024 + 1, 8);
|
||||
auto t1 = std::thread(heap_thread_free_large_worker);
|
||||
t1.join();
|
||||
}
|
||||
}
|
||||
|
@ -152,6 +152,9 @@ int main() {
|
||||
}
|
||||
result = ok;
|
||||
});
|
||||
CHECK_BODY("malloc-aligned5", {
|
||||
void* p = mi_malloc_aligned(4097,4096); size_t usable = mi_usable_size(p); result = usable >= 4097 && usable < 10000; mi_free(p);
|
||||
});
|
||||
CHECK_BODY("malloc-aligned-at1", {
|
||||
void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user