merge from dev-exp

This commit is contained in:
daan 2019-11-21 17:03:30 -08:00
commit 1066be1594
30 changed files with 1167 additions and 356 deletions

View File

@ -6,18 +6,15 @@ set(CMAKE_CXX_STANDARD 17)
option(MI_OVERRIDE "Override the standard malloc interface" ON)
option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF)
option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF)
option(MI_SECURE "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF)
option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF)
option(MI_SECURE_FULL "Use full security mitigations, may be more expensive (includes double-free mitigation)" OFF)
option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
option(MI_BUILD_TESTS "Build test executables" ON)
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
include("cmake/mimalloc-config-version.cmake")
include("CheckIncludeFile")
set(mi_install_dir "lib/mimalloc-${mi_version}")
set(mi_sources
src/stats.c
@ -32,29 +29,33 @@ set(mi_sources
src/options.c
src/init.c)
# Set default build type
# -----------------------------------------------------------------------------
# Converience: set default build type depending on the build directory
# -----------------------------------------------------------------------------
if (NOT CMAKE_BUILD_TYPE)
if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$")
message(STATUS "No build type selected, default to *** Debug ***")
if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR MI_DEBUG_FULL MATCHES "ON")
message(STATUS "No build type selected, default to: Debug")
set(CMAKE_BUILD_TYPE "Debug")
else()
message(STATUS "No build type selected, default to *** Release ***")
message(STATUS "No build type selected, default to: Release")
set(CMAKE_BUILD_TYPE "Release")
endif()
else()
message(STATUS "Build type specified as *** ${CMAKE_BUILD_TYPE} ***")
endif()
if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$")
message(STATUS "Default to secure build")
set(MI_SECURE "ON")
endif()
# -----------------------------------------------------------------------------
# Process options
# -----------------------------------------------------------------------------
if(CMAKE_C_COMPILER_ID MATCHES "MSVC")
set(MI_USE_CXX "ON")
endif()
# Options
if(MI_OVERRIDE MATCHES "ON")
message(STATUS "Override standard malloc (MI_OVERRIDE=ON)")
if(APPLE)
@ -70,15 +71,9 @@ if(MI_OVERRIDE MATCHES "ON")
endif()
endif()
if(MI_SECURE_FULL MATCHES "ON")
message(STATUS "Set full secure build (may be more expensive) (MI_SECURE_FULL=ON)")
if(MI_SECURE MATCHES "ON")
message(STATUS "Set full secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=4)
set(MI_SECURE "ON")
else()
if(MI_SECURE MATCHES "ON")
message(STATUS "Set secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=3)
endif()
endif()
if(MI_SEE_ASM MATCHES "ON")
@ -87,7 +82,12 @@ if(MI_SEE_ASM MATCHES "ON")
endif()
if(MI_CHECK_FULL MATCHES "ON")
message(STATUS "Set debug level to full invariant checking (MI_CHECK_FULL=ON)")
message(STATUS "The MI_CHECK_FULL option is deprecated, use MI_DEBUG_FULL instead")
set(MI_DEBUG_FULL "ON")
endif()
if(MI_DEBUG_FULL MATCHES "ON")
message(STATUS "Set debug level to full internal invariant checking (MI_DEBUG_FULL=ON)")
list(APPEND mi_defines MI_DEBUG=3) # full invariant checking
endif()
@ -97,16 +97,6 @@ if(MI_USE_CXX MATCHES "ON")
set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX )
endif()
CHECK_INCLUDE_FILE("numaif.h" MI_HAVE_NUMA_H)
if(MI_HAVE_NUMA_H)
list(APPEND mi_defines MI_HAS_NUMA)
list(APPEND mi_libraries numa)
else()
if (NOT(WIN32))
message(WARNING "Compiling without using NUMA optimized allocation (on Linux, install libnuma-dev?)")
endif()
endif()
# Compiler flags
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas)
@ -122,19 +112,6 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
endif()
endif()
if(NOT(CMAKE_BUILD_TYPE MATCHES "Release|release|RelWithDebInfo|relwithdebinfo"))
string(TOLOWER "${CMAKE_BUILD_TYPE}" build_type)
set(mi_basename "mimalloc-${build_type}")
else()
if(MI_SECURE MATCHES "ON")
set(mi_basename "mimalloc-secure")
else()
set(mi_basename "mimalloc")
endif()
endif()
message(STATUS "Output library name : ${mi_basename}")
message(STATUS "Installation directory: ${mi_install_dir}")
# extra needed libraries
if(WIN32)
list(APPEND mi_libraries psapi shell32 user32)
@ -147,9 +124,28 @@ else()
endif()
# -----------------------------------------------------------------------------
# Main targets
# Install and output names
# -----------------------------------------------------------------------------
set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}")
if(MI_SECURE MATCHES "ON")
set(mi_basename "mimalloc-secure")
else()
set(mi_basename "mimalloc")
endif()
string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC)
if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel)$"))
set(mi_basename "${mi_basename}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version
endif()
message(STATUS "")
message(STATUS "Library base name: ${mi_basename}")
message(STATUS "Build type : ${CMAKE_BUILD_TYPE_LC}")
message(STATUS "Install directory: ${mi_install_dir}")
message(STATUS "")
# -----------------------------------------------------------------------------
# Main targets
# -----------------------------------------------------------------------------
# shared library
add_library(mimalloc SHARED ${mi_sources})
@ -251,7 +247,7 @@ endif()
if (MI_OVERRIDE MATCHES "ON")
target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE)
if(NOT WIN32)
# It is only possible to override malloc on Windows when building as a DLL. (src/alloc-override.c)
# It is only possible to override malloc on Windows when building as a DLL.
target_compile_definitions(mimalloc-static PRIVATE MI_MALLOC_OVERRIDE)
target_compile_definitions(mimalloc-obj PRIVATE MI_MALLOC_OVERRIDE)
endif()

View File

@ -35,22 +35,32 @@ jobs:
CC: gcc
CXX: g++
BuildType: debug
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_CHECK_FULL=ON
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
Release:
CC: gcc
CXX: g++
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
Secure:
CC: gcc
CXX: g++
BuildType: secure
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
Debug Clang:
CC: clang
CXX: clang++
BuildType: debug-clang
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_CHECK_FULL=ON
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
Release Clang:
CC: clang
CXX: clang++
BuildType: release-clang
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
Secure Clang:
CC: clang
CXX: clang++
BuildType: secure-clang
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
steps:
- task: CMake@1

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,5 +1,5 @@
set(mi_version_major 1)
set(mi_version_minor 1)
set(mi_version_minor 2)
set(mi_version ${mi_version_major}.${mi_version_minor})
set(PACKAGE_VERSION ${mi_version})

View File

@ -0,0 +1,75 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\..\src\options.c">
<Filter>Header Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-aligned.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-override.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\heap.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\init.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\os.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\page.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\page-queue.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\stats.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\bitmap.inc.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-atomic.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-new-delete.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-override.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-types.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="Header Files">
<UniqueIdentifier>{f1fccf27-17b9-42dd-ba51-6070baff85c6}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files">
<UniqueIdentifier>{39cb7e38-69d0-43fb-8406-6a0f7cefc3b4}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>

View File

@ -149,8 +149,8 @@
</ClCompile>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="mimalloc-override.vcxproj">
<Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
<ProjectReference Include="mimalloc.vcxproj">
<Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
</ProjectReference>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />

View File

@ -0,0 +1,78 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\..\src\alloc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-aligned.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-override.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-override-osx.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\heap.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\init.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\options.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\os.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\page.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\page-queue.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\stats.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\bitmap.inc.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-atomic.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-new-delete.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-types.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="Header Files">
<UniqueIdentifier>{2b556b10-f559-4b2d-896e-142652adbf0c}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files">
<UniqueIdentifier>{852a14ae-6dde-4e95-8077-ca705e97e5af}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>

View File

@ -241,7 +241,7 @@ static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x)
#endif
#elif defined(__wasi__)
#include <sched.h>
static inline void mi_atomic_yield() {
static inline void mi_atomic_yield(void) {
sched_yield();
}
#else

View File

@ -22,13 +22,13 @@ terms of the MIT license. A copy of the license can be found in the file
#if defined(_MSC_VER)
#define mi_decl_noinline __declspec(noinline)
#define mi_attr_noreturn
#define mi_attr_noreturn
#elif defined(__GNUC__) || defined(__clang__)
#define mi_decl_noinline __attribute__((noinline))
#define mi_attr_noreturn __attribute__((noreturn))
#else
#define mi_decl_noinline
#define mi_attr_noreturn
#define mi_attr_noreturn
#endif
@ -55,8 +55,6 @@ size_t _mi_os_page_size(void);
void _mi_os_init(void); // called from process init
void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
int _mi_os_numa_node(mi_os_tld_t* tld);
int _mi_os_numa_node_count(void);
bool _mi_os_protect(void* addr, size_t size);
bool _mi_os_unprotect(void* addr, size_t size);
@ -77,6 +75,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t*
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
void _mi_segment_thread_collect(mi_segments_tld_t* tld);
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
// "page.c"
@ -103,11 +102,11 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i
void _mi_heap_destroy_pages(mi_heap_t* heap);
void _mi_heap_collect_abandon(mi_heap_t* heap);
uintptr_t _mi_heap_random(mi_heap_t* heap);
void _mi_heap_set_default_direct(mi_heap_t* heap);
// "stats.c"
void _mi_stats_done(mi_stats_t* stats);
typedef int64_t mi_msecs_t;
mi_msecs_t _mi_clock_now(void);
mi_msecs_t _mi_clock_end(mi_msecs_t start);
mi_msecs_t _mi_clock_start(void);
@ -409,56 +408,86 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
// -------------------------------------------------------------------
// Encoding/Decoding the free list next pointers
// Note: we pass a `null` value to be used as the `NULL` value for the
// end of a free list. This is to prevent the cookie itself to ever
// be present among user blocks (as `cookie^0==cookie`).
// -------------------------------------------------------------------
static inline bool mi_is_in_same_segment(const void* p, const void* q) {
return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
}
static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* block ) {
static inline bool mi_is_in_same_page(const void* p, const void* q) {
mi_segment_t* segment = _mi_ptr_segment(p);
if (_mi_ptr_segment(q) != segment) return false;
return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
}
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) {
#ifdef MI_ENCODE_FREELIST
return (mi_block_t*)(block->next ^ cookie);
mi_block_t* b = (mi_block_t*)(block->next ^ cookie);
if (mi_unlikely((void*)b==null)) { b = NULL; }
return b;
#else
UNUSED(cookie);
UNUSED(cookie); UNUSED(null);
return (mi_block_t*)block->next;
#endif
}
static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) {
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) {
#ifdef MI_ENCODE_FREELIST
if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; }
block->next = (mi_encoded_t)next ^ cookie;
#else
UNUSED(cookie);
UNUSED(cookie); UNUSED(null);
block->next = (mi_encoded_t)next;
#endif
}
static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
#ifdef MI_ENCODE_FREELIST
mi_block_t* next = mi_block_nextx(page->cookie,block);
mi_block_t* next = mi_block_nextx(page,block,page->cookie);
// check for free list corruption: is `next` at least in our segment range?
// TODO: it is better to check if it is actually inside our page but that is more expensive
// to calculate. Perhaps with a relative free list this becomes feasible?
if (next!=NULL && !mi_is_in_same_segment(block, next)) {
// TODO: check if `next` is `page->block_size` aligned?
if (next!=NULL && !mi_is_in_same_page(block, next)) {
_mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next);
next = NULL;
}
}
return next;
#else
UNUSED(page);
return mi_block_nextx(0, block);
return mi_block_nextx(page,block,0);
#endif
}
static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
#ifdef MI_ENCODE_FREELIST
mi_block_set_nextx(page->cookie,block,next);
mi_block_set_nextx(page,block,next, page->cookie);
#else
UNUSED(page);
mi_block_set_nextx(0, block, next);
mi_block_set_nextx(page,block, next,0);
#endif
}
// -------------------------------------------------------------------
// Optimize numa node access for the common case (= one node)
// -------------------------------------------------------------------
int _mi_os_numa_node_get(mi_os_tld_t* tld);
size_t _mi_os_numa_node_count_get(void);
extern size_t _mi_numa_node_count;
static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
if (mi_likely(_mi_numa_node_count == 1)) return 0;
else return _mi_os_numa_node_get(tld);
}
static inline size_t _mi_os_numa_node_count(void) {
if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count;
else return _mi_os_numa_node_count_get();
}
// -------------------------------------------------------------------
// Getting the thread id should be performant
// as it is called in the fast path of `_mi_free`,

View File

@ -26,7 +26,7 @@ terms of the MIT license. A copy of the license can be found in the file
// #define MI_SECURE 1 // guard page around metadata
// #define MI_SECURE 2 // guard page around each mimalloc page
// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. (cmake -DMI_SECURE_FULL=ON)
// #define MI_SECURE 4 // checks for double free. (may be more expensive)
#if !defined(MI_SECURE)
#define MI_SECURE 0
@ -35,7 +35,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Define MI_DEBUG for debug mode
// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free.
// #define MI_DEBUG 2 // + internal assertion checks
// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_CHECK_FULL=ON)
// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON)
#if !defined(MI_DEBUG)
#if !defined(NDEBUG) || defined(_DEBUG)
#define MI_DEBUG 2
@ -401,7 +401,6 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
// ------------------------------------------------------
// Thread Local data
// ------------------------------------------------------
@ -416,6 +415,16 @@ typedef struct mi_span_queue_s {
#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT)
typedef int64_t mi_msecs_t;
// OS thread local data
typedef struct mi_os_tld_s {
size_t region_idx; // start point for next allocation
mi_stats_t* stats; // points to tld stats
} mi_os_tld_t;
// Segments thread local data
typedef struct mi_segments_tld_s {
mi_span_queue_t spans[MI_SEGMENT_BIN_MAX+1]; // free slice spans inside segments
@ -427,14 +436,9 @@ typedef struct mi_segments_tld_s {
size_t cache_size; // total size of all segments in the cache
mi_segment_t* cache; // (small) cache of segments
mi_stats_t* stats; // points to tld stats
mi_os_tld_t* os; // points to os stats
} mi_segments_tld_t;
// OS thread local data
typedef struct mi_os_tld_s {
size_t region_idx; // start point for next allocation
mi_stats_t* stats; // points to tld stats
} mi_os_tld_t;
// Thread local data
struct mi_tld_s {
unsigned long long heartbeat; // monotonic heartbeat count

View File

@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_H
#define MIMALLOC_H
#define MI_MALLOC_VERSION 110 // major + 2 digits minor
#define MI_MALLOC_VERSION 120 // major + 2 digits minor
// ------------------------------------------------------
// Compiler specific attributes
@ -230,7 +230,7 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
mi_decl_export bool mi_is_redirected() mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
// deprecated
@ -270,13 +270,13 @@ typedef enum mi_option_e {
mi_option_reserve_huge_os_pages,
mi_option_segment_cache,
mi_option_page_reset,
mi_option_cache_reset,
mi_option_segment_reset,
mi_option_reset_decommits,
mi_option_eager_commit_delay,
mi_option_allow_decommit,
mi_option_segment_reset,
mi_option_reset_delay,
mi_option_use_numa_nodes,
mi_option_os_tag,
mi_option_max_numa_node,
mi_option_max_errors,
_mi_option_last
} mi_option_t;

View File

@ -1,7 +1,7 @@
<img align="left" width="100" height="100" src="doc/mimalloc-logo.png"/>
[<img align="right" src="https://dev.azure.com/Daan0324/mimalloc/_apis/build/status/microsoft.mimalloc?branchName=master"/>](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary)
[<img align="right" src="https://dev.azure.com/Daan0324/mimalloc/_apis/build/status/microsoft.mimalloc?branchName=dev"/>](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary)
# mimalloc
@ -37,7 +37,7 @@ Notable aspects of the design include:
programs.
- __secure__: _mimalloc_ can be built in secure mode, adding guard pages,
randomized allocation, encrypted free lists, etc. to protect against various
heap vulnerabilities. The performance penalty is only around 3% on average
heap vulnerabilities. The performance penalty is usually around 10% on average
over our benchmarks.
- __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
A heap can be destroyed at once instead of deallocating each object separately.
@ -56,6 +56,7 @@ Enjoy!
### Releases
* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
* 2019-10-07, `v1.1.0`: stable release 1.1.
* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support.
* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements.
@ -64,7 +65,7 @@ Enjoy!
## Windows
Open `ide/vs2017/mimalloc.sln` in Visual Studio 2017 and build.
Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`).
The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
`mimalloc-override` project builds a DLL for overriding malloc
in the entire program.
@ -97,7 +98,7 @@ maintains detailed statistics as:
This will name the shared library as `libmimalloc-debug.so`.
Finally, you can build a _secure_ version that uses guard pages, encrypted
free lists, etc, as:
free lists, etc., as:
```
> mkdir -p out/secure
> cd out/secure
@ -138,6 +139,9 @@ target_link_libraries(myapp PUBLIC mimalloc-static)
```
to link with the static library. See `test\CMakeLists.txt` for an example.
For best performance in C++ programs, it is also recommended to override the
global `new` and `delete` operators. For convience, mimalloc provides
[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project.
You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`)
and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version):
@ -188,18 +192,18 @@ or via environment variables.
- `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates.
- `MIMALLOC_VERBOSE=1`: show verbose messages.
- `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages.
- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly
- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly
improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
can have fragmented memory.
- `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions
show in the working set even though usually just a small part is committed to physical memory. This is why it
turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better
show in the working set even though usually just a small part is committed to physical memory. This is why it
turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better
to turn it on as it improves performance and has no other drawbacks.
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at
startup and can give quite a performance improvement on long running workloads. Usually it is better to not use
`MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
startup and can give quite a performance improvement on long running workloads. Usually it is better to not use
`MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
contiguous physical memory can take a long time when memory is fragmented. Still experimental.
[linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5
@ -211,7 +215,7 @@ Overriding the standard `malloc` can be done either _dynamically_ or _statically
## Dynamic override
This is the recommended way to override the standard malloc interface.
This is the recommended way to override the standard malloc interface.
### Linux, BSD
@ -244,29 +248,31 @@ resolved to the _mimalloc_ library.
Note that certain security restrictions may apply when doing this from
the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this
Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this
(see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)).
### Windows
On Windows you need to link your program explicitly with the mimalloc
DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available
in the same folder as the mimalloc DLL at runtime (as it as referred to by the mimalloc DLL).
The redirection DLL's ensure all calls to the C runtime malloc API get redirected to mimalloc.
DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available
in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
mimalloc (in `mimalloc-override.dll`).
To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some
call to the mimalloc API in the `main` function, like `mi_version()`
call to the mimalloc API in the `main` function, like `mi_version()`
(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project
for an example on how to use this.
for an example on how to use this. For best performance on Windows with C++, it
is highly recommended to also override the `new`/`delete` operations (as described
in the introduction).
The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc successfully redirected.
overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected.
(Note: in principle, it should be possible to patch existing executables
that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the mimalloc DLL into
the import table (and putting `mimalloc-redirect.dll` in the same folder)
Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
(Note: in principle, it is possible to patch existing executables
that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the `mimalloc-override.dll` into the import table (and putting `mimalloc-redirect.dll` in the same folder)
Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
## Static override
@ -282,6 +288,12 @@ object file. For example:
> gcc -o myprogram mimalloc-override.o myfile1.c ...
```
Another way to override statically that works on all platforms, is to
link statically to mimalloc (as shown in the introduction) and include a
header file in each source file that re-defines `malloc` etc. to `mi_malloc`.
This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-override.h). This only works reliably though if all sources are
under your control or otherwise mixing of pointers from different heaps may occur!
# Performance

View File

@ -157,7 +157,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con
}
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
mi_block_t* n = mi_block_nextx(page->cookie, block); // pretend it is freed, and get the decoded first field
mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
(n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL?
{
@ -230,14 +230,14 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
}
else {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
mi_heap_t* heap = page->heap;
mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
mi_assert_internal(heap != NULL);
if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree;
do {
dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap->cookie,block,dfree);
mi_block_set_nextx(heap,block,dfree, heap->cookie);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
}

View File

@ -33,6 +33,7 @@ of 256MiB in practice.
#include "bitmap.inc.c" // atomic bitmap
// os.c
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
@ -40,7 +41,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
int _mi_os_numa_node_count(void);
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
/* -----------------------------------------------------------
@ -61,13 +62,15 @@ typedef uintptr_t mi_block_info_t;
typedef struct mi_arena_s {
uint8_t* start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
size_t field_count; // number of bitmap fields
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
int numa_node; // associated NUMA node
bool is_zero_init; // is the arena zero initialized?
bool is_committed; // is the memory committed
bool is_large; // large OS page allocated
volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks
mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed?
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
} mi_arena_t;
@ -109,7 +112,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
for (size_t visited = 0; visited < fcount; visited++, idx++) {
if (idx >= fcount) idx = 0; // wrap around
if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) {
if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) {
mi_atomic_write(&arena->search_idx, idx); // start search from here next time
return true;
}
@ -121,8 +124,8 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
/* -----------------------------------------------------------
Arena cache
----------------------------------------------------------- */
#define MI_CACHE_MAX (8)
#define MI_MAX_NUMA (64)
#define MI_CACHE_MAX (64)
#define MI_MAX_NUMA (16)
#define MI_SLOT_IN_USE ((void*)1)
@ -215,25 +218,42 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
----------------------------------------------------------- */
static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
bool* commit, bool* large, bool* is_zero, size_t* memid)
bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_bitmap_index_t bitmap_index;
if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) {
// claimed it! set the dirty bits (todo: no need for an atomic op here?)
*is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
*memid = mi_memid_create(arena_index, bitmap_index);
*commit = true; // TODO: support commit on demand?
*large = arena->is_large;
return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE));
if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
// claimed it! set the dirty bits (todo: no need for an atomic op here?)
void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
*memid = mi_memid_create(arena_index, bitmap_index);
*is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
*large = arena->is_large;
if (arena->is_committed) {
// always committed
*commit = true;
}
return NULL;
else if (commit) {
// ensure commit now
bool any_uncommitted;
mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
if (any_uncommitted) {
bool commit_zero;
_mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
if (commit_zero) *is_zero = true;
}
}
else {
// no need to commit, but check if already fully committed
*commit = mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
}
return p;
}
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
bool* commit, bool* large, bool* is_zero,
size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
*memid = MI_MEMID_OS;
*is_zero = false;
@ -258,7 +278,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
{
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid);
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
mi_assert_internal((uintptr_t)p % alignment == 0);
if (p != NULL) return p;
}
@ -270,7 +290,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
{
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid);
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
mi_assert_internal((uintptr_t)p % alignment == 0);
if (p != NULL) return p;
}
@ -285,9 +305,6 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
// finally, fall back to the OS
*is_zero = true;
*memid = MI_MEMID_OS;
if (*large) {
*large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed
}
return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
}
@ -329,7 +346,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool
return;
}
const size_t blocks = mi_block_count_of_size(size);
bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx);
bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
if (!ones) {
_mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
return;
@ -389,15 +406,17 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
arena->is_large = true;
arena->is_zero_init = true;
arena->is_committed = true;
arena->search_idx = 0;
arena->blocks_dirty = &arena->blocks_map[bcount];
arena->blocks_dirty = &arena->blocks_inuse[bcount];
arena->blocks_committed = NULL;
// the bitmaps are already zero initialized due to os_alloc
// just claim leftover blocks if needed
size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
if (post > 0) {
// don't use leftover bits at the end
mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL);
mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
}
mi_arena_add(arena);
@ -405,22 +424,22 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
}
// reserve huge pages evenly among all numa nodes.
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept {
// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
if (pages == 0) return 0;
// pages per numa node
int numa_count = _mi_os_numa_node_count();
size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
if (numa_count <= 0) numa_count = 1;
const size_t pages_per = pages / numa_count;
const size_t pages_mod = pages % numa_count;
const size_t timeout_per = (timeout_msecs / numa_count) + 50;
// reserve evenly among numa nodes
for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
size_t node_pages = pages_per; // can be 0
if ((size_t)numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
if (numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
if (err) return err;
if (pages < node_pages) {
pages = 0;
@ -437,7 +456,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
UNUSED(max_secs);
_mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
if (pages_reserved != NULL) *pages_reserved = 0;
int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0));
int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
return err;
}

View File

@ -8,11 +8,11 @@ terms of the MIT license. A copy of the license can be found in the file
/* ----------------------------------------------------------------------------
This file is meant to be included in other files for efficiency.
It implements a bitmap that can set/reset sequences of bits atomically
and is used to concurrently claim memory ranges.
and is used to concurrently claim memory ranges.
A bitmap is an array of fields where each field is a machine word (`uintptr_t`)
A current limitation is that the bit sequences cannot cross fields
A current limitation is that the bit sequences cannot cross fields
and that the sequence must be smaller or equal to the bits in a field.
---------------------------------------------------------------------------- */
#pragma once
@ -59,7 +59,7 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
// The bit mask for a given number of blocks at a specified bit index.
static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
return ((((uintptr_t)1 << count) - 1) << bitidx);
@ -104,10 +104,30 @@ static inline size_t mi_bsr(uintptr_t x) {
Claim a bit sequence atomically
----------------------------------------------------------- */
// Try to atomically claim a sequence of `count` bits in a single
// Try to atomically claim a sequence of `count` bits at in `idx`
// in the bitmap field. Returns `true` on success.
static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_fields, const size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS);
mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]);
if ((field & mask) == 0) { // free?
if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) {
// claimed!
return true;
}
}
return false;
}
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
{
static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
{
mi_assert_internal(bitmap_idx != NULL);
volatile _Atomic(uintptr_t)* field = &bitmap[idx];
uintptr_t map = mi_atomic_read(field);
@ -136,7 +156,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con
continue;
}
else {
// success, we claimed the bits!
// success, we claimed the bits!
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
return true;
}
@ -160,9 +180,9 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con
// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) {
static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) {
for (size_t idx = 0; idx < bitmap_fields; idx++) {
if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) {
if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
}
@ -170,39 +190,51 @@ static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields,
}
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously
// Returns `true` if all `count` bits were 1 previously.
static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
mi_assert_internal((bitmap[idx] & mask) == mask);
// mi_assert_internal((bitmap[idx] & mask) == mask);
uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask);
return ((prev & mask) == mask);
}
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
// mi_assert_internal((bitmap[idx] & mask) == 0);
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
uintptr_t prev = mi_atomic_or(&bitmap[idx], mask);
if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
return ((prev & mask) == 0);
}
// Returns `true` if all `count` bits were 1
static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
// mi_assert_internal((bitmap[idx] & mask) == 0);
return ((mi_atomic_read(&bitmap[idx]) & mask) == mask);
mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]);
if (any_ones != NULL) *any_ones = ((field & mask) != 0);
return ((field & mask) == mask);
}
#endif
static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
}
static inline bool mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones;
mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
return any_ones;
}
#endif

View File

@ -223,7 +223,7 @@ static void mi_heap_free(mi_heap_t* heap) {
// reset default
if (mi_heap_is_default(heap)) {
_mi_heap_default = heap->tld->heap_backing;
_mi_heap_set_default_direct(heap->tld->heap_backing);
}
// and free the used memory
mi_free(heap);
@ -354,8 +354,8 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
mi_assert(mi_heap_is_initialized(heap));
if (!mi_heap_is_initialized(heap)) return NULL;
mi_assert_expensive(mi_heap_is_valid(heap));
mi_heap_t* old = _mi_heap_default;
_mi_heap_default = heap;
mi_heap_t* old = mi_get_default_heap();
_mi_heap_set_default_direct(heap);
return old;
}

View File

@ -19,7 +19,7 @@ const mi_page_t _mi_page_empty = {
0,
#endif
0, // used
NULL,
NULL,
ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0),
0, NULL, NULL, NULL
#ifndef MI_ENCODE_FREELIST
@ -103,28 +103,31 @@ const mi_heap_t _mi_heap_empty = {
};
#define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats)))
#define tld_empty_os ((mi_os_tld_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,os)))
static const mi_tld_t tld_empty = {
0,
false,
NULL,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_empty_stats }, // segments
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_empty_stats, tld_empty_os }, // segments
{ 0, tld_empty_stats }, // os
{ MI_STATS_NULL } // stats
};
// the thread-local default heap for allocation
mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
#define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os)))
static mi_tld_t tld_main = {
0, false,
&_mi_heap_main,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
{ 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
{ 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
};
mi_heap_t _mi_heap_main = {
@ -214,7 +217,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld;
mi_tld_t tld;
} mi_thread_data_t;
// Initialize the thread local default heap, called from `mi_thread_init`
@ -222,8 +225,8 @@ static bool _mi_heap_init(void) {
if (mi_heap_is_initialized(_mi_heap_default)) return true;
if (_mi_is_main_thread()) {
// the main heap is statically allocated
_mi_heap_default = &_mi_heap_main;
mi_assert_internal(_mi_heap_default->tld->heap_backing == _mi_heap_default);
_mi_heap_set_default_direct(&_mi_heap_main);
mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
@ -242,26 +245,26 @@ static bool _mi_heap_init(void) {
heap->tld = tld;
tld->heap_backing = heap;
tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os;
tld->os.stats = &tld->stats;
_mi_heap_default = heap;
_mi_heap_set_default_direct(heap);
}
return false;
}
// Free the thread local default heap (called from `mi_thread_done`)
static bool _mi_heap_done(void) {
mi_heap_t* heap = _mi_heap_default;
static bool _mi_heap_done(mi_heap_t* heap) {
if (!mi_heap_is_initialized(heap)) return true;
// reset default heap
_mi_heap_default = (_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty);
_mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty);
// todo: delete all non-backing heaps?
// switch to backing heap and free it
heap = heap->tld->heap_backing;
if (!mi_heap_is_initialized(heap)) return false;
// collect if not the main thread
if (heap != &_mi_heap_main) {
_mi_heap_collect_abandon(heap);
@ -301,6 +304,8 @@ static bool _mi_heap_done(void) {
// to set up the thread local keys.
// --------------------------------------------------------
static void _mi_thread_done(mi_heap_t* default_heap);
#ifdef __wasi__
// no pthreads in the WebAssembly Standard Interface
#elif !defined(_WIN32)
@ -315,14 +320,14 @@ static bool _mi_heap_done(void) {
#include <fibersapi.h>
static DWORD mi_fls_key;
static void NTAPI mi_fls_done(PVOID value) {
if (value!=NULL) mi_thread_done();
if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
}
#elif defined(MI_USE_PTHREADS)
// use pthread locol storage keys to detect thread ending
#include <pthread.h>
static pthread_key_t mi_pthread_key;
static void mi_pthread_done(void* value) {
if (value!=NULL) mi_thread_done();
if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
}
#elif defined(__wasi__)
// no pthreads in the WebAssembly Standard Interface
@ -356,6 +361,8 @@ void mi_thread_init(void) mi_attr_noexcept
mi_process_init();
// initialize the thread local default heap
// (this will call `_mi_heap_set_default_direct` and thus set the
// fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called)
if (_mi_heap_init()) return; // returns true if already initialized
// don't further initialize for the main thread
@ -363,33 +370,38 @@ void mi_thread_init(void) mi_attr_noexcept
_mi_stat_increase(&mi_get_default_heap()->tld->stats.threads, 1);
// set hooks so our mi_thread_done() will be called
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
FlsSetValue(mi_fls_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_fls_done` is called
#elif defined(MI_USE_PTHREADS)
pthread_setspecific(mi_pthread_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_pthread_done` is called
#endif
//_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
}
void mi_thread_done(void) mi_attr_noexcept {
_mi_thread_done(mi_get_default_heap());
}
static void _mi_thread_done(mi_heap_t* heap) {
// stats
mi_heap_t* heap = mi_get_default_heap();
if (!_mi_is_main_thread() && mi_heap_is_initialized(heap)) {
_mi_stat_decrease(&heap->tld->stats.threads, 1);
}
// abandon the thread local heap
if (_mi_heap_done()) return; // returns true if already ran
//if (!_mi_is_main_thread()) {
// _mi_verbose_message("thread done: 0x%zx\n", _mi_thread_id());
//}
if (_mi_heap_done(heap)) return; // returns true if already ran
}
void _mi_heap_set_default_direct(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
_mi_heap_default = heap;
// ensure the default heap is passed to `_mi_thread_done`
// setting to a non-NULL value also ensures `mi_thread_done` is called.
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
FlsSetValue(mi_fls_key, heap);
#elif defined(MI_USE_PTHREADS)
pthread_setspecific(mi_pthread_key, heap);
#endif
}
// --------------------------------------------------------
// Run functions on process init/done, and thread init/done
@ -409,7 +421,7 @@ bool mi_is_redirected() mi_attr_noexcept {
}
// Communicate with the redirection module on Windows
#if defined(_WIN32) && defined(MI_SHARED_LIB)
#if defined(_WIN32) && defined(MI_SHARED_LIB)
#ifdef __cplusplus
extern "C" {
#endif
@ -455,11 +467,6 @@ static void mi_process_load(void) {
if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) {
_mi_fputs(NULL,NULL,msg);
}
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
mi_reserve_huge_os_pages_interleave(pages, pages*500);
}
}
// Initialize the process; called by thread_init or the process loader
@ -469,7 +476,7 @@ void mi_process_init(void) mi_attr_noexcept {
// access _mi_heap_default before setting _mi_process_is_initialized to ensure
// that the TLS slot is allocated without getting into recursion on macOS
// when using dynamic linking with interpose.
mi_heap_t* h = _mi_heap_default;
mi_heap_t* h = mi_get_default_heap();
_mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id();
@ -484,8 +491,14 @@ void mi_process_init(void) mi_attr_noexcept {
#if (MI_DEBUG)
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif
_mi_verbose_message("secure level: %d\n", MI_SECURE);
mi_thread_init();
mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL)
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
}
}
// Called when the process is done (through `at_exit`)
@ -512,7 +525,7 @@ static void mi_process_done(void) {
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// Windows DLL: easy to hook into process_init and thread_done
// Windows DLL: easy to hook into process_init and thread_done
__declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
UNUSED(reserved);
UNUSED(inst);

485
src/memory.c Normal file
View File

@ -0,0 +1,485 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
and the segment and huge object allocation by mimalloc. There may be multiple
implementations of this (one could be the identity going directly to the OS,
another could be a simple cache etc), but the current one uses large "regions".
In contrast to the rest of mimalloc, the "regions" are shared between threads and
need to be accessed using atomic operations.
We need this memory layer between the raw OS calls because of:
1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
to reuse memory effectively.
2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
an OS allocation/free is still (much) too expensive relative to the accesses
in that object :-( (`malloc-large` tests this). This means we need a cheaper
way to reuse memory.
3. This layer allows for NUMA aware allocation.
Possible issues:
- (2) can potentially be addressed too with a small cache per thread which is much
simpler. Generally though that requires shrinking of huge pages, and may overuse
memory per thread. (and is not compatible with `sbrk`).
- Since the current regions are per-process, we need atomic operations to
claim blocks which may be contended
- In the worst case, we need to search the whole region map (16KiB for 256GiB)
linearly. At what point will direct OS calls be faster? Is there a way to
do this better without adding too much complexity?
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
#include <string.h> // memset
#include "bitmap.inc.c"
// Internal raw OS interface
size_t _mi_os_large_page_size();
bool _mi_os_protect(void* addr, size_t size);
bool _mi_os_unprotect(void* addr, size_t size);
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
// arena.c
void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
// Constants
#if (MI_INTPTR_SIZE==8)
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map
#elif (MI_INTPTR_SIZE==4)
#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map
#else
#error "define the maximum heap space allowed for regions on this platform"
#endif
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits)
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits)
#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB
#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
// Region info is a pointer to the memory region and two bits for
// its flags: is_large, and is_committed.
typedef union mi_region_info_u {
uintptr_t value;
struct {
bool valid;
bool is_large;
int numa_node;
};
} mi_region_info_t;
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
typedef struct mem_region_s {
volatile _Atomic(uintptr_t) info; // is_large, and associated numa node + 1 (so 0 is no association)
volatile _Atomic(void*) start; // start of the memory area (and flags)
mi_bitmap_field_t in_use; // bit per in-use block
mi_bitmap_field_t dirty; // track if non-zero per block
mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed))
mi_bitmap_field_t reset; // track reset per block
volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena-
} mem_region_t;
// The region map
static mem_region_t regions[MI_REGION_MAX];
// Allocated regions
static volatile _Atomic(uintptr_t) regions_count; // = 0;
/* ----------------------------------------------------------------------------
Utility functions
-----------------------------------------------------------------------------*/
// Blocks (of 4MiB) needed for the given size.
static size_t mi_region_block_count(size_t size) {
return _mi_divide_up(size, MI_SEGMENT_SIZE);
}
/*
// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
static size_t mi_good_commit_size(size_t size) {
if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
return _mi_align_up(size, _mi_os_large_page_size());
}
*/
// Return if a pointer points into a region reserved by us.
bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
if (p==NULL) return false;
size_t count = mi_atomic_read_relaxed(&regions_count);
for (size_t i = 0; i < count; i++) {
uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(&regions[i].start);
if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
}
return false;
}
static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
void* start = mi_atomic_read_ptr(&region->start);
mi_assert_internal(start != NULL);
return ((uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE));
}
static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
size_t idx = region - regions;
mi_assert_internal(&regions[idx] == region);
return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
}
static size_t mi_memid_create_from_arena(size_t arena_memid) {
return (arena_memid << 1) | 1;
}
static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
if ((id&1)==1) {
if (arena_memid != NULL) *arena_memid = (id>>1);
return true;
}
else {
size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
*bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
*region = &regions[idx];
return false;
}
}
/* ----------------------------------------------------------------------------
Allocate a region is allocated from the OS (or an arena)
-----------------------------------------------------------------------------*/
static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
{
// not out of regions yet?
if (mi_atomic_read_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
// try to allocate a fresh region from the OS
bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
bool region_large = (commit && allow_large);
bool is_zero = false;
size_t arena_memid = 0;
void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
if (start == NULL) return false;
mi_assert_internal(!(region_large && !allow_large));
mi_assert_internal(!region_large || region_commit);
// claim a fresh slot
const uintptr_t idx = mi_atomic_increment(&regions_count);
if (idx >= MI_REGION_MAX) {
mi_atomic_decrement(&regions_count);
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
return false;
}
// allocated, initialize and claim the initial blocks
mem_region_t* r = &regions[idx];
r->arena_memid = arena_memid;
mi_atomic_write(&r->in_use, 0);
mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
mi_atomic_write(&r->reset, 0);
*bit_idx = 0;
mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
mi_atomic_write_ptr(&r->start, start);
// and share it
mi_region_info_t info;
info.valid = true;
info.is_large = region_large;
info.numa_node = _mi_os_numa_node(tld);
mi_atomic_write(&r->info, info.value); // now make it available to others
*region = r;
return true;
}
/* ----------------------------------------------------------------------------
Try to claim blocks in suitable regions
-----------------------------------------------------------------------------*/
static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
// initialized at all?
mi_region_info_t info;
info.value = mi_atomic_read_relaxed(&region->info);
if (info.value==0) return false;
// numa correct
if (numa_node >= 0) { // use negative numa node to always succeed
int rnode = info.numa_node;
if (rnode >= 0 && rnode != numa_node) return false;
}
// check allow-large
if (!allow_large && info.is_large) return false;
return true;
}
static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
{
// try all regions for a free slot
const size_t count = mi_atomic_read(&regions_count);
size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses?
for (size_t visited = 0; visited < count; visited++, idx++) {
if (idx >= count) idx = 0; // wrap around
mem_region_t* r = &regions[idx];
if (mi_region_is_suitable(r, numa_node, allow_large)) {
if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
tld->region_idx = idx; // remember the last found position
*region = r;
return true;
}
}
}
return false;
}
static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
mem_region_t* region;
mi_bitmap_index_t bit_idx;
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
// try to claim in existing regions
if (!mi_region_try_claim(numa_node, blocks, *is_large, &region, &bit_idx, tld)) {
// otherwise try to allocate a fresh region
if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
// out of regions or memory
return NULL;
}
}
// found a region and claimed `blocks` at `bit_idx`
mi_assert_internal(region != NULL);
mi_assert_internal(mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
mi_region_info_t info;
info.value = mi_atomic_read(&region->info);
void* start = mi_atomic_read_ptr(&region->start);
mi_assert_internal(!(info.is_large && !*is_large));
mi_assert_internal(start != NULL);
*is_zero = mi_bitmap_unclaim(&region->dirty, 1, blocks, bit_idx);
*is_large = info.is_large;
*memid = mi_memid_create(region, bit_idx);
void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
// commit
if (*commit) {
// ensure commit
bool any_uncommitted;
mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_uncommitted);
if (any_uncommitted) {
mi_assert_internal(!info.is_large);
bool commit_zero;
_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld);
if (commit_zero) *is_zero = true;
}
}
else {
// no need to commit, but check if already fully committed
*commit = mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
}
mi_assert_internal(mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
// unreset reset blocks
if (mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
mi_assert_internal(!info.is_large);
mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit);
mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
bool reset_zero;
_mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
if (reset_zero) *is_zero = true;
}
mi_assert_internal(!mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
#if (MI_DEBUG>=2)
if (*commit) { ((uint8_t*)p)[0] = 0; }
#endif
// and return the allocation
mi_assert_internal(p != NULL);
return p;
}
/* ----------------------------------------------------------------------------
Allocation
-----------------------------------------------------------------------------*/
// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
*memid = 0;
*is_zero = false;
bool default_large = false;
if (large==NULL) large = &default_large; // ensure `large != NULL`
if (size == 0) return NULL;
size = _mi_align_up(size, _mi_os_page_size());
// allocate from regions if possible
size_t arena_memid;
const size_t blocks = mi_region_block_count(size);
if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);
mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0);
if (p != NULL) {
#if (MI_DEBUG>=2)
if (*commit) { ((uint8_t*)p)[0] = 0; }
#endif
return p;
}
_mi_warning_message("unable to allocate from region: size %zu\n", size);
}
// and otherwise fall back to the OS
void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
*memid = mi_memid_create_from_arena(arena_memid);
mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; }
return p;
}
/* ----------------------------------------------------------------------------
Free
-----------------------------------------------------------------------------*/
// Free previously allocated memory with a given id.
void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
mi_assert_internal(size > 0 && tld != NULL);
if (p==NULL) return;
if (size==0) return;
size = _mi_align_up(size, _mi_os_page_size());
size_t arena_memid = 0;
mi_bitmap_index_t bit_idx;
mem_region_t* region;
if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
// was a direct arena allocation, pass through
_mi_arena_free(p, size, arena_memid, tld->stats);
}
else {
// allocated in a region
mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
const size_t blocks = mi_region_block_count(size);
mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
mi_region_info_t info;
info.value = mi_atomic_read(&region->info);
mi_assert_internal(info.value != 0);
void* blocks_start = mi_region_blocks_start(region, bit_idx);
mi_assert_internal(blocks_start == p); // not a pointer in our area?
mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
// committed?
if (full_commit && (size % MI_SEGMENT_SIZE) == 0) {
mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, NULL);
}
if (any_reset) {
// set the is_reset bits if any pages were reset
mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, NULL);
}
// reset the blocks to reduce the working set.
if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) &&
mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use only `option_page_reset` instead
{
bool any_unreset;
mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
if (any_unreset) {
_mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
}
}
// and unclaim
bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed);
}
}
/* ----------------------------------------------------------------------------
collection
-----------------------------------------------------------------------------*/
void _mi_mem_collect(mi_os_tld_t* tld) {
// free every region that has no segments in use.
uintptr_t rcount = mi_atomic_read_relaxed(&regions_count);
for (size_t i = 0; i < rcount; i++) {
mem_region_t* region = &regions[i];
if (mi_atomic_read_relaxed(&region->info) != 0) {
// if no segments used, try to claim the whole region
uintptr_t m;
do {
m = mi_atomic_read_relaxed(&region->in_use);
} while(m == 0 && !mi_atomic_cas_weak(&region->in_use, MI_BITMAP_FIELD_FULL, 0 ));
if (m == 0) {
// on success, free the whole region
void* start = mi_atomic_read_ptr(&regions[i].start);
size_t arena_memid = mi_atomic_read_relaxed(&regions[i].arena_memid);
memset(&regions[i], 0, sizeof(mem_region_t));
// and release the whole region
mi_atomic_write(&region->info, 0);
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
}
}
}
}
}
/* ----------------------------------------------------------------------------
Other
-----------------------------------------------------------------------------*/
bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
return _mi_os_reset(p, size, tld->stats);
}
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
return _mi_os_unreset(p, size, is_zero, tld->stats);
}
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
return _mi_os_commit(p, size, is_zero, tld->stats);
}
bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
return _mi_os_decommit(p, size, tld->stats);
}
bool _mi_mem_protect(void* p, size_t size) {
return _mi_os_protect(p, size);
}
bool _mi_mem_unprotect(void* p, size_t size) {
return _mi_os_unprotect(p, size);
}

View File

@ -60,15 +60,15 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
{ 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
{ 0, UNINIT, MI_OPTION(page_reset) },
{ 0, UNINIT, MI_OPTION(cache_reset) },
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
{ 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed
{ 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
{ 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
{ 0, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed
{ 500,UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
{ 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node
{ 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output
{ 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output
};
static void mi_option_init(mi_option_desc_t* desc);
@ -84,7 +84,7 @@ void _mi_options_init(void) {
mi_option_desc_t* desc = &options[option];
_mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
}
}
}
mi_max_error_count = mi_option_get(mi_option_max_errors);
}

148
src/os.c
View File

@ -299,7 +299,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
#if !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
#if !defined(MAP_NORESERVE)
#define MAP_NORESERVE 0
#endif
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
int fd = -1;
#if defined(MAP_ALIGNED) // BSD
if (try_alignment > 0) {
@ -625,31 +628,41 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
}
#elif defined(__wasi__)
// WebAssembly guests can't control memory protection
#elif defined(MAP_FIXED)
if (!commit) {
// use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge)
void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0);
if (p != start) { err = errno; }
}
else {
// for commit, just change the protection
err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
if (err != 0) { err = errno; }
}
#else
err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE));
if (err != 0) { err = errno; }
#endif
if (err != 0) {
_mi_warning_message("commit/decommit error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err);
_mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
}
mi_assert_internal(err == 0);
return (err == 0);
}
bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {
return mi_os_commitx(addr, size, true, false /* conservative? */, is_zero, stats);
return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats);
}
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) {
bool is_zero;
return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats);
return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats);
}
bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {
return mi_os_commitx(addr, size, true, true /* conservative? */, is_zero, stats);
return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats);
}
// Signal to the OS that the address range is no longer in use
// but may be used later again. This will release physical memory
// pages and reduce swapping while keeping the memory committed.
@ -708,7 +721,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
// We page align to a conservative area inside the range to reset.
bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
if (mi_option_is_enabled(mi_option_reset_decommits)) {
return _mi_os_decommit(addr,size,stats);
return _mi_os_decommit(addr, size, stats);
}
else {
return mi_os_resetx(addr, size, true, stats);
@ -799,9 +812,9 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
mi_win_enable_large_os_pages();
#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} };
MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} };
// on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
static bool mi_huge_pages_available = true;
if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
@ -831,7 +844,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
// on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
if (pVirtualAlloc2 != NULL && numa_node >= 0) {
params[0].Type = MemExtendedParameterNumaNode;
params[0].ULong = (unsigned)numa_node;
params[0].ULong = (unsigned)numa_node;
return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
}
#endif
@ -840,28 +853,35 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
}
#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8)
#ifdef MI_HAS_NUMA
#include <numaif.h> // mbind, and use -lnuma
#include <sys/syscall.h>
#ifndef MPOL_PREFERRED
#define MPOL_PREFERRED 1
#endif
#if defined(SYS_mbind)
static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags);
}
#else
static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags);
return 0;
}
#endif
static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
mi_assert_internal(size%GiB == 0);
bool is_large = true;
void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
if (p == NULL) return NULL;
#ifdef MI_HAS_NUMA
if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
uintptr_t numa_mask = (1UL << numa_node);
// TODO: does `mbind` work correctly for huge OS pages? should we
// TODO: does `mbind` work correctly for huge OS pages? should we
// use `set_mempolicy` before calling mmap instead?
// see: <https://lkml.org/lkml/2017/2/9/875>
long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
if (err != 0) {
_mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno));
}
}
#else
UNUSED(numa_node);
#endif
return p;
}
#else
@ -870,7 +890,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
}
#endif
#if (MI_INTPTR_SIZE >= 8)
#if (MI_INTPTR_SIZE >= 8)
// To ensure proper alignment, use our own area for huge OS pages
static _Atomic(uintptr_t) mi_huge_start; // = 0
@ -913,7 +933,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
size_t size = 0;
uint8_t* start = mi_os_claim_huge_pages(pages, &size);
if (start == NULL) return NULL; // or 32-bit systems
// Allocate one page at the time but try to place them contiguously
// We allocate one page at the time to be able to abort if it takes too long
// or to at least allocate as many as available on the system.
@ -933,11 +953,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
}
break;
}
// success, record it
_mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
_mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
// check for timeout
if (max_msecs > 0) {
mi_msecs_t elapsed = _mi_clock_end(start_t);
@ -971,88 +991,76 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
}
/* ----------------------------------------------------------------------------
Support NUMA aware allocation
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
#ifdef WIN32
static int mi_os_numa_nodex() {
static size_t mi_os_numa_nodex() {
PROCESSOR_NUMBER pnum;
USHORT numa_node = 0;
GetCurrentProcessorNumberEx(&pnum);
GetNumaProcessorNodeEx(&pnum,&numa_node);
return (int)numa_node;
return numa_node;
}
static int mi_os_numa_node_countx(void) {
static size_t mi_os_numa_node_countx(void) {
ULONG numa_max = 0;
GetNumaHighestNodeNumber(&numa_max);
return (int)(numa_max + 1);
return (numa_max + 1);
}
#elif defined(__linux__)
#include <dirent.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/syscall.h> // getcpu
#include <stdio.h> // access
static int mi_os_numa_nodex(void) {
static size_t mi_os_numa_nodex(void) {
#ifdef SYS_getcpu
unsigned node = 0;
unsigned ncpu = 0;
int err = syscall(SYS_getcpu, &ncpu, &node, NULL);
unsigned long node = 0;
unsigned long ncpu = 0;
long err = syscall(SYS_getcpu, &ncpu, &node, NULL);
if (err != 0) return 0;
return (int)node;
return node;
#else
return 0;
#endif
}
static int mi_os_numa_node_countx(void) {
DIR* d = opendir("/sys/devices/system/node");
if (d==NULL) return 1;
struct dirent* de;
int max_node_num = 0;
while ((de = readdir(d)) != NULL) {
int node_num;
if (strncmp(de->d_name, "node", 4) == 0) {
node_num = (int)strtol(de->d_name+4, NULL, 0);
if (max_node_num < node_num) max_node_num = node_num;
}
static size_t mi_os_numa_node_countx(void) {
char buf[128];
unsigned node = 0;
for(node = 0; node < 256; node++) {
// enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
if (access(buf,R_OK) != 0) break;
}
closedir(d);
return (max_node_num + 1);
return (node+1);
}
#else
static int mi_os_numa_nodex(void) {
static size_t mi_os_numa_nodex(void) {
return 0;
}
static int mi_os_numa_node_countx(void) {
static size_t mi_os_numa_node_countx(void) {
return 1;
}
#endif
int _mi_os_numa_node_count(void) {
static int numa_node_count = 0; // cache the node count
if (mi_unlikely(numa_node_count <= 0)) {
int ncount = mi_os_numa_node_countx();
int ncount0 = ncount;
// never more than max numa node and at least 1
int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node);
if (ncount > nmax) ncount = nmax;
if (ncount <= 0) ncount = 1;
numa_node_count = ncount;
_mi_verbose_message("using %i numa regions (%i nodes detected)\n", numa_node_count, ncount0);
size_t _mi_numa_node_count = 0; // cache the node count
size_t _mi_os_numa_node_count_get(void) {
if (mi_unlikely(_mi_numa_node_count <= 0)) {
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
if (ncount <= 0) ncount = (long)mi_os_numa_node_countx(); // or detect dynamically
_mi_numa_node_count = (size_t)(ncount <= 0 ? 1 : ncount);
_mi_verbose_message("using %zd numa regions\n", _mi_numa_node_count);
}
mi_assert_internal(numa_node_count >= 1);
return numa_node_count;
mi_assert_internal(_mi_numa_node_count >= 1);
return _mi_numa_node_count;
}
int _mi_os_numa_node(mi_os_tld_t* tld) {
int _mi_os_numa_node_get(mi_os_tld_t* tld) {
UNUSED(tld);
int numa_count = _mi_os_numa_node_count();
size_t numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
// never more than the node count and >= 0
int numa_node = mi_os_numa_nodex();
size_t numa_node = mi_os_numa_nodex();
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
if (numa_node < 0) numa_node = 0;
return numa_node;
return (int)numa_node;
}

View File

@ -260,7 +260,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
page->heap->page_count--;
page->next = NULL;
page->prev = NULL;
page->heap = NULL;
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
mi_page_set_in_full(page,false);
}
@ -274,7 +274,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
page->heap = heap;
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap);
page->next = queue->first;
page->prev = NULL;
if (queue->first != NULL) {
@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
// set append pages to new heap and count
size_t count = 0;
for (mi_page_t* page = append->first; page != NULL; page = page->next) {
page->heap = heap;
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap);
count++;
}

View File

@ -75,6 +75,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = _mi_page_start(segment,page,NULL);
mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
mi_assert_internal(mi_page_list_is_valid(page,page->free));
@ -227,7 +228,10 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
mi_assert_expensive(mi_page_is_valid_init(page));
mi_assert_internal(page->heap == NULL);
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
mi_assert_internal(!page->is_reset);
_mi_page_free_collect(page,false);
mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
mi_page_queue_push(heap, pq, page);
@ -282,7 +286,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
// and free them all
while(block != NULL) {
mi_block_t* next = mi_block_nextx(heap->cookie,block);
mi_block_t* next = mi_block_nextx(heap,block, heap->cookie);
// use internal free instead of regular one to keep stats etc correct
if (!_mi_free_delayed_block(block)) {
// we might already start delayed freeing while another thread has not yet
@ -290,7 +294,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
mi_block_t* dfree;
do {
dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap->cookie, block, dfree);
mi_block_set_nextx(heap, block, dfree, heap->cookie);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
}
@ -341,19 +345,25 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(page->heap != NULL);
#if MI_DEBUG > 1
mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
#endif
// remove from our page list
mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
mi_page_queue_remove(pq, page);
// page is no longer associated with our heap
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
_mi_page_use_delayed_free(page,MI_NEVER_DELAYED_FREE);
#if MI_DEBUG>1
// check there are no references left..
for (mi_block_t* block = (mi_block_t*)page->heap->thread_delayed_free; block != NULL; block = mi_block_nextx(page->heap->cookie,block)) {
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) {
mi_assert_internal(_mi_ptr_page(block) != page);
}
#endif
// and then remove from our page list
mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
mi_page_queue_remove(pq, page);
// and abandon it
mi_assert_internal(page->heap == NULL);
_mi_segment_page_abandon(page,segments_tld);
@ -588,7 +598,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(block_size > 0);
// set fields
size_t page_size;
_mi_segment_page_start(segment, page, &page_size);
page->block_size = block_size;
mi_assert_internal(page->block_size <= page_size);
mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE);
@ -755,6 +767,7 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
if (page != NULL) {
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(page->block_size == block_size);
if (pq == NULL) {
// huge pages are directly abandoned
mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);

View File

@ -17,8 +17,6 @@ static void mi_segment_map_allocated_at(const mi_segment_t* segment);
static void mi_segment_map_freed_at(const mi_segment_t* segment);
/* -----------------------------------------------------------
Segment allocation
@ -191,10 +189,12 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
}
#endif
/* -----------------------------------------------------------
Segment size calculations
----------------------------------------------------------- */
static size_t mi_segment_size(mi_segment_t* segment) {
return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
}
@ -212,8 +212,9 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
/*
if (idx == 0) {
// the first page starts after the segment info (and possible guard page)
p += segment->segment_info_size;
p += segment->segment_info_size;
psize -= segment->segment_info_size;
// for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
// to ensure this, we over-estimate and align with the OS page size
const size_t asize = _mi_os_page_size();
@ -234,11 +235,12 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
*/
if (page_size != NULL) *page_size = psize;
mi_assert_internal(_mi_ptr_page(p) == page);
mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page);
mi_assert_internal(_mi_ptr_segment(p) == segment);
return p;
}
static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, size_t* info_slices) {
size_t page_size = _mi_os_page_size();
size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size);
@ -283,6 +285,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
if (MI_SECURE>0) {
_mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
}
// _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
_mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_committed || (~segment->commit_mask == 0), segment->mem_is_fixed, tld->stats);
}
@ -330,9 +333,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
}
mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT);
if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) {
_mi_os_reset((uint8_t*)segment + mi_segment_info_size(segment), mi_segment_size(segment) - mi_segment_info_size(segment), tld->stats);
}
mi_assert_internal(segment->next == NULL);
segment->next = tld->cache;
tld->cache = segment;
tld->cache_count++;
@ -706,7 +707,6 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
Page allocation
----------------------------------------------------------- */
static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
@ -896,7 +896,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
}
else {
// otherwise reclaim it
// otherwise reclaim it
_mi_page_reclaim(heap,page);
}
}

View File

@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE)
endif()
# Import mimalloc (if installed)
find_package(mimalloc 1.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
find_package(mimalloc 1.2 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}")
# overriding with a dynamic library

View File

@ -181,9 +181,9 @@ int main() {
// mi_bins();
// detect double frees and heap corruption
//double_free1();
//double_free2();
//corrupt_free();
// double_free1();
// double_free2();
// corrupt_free();
void* p1 = malloc(78);
void* p2 = malloc(24);

View File

@ -1,7 +1,7 @@
Testing allocators is difficult as bugs may only surface after particular
allocation patterns. The main approach to testing _mimalloc_ is therefore
to have extensive internal invariant checking (see `page_is_valid` in `page.c`
for example), which is enabled in debug mode with `-DMI_CHECK_FULL=ON`.
for example), which is enabled in debug mode with `-DMI_DEBUG_FULL=ON`.
The main testing strategy is then to run [`mimalloc-bench`][bench] using full
invariant checking to catch any potential problems over a wide range of intensive
allocation benchmarks and programs.

View File

@ -6,7 +6,8 @@ terms of the MIT license.
/* This is a stress test for the allocator, using multiple threads and
transferring objects between threads. This is not a typical workload
but uses a random linear size distribution. Do not use this test as a benchmark!
but uses a random linear size distribution. Timing can also depend on
(random) thread scheduling. Do not use this test as a benchmark!
*/
#include <stdio.h>
@ -16,17 +17,35 @@ terms of the MIT license.
#include <string.h>
#include <mimalloc.h>
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
//
// argument defaults
static int THREADS = 8; // more repeatable if THREADS <= #processors
static int N = 200; // scaling factor
static int THREADS = 32; // more repeatable if THREADS <= #processors
static int SCALE = 50; // scaling factor
static int ITER = 10; // N full iterations re-creating all threads
// static int THREADS = 8; // more repeatable if THREADS <= #processors
// static int N = 100; // scaling factor
// static int SCALE = 100; // scaling factor
static bool allow_large_objects = true; // allow very large objects?
static size_t use_one_size = 0; // use single object size of N uintptr_t?
#ifdef USE_STD_MALLOC
#define custom_malloc(s) malloc(s)
#define custom_realloc(p,s) realloc(p,s)
#define custom_free(p) free(p)
#else
#define custom_malloc(s) mi_malloc(s)
#define custom_realloc(p,s) mi_realloc(p,s)
#define custom_free(p) mi_free(p)
#endif
// transfer pointer between threads
#define TRANSFERS (1000)
static volatile void* transfer[TRANSFERS];
#if (UINTPTR_MAX != UINT32_MAX)
const uintptr_t cookie = 0xbf58476d1ce4e5b9UL;
#else
@ -39,21 +58,21 @@ typedef uintptr_t* random_t;
static uintptr_t pick(random_t r) {
uintptr_t x = *r;
#if (UINTPTR_MAX > UINT32_MAX)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
#if (UINTPTR_MAX > UINT32_MAX)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9UL;
x ^= x >> 27;
x *= 0x94d049bb133111ebUL;
x ^= x >> 31;
#else
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
#else
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
x ^= x >> 16;
x *= 0x7feb352dUL;
x ^= x >> 15;
x *= 0x846ca68bUL;
x ^= x >> 16;
#endif
#endif
*r = x;
return x;
}
@ -64,12 +83,13 @@ static bool chance(size_t perc, random_t r) {
static void* alloc_items(size_t items, random_t r) {
if (chance(1, r)) {
if (chance(1, r)) items *= 1000; // 0.01% giant
else if (chance(10, r)) items *= 100; // 0.1% huge
else items *= 10; // 1% large objects;
if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant
else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge
else items *= 100; // 1% large objects;
}
if (items==40) items++; // pthreads uses that size for stack increases
uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));
if (items == 40) items++; // pthreads uses that size for stack increases
if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t));
if (p != NULL) {
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
}
@ -81,42 +101,42 @@ static void free_items(void* p) {
uintptr_t* q = (uintptr_t*)p;
uintptr_t items = (q[0] ^ cookie);
for (uintptr_t i = 0; i < items; i++) {
if ((q[i]^cookie) != items - i) {
if ((q[i] ^ cookie) != items - i) {
fprintf(stderr, "memory corruption at block %p at %zu\n", p, i);
abort();
}
}
}
mi_free(p);
custom_free(p);
}
static void stress(intptr_t tid) {
//bench_start_thread();
uintptr_t r = tid ^ 42;
const size_t max_item = 128; // in words
const size_t max_item_retained = 10*max_item;
size_t allocs = 25*N*(tid%8 + 1); // some threads do more
size_t retain = allocs/2;
uintptr_t r = tid * 43;
const size_t max_item_shift = 5; // 128
const size_t max_item_retained_shift = max_item_shift + 2;
size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
size_t retain = allocs / 2;
void** data = NULL;
size_t data_size = 0;
size_t data_top = 0;
void** retained = (void**)mi_malloc(retain*sizeof(void*));
void** retained = (void**)custom_malloc(retain * sizeof(void*));
size_t retain_top = 0;
while (allocs>0 || retain>0) {
while (allocs > 0 || retain > 0) {
if (retain == 0 || (chance(50, &r) && allocs > 0)) {
// 50%+ alloc
allocs--;
if (data_top >= data_size) {
data_size += 100000;
data = (void**)mi_realloc(data, data_size*sizeof(void*));
data = (void**)custom_realloc(data, data_size * sizeof(void*));
}
data[data_top++] = alloc_items((pick(&r) % max_item) + 1, &r);
data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r);
}
else {
// 25% retain
retained[retain_top++] = alloc_items(10*((pick(&r) % max_item_retained) + 1), &r);
retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r);
retain--;
}
if (chance(66, &r) && data_top > 0) {
@ -126,7 +146,7 @@ static void stress(intptr_t tid) {
data[idx] = NULL;
}
if (chance(25, &r) && data_top > 0) {
// 25% transfer-swap
// 25% exchange a local pointer with the (shared) transfer buffer.
size_t data_idx = pick(&r) % data_top;
size_t transfer_idx = pick(&r) % TRANSFERS;
void* p = data[data_idx];
@ -141,38 +161,54 @@ static void stress(intptr_t tid) {
for (size_t i = 0; i < data_top; i++) {
free_items(data[i]);
}
mi_free(retained);
mi_free(data);
custom_free(retained);
custom_free(data);
//bench_end_thread();
}
static void run_os_threads(size_t nthreads);
int main(int argc, char** argv) {
if (argc>=2) {
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
if (argc >= 2) {
char* end;
long n = strtol(argv[1], &end, 10);
if (n > 0) THREADS = n;
}
if (argc>=3) {
if (argc >= 3) {
char* end;
long n = (strtol(argv[2], &end, 10));
if (n > 0) N = n;
if (n > 0) SCALE = n;
}
printf("start with %i threads with a %i%% load-per-thread\n", THREADS, N);
if (argc >= 4) {
char* end;
long n = (strtol(argv[3], &end, 10));
if (n > 0) ITER = n;
}
printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
//int res = mi_reserve_huge_os_pages(4,1);
//printf("(reserve huge: %i\n)", res);
//bench_start_program();
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
mi_stats_reset();
memset((void*)transfer, 0, TRANSFERS*sizeof(void*));
run_os_threads(THREADS);
for (int i = 0; i < TRANSFERS; i++) {
free_items((void*)transfer[i]);
uintptr_t r = 43 * 43;
for (int n = 0; n < ITER; n++) {
run_os_threads(THREADS);
for (int i = 0; i < TRANSFERS; i++) {
if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
void* p = atomic_exchange_ptr(&transfer[i], NULL);
free_items(p);
}
}
mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); }
#endif
}
#ifndef NDEBUG
mi_collect(false);
#endif
mi_collect(true);
mi_stats_print(NULL);
//bench_end_program();
return 0;
@ -189,8 +225,8 @@ static DWORD WINAPI thread_entry(LPVOID param) {
}
static void run_os_threads(size_t nthreads) {
DWORD* tids = (DWORD*)malloc(nthreads * sizeof(DWORD));
HANDLE* thandles = (HANDLE*)malloc(nthreads * sizeof(HANDLE));
DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD));
HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE));
for (uintptr_t i = 0; i < nthreads; i++) {
thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]);
}
@ -200,16 +236,16 @@ static void run_os_threads(size_t nthreads) {
for (size_t i = 0; i < nthreads; i++) {
CloseHandle(thandles[i]);
}
free(tids);
free(thandles);
custom_free(tids);
custom_free(thandles);
}
static void* atomic_exchange_ptr(volatile void** p, void* newval) {
#if (INTPTR_MAX == UINT32_MAX)
#if (INTPTR_MAX == UINT32_MAX)
return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
#else
#else
return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
#endif
#endif
}
#else
@ -222,8 +258,8 @@ static void* thread_entry(void* param) {
}
static void run_os_threads(size_t nthreads) {
pthread_t* threads = (pthread_t*)mi_malloc(nthreads*sizeof(pthread_t));
memset(threads, 0, sizeof(pthread_t)*nthreads);
pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t));
memset(threads, 0, sizeof(pthread_t) * nthreads);
//pthread_setconcurrency(nthreads);
for (uintptr_t i = 0; i < nthreads; i++) {
pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
@ -231,6 +267,7 @@ static void run_os_threads(size_t nthreads) {
for (size_t i = 0; i < nthreads; i++) {
pthread_join(threads[i], NULL);
}
custom_free(threads);
}
static void* atomic_exchange_ptr(volatile void** p, void* newval) {