diff --git a/CMakeLists.txt b/CMakeLists.txt index cde8144f..ca69fc6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON) option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON) option(MI_WIN_REDIRECT "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON) +option(MI_WIN_USE_FIXED_TLS "Use a fixed TLS slot on Windows to avoid extra tests in the malloc fast path" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use local-dynamic-tls, a slightly slower but dlopen-compatible thread local storage mechanism (Unix)" OFF) option(MI_LIBC_MUSL "Set this when linking with musl libc" OFF) option(MI_BUILD_SHARED "Build shared library" ON) @@ -35,7 +36,7 @@ option(MI_NO_THP "Disable transparent huge pages support on Linux/And option(MI_EXTRA_CPPDEFS "Extra pre-processor definitions (use as `-DMI_EXTRA_CPPDEFS=\"opt1=val1;opt2=val2\"`)" "") # deprecated options -option(MI_WIN_USE_FLS "Use Fiber local storage on Windows to detect thread termination" OFF) +option(MI_WIN_USE_FLS "Use Fiber local storage on Windows to detect thread termination (deprecated)" OFF) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) option(MI_USE_LIBATOMIC "Explicitly link with -latomic (on older systems) (deprecated and detected automatically)" OFF) @@ -90,7 +91,7 @@ endif() if (CMAKE_GENERATOR MATCHES "^Visual Studio.*$") message(STATUS "Note: when building with Visual Studio the build type is specified when building.") - message(STATUS "For example: 'cmake --build . --config=Release") + message(STATUS "For example: 'cmake --build . --config=Release") endif() if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") @@ -322,10 +323,15 @@ if(MI_LIBC_MUSL) endif() if(MI_WIN_USE_FLS) - message(STATUS "Use the Fiber API to detect thread termination (MI_WIN_USE_FLS=ON)") + message(STATUS "Use the Fiber API to detect thread termination (deprecated) (MI_WIN_USE_FLS=ON)") list(APPEND mi_defines MI_WIN_USE_FLS=1) endif() +if(MI_WIN_USE_FIXED_TLS) + message(STATUS "Use fixed TLS slot on Windows to avoid extra tests in the malloc fast path (MI_WIN_USE_FIXED_TLS=ON)") + list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1) +endif() + # Determine architecture set(MI_OPT_ARCH_FLAGS "") set(MI_ARCH "unknown") @@ -543,7 +549,7 @@ if(MI_BUILD_SHARED) elseif(MI_ARCH STREQUAL "x64") set(MIMALLOC_REDIRECT_SUFFIX "") if(CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64") - message(STATUS "Note: x64 code emulated on Windows for arm64 should use an arm64ec build of 'mimalloc-override.dll'") + message(STATUS "Note: x64 code emulated on Windows for arm64 should use an arm64ec build of 'mimalloc-override.dll'") message(STATUS " with 'mimalloc-redirect-arm64ec.dll'. See the 'bin\\readme.md' for more information.") endif() elseif(MI_ARCH STREQUAL "x86") @@ -667,7 +673,7 @@ endif() # ----------------------------------------------------------------------------- if (MI_OVERRIDE) if (MI_BUILD_SHARED) - target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) + target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) endif() if(NOT WIN32) # It is only possible to override malloc on Windows when building as a DLL. diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc-lib.vcxproj similarity index 99% rename from ide/vs2022/mimalloc.vcxproj rename to ide/vs2022/mimalloc-lib.vcxproj index 9964310d..6c652b8a 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc-lib.vcxproj @@ -37,7 +37,7 @@ 15.0 {ABB5EAE7-B3E6-432E-B636-333449892EA6} - mimalloc + mimalloc-lib 10.0 mimalloc-lib diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override-dll.vcxproj similarity index 99% rename from ide/vs2022/mimalloc-override.vcxproj rename to ide/vs2022/mimalloc-override-dll.vcxproj index 1278cd0f..c1849bb2 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override-dll.vcxproj @@ -37,7 +37,7 @@ 15.0 {ABB5EAE7-B3E6-432E-B636-333449892EA7} - mimalloc-override + mimalloc-override-dll 10.0 mimalloc-override-dll diff --git a/ide/vs2022/mimalloc-override-test.vcxproj b/ide/vs2022/mimalloc-override-test.vcxproj index 0e87cf36..427a75ae 100644 --- a/ide/vs2022/mimalloc-override-test.vcxproj +++ b/ide/vs2022/mimalloc-override-test.vcxproj @@ -344,7 +344,7 @@ - + {abb5eae7-b3e6-432e-b636-333449892ea7} diff --git a/ide/vs2022/mimalloc-override.vcxproj.filters b/ide/vs2022/mimalloc-override.vcxproj.filters deleted file mode 100644 index a9f66c35..00000000 --- a/ide/vs2022/mimalloc-override.vcxproj.filters +++ /dev/null @@ -1,113 +0,0 @@ - - - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - - - {9ef1cf48-7bb2-4af1-8cc1-603486e08a7a} - - - {cfcf1674-81e3-487a-a8dd-5f956ae4007d} - - - - - Headers - - - \ No newline at end of file diff --git a/ide/vs2022/mimalloc-test-api.vcxproj b/ide/vs2022/mimalloc-test-api.vcxproj index 27247569..b7f97ad2 100644 --- a/ide/vs2022/mimalloc-test-api.vcxproj +++ b/ide/vs2022/mimalloc-test-api.vcxproj @@ -282,7 +282,7 @@ - + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index fd88cd8e..cb761f94 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -279,7 +279,7 @@ - + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj index 6e4576fd..83202dbe 100644 --- a/ide/vs2022/mimalloc-test.vcxproj +++ b/ide/vs2022/mimalloc-test.vcxproj @@ -276,7 +276,7 @@ - + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/ide/vs2022/mimalloc.sln b/ide/vs2022/mimalloc.sln index 5a55c98b..040af3ac 100644 --- a/ide/vs2022/mimalloc.sln +++ b/ide/vs2022/mimalloc.sln @@ -3,11 +3,11 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.12.35527.113 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-lib", "mimalloc-lib.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-dll", "mimalloc-override-dll.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}" EndProject diff --git a/ide/vs2022/mimalloc.vcxproj.filters b/ide/vs2022/mimalloc.vcxproj.filters deleted file mode 100644 index 54ee0fcb..00000000 --- a/ide/vs2022/mimalloc.vcxproj.filters +++ /dev/null @@ -1,105 +0,0 @@ - - - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - - - {dd2da697-c33c-4348-bf80-a802ebaa06fb} - - - {8027057b-4b93-4321-b93c-d51dd0c8077b} - - - \ No newline at end of file diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 48ec3f1c..ecb49a4e 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -419,7 +419,7 @@ static inline bool mi_heap_is_backing(const mi_heap_t* heap) { static inline bool mi_heap_is_initialized(mi_heap_t* heap) { mi_assert_internal(heap != NULL); - return (heap != &_mi_heap_empty); + return (heap != NULL && heap != &_mi_heap_empty); } static inline uintptr_t _mi_ptr_cookie(const void* p) { diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 56715df4..c21ffa63 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -129,6 +129,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); // for each thread (unequal to zero). //------------------------------------------------------------------- + // On some libc + platform combinations we can directly access a thread-local storage (TLS) slot. // The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform. // If you test on another platform and it works please send a PR :-) @@ -206,8 +207,40 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #endif } +#elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS) + +// On windows we can store the thread-local heap at a fixed TLS slot to avoid +// thread-local initialization checks in the fast path. This use a fixed location +// in the TCB though (last user-reserved slot by default) which may clash with other applications. + +#define MI_HAS_TLS_SLOT 2 // 2 = we can reliable initialize the slot (saving a test on each malloc) + +#if MI_WIN_USE_FIXED_TLS > 1 +#define MI_TLS_SLOT (MI_WIN_USE_FIXED_TLS) +#elif MI_SIZE_SIZE == 4 +#define MI_TLS_SLOT (0x710) // Last user-reserved slot +// #define MI_TLS_SLOT (0xF0C) // Last TlsSlot (might clash with other app reserved slot) +#else +#define MI_TLS_SLOT (0x888) // Last user-reserved slot +// #define MI_TLS_SLOT (0x1678) // Last TlsSlot (might clash with other app reserved slot) #endif +static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { + #if (_M_X64 || _M_AMD64) && !defined(_M_ARM64EC) + return (void*)__readgsqword((unsigned long)slot); // direct load at offset from gs + #elif _M_IX86 && !defined(_M_ARM64EC) + return (void*)__readfsdword((unsigned long)slot); // direct load at offset from fs + #else + return ((void**)NtCurrentTeb())[slot / sizeof(void*)]; + #endif +} +static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { + ((void**)NtCurrentTeb())[slot / sizeof(void*)] = value; +} + +#endif + + // Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id // but unfortunately, it seems we cannot test for this reliably at this time (see issue #883) // Nevertheless, it seems needed on older graviton platforms (see issue #851). @@ -322,12 +355,14 @@ static inline mi_heap_t* mi_prim_get_default_heap(void); static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT); + #if MI_TLS_SLOT == 1 // check if the TLS slot is initialized if mi_unlikely(heap == NULL) { #ifdef __GNUC__ __asm(""); // prevent conditional load of the address of _mi_heap_empty #endif heap = (mi_heap_t*)&_mi_heap_empty; } + #endif return heap; } diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 1d3d6f41..ac228b53 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -622,6 +622,11 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(module); + #if MI_TLS_SLOT >= 2 + if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + } + #endif if (reason==DLL_PROCESS_ATTACH) { _mi_process_load(); } @@ -630,7 +635,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { } else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) { _mi_thread_done(NULL); - } + } } @@ -783,6 +788,11 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #endif mi_decl_export void _mi_redirect_entry(DWORD reason) { // called on redirection; careful as this may be called before DllMain + #if MI_TLS_SLOT >= 2 + if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + } + #endif if (reason == DLL_PROCESS_ATTACH) { mi_redirected = true; }