mirror of
https://github.com/microsoft/mimalloc.git
synced 2024-12-27 13:33:18 +08:00
add initial fast tls for macOSX
This commit is contained in:
parent
fea903900d
commit
0989562c2d
@ -11,7 +11,10 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include "mimalloc-types.h"
|
||||
|
||||
#if defined(MI_MALLOC_OVERRIDE)
|
||||
#if defined(__APPLE__)
|
||||
#if defined(__APPLE__) && (defined(__i386__) || defined(__x86_64__))
|
||||
#define MI_TLS_OSX_FAST
|
||||
#define MI_TLS_OSX_SLOT 94 // seems unused, except in Webkit? See: <https://github.com/WebKit/webkit/blob/master/Source/WTF/wtf/FastTLS.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <pthread.h>
|
||||
#define MI_TLS_PTHREADS
|
||||
#elif (defined(__OpenBSD__) || defined(__DragonFly__))
|
||||
@ -284,14 +287,31 @@ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value o
|
||||
extern mi_heap_t _mi_heap_main; // statically allocated main backing heap
|
||||
extern bool _mi_process_is_initialized;
|
||||
|
||||
#if defined(MI_TLS_PTHREADS)
|
||||
#if defined(MI_TLS_OSX_FAST)
|
||||
#define MI_TLS_OSX_OFFSET (MI_TLS_OSX_SLOT*sizeof(void*))
|
||||
static inline void* mi_tls_osx_fast_get(void) {
|
||||
void* ret;
|
||||
__asm__("mov %%gs:%1, %0" : "=r" (ret) : "m" (*(void**)(MI_TLS_OSX_OFFSET)));
|
||||
return ret;
|
||||
}
|
||||
static inline void mi_tls_osx_fast_set(void* value) {
|
||||
__asm__("movq %1,%%gs:%0" : "=m" (*(void**)(MI_TLS_OSX_OFFSET)) : "rn" (value));
|
||||
}
|
||||
#elif defined(MI_TLS_PTHREADS)
|
||||
extern pthread_key_t _mi_heap_default_key;
|
||||
#else
|
||||
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
|
||||
#endif
|
||||
|
||||
static inline mi_heap_t* mi_get_default_heap(void) {
|
||||
#if defined(MI_TLS_PTHREADS)
|
||||
#if defined(MI_TLS_OSX_FAST)
|
||||
// Use a fixed slot in the TSD on MacOSX to avoid recursion (since the loader calls malloc).
|
||||
// We use slot 94 (__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4) <https://github.com/apportable/Foundation/blob/master/System/System/src/pthread_machdep.h>
|
||||
// which seems unused except for the more recent Webkit <https://github.com/WebKit/webkit/blob/master/Source/WTF/wtf/FastTLS.h>
|
||||
// Use with care.
|
||||
mi_heap_t* heap = (mi_heap_t*)mi_tls_osx_fast_get();
|
||||
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
|
||||
#elif defined(MI_TLS_PTHREADS)
|
||||
// Use pthreads for TLS; this is used on macOSX with interpose as the loader calls `malloc`
|
||||
// to allocate TLS storage leading to recursive calls if __thread declared variables are accessed.
|
||||
// Using pthreads allows us to initialize without recursive calls. (performance seems still quite good).
|
||||
@ -300,9 +320,9 @@ static inline mi_heap_t* mi_get_default_heap(void) {
|
||||
#else
|
||||
#if defined(MI_TLS_RECURSE_GUARD)
|
||||
// On some BSD platforms, like openBSD, the dynamic loader calls `malloc`
|
||||
// to initialize thread local data. To avoid recursion, we need to avoid
|
||||
// accessing the thread local `_mi_default_heap` until our module is loaded
|
||||
// and use the statically allocated main heap until that time.
|
||||
// to initialize thread local data (before our module is loaded).
|
||||
// To avoid recursion, we need to avoid accessing the thread local `_mi_default_heap`
|
||||
// until our module is loaded and use the statically allocated main heap until that time.
|
||||
// TODO: patch ourselves dynamically to avoid this check every time?
|
||||
if (mi_unlikely(!_mi_process_is_initialized)) return &_mi_heap_main;
|
||||
#endif
|
||||
|
19
src/init.c
19
src/init.c
@ -260,14 +260,15 @@ static void _mi_thread_done(mi_heap_t* default_heap);
|
||||
// use thread local storage keys to detect thread ending
|
||||
#include <windows.h>
|
||||
#include <fibersapi.h>
|
||||
static DWORD mi_fls_key;
|
||||
static DWORD mi_fls_key = (DWORD)(-1);
|
||||
static void NTAPI mi_fls_done(PVOID value) {
|
||||
if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
|
||||
}
|
||||
#elif defined(MI_USE_PTHREADS)
|
||||
// use pthread locol storage keys to detect thread ending
|
||||
// use pthread local storage keys to detect thread ending
|
||||
// (and used with MI_TLS_PTHREADS for the default heap)
|
||||
#include <pthread.h>
|
||||
pthread_key_t _mi_heap_default_key;
|
||||
pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
|
||||
static void mi_pthread_done(void* value) {
|
||||
if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
|
||||
}
|
||||
@ -287,6 +288,7 @@ static void mi_process_setup_auto_thread_done(void) {
|
||||
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
|
||||
mi_fls_key = FlsAlloc(&mi_fls_done);
|
||||
#elif defined(MI_USE_PTHREADS)
|
||||
mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
|
||||
pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
|
||||
#endif
|
||||
_mi_heap_set_default_direct(&_mi_heap_main);
|
||||
@ -331,9 +333,14 @@ static void _mi_thread_done(mi_heap_t* heap) {
|
||||
|
||||
void _mi_heap_set_default_direct(mi_heap_t* heap) {
|
||||
mi_assert_internal(heap != NULL);
|
||||
#if !defined(MI_TLS_PTHREADS)
|
||||
#if defined(MI_TLS_OSX_FAST)
|
||||
mi_tls_osx_fast_set(heap);
|
||||
#elif defined(MI_TLS_PTHREADS)
|
||||
// we use _mi_heap_default_key
|
||||
#else
|
||||
_mi_heap_default = heap;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// ensure the default heap is passed to `_mi_thread_done`
|
||||
// setting to a non-NULL value also ensures `mi_thread_done` is called.
|
||||
#if defined(_WIN32) && defined(MI_SHARED_LIB)
|
||||
@ -342,7 +349,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
|
||||
mi_assert_internal(mi_fls_key != 0);
|
||||
FlsSetValue(mi_fls_key, heap);
|
||||
#elif defined(MI_USE_PTHREADS)
|
||||
// mi_assert_internal(_mi_heap_default_key != 0); // often 0 is also the allocated key
|
||||
mi_assert_internal(_mi_heap_default_key != (pthread_key_t)(-1));
|
||||
pthread_setspecific(_mi_heap_default_key, heap);
|
||||
#endif
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ terms of the MIT license.
|
||||
// argument defaults
|
||||
static int THREADS = 32; // more repeatable if THREADS <= #processors
|
||||
static int SCALE = 10; // scaling factor
|
||||
static int ITER = 5; // N full iterations destructing and re-creating all threads
|
||||
static int ITER = 50; // N full iterations destructing and re-creating all threads
|
||||
|
||||
// static int THREADS = 8; // more repeatable if THREADS <= #processors
|
||||
// static int SCALE = 100; // scaling factor
|
||||
@ -250,7 +250,7 @@ int main(int argc, char** argv) {
|
||||
#endif
|
||||
|
||||
// mi_collect(true);
|
||||
// mi_stats_print(NULL);
|
||||
mi_stats_print(NULL);
|
||||
//bench_end_program();
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user