From a6499be074a52232ed131eeabb3bd8040f2743c3 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 19:53:07 -0700 Subject: [PATCH] initial numa support for arenas --- include/mimalloc-internal.h | 1 + include/mimalloc.h | 8 +- src/arena.c | 128 +++++++++++++----- src/init.c | 2 +- src/options.c | 3 +- src/os.c | 252 +++++++++++++++++++----------------- 6 files changed, 241 insertions(+), 153 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 2b881ac9..dd677a02 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -56,6 +56,7 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); +int _mi_os_numa_node(void); // memory.c diff --git a/include/mimalloc.h b/include/mimalloc.h index b63ed79d..b155aca6 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -228,9 +228,14 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b // Experimental mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept; + +// deprecated +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + // ------------------------------------------------------ // Convenience // ------------------------------------------------------ @@ -271,6 +276,7 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_segment_reset, mi_option_os_tag, + mi_option_max_numa_node, _mi_option_last } mi_option_t; diff --git a/src/arena.c b/src/arena.c index 469755f2..5bc3900c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -25,8 +25,10 @@ with on-demand coalescing. // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; +//int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize); +int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- Arena allocation @@ -44,6 +46,7 @@ typedef uintptr_t mi_block_info_t; typedef struct mi_arena_s { uint8_t* start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? bool is_large; // large OS page allocated _Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks @@ -223,7 +226,31 @@ static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_ze Arena Allocation ----------------------------------------------------------- */ -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { +static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool* commit, bool* large, bool* is_zero, + size_t* memid) +{ + size_t block_index = SIZE_MAX; + void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index); + if (p != NULL) { + mi_assert_internal(block_index != SIZE_MAX); +#if MI_DEBUG>=1 + _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; + mi_block_info_t binfo = mi_atomic_read(block); + mi_assert_internal(mi_block_is_in_use(binfo)); + mi_assert_internal(mi_block_count(binfo) >= needed_bcount); +#endif + *memid = mi_memid_create(arena_index, block_index); + *commit = true; // TODO: support commit on demand? + *large = arena->is_large; + } + return p; +} + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, + bool* commit, bool* large, bool* is_zero, + size_t* memid, mi_os_tld_t* tld) +{ mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; @@ -240,33 +267,36 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* { size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); size_t bcount = asize / MI_ARENA_BLOCK_SIZE; + int numa_node = _mi_os_numa_node(); // current numa node mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + // try numa affine allocation for (size_t i = 0; i < MI_MAX_ARENAS; i++) { mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); - if (arena==NULL) break; - if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages - size_t block_index = SIZE_MAX; - void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index); - if (p != NULL) { - mi_assert_internal(block_index != SIZE_MAX); - #if MI_DEBUG>=1 - _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; - mi_block_info_t binfo = mi_atomic_read(block); - mi_assert_internal(mi_block_is_in_use(binfo)); - mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); - #endif - *memid = mi_memid_create(i, block_index); - *commit = true; // TODO: support commit on demand? - *large = arena->is_large; - mi_assert_internal((uintptr_t)p % alignment == 0); - return p; - } + if (arena==NULL) break; // end reached + if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; + } + } + // try from another numa node instead.. + for (size_t i = 0; i < MI_MAX_ARENAS; i++) { + mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); + if (arena==NULL) break; // end reached + if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; } } } - // fall back to the OS + // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); @@ -350,31 +380,61 @@ static bool mi_arena_add(mi_arena_t* arena) { ----------------------------------------------------------- */ #include // ENOMEM -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { - size_t pages_reserved_default = 0; - if (pages_reserved==NULL) pages_reserved = &pages_reserved_default; +// reserve at a specific numa node +static int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { size_t hsize = 0; - void* p = NULL; - int err = _mi_os_alloc_huge_os_pages(pages, max_secs, &p, pages_reserved, &hsize); - _mi_verbose_message("reserved %zu huge pages\n", *pages_reserved); - if (p==NULL) return err; - // err might be != 0 but that is fine, we just got less pages. - mi_assert_internal(*pages_reserved > 0 && hsize > 0 && *pages_reserved <= pages); + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize); + if (p==NULL) return ENOMEM; + _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages); + size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; - size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); + size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { - *pages_reserved = 0; _mi_os_free(p, hsize, &_mi_stats_main); return ENOMEM; } arena->block_count = bcount; arena->start = (uint8_t*)p; arena->block_bottom = 0; + arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t)); - //mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false)); mi_arena_add(arena); return 0; } + + +// reserve huge pages evenly among all numa nodes. +int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { + if (pages == 0) return 0; + + // pages per numa node + int numa_count = _mi_os_numa_node_count(); + if (numa_count <= 0) numa_count = 1; + size_t pages_per = pages / numa_count; + if (pages_per == 0) pages_per = 1; + + // reserve evenly among numa nodes + for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + int err = mi_reserve_huge_os_pages_at((pages_per > pages ? pages : pages_per), numa_node); + if (err) return err; + if (pages < pages_per) { + pages = 0; + } + else { + pages -= pages_per; + } + } + + return 0; +} + +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + _mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); + if (pages_reserved != NULL) *pages_reserved = 0; + int err = mi_reserve_huge_os_pages_interleave(pages); + if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; + return err; +} diff --git a/src/init.c b/src/init.c index e15d82eb..138b54aa 100644 --- a/src/init.c +++ b/src/init.c @@ -435,7 +435,7 @@ static void mi_process_load(void) { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) - mi_reserve_huge_os_pages(pages, max_secs, NULL); + mi_reserve_huge_os_pages_interleave(pages); } } diff --git a/src/options.c b/src/options.c index a49c46ed..32f13d54 100644 --- a/src/options.c +++ b/src/options.c @@ -66,7 +66,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 256, UNINIT, MI_OPTION(max_numa_node) } // maximum allowed numa node }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index b7bffa64..c0564174 100644 --- a/src/os.c +++ b/src/os.c @@ -170,7 +170,7 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = (1UL << 21); // 2MiB + large_os_page_size = 2*MiB; } } #endif @@ -207,31 +207,6 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); #ifdef _WIN32 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */ - && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0 - && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0) - && pNtAllocateVirtualMemoryEx != NULL) - { - #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE - #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) - #endif - MEM_EXTENDED_PARAMETER param = { 0, 0 }; - param.Type = 5; // == MemExtendedParameterAttributeFlags; - param.ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - SIZE_T psize = size; - void* base = addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, ¶m, 1); - if (err == 0) { - return base; - } - else { - // else fall back to regular large OS pages - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error 0x%lx)\n", err); - } - } -#endif #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; @@ -364,7 +339,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro lflags |= MAP_HUGETLB; #endif #ifdef MAP_HUGE_1GB - if ((size % ((uintptr_t)1 << 30)) == 0) { + if ((size % GiB) == 0) { lflags |= MAP_HUGE_1GB; } else @@ -400,10 +375,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead - // transparent huge pages (TPH). It is not required to call `madvise` with MADV_HUGE + // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available // in that case -- in particular for our large regions (in `memory.c`). - // However, some systems only allow TPH if called with explicit `madvise`, so + // However, some systems only allow THP if called with explicit `madvise`, so // when large OS pages are enabled for mimalloc, we call `madvice` anyways. if (allow_large && use_large_os_page(size, try_alignment)) { if (madvise(p, size, MADV_HUGEPAGE) == 0) { @@ -810,101 +785,146 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- -Support for huge OS pages (1Gib) that are reserved up-front and never -released. Only regions are allocated in here (see `memory.c`) so the memory -will be reused. +Support for allocating huge OS pages (1Gib) that are reserved up-front +and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ -#define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30) // 1GiB +#define MI_HUGE_OS_PAGE_SIZE (GiB) +#if defined(WIN32) && (MI_INTPTR_SIZE >= 8) +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) +{ + mi_assert_internal(size%GiB == 0); -#if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** start, size_t* pages_reserved, size_t* size) mi_attr_noexcept { - UNUSED(pages); UNUSED(max_secs); - if (start != NULL) *start = NULL; - if (pages_reserved != NULL) *pages_reserved = 0; - if (size != NULL) *size = 0; - return ENOMEM; -} -#else -static _Atomic(uintptr_t) huge_top; // = 0 - -int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept -{ - *pstart = NULL; - *pages_reserved = 0; - *psize = 0; - if (max_secs==0) return ETIMEDOUT; // timeout - if (pages==0) return 0; // ok - - // Atomically claim a huge address range - size_t size = pages * MI_HUGE_OS_PAGE_SIZE; - uint8_t* start; - do { - start = (uint8_t*)mi_atomic_addu(&huge_top, size); - if (start == NULL) { - uintptr_t top = ((uintptr_t)32 << 40); // 32TiB virtual start address - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages); - top += ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB - #endif - mi_atomic_cas_strong(&huge_top, top, 0); - } - } while (start == NULL); - + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) + DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; + MEM_EXTENDED_PARAMETER params[4] = { {0,0},{0,0},{0,0},{0,0} }; + MEM_ADDRESS_REQUIREMENTS reqs = {0,0,0}; + reqs.HighestEndingAddress = NULL; + reqs.LowestStartingAddress = NULL; + reqs.Alignment = MI_SEGMENT_SIZE; - // Allocate one page at the time but try to place them contiguously - // We allocate one page at the time to be able to abort if it takes too long - double start_t = _mi_clock_start(); - uint8_t* addr = start; // current top of the allocations - for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) { - // allocate a page - void* p = NULL; - bool is_large = true; - #ifdef _WIN32 - if (page==0) { mi_win_enable_large_os_pages(); } - p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true, true, &is_large); - #elif defined(MI_OS_USE_MMAP) - p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true, true, &is_large); - #else - // always fail - #endif - - // Did we succeed at a contiguous address? - if (p != addr) { - // no success, issue a warning and return with an error - if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); - _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main ); - } - else { - #ifdef _WIN32 - int err = GetLastError(); - #else - int err = errno; - #endif - _mi_warning_message("could not allocate huge page %zu at 0x%p, error: %i\n", page, addr, err); - } - return ENOMEM; + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + if (pNtAllocateVirtualMemoryEx != NULL) { + #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE + #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) + #endif + params[0].Type = MemExtendedParameterAddressRequirements; + params[0].Pointer = &reqs; + params[1].Type = 5; // == MemExtendedParameterAttributeFlags; + params[1].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + size_t param_count = 2; + if (numa_node >= 0) { + param_count++; + params[2].Type = MemExtendedParameterNumaNode; + params[2].ULong = (unsigned)numa_node; } - // success, record it - if (page==0) { - *pstart = addr; + SIZE_T psize = size; + void* base = NULL; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0) { + return base; } - *psize += MI_HUGE_OS_PAGE_SIZE; - *pages_reserved += 1; - _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); - _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - - // check for timeout - double elapsed = _mi_clock_end(start_t); - if (elapsed > max_secs) return ETIMEDOUT; - if (page >= 1) { - double estimate = ((elapsed / (double)(page+1)) * (double)pages); - if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout + else { + // fall back to regular huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err); } } - mi_assert_internal(*psize == size); - return 0; + // on modern Windows try use VirtualAlloc2 for aligned large OS page allocation + if (pVirtualAlloc2 != NULL) { + params[0].Type = MemExtendedParameterAddressRequirements; + params[0].Pointer = &reqs; + size_t param_count = 1; + if (numa_node >= 0) { + param_count++; + params[1].Type = MemExtendedParameterNumaNode; + params[1].ULong = (unsigned)numa_node; + } + return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count); + } + #endif + return NULL; // give up on older Windows.. +} +#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) +#ifdef MI_HAS_NUMA +#include // mbind, and use -lnuma +#endif +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { + mi_assert_internal(size%GiB == 0); + bool is_large = true; + void* p = mi_unix_mmap(NULL, MI_HUGE_OS_PAGE_SIZE, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (p == NULL) return NULL; + #ifdef MI_HAS_NUMA + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { + uintptr_t numa_mask = (1UL << numa_node); + long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + if (err != 0) { + _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); + } + } + #endif + return p; +} +#else +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { + return NULL; } #endif +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { + if (psize != NULL) *psize = 0; + size_t size = pages * MI_HUGE_OS_PAGE_SIZE; + void* p = mi_os_alloc_huge_os_pagesx(size, numa_node); + if (p==NULL) return NULL; + if (psize != NULL) *psize = size; + _mi_stat_increase(&_mi_stats_main.committed, size); + _mi_stat_increase(&_mi_stats_main.reserved, size); + return p; +} + +#ifdef WIN32 +static int mi_os_numa_nodex(void) { + PROCESSOR_NUMBER pnum; + USHORT numa_node = 0; + GetCurrentProcessorNumberEx(&pnum); + GetNumaProcessorNodeEx(&pnum,&numa_node); + return (int)numa_node; +} + +static int mi_os_numa_node_countx(void) { + ULONG numa_max = 0; + GetNumaHighestNodeNumber(&numa_max); + return (int)(numa_max + 1); +} +#elif MI_HAS_NUMA +#include +static int mi_os_numa_nodex(void) { + return numa_preferred(); +} +static int mi_os_numa_node_countx(void) { + return (numa_max_node() + 1); +} +#else +static int mi_os_numa_nodex(void) { + return 0; +} +static int mi_os_numa_node_countx(void) { + return 1; +} +#endif + +int _mi_os_numa_node_count(void) { + long ncount = mi_os_numa_node_countx(); + // never more than max numa node and at least 1 + long nmax = 1 + mi_option_get(mi_option_max_numa_node); + if (ncount > nmax) ncount = nmax; + if (ncount <= 0) ncount = 1; + return ncount; +} + +int _mi_os_numa_node(void) { + int nnode = mi_os_numa_nodex(); + // never more than the node count + int ncount = _mi_os_numa_node_count(); + if (nnode >= ncount) { nnode = nnode % ncount; } + return nnode; +}