mirror of
https://github.com/microsoft/mimalloc.git
synced 2024-12-26 21:04:27 +08:00
refactor and improve atomic bitmap usage
This commit is contained in:
parent
b09282bc0d
commit
378716c467
@ -10,6 +10,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF)
|
||||
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF)
|
||||
option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF)
|
||||
option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF)
|
||||
option(MI_SECURE_FULL "Use full security mitigations (like double free protection, more expensive)" OFF)
|
||||
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
|
||||
option(MI_BUILD_TESTS "Build test executables" ON)
|
||||
|
||||
@ -70,9 +71,14 @@ if(MI_OVERRIDE MATCHES "ON")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MI_SECURE MATCHES "ON")
|
||||
message(STATUS "Set secure build (MI_SECURE=ON)")
|
||||
list(APPEND mi_defines MI_SECURE=3)
|
||||
if(MI_SECURE_FULL MATCHES "ON")
|
||||
message(STATUS "Set full secure build (experimental) (MI_SECURE_FULL=ON)")
|
||||
list(APPEND mi_defines MI_SECURE=4)
|
||||
else()
|
||||
if(MI_SECURE MATCHES "ON")
|
||||
message(STATUS "Set secure build (MI_SECURE=ON)")
|
||||
list(APPEND mi_defines MI_SECURE=3)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MI_SEE_ASM MATCHES "ON")
|
||||
|
@ -232,6 +232,9 @@
|
||||
<ClCompile Include="..\..\src\alloc-posix.c" />
|
||||
<ClCompile Include="..\..\src\alloc.c" />
|
||||
<ClCompile Include="..\..\src\arena.c" />
|
||||
<ClCompile Include="..\..\src\bitmap.inc.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\memory.c" />
|
||||
|
@ -218,7 +218,9 @@
|
||||
<ClCompile Include="..\..\src\alloc-posix.c" />
|
||||
<ClCompile Include="..\..\src\alloc.c" />
|
||||
<ClCompile Include="..\..\src\arena.c" />
|
||||
<ClCompile Include="..\..\src\bitmap.inc.c" />
|
||||
<ClCompile Include="..\..\src\bitmap.inc.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\memory.c" />
|
||||
|
@ -163,7 +163,6 @@ bool _mi_page_is_valid(mi_page_t* page);
|
||||
|
||||
|
||||
// Overflow detecting multiply
|
||||
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
|
||||
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
|
||||
#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
|
||||
#include <limits.h> // UINT_MAX, ULONG_MAX
|
||||
@ -175,6 +174,7 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
|
||||
return __builtin_umulll_overflow(count, size, total);
|
||||
#endif
|
||||
#else /* __builtin_umul_overflow is unavailable */
|
||||
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
|
||||
*total = count * size;
|
||||
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
|
||||
&& size > 0 && (SIZE_MAX / size) < count);
|
||||
@ -188,6 +188,7 @@ static inline bool _mi_is_power_of_two(uintptr_t x) {
|
||||
|
||||
// Align upwards
|
||||
static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
|
||||
mi_assert_internal(alignment != 0);
|
||||
uintptr_t mask = alignment - 1;
|
||||
if ((alignment & mask) == 0) { // power of two?
|
||||
return ((sz + mask) & ~mask);
|
||||
@ -197,6 +198,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
|
||||
}
|
||||
}
|
||||
|
||||
// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
|
||||
static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
|
||||
mi_assert_internal(divider != 0);
|
||||
return (divider == 0 ? size : ((size + divider - 1) / divider));
|
||||
}
|
||||
|
||||
// Is memory zero initialized?
|
||||
static inline bool mi_mem_is_zero(void* p, size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
@ -283,7 +290,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
|
||||
static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
|
||||
// if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages
|
||||
ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
|
||||
mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE);
|
||||
mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE);
|
||||
uintptr_t idx = (uintptr_t)diff >> segment->page_shift;
|
||||
mi_assert_internal(idx < segment->capacity);
|
||||
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
|
||||
|
@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free.
|
||||
|
||||
#if !defined(MI_SECURE)
|
||||
#define MI_SECURE 0
|
||||
#define MI_SECURE 4
|
||||
#endif
|
||||
|
||||
// Define MI_DEBUG for debug mode
|
||||
@ -93,12 +93,12 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb
|
||||
|
||||
// Derived constants
|
||||
#define MI_SEGMENT_SIZE (1<<MI_SEGMENT_SHIFT)
|
||||
#define MI_SEGMENT_SIZE (1UL<<MI_SEGMENT_SHIFT)
|
||||
#define MI_SEGMENT_MASK ((uintptr_t)MI_SEGMENT_SIZE - 1)
|
||||
|
||||
#define MI_SMALL_PAGE_SIZE (1<<MI_SMALL_PAGE_SHIFT)
|
||||
#define MI_MEDIUM_PAGE_SIZE (1<<MI_MEDIUM_PAGE_SHIFT)
|
||||
#define MI_LARGE_PAGE_SIZE (1<<MI_LARGE_PAGE_SHIFT)
|
||||
#define MI_SMALL_PAGE_SIZE (1UL<<MI_SMALL_PAGE_SHIFT)
|
||||
#define MI_MEDIUM_PAGE_SIZE (1UL<<MI_MEDIUM_PAGE_SHIFT)
|
||||
#define MI_LARGE_PAGE_SIZE (1UL<<MI_LARGE_PAGE_SHIFT)
|
||||
|
||||
#define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
|
||||
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
|
||||
|
62
src/arena.c
62
src/arena.c
@ -7,12 +7,16 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
"Arenas" are fixed area's of OS memory from which we can allocate
|
||||
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). Currently only used to
|
||||
allocate in one arena consisting of huge OS pages -- otherwise it
|
||||
delegates to direct allocation from the OS.
|
||||
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
|
||||
In contrast to the rest of mimalloc, the arenas are shared between
|
||||
threads and need to be accessed using atomic operations.
|
||||
|
||||
In the future, we can expose an API to manually add more arenas which
|
||||
is sometimes needed for embedded devices or shared memory for example.
|
||||
Currently arenas are only used to for huge OS page (1GiB) reservations,
|
||||
otherwise it delegates to direct allocation from the OS.
|
||||
In the future, we can expose an API to manually add more kinds of arenas
|
||||
which is sometimes needed for embedded devices or shared memory for example.
|
||||
(We can also employ this with WASI or `sbrk` systems to reserve large arenas
|
||||
on demand and be able to reuse them efficiently).
|
||||
|
||||
The arena allocation needs to be thread safe and we use an atomic
|
||||
bitmap to allocate. The current implementation of the bitmap can
|
||||
@ -48,10 +52,6 @@ int _mi_os_numa_node_count(void);
|
||||
#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB
|
||||
#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid)
|
||||
|
||||
// Block info: bit 0 contains the `in_use` bit, the upper bits the
|
||||
// size in count of arena blocks.
|
||||
typedef uintptr_t mi_block_info_t;
|
||||
|
||||
// A memory arena descriptor
|
||||
typedef struct mi_arena_s {
|
||||
uint8_t* start; // the start of the memory area
|
||||
@ -61,8 +61,8 @@ typedef struct mi_arena_s {
|
||||
bool is_zero_init; // is the arena zero initialized?
|
||||
bool is_large; // large OS page allocated
|
||||
volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
|
||||
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
|
||||
mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks
|
||||
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
|
||||
mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks
|
||||
} mi_arena_t;
|
||||
|
||||
|
||||
@ -81,6 +81,7 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0
|
||||
|
||||
static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
|
||||
mi_assert_internal(arena_index < 0xFE);
|
||||
mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
|
||||
return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
|
||||
}
|
||||
|
||||
@ -90,30 +91,25 @@ static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_
|
||||
*bitmap_index = (memid >> 8);
|
||||
}
|
||||
|
||||
|
||||
static size_t mi_arena_block_count_of_size(size_t size) {
|
||||
const size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
|
||||
const size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
|
||||
return bcount;
|
||||
static size_t mi_block_count_of_size(size_t size) {
|
||||
return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Thread safe allocation in an arena
|
||||
----------------------------------------------------------- */
|
||||
static void* mi_arena_alloc(mi_arena_t* arena, size_t blocks, bool* is_zero, mi_bitmap_index_t* bitmap_idx)
|
||||
static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
const size_t fcount = arena->field_count;
|
||||
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
|
||||
for (size_t visited = 0; visited < fcount; visited++, idx++) {
|
||||
if (idx >= fcount) idx = 0; // wrap around
|
||||
if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) {
|
||||
// claimed it! set the dirty bits
|
||||
*is_zero = mi_bitmap_claim(arena->blocks_dirty, fcount, blocks, *bitmap_idx);
|
||||
mi_atomic_write(&arena->search_idx, idx); // start search from here next time
|
||||
return (arena->start + (*bitmap_idx)*MI_ARENA_BLOCK_SIZE);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -125,13 +121,15 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
|
||||
bool* commit, bool* large, bool* is_zero, size_t* memid)
|
||||
{
|
||||
mi_bitmap_index_t bitmap_index;
|
||||
void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &bitmap_index);
|
||||
if (p != NULL) {
|
||||
*memid = mi_memid_create(arena_index, bitmap_index);
|
||||
*commit = true; // TODO: support commit on demand?
|
||||
*large = arena->is_large;
|
||||
if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) {
|
||||
// claimed it! set the dirty bits (todo: no need for an atomic op here?)
|
||||
*is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index);
|
||||
*memid = mi_memid_create(arena_index, bitmap_index);
|
||||
*commit = true; // TODO: support commit on demand?
|
||||
*large = arena->is_large;
|
||||
return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE));
|
||||
}
|
||||
return p;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
@ -140,7 +138,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
{
|
||||
mi_assert_internal(memid != NULL && tld != NULL);
|
||||
mi_assert_internal(size > 0);
|
||||
*memid = MI_MEMID_OS;
|
||||
*memid = MI_MEMID_OS;
|
||||
*is_zero = false;
|
||||
bool default_large = false;
|
||||
if (large==NULL) large = &default_large; // ensure `large != NULL`
|
||||
@ -151,7 +149,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
size <= MI_ARENA_MAX_OBJ_SIZE &&
|
||||
size >= MI_ARENA_MIN_OBJ_SIZE)
|
||||
{
|
||||
const size_t bcount = mi_arena_block_count_of_size(size);
|
||||
const size_t bcount = mi_block_count_of_size(size);
|
||||
const int numa_node = _mi_os_numa_node(tld); // current numa node
|
||||
|
||||
mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
|
||||
@ -221,7 +219,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
|
||||
_mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
|
||||
return;
|
||||
}
|
||||
const size_t blocks = mi_arena_block_count_of_size(size);
|
||||
const size_t blocks = mi_block_count_of_size(size);
|
||||
bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx);
|
||||
if (!ones) {
|
||||
_mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
|
||||
@ -268,7 +266,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
|
||||
}
|
||||
_mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
|
||||
|
||||
size_t bcount = mi_arena_block_count_of_size(hsize);
|
||||
size_t bcount = mi_block_count_of_size(hsize);
|
||||
size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
|
||||
size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
|
||||
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
|
||||
@ -284,6 +282,8 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
|
||||
arena->is_zero_init = true;
|
||||
arena->search_idx = 0;
|
||||
arena->blocks_dirty = &arena->blocks_map[bcount];
|
||||
// the bitmaps are already zero initialized due to os_alloc
|
||||
// just claim leftover blocks if needed
|
||||
size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
|
||||
if (post > 0) {
|
||||
// don't use leftover bits at the end
|
||||
|
110
src/bitmap.inc.c
110
src/bitmap.inc.c
@ -1,41 +1,30 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
This file is meant to be included in other files for efficiency.
|
||||
It implements a bitmap that can set/reset sequences of bits atomically
|
||||
and is used to concurrently claim memory ranges.
|
||||
|
||||
A bitmap is an array of fields where each field is a machine word (`uintptr_t`)
|
||||
|
||||
A current limitation is that the bit sequences cannot cross fields
|
||||
and that the sequence must be smaller or equal to the bits in a field.
|
||||
---------------------------------------------------------------------------- */
|
||||
#pragma once
|
||||
#ifndef MI_BITMAP_H
|
||||
#define MI_BITMAP_H
|
||||
#ifndef MI_BITMAP_C
|
||||
#define MI_BITMAP_C
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc-internal.h"
|
||||
|
||||
// Use bit scan forward to quickly find the first zero bit if it is available
|
||||
#if defined(_MSC_VER)
|
||||
#define MI_HAVE_BITSCAN
|
||||
#include <intrin.h>
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
MI_64(_BitScanForward)(&idx, x);
|
||||
return idx;
|
||||
}
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
MI_64(_BitScanReverse)(&idx, x);
|
||||
return idx;
|
||||
}
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define MI_HAVE_BITSCAN
|
||||
#if (INTPTR_MAX == LONG_MAX)
|
||||
# define MI_L(x) x##l
|
||||
#else
|
||||
# define MI_L(x) x##ll
|
||||
#endif
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
|
||||
}
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Bitmap definition
|
||||
----------------------------------------------------------- */
|
||||
|
||||
#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE)
|
||||
#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set
|
||||
@ -63,14 +52,59 @@ static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx)
|
||||
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the full bit index
|
||||
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
|
||||
return bitmap_idx;
|
||||
}
|
||||
|
||||
|
||||
// The bit mask for a given number of blocks at a specified bit index.
|
||||
static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
|
||||
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
|
||||
return ((((uintptr_t)1 << count) - 1) << bitidx);
|
||||
}
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single field at `idx` in `bitmap`.
|
||||
// Returns `true` on success.
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Use bit scan forward/reverse to quickly find the first zero bit if it is available
|
||||
----------------------------------------------------------- */
|
||||
#if defined(_MSC_VER)
|
||||
#define MI_HAVE_BITSCAN
|
||||
#include <intrin.h>
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
MI_64(_BitScanForward)(&idx, x);
|
||||
return idx;
|
||||
}
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
MI_64(_BitScanReverse)(&idx, x);
|
||||
return idx;
|
||||
}
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#include <limits.h> // LONG_MAX
|
||||
#define MI_HAVE_BITSCAN
|
||||
#if (INTPTR_MAX == LONG_MAX)
|
||||
# define MI_L(x) x##l
|
||||
#else
|
||||
# define MI_L(x) x##ll
|
||||
#endif
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
|
||||
}
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single
|
||||
// field at `idx` in `bitmap`. Returns `true` on success.
|
||||
static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
@ -93,7 +127,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con
|
||||
while (bitidx <= bitidx_max) {
|
||||
if ((map & m) == 0) { // are the mask bits free at bitidx?
|
||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||
uintptr_t newmap = map | m;
|
||||
const uintptr_t newmap = map | m;
|
||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||
if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here?
|
||||
// no success, another thread claimed concurrently.. keep going
|
||||
@ -109,10 +143,10 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con
|
||||
else {
|
||||
// on to the next bit range
|
||||
#ifdef MI_HAVE_BITSCAN
|
||||
size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
|
||||
const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
|
||||
mi_assert_internal(shift > 0 && shift <= count);
|
||||
#else
|
||||
size_t shift = 1;
|
||||
const size_t shift = 1;
|
||||
#endif
|
||||
bitidx += shift;
|
||||
m <<= shift;
|
||||
|
96
src/memory.c
96
src/memory.c
@ -16,10 +16,10 @@ We need this memory layer between the raw OS calls because of:
|
||||
1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
|
||||
to reuse memory effectively.
|
||||
2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
|
||||
an OS allocation/free is still (much) too expensive relative to the accesses in that
|
||||
object :-( (`malloc-large` tests this). This means we need a cheaper way to
|
||||
reuse memory.
|
||||
3. This layer can help with a NUMA aware allocation in the future.
|
||||
an OS allocation/free is still (much) too expensive relative to the accesses
|
||||
in that object :-( (`malloc-large` tests this). This means we need a cheaper
|
||||
way to reuse memory.
|
||||
3. This layer allows for NUMA aware allocation.
|
||||
|
||||
Possible issues:
|
||||
- (2) can potentially be addressed too with a small cache per thread which is much
|
||||
@ -47,8 +47,6 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
|
||||
bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
|
||||
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
|
||||
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
|
||||
//void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
|
||||
//void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
|
||||
|
||||
// arena.c
|
||||
void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
|
||||
@ -58,18 +56,18 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
|
||||
|
||||
// Constants
|
||||
#if (MI_INTPTR_SIZE==8)
|
||||
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 16KiB for the region map
|
||||
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 40KiB for the region map
|
||||
#elif (MI_INTPTR_SIZE==4)
|
||||
#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // 196 bytes for the region map
|
||||
#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map
|
||||
#else
|
||||
#error "define the maximum heap space allowed for regions on this platform"
|
||||
#endif
|
||||
|
||||
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
|
||||
|
||||
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB
|
||||
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits)
|
||||
#define MI_REGION_MAX_ALLOC_SIZE (MI_REGION_SIZE/4) // 64MiB
|
||||
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)
|
||||
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits)
|
||||
|
||||
|
||||
// Region info is a pointer to the memory region and two bits for
|
||||
@ -95,7 +93,7 @@ typedef struct mem_region_s {
|
||||
size_t arena_memid; // if allocated from a (huge page) arena
|
||||
} mem_region_t;
|
||||
|
||||
// The region map; 16KiB for a 256GiB HEAP_REGION_MAX
|
||||
// The region map
|
||||
static mem_region_t regions[MI_REGION_MAX];
|
||||
|
||||
// A bit mask per region for its claimed MI_SEGMENT_SIZE blocks.
|
||||
@ -173,7 +171,7 @@ static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_i
|
||||
bool region_large = allow_large;
|
||||
bool is_zero = false;
|
||||
size_t arena_memid = 0;
|
||||
void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld);
|
||||
void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld);
|
||||
mi_assert_internal(!(region_large && !allow_large));
|
||||
|
||||
if (start == NULL) {
|
||||
@ -183,35 +181,31 @@ static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_i
|
||||
}
|
||||
|
||||
// set the newly allocated region
|
||||
// try to initialize any region up to 4 beyond the current one in
|
||||
// care multiple threads are doing this concurrently (common at startup)
|
||||
info = mi_region_info_create(start, region_large, region_commit);
|
||||
if (mi_atomic_cas_strong(®ions[idx].info, info, 0)) {
|
||||
// update the region count
|
||||
regions[idx].arena_memid = arena_memid;
|
||||
mi_atomic_write(®ions[idx].numa_node, _mi_os_numa_node(tld) + 1);
|
||||
mi_atomic_write(®ions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0));
|
||||
mi_atomic_increment(®ions_count);
|
||||
}
|
||||
else {
|
||||
// failed, another thread allocated just before us!
|
||||
// we assign it to a later slot instead (up to 4 tries).
|
||||
for (size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
|
||||
if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) {
|
||||
regions[idx+i].arena_memid = arena_memid;
|
||||
mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
|
||||
mi_atomic_write(®ions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0));
|
||||
mi_atomic_increment(®ions_count);
|
||||
start = NULL;
|
||||
break;
|
||||
}
|
||||
bool claimed = false;
|
||||
for (size_t i = 0; i <= 4 && idx + i < MI_REGION_MAX && !claimed; i++) {
|
||||
if (!is_zero) {
|
||||
// set dirty bits before CAS; this might race with a zero block but that is ok.
|
||||
// (but writing before cas prevents a concurrent allocation to assume it is not dirty)
|
||||
mi_atomic_write(®ions_dirty[idx+i], MI_BITMAP_FIELD_FULL);
|
||||
}
|
||||
if (start != NULL) {
|
||||
// free it if we didn't succeed to save it to some other region
|
||||
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
|
||||
// _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
|
||||
if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) {
|
||||
// claimed!
|
||||
regions[idx+i].arena_memid = arena_memid;
|
||||
mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
|
||||
mi_atomic_increment(®ions_count);
|
||||
claimed = true;
|
||||
}
|
||||
// and continue with the memory at our index
|
||||
info = mi_atomic_read(®ions[idx].info);
|
||||
}
|
||||
if (!claimed) {
|
||||
// free our OS allocation if we didn't succeed to store it in some region
|
||||
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
|
||||
}
|
||||
// continue with the actual info at our index in case another thread was quicker with the allocation
|
||||
info = mi_atomic_read(®ions[idx].info);
|
||||
mi_assert_internal(info != 0);
|
||||
}
|
||||
mi_assert_internal(info == mi_atomic_read(®ions[idx].info));
|
||||
mi_assert_internal(info != 0);
|
||||
@ -290,19 +284,21 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a
|
||||
int rnode = ((int)mi_atomic_read_relaxed(®ions->numa_node)) - 1;
|
||||
if (rnode != numa_node) return false;
|
||||
}
|
||||
if (mi_unlikely(!(commit || allow_large))) {
|
||||
// otherwise skip incompatible regions if possible.
|
||||
// this is not guaranteed due to multiple threads allocating at the same time but
|
||||
// that's ok. In secure mode, large is never allowed for any thread, so that works out;
|
||||
// otherwise we might just not be able to reset/decommit individual pages sometimes.
|
||||
mi_region_info_t info = mi_atomic_read_relaxed(®ions->info);
|
||||
bool is_large;
|
||||
bool is_committed;
|
||||
void* start = mi_region_info_read(info, &is_large, &is_committed);
|
||||
bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation?
|
||||
if (!ok) return false;
|
||||
}
|
||||
return true;
|
||||
if (commit && allow_large) return true; // always ok
|
||||
|
||||
// otherwise skip incompatible regions if possible.
|
||||
// this is not guaranteed due to multiple threads allocating at the same time but
|
||||
// that's ok. In secure mode, large is never allowed for any thread, so that works out;
|
||||
// otherwise we might just not be able to reset/decommit individual pages sometimes.
|
||||
mi_region_info_t info = mi_atomic_read_relaxed(®ions->info);
|
||||
bool is_large;
|
||||
bool is_committed;
|
||||
void* start = mi_region_info_read(info, &is_large, &is_committed);
|
||||
// note: we also skip if commit is false and the region is committed,
|
||||
// that is a bit strong but prevents allocation of eager delayed segments in
|
||||
// committed memory
|
||||
bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation?
|
||||
return ok;
|
||||
}
|
||||
|
||||
// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.
|
||||
|
@ -497,8 +497,10 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
|
||||
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats)
|
||||
{
|
||||
UNUSED(stats);
|
||||
#if (MI_SECURE <= 2)
|
||||
mi_assert_internal(page->free == NULL);
|
||||
mi_assert_internal(page->local_free == NULL);
|
||||
#endif
|
||||
mi_assert_internal(page->capacity + extend <= page->reserved);
|
||||
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
|
||||
size_t bsize = page->block_size;
|
||||
|
@ -66,7 +66,9 @@ static void* alloc_items(size_t items, random_t r) {
|
||||
if (chance(1, r)) items *= 100; // 1% huge objects;
|
||||
if (items==40) items++; // pthreads uses that size for stack increases
|
||||
uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));
|
||||
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
|
||||
if (p != NULL) {
|
||||
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user