refactor and improve atomic bitmap usage

This commit is contained in:
daan 2019-11-07 10:26:52 -08:00
parent b09282bc0d
commit 378716c467
10 changed files with 183 additions and 131 deletions

View File

@ -10,6 +10,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF)
option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF)
option(MI_SECURE_FULL "Use full security mitigations (like double free protection, more expensive)" OFF)
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
option(MI_BUILD_TESTS "Build test executables" ON)
@ -70,9 +71,14 @@ if(MI_OVERRIDE MATCHES "ON")
endif()
endif()
if(MI_SECURE MATCHES "ON")
message(STATUS "Set secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=3)
if(MI_SECURE_FULL MATCHES "ON")
message(STATUS "Set full secure build (experimental) (MI_SECURE_FULL=ON)")
list(APPEND mi_defines MI_SECURE=4)
else()
if(MI_SECURE MATCHES "ON")
message(STATUS "Set secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=3)
endif()
endif()
if(MI_SEE_ASM MATCHES "ON")

View File

@ -232,6 +232,9 @@
<ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.inc.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\memory.c" />

View File

@ -218,7 +218,9 @@
<ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.inc.c" />
<ClCompile Include="..\..\src\bitmap.inc.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\memory.c" />

View File

@ -163,7 +163,6 @@ bool _mi_page_is_valid(mi_page_t* page);
// Overflow detecting multiply
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
#include <limits.h> // UINT_MAX, ULONG_MAX
@ -175,6 +174,7 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
return __builtin_umulll_overflow(count, size, total);
#endif
#else /* __builtin_umul_overflow is unavailable */
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
*total = count * size;
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
&& size > 0 && (SIZE_MAX / size) < count);
@ -188,6 +188,7 @@ static inline bool _mi_is_power_of_two(uintptr_t x) {
// Align upwards
static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
mi_assert_internal(alignment != 0);
uintptr_t mask = alignment - 1;
if ((alignment & mask) == 0) { // power of two?
return ((sz + mask) & ~mask);
@ -197,6 +198,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
}
}
// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
mi_assert_internal(divider != 0);
return (divider == 0 ? size : ((size + divider - 1) / divider));
}
// Is memory zero initialized?
static inline bool mi_mem_is_zero(void* p, size_t size) {
for (size_t i = 0; i < size; i++) {
@ -283,7 +290,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
// if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages
ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE);
mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE);
uintptr_t idx = (uintptr_t)diff >> segment->page_shift;
mi_assert_internal(idx < segment->capacity);
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);

View File

@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free.
#if !defined(MI_SECURE)
#define MI_SECURE 0
#define MI_SECURE 4
#endif
// Define MI_DEBUG for debug mode
@ -93,12 +93,12 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb
// Derived constants
#define MI_SEGMENT_SIZE (1<<MI_SEGMENT_SHIFT)
#define MI_SEGMENT_SIZE (1UL<<MI_SEGMENT_SHIFT)
#define MI_SEGMENT_MASK ((uintptr_t)MI_SEGMENT_SIZE - 1)
#define MI_SMALL_PAGE_SIZE (1<<MI_SMALL_PAGE_SHIFT)
#define MI_MEDIUM_PAGE_SIZE (1<<MI_MEDIUM_PAGE_SHIFT)
#define MI_LARGE_PAGE_SIZE (1<<MI_LARGE_PAGE_SHIFT)
#define MI_SMALL_PAGE_SIZE (1UL<<MI_SMALL_PAGE_SHIFT)
#define MI_MEDIUM_PAGE_SIZE (1UL<<MI_MEDIUM_PAGE_SHIFT)
#define MI_LARGE_PAGE_SIZE (1UL<<MI_LARGE_PAGE_SHIFT)
#define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)

View File

@ -7,12 +7,16 @@ terms of the MIT license. A copy of the license can be found in the file
/* ----------------------------------------------------------------------------
"Arenas" are fixed area's of OS memory from which we can allocate
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). Currently only used to
allocate in one arena consisting of huge OS pages -- otherwise it
delegates to direct allocation from the OS.
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
In contrast to the rest of mimalloc, the arenas are shared between
threads and need to be accessed using atomic operations.
In the future, we can expose an API to manually add more arenas which
is sometimes needed for embedded devices or shared memory for example.
Currently arenas are only used to for huge OS page (1GiB) reservations,
otherwise it delegates to direct allocation from the OS.
In the future, we can expose an API to manually add more kinds of arenas
which is sometimes needed for embedded devices or shared memory for example.
(We can also employ this with WASI or `sbrk` systems to reserve large arenas
on demand and be able to reuse them efficiently).
The arena allocation needs to be thread safe and we use an atomic
bitmap to allocate. The current implementation of the bitmap can
@ -48,10 +52,6 @@ int _mi_os_numa_node_count(void);
#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB
#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid)
// Block info: bit 0 contains the `in_use` bit, the upper bits the
// size in count of arena blocks.
typedef uintptr_t mi_block_info_t;
// A memory arena descriptor
typedef struct mi_arena_s {
uint8_t* start; // the start of the memory area
@ -61,8 +61,8 @@ typedef struct mi_arena_s {
bool is_zero_init; // is the arena zero initialized?
bool is_large; // large OS page allocated
volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks
} mi_arena_t;
@ -81,6 +81,7 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0
static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
mi_assert_internal(arena_index < 0xFE);
mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
}
@ -90,30 +91,25 @@ static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_
*bitmap_index = (memid >> 8);
}
static size_t mi_arena_block_count_of_size(size_t size) {
const size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
const size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
return bcount;
static size_t mi_block_count_of_size(size_t size) {
return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
}
/* -----------------------------------------------------------
Thread safe allocation in an arena
----------------------------------------------------------- */
static void* mi_arena_alloc(mi_arena_t* arena, size_t blocks, bool* is_zero, mi_bitmap_index_t* bitmap_idx)
static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
{
const size_t fcount = arena->field_count;
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
for (size_t visited = 0; visited < fcount; visited++, idx++) {
if (idx >= fcount) idx = 0; // wrap around
if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) {
// claimed it! set the dirty bits
*is_zero = mi_bitmap_claim(arena->blocks_dirty, fcount, blocks, *bitmap_idx);
mi_atomic_write(&arena->search_idx, idx); // start search from here next time
return (arena->start + (*bitmap_idx)*MI_ARENA_BLOCK_SIZE);
return true;
}
}
return NULL;
return false;
}
@ -125,13 +121,15 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
bool* commit, bool* large, bool* is_zero, size_t* memid)
{
mi_bitmap_index_t bitmap_index;
void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &bitmap_index);
if (p != NULL) {
*memid = mi_memid_create(arena_index, bitmap_index);
*commit = true; // TODO: support commit on demand?
*large = arena->is_large;
if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) {
// claimed it! set the dirty bits (todo: no need for an atomic op here?)
*is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index);
*memid = mi_memid_create(arena_index, bitmap_index);
*commit = true; // TODO: support commit on demand?
*large = arena->is_large;
return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE));
}
return p;
return NULL;
}
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
@ -140,7 +138,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
{
mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
*memid = MI_MEMID_OS;
*memid = MI_MEMID_OS;
*is_zero = false;
bool default_large = false;
if (large==NULL) large = &default_large; // ensure `large != NULL`
@ -151,7 +149,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
size <= MI_ARENA_MAX_OBJ_SIZE &&
size >= MI_ARENA_MIN_OBJ_SIZE)
{
const size_t bcount = mi_arena_block_count_of_size(size);
const size_t bcount = mi_block_count_of_size(size);
const int numa_node = _mi_os_numa_node(tld); // current numa node
mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
@ -221,7 +219,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
_mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
return;
}
const size_t blocks = mi_arena_block_count_of_size(size);
const size_t blocks = mi_block_count_of_size(size);
bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx);
if (!ones) {
_mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
@ -268,7 +266,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
}
_mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
size_t bcount = mi_arena_block_count_of_size(hsize);
size_t bcount = mi_block_count_of_size(hsize);
size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
@ -284,6 +282,8 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
arena->is_zero_init = true;
arena->search_idx = 0;
arena->blocks_dirty = &arena->blocks_map[bcount];
// the bitmaps are already zero initialized due to os_alloc
// just claim leftover blocks if needed
size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
if (post > 0) {
// don't use leftover bits at the end

View File

@ -1,41 +1,30 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
This file is meant to be included in other files for efficiency.
It implements a bitmap that can set/reset sequences of bits atomically
and is used to concurrently claim memory ranges.
A bitmap is an array of fields where each field is a machine word (`uintptr_t`)
A current limitation is that the bit sequences cannot cross fields
and that the sequence must be smaller or equal to the bits in a field.
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITMAP_H
#define MI_BITMAP_H
#ifndef MI_BITMAP_C
#define MI_BITMAP_C
#include "mimalloc.h"
#include "mimalloc-internal.h"
// Use bit scan forward to quickly find the first zero bit if it is available
#if defined(_MSC_VER)
#define MI_HAVE_BITSCAN
#include <intrin.h>
static inline size_t mi_bsf(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanForward)(&idx, x);
return idx;
}
static inline size_t mi_bsr(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanReverse)(&idx, x);
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
#define MI_HAVE_BITSCAN
#if (INTPTR_MAX == LONG_MAX)
# define MI_L(x) x##l
#else
# define MI_L(x) x##ll
#endif
static inline size_t mi_bsf(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
}
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
}
#endif
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE)
#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set
@ -63,14 +52,59 @@ static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx)
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
}
// Get the full bit index
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
return bitmap_idx;
}
// The bit mask for a given number of blocks at a specified bit index.
static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
return ((((uintptr_t)1 << count) - 1) << bitidx);
}
// Try to atomically claim a sequence of `count` bits in a single field at `idx` in `bitmap`.
// Returns `true` on success.
/* -----------------------------------------------------------
Use bit scan forward/reverse to quickly find the first zero bit if it is available
----------------------------------------------------------- */
#if defined(_MSC_VER)
#define MI_HAVE_BITSCAN
#include <intrin.h>
static inline size_t mi_bsf(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanForward)(&idx, x);
return idx;
}
static inline size_t mi_bsr(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanReverse)(&idx, x);
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
#include <limits.h> // LONG_MAX
#define MI_HAVE_BITSCAN
#if (INTPTR_MAX == LONG_MAX)
# define MI_L(x) x##l
#else
# define MI_L(x) x##ll
#endif
static inline size_t mi_bsf(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
}
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
}
#endif
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
{
mi_assert_internal(bitmap_idx != NULL);
@ -93,7 +127,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con
while (bitidx <= bitidx_max) {
if ((map & m) == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
uintptr_t newmap = map | m;
const uintptr_t newmap = map | m;
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here?
// no success, another thread claimed concurrently.. keep going
@ -109,10 +143,10 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con
else {
// on to the next bit range
#ifdef MI_HAVE_BITSCAN
size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
mi_assert_internal(shift > 0 && shift <= count);
#else
size_t shift = 1;
const size_t shift = 1;
#endif
bitidx += shift;
m <<= shift;

View File

@ -16,10 +16,10 @@ We need this memory layer between the raw OS calls because of:
1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
to reuse memory effectively.
2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
an OS allocation/free is still (much) too expensive relative to the accesses in that
object :-( (`malloc-large` tests this). This means we need a cheaper way to
reuse memory.
3. This layer can help with a NUMA aware allocation in the future.
an OS allocation/free is still (much) too expensive relative to the accesses
in that object :-( (`malloc-large` tests this). This means we need a cheaper
way to reuse memory.
3. This layer allows for NUMA aware allocation.
Possible issues:
- (2) can potentially be addressed too with a small cache per thread which is much
@ -47,8 +47,6 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
//void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
//void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
// arena.c
void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
@ -58,18 +56,18 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
// Constants
#if (MI_INTPTR_SIZE==8)
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 16KiB for the region map
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 40KiB for the region map
#elif (MI_INTPTR_SIZE==4)
#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // 196 bytes for the region map
#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map
#else
#error "define the maximum heap space allowed for regions on this platform"
#endif
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits)
#define MI_REGION_MAX_ALLOC_SIZE (MI_REGION_SIZE/4) // 64MiB
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits)
// Region info is a pointer to the memory region and two bits for
@ -95,7 +93,7 @@ typedef struct mem_region_s {
size_t arena_memid; // if allocated from a (huge page) arena
} mem_region_t;
// The region map; 16KiB for a 256GiB HEAP_REGION_MAX
// The region map
static mem_region_t regions[MI_REGION_MAX];
// A bit mask per region for its claimed MI_SEGMENT_SIZE blocks.
@ -173,7 +171,7 @@ static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_i
bool region_large = allow_large;
bool is_zero = false;
size_t arena_memid = 0;
void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
mi_assert_internal(!(region_large && !allow_large));
if (start == NULL) {
@ -183,35 +181,31 @@ static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_i
}
// set the newly allocated region
// try to initialize any region up to 4 beyond the current one in
// care multiple threads are doing this concurrently (common at startup)
info = mi_region_info_create(start, region_large, region_commit);
if (mi_atomic_cas_strong(&regions[idx].info, info, 0)) {
// update the region count
regions[idx].arena_memid = arena_memid;
mi_atomic_write(&regions[idx].numa_node, _mi_os_numa_node(tld) + 1);
mi_atomic_write(&regions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0));
mi_atomic_increment(&regions_count);
}
else {
// failed, another thread allocated just before us!
// we assign it to a later slot instead (up to 4 tries).
for (size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
regions[idx+i].arena_memid = arena_memid;
mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
mi_atomic_write(&regions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0));
mi_atomic_increment(&regions_count);
start = NULL;
break;
}
bool claimed = false;
for (size_t i = 0; i <= 4 && idx + i < MI_REGION_MAX && !claimed; i++) {
if (!is_zero) {
// set dirty bits before CAS; this might race with a zero block but that is ok.
// (but writing before cas prevents a concurrent allocation to assume it is not dirty)
mi_atomic_write(&regions_dirty[idx+i], MI_BITMAP_FIELD_FULL);
}
if (start != NULL) {
// free it if we didn't succeed to save it to some other region
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
// _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
// claimed!
regions[idx+i].arena_memid = arena_memid;
mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
mi_atomic_increment(&regions_count);
claimed = true;
}
// and continue with the memory at our index
info = mi_atomic_read(&regions[idx].info);
}
if (!claimed) {
// free our OS allocation if we didn't succeed to store it in some region
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
}
// continue with the actual info at our index in case another thread was quicker with the allocation
info = mi_atomic_read(&regions[idx].info);
mi_assert_internal(info != 0);
}
mi_assert_internal(info == mi_atomic_read(&regions[idx].info));
mi_assert_internal(info != 0);
@ -290,19 +284,21 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a
int rnode = ((int)mi_atomic_read_relaxed(&regions->numa_node)) - 1;
if (rnode != numa_node) return false;
}
if (mi_unlikely(!(commit || allow_large))) {
// otherwise skip incompatible regions if possible.
// this is not guaranteed due to multiple threads allocating at the same time but
// that's ok. In secure mode, large is never allowed for any thread, so that works out;
// otherwise we might just not be able to reset/decommit individual pages sometimes.
mi_region_info_t info = mi_atomic_read_relaxed(&regions->info);
bool is_large;
bool is_committed;
void* start = mi_region_info_read(info, &is_large, &is_committed);
bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation?
if (!ok) return false;
}
return true;
if (commit && allow_large) return true; // always ok
// otherwise skip incompatible regions if possible.
// this is not guaranteed due to multiple threads allocating at the same time but
// that's ok. In secure mode, large is never allowed for any thread, so that works out;
// otherwise we might just not be able to reset/decommit individual pages sometimes.
mi_region_info_t info = mi_atomic_read_relaxed(&regions->info);
bool is_large;
bool is_committed;
void* start = mi_region_info_read(info, &is_large, &is_committed);
// note: we also skip if commit is false and the region is committed,
// that is a bit strong but prevents allocation of eager delayed segments in
// committed memory
bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation?
return ok;
}
// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.

View File

@ -497,8 +497,10 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats)
{
UNUSED(stats);
#if (MI_SECURE <= 2)
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL);
#endif
mi_assert_internal(page->capacity + extend <= page->reserved);
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
size_t bsize = page->block_size;

View File

@ -66,7 +66,9 @@ static void* alloc_items(size_t items, random_t r) {
if (chance(1, r)) items *= 100; // 1% huge objects;
if (items==40) items++; // pthreads uses that size for stack increases
uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
if (p != NULL) {
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
}
return p;
}