Initial commit of separate memory region layer and improved large OS pages support, see 'memory.c'

This commit is contained in:
daan 2019-07-02 07:23:24 -07:00
parent d6901558cd
commit 06bcea1761
18 changed files with 693 additions and 297 deletions

View File

@ -15,6 +15,7 @@ set(mi_install_dir "lib/mimalloc-${mi_version}")
set(mi_sources set(mi_sources
src/stats.c src/stats.c
src/os.c src/os.c
src/memory.c
src/segment.c src/segment.c
src/page.c src/page.c
src/alloc.c src/alloc.c

View File

@ -225,6 +225,7 @@
<ClCompile Include="..\..\src\alloc.c" /> <ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\heap.c" /> <ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" /> <ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\memory.c" />
<ClCompile Include="..\..\src\options.c" /> <ClCompile Include="..\..\src\options.c" />
<ClCompile Include="..\..\src\os.c" /> <ClCompile Include="..\..\src\os.c" />
<ClCompile Include="..\..\src\page-queue.c"> <ClCompile Include="..\..\src\page-queue.c">

View File

@ -58,5 +58,8 @@
<ClCompile Include="..\..\src\init.c"> <ClCompile Include="..\..\src\init.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\memory.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -224,6 +224,7 @@
<ClCompile Include="..\..\src\alloc.c" /> <ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\heap.c" /> <ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" /> <ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\memory.c" />
<ClCompile Include="..\..\src\options.c" /> <ClCompile Include="..\..\src\options.c" />
<ClCompile Include="..\..\src\page-queue.c"> <ClCompile Include="..\..\src\page-queue.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>

View File

@ -50,6 +50,9 @@
<ClCompile Include="..\..\src\init.c"> <ClCompile Include="..\..\src\init.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\memory.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h"> <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View File

@ -39,6 +39,15 @@ static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t e
// Atomically exchange a value. // Atomically exchange a value.
static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange); static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange);
// Atomically read a value
static inline uintptr_t mi_atomic_read(volatile uintptr_t* p);
// Atomically read a pointer
static inline void* mi_atomic_read_ptr(volatile void** p);
// Atomically write a value
static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x);
static inline void mi_atomic_yield(void); static inline void mi_atomic_yield(void);
// Atomically compare and exchange a pointer; returns `true` if successful. // Atomically compare and exchange a pointer; returns `true` if successful.
@ -85,6 +94,15 @@ static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t e
static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange) { static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange) {
return (uintptr_t)RC64(_InterlockedExchange)((volatile intptr_t*)p, (intptr_t)exchange); return (uintptr_t)RC64(_InterlockedExchange)((volatile intptr_t*)p, (intptr_t)exchange);
} }
static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) {
return *p;
}
static inline void* mi_atomic_read_ptr(volatile void** p) {
return (void*)(*p);
}
static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) {
*p = x;
}
static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) {
YieldProcessor(); YieldProcessor();
} }
@ -147,6 +165,18 @@ static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exch
MI_USING_STD MI_USING_STD
return atomic_exchange_explicit((volatile atomic_uintptr_t*)p, exchange, memory_order_relaxed); return atomic_exchange_explicit((volatile atomic_uintptr_t*)p, exchange, memory_order_relaxed);
} }
static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) {
MI_USING_STD
return atomic_load_explicit((volatile atomic_uintptr_t*)p, memory_order_relaxed);
}
static inline void* mi_atomic_read_ptr(volatile void** p) {
MI_USING_STD
return atomic_load_explicit((volatile _Atomic(void*)*)p, memory_order_relaxed);
}
static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) {
MI_USING_STD
return atomic_store_explicit((volatile atomic_uintptr_t*)p, x, memory_order_relaxed);
}
#if defined(__cplusplus) #if defined(__cplusplus)
#include <thread> #include <thread>

View File

@ -29,18 +29,21 @@ uintptr_t _mi_ptr_cookie(const void* p);
uintptr_t _mi_random_shuffle(uintptr_t x); uintptr_t _mi_random_shuffle(uintptr_t x);
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); uintptr_t _mi_random_init(uintptr_t seed /* can be zero */);
// "os.c" // os.c
bool _mi_os_reset(void* p, size_t size);
void* _mi_os_alloc(size_t size, mi_stats_t* stats);
bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize);
void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_protect(void* addr, size_t size);
bool _mi_os_unprotect(void* addr, size_t size);
void _mi_os_init(void); // called from process init
void* _mi_os_alloc_aligned(size_t size, size_t alignment, mi_os_tld_t* tld);
size_t _mi_os_page_size(void); size_t _mi_os_page_size(void);
uintptr_t _mi_align_up(uintptr_t sz, size_t alignment); uintptr_t _mi_align_up(uintptr_t sz, size_t alignment);
void _mi_os_init(void); // called from process init
void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
// memory.c
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t* id, mi_os_tld_t* tld);
void* _mi_mem_alloc(size_t size, size_t* id, mi_os_tld_t* tld);
void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats);
bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats);
bool _mi_mem_protect(void* addr, size_t size);
bool _mi_mem_unprotect(void* addr, size_t size);
// "segment.c" // "segment.c"
mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);

View File

@ -89,7 +89,7 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE) #define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
#define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE) #define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/8) // 512kb on 64-bit #define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1MiB on 64-bit
#define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT) #define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT)
@ -215,6 +215,7 @@ typedef struct mi_segment_s {
size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE`
size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages.
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
size_t memid; // id for the os-level memory manager
// layout like this to optimize access in `mi_free` // layout like this to optimize access in `mi_free`
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
@ -322,12 +323,14 @@ typedef struct mi_stats_s {
mi_stat_count_t reserved; mi_stat_count_t reserved;
mi_stat_count_t committed; mi_stat_count_t committed;
mi_stat_count_t reset; mi_stat_count_t reset;
mi_stat_count_t page_committed;
mi_stat_count_t segments_abandoned; mi_stat_count_t segments_abandoned;
mi_stat_count_t pages_abandoned; mi_stat_count_t pages_abandoned;
mi_stat_count_t pages_extended; mi_stat_count_t pages_extended;
mi_stat_count_t mmap_calls; mi_stat_count_t mmap_calls;
mi_stat_count_t mmap_right_align; mi_stat_count_t mmap_right_align;
mi_stat_count_t mmap_ensure_aligned; mi_stat_count_t mmap_ensure_aligned;
mi_stat_count_t commit_calls;
mi_stat_count_t threads; mi_stat_count_t threads;
mi_stat_count_t huge; mi_stat_count_t huge;
mi_stat_count_t malloc; mi_stat_count_t malloc;
@ -370,11 +373,13 @@ typedef struct mi_segment_queue_s {
// Segments thread local data // Segments thread local data
typedef struct mi_segments_tld_s { typedef struct mi_segments_tld_s {
mi_segment_queue_t small_free; // queue of segments with free small pages mi_segment_queue_t small_free; // queue of segments with free small pages
size_t count; // current number of segments;
size_t peak_count; // peak number of segments
size_t current_size; // current size of all segments size_t current_size; // current size of all segments
size_t peak_size; // peak size of all segments size_t peak_size; // peak size of all segments
size_t cache_count; // number of segments in the cache size_t cache_count; // number of segments in the cache
size_t cache_size; // total size of all segments in the cache size_t cache_size; // total size of all segments in the cache
mi_segment_queue_t cache; // (small) cache of segments for small and large pages (to avoid repeated mmap calls) mi_segment_t* cache; // (small) cache of segments
mi_stats_t* stats; // points to tld stats mi_stats_t* stats; // points to tld stats
} mi_segments_tld_t; } mi_segments_tld_t;

View File

@ -215,8 +215,8 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
typedef enum mi_option_e { typedef enum mi_option_e {
mi_option_page_reset, mi_option_page_reset,
mi_option_cache_reset, mi_option_cache_reset,
mi_option_pool_commit, mi_option_eager_commit,
mi_option_large_os_pages, mi_option_large_os_pages, // implies eager commit
mi_option_secure, mi_option_secure,
mi_option_show_stats, mi_option_show_stats,
mi_option_show_errors, mi_option_show_errors,

View File

@ -58,6 +58,7 @@ const mi_page_t _mi_page_empty = {
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
{ 0, 0 } \ { 0, 0 } \
MI_STAT_COUNT_END_NULL() MI_STAT_COUNT_END_NULL()
@ -90,7 +91,7 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
static mi_tld_t tld_main = { static mi_tld_t tld_main = {
0, 0,
&_mi_heap_main, &_mi_heap_main,
{ { NULL, NULL }, 0, 0, 0, 0, {NULL,NULL}, tld_main_stats }, // segments { { NULL, NULL }, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
{ 0, NULL, NULL, 0, tld_main_stats }, // os { 0, NULL, NULL, 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats { MI_STATS_NULL } // stats
}; };

349
src/memory.c Normal file
View File

@ -0,0 +1,349 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
and the segment and huge object allocation by mimalloc. In contrast to the
rest of mimalloc, this uses thread-shared "regions" that are accessed using
atomic operations. We need this layer because of:
1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
to reuse memory
2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
an OS allocation/free is still too expensive relative to the accesses in that
object :-( (`mallloc-large` tests this). This means we need a cheaper way to
reuse memory.
3. This layer can help with a NUMA aware allocation in the future.
Possible issues:
- (2) can potentially be addressed too with a small cache per thread which is much
simpler. Generally though that requires shrinking of huge pages, and may overuse
memory per thread. (and is not compatible with `sbrk`).
- Since the current regions are per-process, we need atomic operations to
claim blocks which may be contended
- In the worst case, we need to search the whole region map (16KiB for 256GiB)
linearly. At what point will direct OS calls be faster? Is there a way to
do this better without adding too much complexity?
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
#include <string.h> // memset
// Internal OS interface
size_t _mi_os_large_page_size();
bool _mi_os_protect(void* addr, size_t size);
bool _mi_os_unprotect(void* addr, size_t size);
bool _mi_os_commit(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld);
// Constants
#if (MI_INTPTR_SIZE==8)
#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map
#elif (MI_INTPTR_SIZE==4)
#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map
#else
#error "define the maximum heap space allowed for regions on this platform"
#endif
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8)
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS)
#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)
#define MI_REGION_MAP_FULL UINTPTR_MAX
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
typedef struct mem_region_s {
volatile uintptr_t map; // in-use bit per MI_SEGMENT_SIZE block
volatile void* start; // start of virtual memory area
} mem_region_t;
// The region map; 16KiB for a 256GiB HEAP_REGION_MAX
// TODO: in the future, maintain a map per NUMA node for numa aware allocation
static mem_region_t regions[MI_REGION_MAX];
static volatile size_t regions_count = 0; // allocated regions
static volatile uintptr_t region_next_idx = 0;
/* ----------------------------------------------------------------------------
Utility functions
-----------------------------------------------------------------------------*/
// Blocks (of 4MiB) needed for the given size.
static size_t mi_region_block_count(size_t size) {
mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE);
return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE;
}
// The bit mask for a given number of blocks at a specified bit index.
static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) {
mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS);
return ((((uintptr_t)1 << blocks) - 1) << bitidx);
}
// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
static size_t mi_good_commit_size(size_t size) {
if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
return _mi_align_up(size, _mi_os_large_page_size());
}
/* ----------------------------------------------------------------------------
Commit from a region
-----------------------------------------------------------------------------*/
// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, void** p, size_t* id, mi_os_tld_t* tld) {
size_t mask = mi_region_block_mask(blocks,bitidx);
mi_assert_internal(mask != 0);
mi_assert_internal((mask & mi_atomic_read(&region->map)) == mask);
// ensure the region is reserved
void* start = mi_atomic_read_ptr(&region->start);
if (start == NULL) {
start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_commit), tld);
if (start == NULL) {
// failure to allocate from the OS! unclaim the blocks and fail
size_t map;
do {
map = mi_atomic_read(&region->map);
} while (!mi_atomic_compare_exchange(&region->map, map & ~mask, map));
return false;
}
// set the newly allocated region
if (mi_atomic_compare_exchange_ptr(&region->start, start, NULL)) {
// update the region count
mi_atomic_increment(&regions_count);
}
else {
// failed, another thread allocated just before us, free our allocated memory
// TODO: should we keep the allocated memory and assign it to some other region?
_mi_os_free(start, MI_REGION_SIZE, tld->stats);
start = mi_atomic_read_ptr(&region->start);
}
}
// Commit the blocks to memory
mi_assert_internal(start == mi_atomic_read_ptr(&region->start));
mi_assert_internal(start != NULL);
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
if (!mi_option_is_enabled(mi_option_eager_commit)) {
_mi_os_commit(blocks_start, mi_good_commit_size(size), tld->stats); // only commit needed size (unless using large OS pages)
}
// and return the allocation
mi_atomic_write(&region_next_idx,idx); // next search from here
*p = blocks_start;
*id = (idx*MI_REGION_MAP_BITS) + bitidx;
return true;
}
// Allocate `blocks` in a `region` at `idx` of a given `size`.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, void** p, size_t* id, mi_os_tld_t* tld) {
mi_assert_internal(p != NULL && id != NULL);
mi_assert_internal(blocks < MI_REGION_MAP_BITS);
const uintptr_t mask = mi_region_block_mask(blocks,0);
const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
size_t bitidx = 0;
uintptr_t map;
uintptr_t newmap;
do { // while no atomic claim success and not all bits seen
// find the first free range of bits
map = mi_atomic_read(&region->map);
size_t m = map;
do {
// skip ones
while ((m&1) == 1) { bitidx++; m>>=1; }
// count zeros
mi_assert_internal((m&1)==0);
size_t zeros = 1;
m >>= 1;
while(zeros < blocks && (m&1)==0) { zeros++; m>>=1; }
if (zeros == blocks) break; // found a range that fits
bitidx += zeros;
}
while(bitidx <= bitidx_max);
if (bitidx > bitidx_max) {
return true; // no error, but could not find a range either
}
// try to claim it
mi_assert_internal( (mask << bitidx) >> bitidx == mask ); // no overflow?
mi_assert_internal( (map & (mask << bitidx)) == 0); // fits in zero range
newmap = map | (mask << bitidx);
mi_assert_internal((newmap^map) >> bitidx == mask);
}
while(!mi_atomic_compare_exchange(&region->map, newmap, map));
// success, we claimed the blocks atomically
// now commit the block memory -- this can still fail
return mi_region_commit_blocks(region, idx, bitidx, blocks, size, p, id, tld);
}
// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, void** p, size_t* id, mi_os_tld_t* tld)
{
// check if there are available blocks in the region..
mi_assert_internal(idx < MI_REGION_MAX);
mem_region_t* region = &regions[idx];
uintptr_t m = mi_atomic_read(&region->map);
if (m != MI_REGION_MAP_FULL) { // some bits are zero
return mi_region_alloc_blocks(region, idx, blocks, size, p, id, tld);
}
else {
return true; // no error, but no success either
}
}
/* ----------------------------------------------------------------------------
Allocation
-----------------------------------------------------------------------------*/
// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t* id, mi_os_tld_t* tld)
{
mi_assert_internal(id != NULL && tld != NULL);
mi_assert_internal(size > 0);
*id = SIZE_MAX;
// use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`)
if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, true, tld); // round up size
}
// always round size to OS page size multiple (so commit/decommit go over the entire range)
// TODO: use large OS page size here?
size = _mi_align_up(size, _mi_os_page_size());
// calculate the number of needed blocks
size_t blocks = mi_region_block_count(size);
mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
// find a range of free blocks
void* p = NULL;
size_t count = mi_atomic_read(&regions_count);
size_t idx = mi_atomic_read(&region_next_idx);
for (size_t visited = 0; visited < count; visited++, idx++) {
if (!mi_region_try_alloc_blocks(idx%count, blocks, size, &p, id, tld)) return NULL; // error
if (p != NULL) break;
}
if (p == NULL) {
// no free range in existing regions -- try to extend beyond the count
for (idx = count; idx < MI_REGION_MAX; idx++) {
if (!mi_region_try_alloc_blocks(idx, blocks, size, &p, id, tld)) return NULL; // error
if (p != NULL) break;
}
}
if (p == NULL) {
// we could not find a place to allocate, fall back to the os directly
p = _mi_os_alloc_aligned(size, alignment, true, tld);
}
mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
return p;
}
// Allocate `size` memory. Return non NULL on success, with a given memory `id`.
void* _mi_mem_alloc(size_t size, size_t* id, mi_os_tld_t* tld) {
return _mi_mem_alloc_aligned(size,0,id,tld);
}
/* ----------------------------------------------------------------------------
Free
-----------------------------------------------------------------------------*/
// Free previously allocated memory with a given id.
void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
mi_assert_internal(size > 0 && stats != NULL);
if (p==NULL) return;
if (size==0) return;
if (id == SIZE_MAX) {
// was a direct OS allocation, pass through
_mi_os_free(p, size, stats);
}
else {
// allocated in a region
mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return;
// we can align the size up to page size (as we allocate that way too)
// this ensures we fully commit/decommit/reset
size = _mi_align_up(size, _mi_os_page_size());
size_t idx = (id / MI_REGION_MAP_BITS);
size_t bitidx = (id % MI_REGION_MAP_BITS);
size_t blocks = mi_region_block_count(size);
size_t mask = mi_region_block_mask(blocks, bitidx);
mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
mem_region_t* region = &regions[idx];
mi_assert_internal((mi_atomic_read(&region->map) & mask) == mask ); // claimed?
void* start = mi_atomic_read_ptr(&region->start);
mi_assert_internal(start != NULL);
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
mi_assert_internal(blocks_start == p); // not a pointer in our area?
mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS);
if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`?
// decommit (or reset) the blocks to reduce the working set.
// TODO: implement delayed decommit/reset as these calls are too expensive
// if the memory is reused soon.
// reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large
if (mi_option_is_enabled(mi_option_eager_commit)) {
// _mi_os_reset(p, size, stats); // 10x slowdown on malloc-large
}
else {
// _mi_os_decommit(p, size, stats); // 17x slowdown on malloc-large
}
// TODO: should we free empty regions?
// this frees up virtual address space which
// might be useful on 32-bit systems?
// and unclaim
uintptr_t map;
uintptr_t newmap;
do {
map = mi_atomic_read(&region->map);
newmap = map & ~mask;
} while (!mi_atomic_compare_exchange(&region->map, newmap, map));
}
}
/* ----------------------------------------------------------------------------
Other
-----------------------------------------------------------------------------*/
bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) {
return _mi_os_reset(p, size, stats);
}
bool _mi_mem_protect(void* p, size_t size) {
return _mi_os_protect(p, size);
}
bool _mi_mem_unprotect(void* p, size_t size) {
return _mi_os_unprotect(p, size);
}

View File

@ -30,8 +30,8 @@ typedef struct mi_option_desc_s {
static mi_option_desc_t options[_mi_option_last] = { static mi_option_desc_t options[_mi_option_last] = {
{ 0, UNINIT, "page_reset" }, { 0, UNINIT, "page_reset" },
{ 0, UNINIT, "cache_reset" }, { 0, UNINIT, "cache_reset" },
{ 0, UNINIT, "pool_commit" }, { 1, UNINIT, "eager_commit" }, // on by default as it seems to be faster in general
{ 0, UNINIT, "large_os_pages" }, // use large OS pages { 0, UNINIT, "large_os_pages" }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
#if MI_SECURE #if MI_SECURE
{ MI_SECURE, INITIALIZED, "secure" }, // in secure build the environment setting is ignored { MI_SECURE, INITIALIZED, "secure" }, // in secure build the environment setting is ignored
#else #else

321
src/os.c
View File

@ -12,7 +12,6 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc-internal.h" #include "mimalloc-internal.h"
#include <string.h> // memset #include <string.h> // memset
#include <stdio.h> // debug fprintf
#include <errno.h> #include <errno.h>
/* ----------------------------------------------------------- /* -----------------------------------------------------------
@ -28,15 +27,37 @@ terms of the MIT license. A copy of the license can be found in the file
#include <unistd.h> // sysconf #include <unistd.h> // sysconf
#endif #endif
// page size (initialized properly in `os_init`)
static size_t os_page_size = 4096;
// minimal allocation granularity
static size_t os_alloc_granularity = 4096;
// if non-zero, use large page allocation // if non-zero, use large page allocation
static size_t large_os_page_size = 0; static size_t large_os_page_size = 0;
// OS (small) page size
size_t _mi_os_page_size() {
return os_page_size;
}
// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB)
size_t _mi_os_large_page_size() {
return (large_os_page_size != 0 ? large_os_page_size : _mi_os_page_size());
}
static bool use_large_os_page(size_t size, size_t alignment) { static bool use_large_os_page(size_t size, size_t alignment) {
// if we have access, check the size and alignment requirements // if we have access, check the size and alignment requirements
if (large_os_page_size == 0) return false; if (large_os_page_size == 0) return false;
return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0); return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0);
} }
// round to a good allocation size
static size_t mi_os_good_alloc_size(size_t size, size_t alignment) {
UNUSED(alignment);
if (size >= (SIZE_MAX - os_alloc_granularity)) return size; // possible overflow?
return _mi_align_up(size, os_alloc_granularity);
}
#if defined(_WIN32) #if defined(_WIN32)
// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. // We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
@ -45,11 +66,17 @@ typedef PVOID (*VirtualAlloc2Ptr)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTEN
static VirtualAlloc2Ptr pVirtualAlloc2 = NULL; static VirtualAlloc2Ptr pVirtualAlloc2 = NULL;
void _mi_os_init(void) { void _mi_os_init(void) {
// Try to get the VirtualAlloc2 function (only supported on Windows 10 and Windows Server 2016) // get the page size
SYSTEM_INFO si;
GetSystemInfo(&si);
if (si.dwPageSize > 0) os_page_size = si.dwPageSize;
if (si.dwAllocationGranularity > 0) os_alloc_granularity = si.dwAllocationGranularity;
// get the VirtualAlloc2 function
HINSTANCE hDll; HINSTANCE hDll;
hDll = LoadLibrary("kernelbase.dll"); hDll = LoadLibrary("kernelbase.dll");
if (hDll!=NULL) { if (hDll!=NULL) {
pVirtualAlloc2 = (VirtualAlloc2Ptr)GetProcAddress(hDll, "VirtualAlloc2"); // use VirtualAlloc2FromApp as it is available to Windows store apps
pVirtualAlloc2 = (VirtualAlloc2Ptr)GetProcAddress(hDll, "VirtualAlloc2FromApp");
FreeLibrary(hDll); FreeLibrary(hDll);
} }
// Try to see if large OS pages are supported // Try to see if large OS pages are supported
@ -86,8 +113,15 @@ void _mi_os_init(void) {
} }
#else #else
void _mi_os_init() { void _mi_os_init() {
// nothing to do // get the page size
use_large_os_page(0, 0); // dummy call to suppress warnings long result = sysconf(_SC_PAGESIZE);
if (result > 0) {
os_page_size = (size_t)result;
os_alloc_granularity = os_page_size;
}
if (mi_option_is_enabled(mi_option_large_os_pages)) {
large_os_page_size = (1UL<<21); // 2MiB
}
} }
#endif #endif
@ -116,26 +150,8 @@ static void* mi_align_down_ptr(void* p, size_t alignment) {
return (void*)_mi_align_down((uintptr_t)p, alignment); return (void*)_mi_align_down((uintptr_t)p, alignment);
} }
static void* os_pool_alloc(size_t size, size_t alignment, mi_os_tld_t* tld);
// cached OS page size static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats)
size_t _mi_os_page_size(void) {
static size_t page_size = 0;
if (page_size == 0) {
#if defined(_WIN32)
SYSTEM_INFO si;
GetSystemInfo(&si);
page_size = (si.dwPageSize > 0 ? si.dwPageSize : 4096);
#else
long result = sysconf(_SC_PAGESIZE);
page_size = (result > 0 ? (size_t)result : 4096);
#endif
}
return page_size;
}
static bool mi_munmap(void* addr, size_t size)
{ {
if (addr == NULL || size == 0) return true; if (addr == NULL || size == 0) return true;
bool err = false; bool err = false;
@ -144,6 +160,8 @@ static bool mi_munmap(void* addr, size_t size)
#else #else
err = (munmap(addr, size) == -1); err = (munmap(addr, size) == -1);
#endif #endif
_mi_stat_decrease(&stats->committed, size); // TODO: what if never committed?
_mi_stat_decrease(&stats->reserved, size);
if (err) { if (err) {
#pragma warning(suppress:4996) #pragma warning(suppress:4996)
_mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size); _mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size);
@ -154,16 +172,18 @@ static bool mi_munmap(void* addr, size_t size)
} }
} }
static void* mi_mmap(void* addr, size_t size, int extra_flags, mi_stats_t* stats) { static void* mi_os_mem_alloc(void* addr, size_t size, bool commit, int extra_flags, mi_stats_t* stats) {
UNUSED(stats); UNUSED(stats);
if (size == 0) return NULL; if (size == 0) return NULL;
void* p = NULL; void* p = NULL;
#if defined(_WIN32) #if defined(_WIN32)
int flags = MEM_RESERVE | extra_flags;
if (commit) flags |= MEM_COMMIT;
if (use_large_os_page(size, 0)) { if (use_large_os_page(size, 0)) {
p = VirtualAlloc(addr, size, MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT | extra_flags, PAGE_READWRITE); p = VirtualAlloc(addr, size, MEM_LARGE_PAGES | flags, PAGE_READWRITE);
} }
if (p == NULL) { if (p == NULL) {
p = VirtualAlloc(addr, size, MEM_RESERVE | MEM_COMMIT | extra_flags, PAGE_READWRITE); p = VirtualAlloc(addr, size, flags, PAGE_READWRITE);
} }
#else #else
#if !defined(MAP_ANONYMOUS) #if !defined(MAP_ANONYMOUS)
@ -179,19 +199,43 @@ static void* mi_mmap(void* addr, size_t size, int extra_flags, mi_stats_t* stats
flags |= MAP_FIXED; flags |= MAP_FIXED;
#endif #endif
} }
p = mmap(addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0); if (large_os_page_size > 0 && use_large_os_page(size, 0) && ((uintptr_t)addr % large_os_page_size) == 0) {
if (p == MAP_FAILED) p = NULL; int lflags = flags;
#ifdef MAP_ALIGNED_SUPER
lflags |= MAP_ALIGNED_SUPER;
#endif
#ifdef MAP_HUGETLB
lflags |= MAP_HUGETLB;
#endif
#ifdef MAP_HUGE_2MB
lflags |= MAP_HUGE_2MB;
#endif
if (lflags != flags) {
// try large page allocation
p = mmap(addr, size, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE), lflags, -1, 0);
if (p == MAP_FAILED) p = NULL;
}
}
if (p == NULL) {
p = mmap(addr, size, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE), flags, -1, 0);
if (p == MAP_FAILED) p = NULL;
}
if (addr != NULL && p != addr) { if (addr != NULL && p != addr) {
mi_munmap(p, size); mi_os_mem_free(p, size, stats);
p = NULL; p = NULL;
} }
#endif #endif
UNUSED(stats);
mi_assert(p == NULL || (addr == NULL && p != addr) || (addr != NULL && p == addr)); mi_assert(p == NULL || (addr == NULL && p != addr) || (addr != NULL && p == addr));
if (p != NULL) mi_stat_increase(stats->mmap_calls, 1); if (p != NULL) {
mi_stat_increase(stats->mmap_calls, 1);
mi_stat_increase(stats->reserved, size);
if (commit) mi_stat_increase(stats->committed, size);
}
return p; return p;
} }
static void* mi_mmap_aligned(size_t size, size_t alignment, mi_stats_t* stats) { static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, mi_stats_t* stats) {
if (alignment < _mi_os_page_size() || ((alignment & (~alignment + 1)) != alignment)) return NULL; if (alignment < _mi_os_page_size() || ((alignment & (~alignment + 1)) != alignment)) return NULL;
void* p = NULL; void* p = NULL;
#if defined(_WIN32) && defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) #if defined(_WIN32) && defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
@ -202,27 +246,33 @@ static void* mi_mmap_aligned(size_t size, size_t alignment, mi_stats_t* stats) {
MEM_EXTENDED_PARAMETER param = { 0 }; MEM_EXTENDED_PARAMETER param = { 0 };
param.Type = MemExtendedParameterAddressRequirements; param.Type = MemExtendedParameterAddressRequirements;
param.Pointer = &reqs; param.Pointer = &reqs;
DWORD extra_flags = 0; DWORD flags = MEM_RESERVE;
if (use_large_os_page(size, alignment)) extra_flags |= MEM_LARGE_PAGES; if (commit) flags |= MEM_COMMIT;
p = (*pVirtualAlloc2)(NULL, NULL, size, MEM_RESERVE | MEM_COMMIT | extra_flags, PAGE_READWRITE, &param, 1); if (use_large_os_page(size, alignment)) flags |= MEM_LARGE_PAGES;
p = (*pVirtualAlloc2)(NULL, NULL, size, flags, PAGE_READWRITE, &param, 1);
} }
#elif defined(MAP_ALIGNED) #elif defined(MAP_ALIGNED)
// on BSD, use the aligned mmap api // on BSD, use the aligned mmap api
size_t n = _mi_bsr(alignment); size_t n = _mi_bsr(alignment);
if ((size_t)1 << n == alignment && n >= 12) { // alignment is a power of 2 and >= 4096 if (((size_t)1 << n) == alignment && n >= 12) { // alignment is a power of 2 and >= 4096
p = mi_mmap(suggest, size, MAP_ALIGNED(n), tld->stats); // use the NetBSD/freeBSD aligned flags p = mi_os_mem_alloc(suggest, size, commit, MAP_ALIGNED(n), tld->stats); // use the NetBSD/freeBSD aligned flags
} }
#else #else
UNUSED(size); UNUSED(size);
UNUSED(alignment); UNUSED(alignment);
#endif #endif
UNUSED(stats); // if !STATS
mi_assert(p == NULL || (uintptr_t)p % alignment == 0); mi_assert(p == NULL || (uintptr_t)p % alignment == 0);
if (p != NULL) mi_stat_increase(stats->mmap_calls, 1); if (p != NULL) {
mi_stat_increase(stats->mmap_calls, 1);
mi_stat_increase(stats->reserved, size);
if (commit) mi_stat_increase(stats->committed, size);
}
return p; return p;
} }
// Conservatively OS page align within a given area
static void* mi_os_page_align_region(void* addr, size_t size, size_t* newsize) { static void* mi_os_page_align_area(void* addr, size_t size, size_t* newsize) {
mi_assert(addr != NULL && size > 0); mi_assert(addr != NULL && size > 0);
if (newsize != NULL) *newsize = 0; if (newsize != NULL) *newsize = 0;
if (size == 0 || addr == NULL) return NULL; if (size == 0 || addr == NULL) return NULL;
@ -242,16 +292,31 @@ static void* mi_os_page_align_region(void* addr, size_t size, size_t* newsize) {
// but may be used later again. This will release physical memory // but may be used later again. This will release physical memory
// pages and reduce swapping while keeping the memory committed. // pages and reduce swapping while keeping the memory committed.
// We page align to a conservative area inside the range to reset. // We page align to a conservative area inside the range to reset.
bool _mi_os_reset(void* addr, size_t size) { bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
// page align conservatively within the range // page align conservatively within the range
size_t csize; size_t csize;
void* start = mi_os_page_align_region(addr,size,&csize); void* start = mi_os_page_align_area(addr,size,&csize);
if (csize==0) return true; if (csize==0) return true;
UNUSED(stats); // if !STATS
mi_stat_increase(stats->reset, csize);
#if defined(_WIN32) #if defined(_WIN32)
// Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory
// (but this is for an access pattern that immediately reuses the memory)
/*
DWORD ok = DiscardVirtualMemory(start, csize);
return (ok != 0);
*/
void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE);
mi_assert(p == start); mi_assert(p == start);
return (p == start); if (p != start) return false;
/*
// VirtualUnlock removes the memory eagerly from the current working set (which MEM_RESET does lazily on demand)
// TODO: put this behind an option?
DWORD ok = VirtualUnlock(start, csize);
if (ok != 0) return false;
*/
return true;
#else #else
#if defined(MADV_FREE) #if defined(MADV_FREE)
static int advice = MADV_FREE; static int advice = MADV_FREE;
@ -276,19 +341,19 @@ bool _mi_os_reset(void* addr, size_t size) {
static bool mi_os_protectx(void* addr, size_t size, bool protect) { static bool mi_os_protectx(void* addr, size_t size, bool protect) {
// page align conservatively within the range // page align conservatively within the range
size_t csize = 0; size_t csize = 0;
void* start = mi_os_page_align_region(addr, size, &csize); void* start = mi_os_page_align_area(addr, size, &csize);
if (csize==0) return false; if (csize==0) return false;
int err = 0; int err = 0;
#ifdef _WIN32 #ifdef _WIN32
DWORD oldprotect = 0; DWORD oldprotect = 0;
BOOL ok = VirtualProtect(start,csize,protect ? PAGE_NOACCESS : PAGE_READWRITE,&oldprotect); BOOL ok = VirtualProtect(start,csize,protect ? PAGE_NOACCESS : PAGE_READWRITE,&oldprotect);
err = (ok ? 0 : -1); err = (ok ? 0 : GetLastError());
#else #else
err = mprotect(start,csize,protect ? PROT_NONE : (PROT_READ|PROT_WRITE)); err = mprotect(start,csize,protect ? PROT_NONE : (PROT_READ|PROT_WRITE));
#endif #endif
if (err != 0) { if (err != 0) {
_mi_warning_message("mprotect error: start: 0x%8p, csize: 0x%8zux, errno: %i\n", start, csize, errno); _mi_warning_message("mprotect error: start: 0x%8p, csize: 0x%8zux, err: %i\n", start, csize, err);
} }
return (err==0); return (err==0);
} }
@ -301,24 +366,48 @@ bool _mi_os_unprotect(void* addr, size_t size) {
return mi_os_protectx(addr, size, false); return mi_os_protectx(addr, size, false);
} }
bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize) { // Commit/Decommit memory.
// We page align to a conservative area inside the range to reset.
static bool mi_os_commitx(void* addr, size_t size, bool commit, mi_stats_t* stats) {
// page align conservatively within the range // page align conservatively within the range
mi_assert_internal(oldsize > newsize && p != NULL); size_t csize;
if (oldsize < newsize || p==NULL) return false; void* start = mi_os_page_align_area(addr, size, &csize);
if (oldsize == newsize) return true; if (csize == 0) return true;
int err = 0;
UNUSED(stats); // if !STATS
if (commit) {
mi_stat_increase(stats->committed, csize);
mi_stat_increase(stats->commit_calls,1);
}
else {
mi_stat_decrease(stats->committed, csize);
}
// oldsize and newsize should be page aligned or we cannot shrink precisely #if defined(_WIN32)
void* addr = (uint8_t*)p + newsize; if (commit) {
size_t size = 0; void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE);
void* start = mi_os_page_align_region(addr, oldsize - newsize, &size); err = (p == start ? 0 : GetLastError());
if (size==0 || start != addr) return false; }
else {
#ifdef _WIN32 BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT);
// we cannot shrink on windows err = (ok ? 0 : GetLastError());
return false; }
#else #else
return mi_munmap( start, size ); err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE));
#endif #endif
if (err != 0) {
_mi_warning_message("commit/decommit error: start: 0x%8p, csize: 0x%8zux, err: %i\n", start, csize, err);
}
mi_assert_internal(err == 0);
return (err == 0);
}
bool _mi_os_commit(void* addr, size_t size, mi_stats_t* stats) {
return mi_os_commitx(addr, size, true, stats);
}
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) {
return mi_os_commitx(addr, size, false, stats);
} }
/* ----------------------------------------------------------- /* -----------------------------------------------------------
@ -327,22 +416,21 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize) {
void* _mi_os_alloc(size_t size, mi_stats_t* stats) { void* _mi_os_alloc(size_t size, mi_stats_t* stats) {
if (size == 0) return NULL; if (size == 0) return NULL;
void* p = mi_mmap(NULL, size, 0, stats); size = mi_os_good_alloc_size(size, 0);
void* p = mi_os_mem_alloc(NULL, size, true, 0, stats);
mi_assert(p!=NULL); mi_assert(p!=NULL);
if (p != NULL) mi_stat_increase(stats->reserved, size);
return p; return p;
} }
void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { void _mi_os_free(void* p, size_t size, mi_stats_t* stats) {
UNUSED(stats); UNUSED(stats);
mi_munmap(p, size); mi_os_mem_free(p, size, stats);
mi_stat_decrease(stats->reserved, size);
} }
// Slow but guaranteed way to allocated aligned memory // Slow but guaranteed way to allocated aligned memory
// by over-allocating and then reallocating at a fixed aligned // by over-allocating and then reallocating at a fixed aligned
// address that should be available then. // address that should be available then.
static void* mi_os_alloc_aligned_ensured(size_t size, size_t alignment, size_t trie, mi_stats_t* stats) static void* mi_os_alloc_aligned_ensured(size_t size, size_t alignment, bool commit, size_t trie, mi_stats_t* stats)
{ {
if (trie >= 3) return NULL; // stop recursion (only on Windows) if (trie >= 3) return NULL; // stop recursion (only on Windows)
size_t alloc_size = size + alignment; size_t alloc_size = size + alignment;
@ -350,28 +438,28 @@ static void* mi_os_alloc_aligned_ensured(size_t size, size_t alignment, size_t t
if (alloc_size < size) return NULL; if (alloc_size < size) return NULL;
// allocate a chunk that includes the alignment // allocate a chunk that includes the alignment
void* p = mi_mmap(NULL, alloc_size, 0, stats); void* p = mi_os_mem_alloc(NULL, alloc_size, commit, 0, stats);
if (p == NULL) return NULL; if (p == NULL) return NULL;
// create an aligned pointer in the allocated area // create an aligned pointer in the allocated area
void* aligned_p = mi_align_up_ptr(p, alignment); void* aligned_p = mi_align_up_ptr(p, alignment);
mi_assert(aligned_p != NULL); mi_assert(aligned_p != NULL);
#if defined(_WIN32)
// free it and try to allocate `size` at exactly `aligned_p` // free it and try to allocate `size` at exactly `aligned_p`
// note: this may fail in case another thread happens to VirtualAlloc // note: this may fail in case another thread happens to allocate
// concurrently at that spot. We try up to 3 times to mitigate this. // concurrently at that spot. We try up to 3 times to mitigate this.
mi_munmap(p, alloc_size); mi_os_mem_free(p, alloc_size, stats);
p = mi_mmap(aligned_p, size, 0, stats); p = mi_os_mem_alloc(aligned_p, size, commit, 0, stats);
if (p != aligned_p) { if (p != aligned_p) {
if (p != NULL) mi_munmap(p, size); if (p != NULL) mi_os_mem_free(p, size, stats);
return mi_os_alloc_aligned_ensured(size, alignment, trie++, stats); return mi_os_alloc_aligned_ensured(size, alignment, commit, trie++, stats);
} }
#else #if 0 // could use this on mmap systems
// we selectively unmap parts around the over-allocated area. // we selectively unmap parts around the over-allocated area.
size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t mid_size = _mi_align_up(size, _mi_os_page_size());
size_t post_size = alloc_size - pre_size - mid_size; size_t post_size = alloc_size - pre_size - mid_size;
if (pre_size > 0) mi_munmap(p, pre_size); if (pre_size > 0) mi_os_mem_free(p, pre_size, stats);
if (post_size > 0) mi_munmap((uint8_t*)aligned_p + mid_size, post_size); if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, stats);
#endif #endif
mi_assert(((uintptr_t)aligned_p) % alignment == 0); mi_assert(((uintptr_t)aligned_p) % alignment == 0);
@ -382,22 +470,21 @@ static void* mi_os_alloc_aligned_ensured(size_t size, size_t alignment, size_t t
// Since `mi_mmap` is relatively slow we try to allocate directly at first and // Since `mi_mmap` is relatively slow we try to allocate directly at first and
// hope to get an aligned address; only when that fails we fall back // hope to get an aligned address; only when that fails we fall back
// to a guaranteed method by overallocating at first and adjusting. // to a guaranteed method by overallocating at first and adjusting.
// TODO: use VirtualAlloc2 with alignment on Windows 10 / Windows Server 2016. void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld)
void* _mi_os_alloc_aligned(size_t size, size_t alignment, mi_os_tld_t* tld)
{ {
if (size == 0) return NULL; if (size == 0) return NULL;
if (alignment < 1024) return _mi_os_alloc(size, tld->stats); size = mi_os_good_alloc_size(size,alignment);
if (alignment < 1024) return mi_os_mem_alloc(NULL, size, commit, 0, tld->stats);
void* p = os_pool_alloc(size,alignment,tld);
if (p != NULL) return p;
// try direct OS aligned allocation; only supported on BSD and Windows 10+
void* suggest = NULL; void* suggest = NULL;
void* p = mi_os_mem_alloc_aligned(size,alignment,commit,tld->stats);
p = mi_mmap_aligned(size,alignment,tld->stats); // Fall back
if (p==NULL && (tld->mmap_next_probable % alignment) == 0) { if (p==NULL && (tld->mmap_next_probable % alignment) == 0) {
// if the next probable address is aligned, // if the next probable address is aligned,
// then try to just allocate `size` and hope it is aligned... // then try to just allocate `size` and hope it is aligned...
p = mi_mmap(suggest, size, 0, tld->stats); p = mi_os_mem_alloc(suggest, size, commit, 0, tld->stats);
if (p == NULL) return NULL; if (p == NULL) return NULL;
if (((uintptr_t)p % alignment) == 0) mi_stat_increase(tld->stats->mmap_right_align, 1); if (((uintptr_t)p % alignment) == 0) mi_stat_increase(tld->stats->mmap_right_align, 1);
} }
@ -406,75 +493,23 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, mi_os_tld_t* tld)
if (p==NULL || ((uintptr_t)p % alignment) != 0) { if (p==NULL || ((uintptr_t)p % alignment) != 0) {
// if `p` is not yet aligned after all, free the block and use a slower // if `p` is not yet aligned after all, free the block and use a slower
// but guaranteed way to allocate an aligned block // but guaranteed way to allocate an aligned block
if (p != NULL) mi_munmap(p, size); if (p != NULL) mi_os_mem_free(p, size, tld->stats);
mi_stat_increase( tld->stats->mmap_ensure_aligned, 1); mi_stat_increase( tld->stats->mmap_ensure_aligned, 1);
//fprintf(stderr, "mimalloc: slow mmap 0x%lx\n", _mi_thread_id()); //fprintf(stderr, "mimalloc: slow mmap 0x%lx\n", _mi_thread_id());
p = mi_os_alloc_aligned_ensured(size, alignment,0,tld->stats); p = mi_os_alloc_aligned_ensured(size, alignment,commit,0,tld->stats);
} }
if (p != NULL) { if (p != NULL) {
mi_stat_increase( tld->stats->reserved, size); // next probable address is the page-aligned address just after the newly allocated area.
// next probable address is the page-aligned address just after the newly allocated area.
const size_t alloc_align =
#if defined(_WIN32)
64 * 1024; // Windows allocates 64kb aligned
#else
_mi_os_page_size(); // page size on other OS's
#endif
size_t probable_size = MI_SEGMENT_SIZE; size_t probable_size = MI_SEGMENT_SIZE;
if (tld->mmap_previous > p) { if (tld->mmap_previous > p) {
// Linux tends to allocate downward // Linux tends to allocate downward
tld->mmap_next_probable = _mi_align_down((uintptr_t)p - probable_size, alloc_align); // ((uintptr_t)previous - (uintptr_t)p); tld->mmap_next_probable = _mi_align_down((uintptr_t)p - probable_size, os_alloc_granularity); // ((uintptr_t)previous - (uintptr_t)p);
} }
else { else {
// Otherwise, guess the next address is page aligned `size` from current pointer // Otherwise, guess the next address is page aligned `size` from current pointer
tld->mmap_next_probable = _mi_align_up((uintptr_t)p + probable_size, alloc_align); tld->mmap_next_probable = _mi_align_up((uintptr_t)p + probable_size, os_alloc_granularity);
} }
tld->mmap_previous = p; tld->mmap_previous = p;
} }
return p; return p;
} }
// Pooled allocation: on 64-bit systems with plenty
// of virtual addresses, we allocate 10 segments at the
// time to minimize `mmap` calls and increase aligned
// allocations. This is only good on systems that
// do overcommit so we put it behind the `MIMALLOC_POOL_COMMIT` option.
// For now, we disable it on windows as VirtualFree must
// be called on the original allocation and cannot be called
// for individual fragments.
#if defined(_WIN32) || (MI_INTPTR_SIZE<8)
static void* os_pool_alloc(size_t size, size_t alignment, mi_os_tld_t* tld) {
UNUSED(size);
UNUSED(alignment);
UNUSED(tld);
return NULL;
}
#else
#define MI_POOL_ALIGNMENT MI_SEGMENT_SIZE
#define MI_POOL_SIZE (10*MI_POOL_ALIGNMENT)
static void* os_pool_alloc(size_t size, size_t alignment, mi_os_tld_t* tld)
{
if (!mi_option_is_enabled(mi_option_pool_commit)) return NULL;
if (alignment != MI_POOL_ALIGNMENT) return NULL;
size = _mi_align_up(size,MI_POOL_ALIGNMENT);
if (size > MI_POOL_SIZE) return NULL;
if (tld->pool_available == 0) {
tld->pool = (uint8_t*)mi_os_alloc_aligned_ensured(MI_POOL_SIZE,MI_POOL_ALIGNMENT,0,tld->stats);
if (tld->pool == NULL) return NULL;
tld->pool_available += MI_POOL_SIZE;
}
if (size > tld->pool_available) return NULL;
void* p = tld->pool;
tld->pool_available -= size;
tld->pool += size;
return p;
}
#endif

View File

@ -267,7 +267,9 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(page->heap == NULL); mi_assert_internal(page->heap == NULL);
mi_assert_internal(!mi_page_queue_contains(queue, page)); mi_assert_internal(!mi_page_queue_contains(queue, page));
mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (page->flags.in_full && mi_page_queue_is_full(queue))); mi_assert_internal(page->block_size == queue->block_size ||
(page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) ||
(page->flags.in_full && mi_page_queue_is_full(queue)));
page->flags.in_full = mi_page_queue_is_full(queue); page->flags.in_full = mi_page_queue_is_full(queue);
page->heap = heap; page->heap = heap;
@ -292,9 +294,11 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(mi_page_queue_contains(from, page));
mi_assert_expensive(!mi_page_queue_contains(to, page)); mi_assert_expensive(!mi_page_queue_contains(to, page));
mi_assert_internal(page->block_size == to->block_size || mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) ||
(page->block_size == to->block_size && mi_page_queue_is_full(from)) ||
(page->block_size == from->block_size && mi_page_queue_is_full(to)) ||
(page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(to)) || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(to)) ||
(page->block_size == from->block_size && mi_page_queue_is_full(to))); (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_full(to)));
if (page->prev != NULL) page->prev->next = page->next; if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev; if (page->next != NULL) page->next->prev = page->prev;

View File

@ -453,7 +453,7 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e
} }
// enable the new free list // enable the new free list
page->capacity += (uint16_t)extend; page->capacity += (uint16_t)extend;
mi_stat_increase(stats->committed, extend * page->block_size); mi_stat_increase(stats->page_committed, extend * page->block_size);
} }
/* ----------------------------------------------------------- /* -----------------------------------------------------------

View File

@ -108,19 +108,6 @@ static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment)
} }
} }
static void mi_segment_queue_insert_before(mi_segment_queue_t* queue, mi_segment_t* elem, mi_segment_t* segment) {
mi_assert_expensive(elem==NULL || mi_segment_queue_contains(queue, elem));
mi_assert_expensive(segment != NULL && !mi_segment_queue_contains(queue, segment));
segment->prev = (elem == NULL ? queue->last : elem->prev);
if (segment->prev != NULL) segment->prev->next = segment;
else queue->first = segment;
segment->next = elem;
if (segment->next != NULL) segment->next->prev = segment;
else queue->last = segment;
}
// Start of the page available memory // Start of the page available memory
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size)
{ {
@ -176,17 +163,17 @@ static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size
} }
/* ----------------------------------------------------------- /* ----------------------------------------------------------------------------
Segment caches Segment caches
We keep a small segment cache per thread to avoid repeated allocation We keep a small segment cache per thread to increase local
and free in the OS if a program allocates memory and then frees reuse and avoid setting/clearing guard pages in secure mode.
all again repeatedly. (We tried a one-element cache but that ------------------------------------------------------------------------------- */
proves to be too small for certain workloads).
----------------------------------------------------------- */
static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
if (segment_size>=0) mi_stat_increase(tld->stats->segments,1); if (segment_size>=0) mi_stat_increase(tld->stats->segments,1);
else mi_stat_decrease(tld->stats->segments,1); else mi_stat_decrease(tld->stats->segments,1);
tld->count += (segment_size >= 0 ? 1 : -1);
if (tld->count > tld->peak_count) tld->peak_count = tld->count;
tld->current_size += segment_size; tld->current_size += segment_size;
if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size; if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size;
} }
@ -194,123 +181,87 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) {
mi_segments_track_size(-((long)segment_size),tld); mi_segments_track_size(-((long)segment_size),tld);
_mi_os_free(segment, segment_size,tld->stats); if (mi_option_is_enabled(mi_option_secure)) {
_mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
}
_mi_mem_free(segment, segment_size, segment->memid, tld->stats);
} }
// The segment cache is limited to be at most 1/8 of the peak size // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
// in use (and no more than 32) // and no more than 4.
#define MI_SEGMENT_CACHE_MAX (32) #define MI_SEGMENT_CACHE_MAX (4)
#define MI_SEGMENT_CACHE_FRACTION (8) #define MI_SEGMENT_CACHE_FRACTION (8)
// Get a segment of at least `required` size. static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t* tld) {
// If `required == MI_SEGMENT_SIZE` the `segment_size` will match exactly if (segment_size != 0 && segment_size != MI_SEGMENT_SIZE) return NULL;
static mi_segment_t* _mi_segment_cache_findx(mi_segments_tld_t* tld, size_t required, bool reverse) { mi_segment_t* segment = tld->cache;
mi_assert_internal(required % _mi_os_page_size() == 0); if (segment == NULL) return NULL;
mi_segment_t* segment = (reverse ? tld->cache.last : tld->cache.first); tld->cache_count--;
while (segment != NULL) { tld->cache = segment->next;
if (segment->segment_size >= required) { segment->next = NULL;
tld->cache_count--; mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
tld->cache_size -= segment->segment_size; return segment;
mi_segment_queue_remove(&tld->cache, segment);
// exact size match?
if (required==0 || segment->segment_size == required) {
return segment;
}
// not more than 25% waste and on a huge page segment? (in that case the segment size does not need to match required)
else if (required != MI_SEGMENT_SIZE && segment->segment_size - (segment->segment_size/4) <= required) {
return segment;
}
// try to shrink the memory to match exactly
else {
if (mi_option_is_enabled(mi_option_secure)) {
_mi_os_unprotect(segment, segment->segment_size);
}
if (_mi_os_shrink(segment, segment->segment_size, required)) {
tld->current_size -= segment->segment_size;
tld->current_size += required;
segment->segment_size = required;
return segment;
}
else {
// if that all fails, we give up
mi_segment_os_free(segment,segment->segment_size,tld);
return NULL;
}
}
}
segment = (reverse ? segment->prev : segment->next);
}
return NULL;
}
static mi_segment_t* mi_segment_cache_find(mi_segments_tld_t* tld, size_t required) {
return _mi_segment_cache_findx(tld,required,false);
}
static mi_segment_t* mi_segment_cache_evict(mi_segments_tld_t* tld) {
// TODO: random eviction instead?
return _mi_segment_cache_findx(tld, 0, true /* from the end */);
} }
static bool mi_segment_cache_full(mi_segments_tld_t* tld) { static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
if (tld->cache_count < MI_SEGMENT_CACHE_MAX && if (tld->cache_count < MI_SEGMENT_CACHE_MAX &&
tld->cache_size*MI_SEGMENT_CACHE_FRACTION < tld->peak_size) return false; tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache
return false;
}
// take the opportunity to reduce the segment cache if it is too large (now) // take the opportunity to reduce the segment cache if it is too large (now)
while (tld->cache_size*MI_SEGMENT_CACHE_FRACTION >= tld->peak_size + 1) { // TODO: this never happens as we check against peak usage, should we use current usage instead?
mi_segment_t* segment = mi_segment_cache_evict(tld); while (tld->cache_count > (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
mi_segment_t* segment = mi_segment_cache_pop(0,tld);
mi_assert_internal(segment != NULL); mi_assert_internal(segment != NULL);
if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld);
} }
return true; return true;
} }
static bool mi_segment_cache_insert(mi_segment_t* segment, mi_segments_tld_t* tld) { static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(segment->next==NULL && segment->prev==NULL); mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld));
mi_assert_internal(!mi_segment_is_in_free_queue(segment,tld)); mi_assert_internal(segment->next == NULL);
mi_assert_expensive(!mi_segment_queue_contains(&tld->cache, segment)); if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) {
if (mi_segment_cache_full(tld)) return false; return false;
}
mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
if (mi_option_is_enabled(mi_option_cache_reset) && !mi_option_is_enabled(mi_option_page_reset)) { if (mi_option_is_enabled(mi_option_cache_reset) && !mi_option_is_enabled(mi_option_page_reset)) {
_mi_os_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size); _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
} }
// insert ordered segment->next = tld->cache;
mi_segment_t* seg = tld->cache.first; tld->cache = segment;
while (seg != NULL && seg->segment_size < segment->segment_size) {
seg = seg->next;
}
mi_segment_queue_insert_before( &tld->cache, seg, segment );
tld->cache_count++; tld->cache_count++;
tld->cache_size += segment->segment_size;
return true; return true;
} }
// called by ending threads to free cached segments // called by threads that are terminating to free cached segments
void _mi_segment_thread_collect(mi_segments_tld_t* tld) { void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
mi_segment_t* segment; mi_segment_t* segment;
while ((segment = mi_segment_cache_find(tld,0)) != NULL) { while ((segment = mi_segment_cache_pop(0,tld)) != NULL) {
mi_segment_os_free(segment, MI_SEGMENT_SIZE, tld); mi_segment_os_free(segment, segment->segment_size, tld);
} }
mi_assert_internal(tld->cache_count == 0 && tld->cache_size == 0); mi_assert_internal(tld->cache_count == 0);
mi_assert_internal(mi_segment_queue_is_empty(&tld->cache)); mi_assert_internal(tld->cache == NULL);
} }
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Segment allocation Segment allocation
----------------------------------------------------------- */ ----------------------------------------------------------- */
// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{ {
// calculate needed sizes first // calculate needed sizes first
size_t capacity; size_t capacity;
if (page_kind == MI_PAGE_HUGE) { if (page_kind == MI_PAGE_HUGE) {
mi_assert_internal(page_shift==MI_SEGMENT_SHIFT && required > 0); mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0);
capacity = 1; capacity = 1;
} }
else { else {
mi_assert_internal(required==0); mi_assert_internal(required == 0);
size_t page_size = (size_t)1 << page_shift; size_t page_size = (size_t)1 << page_shift;
capacity = MI_SEGMENT_SIZE / page_size; capacity = MI_SEGMENT_SIZE / page_size;
mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0); mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0);
@ -318,46 +269,52 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind
} }
size_t info_size; size_t info_size;
size_t pre_size; size_t pre_size;
size_t segment_size = mi_segment_size( capacity, required, &pre_size, &info_size); size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size);
mi_assert_internal(segment_size >= required); mi_assert_internal(segment_size >= required);
size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
// Allocate the segment // Try to get it from our thread local cache first
mi_segment_t* segment = NULL; bool protection_still_good = false;
mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
// try to get it from our caches if (segment != NULL) {
segment = mi_segment_cache_find(tld,segment_size); if (mi_option_is_enabled(mi_option_secure)) {
mi_assert_internal(segment == NULL || if (segment->page_kind != page_kind) {
(segment_size==MI_SEGMENT_SIZE && segment_size == segment->segment_size) || _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs
(segment_size!=MI_SEGMENT_SIZE && segment_size <= segment->segment_size)); }
if (segment != NULL && mi_option_is_enabled(mi_option_secure) && (segment->page_kind != page_kind || segment->segment_size != segment_size)) { else {
_mi_os_unprotect(segment,segment->segment_size); protection_still_good = true; // otherwise, the guard pages are still in place
}
}
} }
else {
// and otherwise allocate it from the OS // Allocate the segment from the OS
if (segment == NULL) { size_t memid;
segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, os_tld); segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &memid, os_tld);
if (segment == NULL) return NULL; if (segment == NULL) return NULL; // failed to allocate
mi_segments_track_size((long)segment_size,tld); segment->memid = memid;
mi_segments_track_size((long)segment_size, tld);
} }
mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
mi_assert_internal((uintptr_t)segment % MI_SEGMENT_SIZE == 0); // zero the segment info
{ size_t memid = segment->memid;
memset(segment, 0, info_size); memset(segment, 0, info_size);
if (mi_option_is_enabled(mi_option_secure)) { segment->memid = memid;
// in secure mode, we set up a protected page in between the segment info }
// and the page data
if (mi_option_is_enabled(mi_option_secure) && !protection_still_good) {
// in secure mode, we set up a protected page in between the segment info and the page data
mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0);
_mi_os_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) );
size_t os_page_size = _mi_os_page_size(); size_t os_page_size = _mi_os_page_size();
if (mi_option_get(mi_option_secure) <= 1) { if (mi_option_get(mi_option_secure) <= 1) {
// and protect the last page too // and protect the last page too
_mi_os_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size );
} }
else { else {
// protect every page // protect every page
for (size_t i = 0; i < capacity; i++) { for (size_t i = 0; i < capacity; i++) {
_mi_os_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size );
} }
} }
} }
@ -372,7 +329,7 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind
for (uint8_t i = 0; i < segment->capacity; i++) { for (uint8_t i = 0; i < segment->capacity; i++) {
segment->pages[i].segment_idx = i; segment->pages[i].segment_idx = i;
} }
mi_stat_increase(tld->stats->committed, segment->segment_info_size); mi_stat_increase(tld->stats->page_committed, segment->segment_info_size);
//fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment);
return segment; return segment;
} }
@ -387,6 +344,7 @@ static size_t mi_page_size(const mi_page_t* page) {
#endif #endif
static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
UNUSED(force);
//fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment); //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment);
mi_assert(segment != NULL); mi_assert(segment != NULL);
if (mi_segment_is_in_free_queue(segment,tld)) { if (mi_segment_is_in_free_queue(segment,tld)) {
@ -403,7 +361,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment));
mi_assert(segment->next == NULL); mi_assert(segment->next == NULL);
mi_assert(segment->prev == NULL); mi_assert(segment->prev == NULL);
mi_stat_decrease( tld->stats->committed, segment->segment_info_size); mi_stat_decrease( tld->stats->page_committed, segment->segment_info_size);
segment->thread_id = 0; segment->thread_id = 0;
// update reset memory statistics // update reset memory statistics
@ -415,7 +373,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
} }
} }
if (!force && mi_segment_cache_insert(segment, tld)) { if (!force && mi_segment_cache_push(segment, tld)) {
// it is put in our cache // it is put in our cache
} }
else { else {
@ -424,9 +382,6 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
} }
} }
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Free page management inside a segment Free page management inside a segment
----------------------------------------------------------- */ ----------------------------------------------------------- */
@ -461,17 +416,16 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_sta
mi_assert_internal(page->segment_in_use); mi_assert_internal(page->segment_in_use);
mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(mi_page_all_free(page));
size_t inuse = page->capacity * page->block_size; size_t inuse = page->capacity * page->block_size;
mi_stat_decrease( stats->committed, inuse); mi_stat_decrease( stats->page_committed, inuse);
mi_stat_decrease( stats->pages, 1); mi_stat_decrease( stats->pages, 1);
// reset the page memory to reduce memory pressure? // reset the page memory to reduce memory pressure?
if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
size_t psize; size_t psize;
uint8_t* start = _mi_segment_page_start(segment, page, &psize); uint8_t* start = _mi_segment_page_start(segment, page, &psize);
mi_stat_increase( stats->reset, psize); // for stats we assume resetting the full page
page->is_reset = true; page->is_reset = true;
if (inuse > 0) { if (inuse > 0) {
_mi_os_reset(start, inuse); _mi_mem_reset(start, psize, stats); // TODO: just `inuse`?
} }
} }

View File

@ -94,12 +94,14 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
mi_stat_add(&stats->reserved, &src->reserved, 1); mi_stat_add(&stats->reserved, &src->reserved, 1);
mi_stat_add(&stats->committed, &src->committed, 1); mi_stat_add(&stats->committed, &src->committed, 1);
mi_stat_add(&stats->reset, &src->reset, 1); mi_stat_add(&stats->reset, &src->reset, 1);
mi_stat_add(&stats->page_committed, &src->page_committed, 1);
mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1);
mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1); mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1);
mi_stat_add(&stats->mmap_calls, &src->mmap_calls, 1); mi_stat_add(&stats->mmap_calls, &src->mmap_calls, 1);
mi_stat_add(&stats->mmap_ensure_aligned, &src->mmap_ensure_aligned, 1); mi_stat_add(&stats->mmap_ensure_aligned, &src->mmap_ensure_aligned, 1);
mi_stat_add(&stats->mmap_right_align, &src->mmap_right_align, 1); mi_stat_add(&stats->mmap_right_align, &src->mmap_right_align, 1);
mi_stat_add(&stats->commit_calls, &src->commit_calls, 1);
mi_stat_add(&stats->threads, &src->threads, 1); mi_stat_add(&stats->threads, &src->threads, 1);
mi_stat_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_add(&stats->pages_extended, &src->pages_extended, 1);
@ -226,9 +228,10 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n
_mi_fprintf(out, "malloc requested: "); _mi_fprintf(out, "malloc requested: ");
mi_print_amount(stats->malloc.allocated, 1, out); mi_print_amount(stats->malloc.allocated, 1, out);
_mi_fprintf(out, "\n\n"); _mi_fprintf(out, "\n\n");
mi_stat_print(&stats->committed, "committed", 1, out);
mi_stat_print(&stats->reserved, "reserved", 1, out); mi_stat_print(&stats->reserved, "reserved", 1, out);
mi_stat_print(&stats->committed, "committed", 1, out);
mi_stat_print(&stats->reset, "reset", -1, out); mi_stat_print(&stats->reset, "reset", -1, out);
mi_stat_print(&stats->page_committed, "touched", 1, out);
mi_stat_print(&stats->segments, "segments", -1, out); mi_stat_print(&stats->segments, "segments", -1, out);
mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out); mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out);
mi_stat_print(&stats->pages, "pages", -1, out); mi_stat_print(&stats->pages, "pages", -1, out);
@ -237,6 +240,7 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n
mi_stat_print(&stats->mmap_calls, "mmaps", 0, out); mi_stat_print(&stats->mmap_calls, "mmaps", 0, out);
mi_stat_print(&stats->mmap_right_align, "mmap fast", 0, out); mi_stat_print(&stats->mmap_right_align, "mmap fast", 0, out);
mi_stat_print(&stats->mmap_ensure_aligned, "mmap slow", 0, out); mi_stat_print(&stats->mmap_ensure_aligned, "mmap slow", 0, out);
mi_stat_print(&stats->commit_calls, "commits", 0, out);
mi_stat_print(&stats->threads, "threads", 0, out); mi_stat_print(&stats->threads, "threads", 0, out);
mi_stat_counter_print(&stats->searches, "searches", out); mi_stat_counter_print(&stats->searches, "searches", out);
#endif #endif

View File

@ -139,6 +139,8 @@ int main() {
CHECK("heap_destroy", test_heap1()); CHECK("heap_destroy", test_heap1());
CHECK("heap_delete", test_heap2()); CHECK("heap_delete", test_heap2());
//mi_stats_print(NULL);
// --------------------------------------------------- // ---------------------------------------------------
// Done // Done
// ---------------------------------------------------[] // ---------------------------------------------------[]