mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-01-14 08:37:59 +08:00
improve handling of huge pages in the cache
This commit is contained in:
parent
43568fa422
commit
cd7aa0ce83
@ -32,6 +32,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */);
|
||||
// "os.c"
|
||||
bool _mi_os_reset(void* p, size_t size);
|
||||
void* _mi_os_alloc(size_t size, mi_stats_t* stats);
|
||||
bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize);
|
||||
void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
|
||||
bool _mi_os_protect(void* addr, size_t size);
|
||||
bool _mi_os_unprotect(void* addr, size_t size);
|
||||
|
@ -370,9 +370,10 @@ typedef struct mi_segment_queue_s {
|
||||
// Segments thread local data
|
||||
typedef struct mi_segments_tld_s {
|
||||
mi_segment_queue_t small_free; // queue of segments with free small pages
|
||||
size_t count; // current number of segments
|
||||
size_t peak; // peak number of segments
|
||||
size_t current_size; // current size of all segments
|
||||
size_t peak_size; // peak size of all segments
|
||||
size_t cache_count; // number of segments in the cache
|
||||
size_t cache_size; // total size of all segments in the cache
|
||||
mi_segment_queue_t cache; // (small) cache of segments for small and large pages (to avoid repeated mmap calls)
|
||||
mi_stats_t* stats; // points to tld stats
|
||||
} mi_segments_tld_t;
|
||||
|
@ -90,7 +90,7 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
|
||||
static mi_tld_t tld_main = {
|
||||
0,
|
||||
&_mi_heap_main,
|
||||
{ { NULL, NULL }, 0, 0, 0, {NULL,NULL}, tld_main_stats }, // segments
|
||||
{ { NULL, NULL }, 0, 0, 0, 0, {NULL,NULL}, tld_main_stats }, // segments
|
||||
{ 0, NULL, NULL, 0, tld_main_stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
};
|
||||
|
28
src/os.c
28
src/os.c
@ -66,9 +66,9 @@ size_t _mi_os_page_size() {
|
||||
}
|
||||
|
||||
|
||||
static void mi_munmap(void* addr, size_t size)
|
||||
static bool mi_munmap(void* addr, size_t size)
|
||||
{
|
||||
if (addr == NULL || size == 0) return;
|
||||
if (addr == NULL || size == 0) return true;
|
||||
bool err = false;
|
||||
#if defined(_WIN32)
|
||||
err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
|
||||
@ -78,6 +78,10 @@ static void mi_munmap(void* addr, size_t size)
|
||||
if (err) {
|
||||
#pragma warning(suppress:4996)
|
||||
_mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size);
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -193,6 +197,26 @@ bool _mi_os_unprotect(void* addr, size_t size) {
|
||||
return mi_os_protectx(addr, size, false);
|
||||
}
|
||||
|
||||
bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize) {
|
||||
// page align conservatively within the range
|
||||
mi_assert_internal(oldsize > newsize && p != NULL);
|
||||
if (oldsize < newsize || p==NULL) return false;
|
||||
if (oldsize == newsize) return true;
|
||||
|
||||
// oldsize and newsize should be page aligned or we cannot shrink precisely
|
||||
void* addr = (uint8_t*)p + newsize;
|
||||
size_t size = 0;
|
||||
void* start = mi_os_page_align_region(addr, oldsize - newsize, &size);
|
||||
if (size==0 || start != addr) return false;
|
||||
|
||||
#ifdef _WIN32
|
||||
// we cannot shrink on windows
|
||||
return false;
|
||||
#else
|
||||
return mi_munmap( start, size );
|
||||
#endif
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
OS allocation using mmap/munmap
|
||||
----------------------------------------------------------- */
|
||||
|
@ -11,23 +11,19 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include <string.h> // memset
|
||||
#include <stdio.h>
|
||||
|
||||
#define MI_PAGE_HUGE_ALIGN (256*1024)
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Segment allocation
|
||||
We allocate pages inside big OS allocated "segments"
|
||||
(2mb on 64-bit). This is to avoid splitting VMA's on Linux
|
||||
(4mb on 64-bit). This is to avoid splitting VMA's on Linux
|
||||
and reduce fragmentation on other OS's. Each thread
|
||||
owns its own segments.
|
||||
|
||||
Currently we have:
|
||||
- small pages (64kb), 32 in one segment
|
||||
- large pages (2mb), 1 in one segment
|
||||
- huge blocks > RC_LARGE_SIZE_MAX (256kb) are directly allocated by the OS
|
||||
|
||||
It might be good to have "medium" pages too (of, say 256kb)
|
||||
to reduce pressure on the virtual address space on 32-bit systems
|
||||
but for now we choose the simpler implementation since this
|
||||
will only be a problem if multiple threads allocate many
|
||||
differently sized objects between 8kb and 2mb which is not common.
|
||||
- large pages (4mb), 1 in one segment
|
||||
- huge blocks > MI_LARGE_SIZE_MAX (512kb) are directly allocated by the OS
|
||||
|
||||
In any case the memory for a segment is virtual and only
|
||||
committed on demand (i.e. we are careful to not touch the memory
|
||||
@ -188,39 +184,60 @@ all again repeatedly. (We tried a one-element cache but that
|
||||
proves to be too small for certain workloads).
|
||||
----------------------------------------------------------- */
|
||||
|
||||
static void mi_segments_count_add(long inc, mi_segments_tld_t* tld) {
|
||||
if (inc>=0) mi_stat_increase(tld->stats->segments,inc);
|
||||
else mi_stat_decrease(tld->stats->segments,-inc);
|
||||
mi_assert_internal(inc < 0 ? tld->count >= (size_t)(-inc) : tld->count < (SIZE_MAX - inc));
|
||||
mi_assert_internal(tld->peak >= tld->count);
|
||||
tld->count += inc;
|
||||
if (tld->count > tld->peak) tld->peak = tld->count;
|
||||
static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
|
||||
if (segment_size>=0) mi_stat_increase(tld->stats->segments,1);
|
||||
else mi_stat_decrease(tld->stats->segments,1);
|
||||
tld->current_size += segment_size;
|
||||
if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size;
|
||||
}
|
||||
|
||||
static size_t mi_segments_peak(mi_segments_tld_t* tld) {
|
||||
return tld->peak;
|
||||
}
|
||||
|
||||
static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) {
|
||||
mi_segments_count_add(-1,tld);
|
||||
mi_segments_track_size(-((long)segment_size),tld);
|
||||
_mi_os_free(segment, segment_size,tld->stats);
|
||||
}
|
||||
|
||||
// The segment cache is limited to be at most 1/6 of the peak
|
||||
// number of segments in use (and no more than 32)
|
||||
// The segment cache is limited to be at most 1/8 of the peak size
|
||||
// in use (and no more than 32)
|
||||
#define MI_SEGMENT_CACHE_MAX (32)
|
||||
#define MI_SEGMENT_CACHE_FRACTION (6)
|
||||
#define MI_SEGMENT_CACHE_FRACTION (8)
|
||||
|
||||
|
||||
// Get a segment of at least `required` size.
|
||||
// Get a segment of at least `required` size.
|
||||
// If `required == MI_SEGMENT_SIZE` the `segment_size` will match exactly
|
||||
static mi_segment_t* _mi_segment_cache_findx(mi_segments_tld_t* tld, size_t required, bool reverse) {
|
||||
mi_assert_internal(required % _mi_os_page_size() == 0);
|
||||
mi_segment_t* segment = (reverse ? tld->cache.last : tld->cache.first);
|
||||
while (segment != NULL) {
|
||||
if (segment->segment_size >= required) {
|
||||
tld->cache_count--;
|
||||
tld->cache_size -= segment->segment_size;
|
||||
mi_segment_queue_remove(&tld->cache, segment);
|
||||
// TODO: unmap excess memory if larger than N%
|
||||
return segment;
|
||||
// exact size match?
|
||||
if (segment->segment_size == required) {
|
||||
return segment;
|
||||
}
|
||||
// not more than 25% waste and on a huge page segment? (in that case the segment size does not need to match required)
|
||||
else if (required != MI_SEGMENT_SIZE && segment->segment_size - (segment->segment_size/4) <= required) {
|
||||
return segment;
|
||||
}
|
||||
// try to shrink the memory to match exactly
|
||||
else {
|
||||
if (mi_option_is_enabled(mi_option_secure)) {
|
||||
_mi_os_unprotect(segment, segment->segment_size);
|
||||
}
|
||||
if (_mi_os_shrink(segment, segment->segment_size, required)) {
|
||||
tld->current_size -= segment->segment_size;
|
||||
tld->current_size += required;
|
||||
segment->segment_size = required;
|
||||
return segment;
|
||||
}
|
||||
else {
|
||||
// if that all fails, we give up
|
||||
mi_segment_os_free(segment,segment->segment_size,tld);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
segment = (reverse ? segment->prev : segment->next);
|
||||
}
|
||||
@ -238,12 +255,12 @@ static mi_segment_t* mi_segment_cache_evict(mi_segments_tld_t* tld) {
|
||||
|
||||
static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
|
||||
if (tld->cache_count < MI_SEGMENT_CACHE_MAX &&
|
||||
tld->cache_count*MI_SEGMENT_CACHE_FRACTION < mi_segments_peak(tld)) return false;
|
||||
tld->cache_size*MI_SEGMENT_CACHE_FRACTION < tld->peak_size) return false;
|
||||
// take the opportunity to reduce the segment cache if it is too large (now)
|
||||
while (tld->cache_count*MI_SEGMENT_CACHE_FRACTION >= mi_segments_peak(tld) + 1) {
|
||||
while (tld->cache_size*MI_SEGMENT_CACHE_FRACTION >= tld->peak_size + 1) {
|
||||
mi_segment_t* segment = mi_segment_cache_evict(tld);
|
||||
mi_assert_internal(segment != NULL);
|
||||
if (segment != NULL) mi_segment_os_free(segment, MI_SEGMENT_SIZE, tld);
|
||||
if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -263,6 +280,7 @@ static bool mi_segment_cache_insert(mi_segment_t* segment, mi_segments_tld_t* tl
|
||||
}
|
||||
mi_segment_queue_insert_before( &tld->cache, seg, segment );
|
||||
tld->cache_count++;
|
||||
tld->cache_size += segment->segment_size;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -272,7 +290,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
|
||||
while ((segment = mi_segment_cache_find(tld,0)) != NULL) {
|
||||
mi_segment_os_free(segment, MI_SEGMENT_SIZE, tld);
|
||||
}
|
||||
mi_assert_internal(tld->cache_count == 0);
|
||||
mi_assert_internal(tld->cache_count == 0 && tld->cache_size == 0);
|
||||
mi_assert_internal(mi_segment_queue_is_empty(&tld->cache));
|
||||
}
|
||||
|
||||
@ -301,7 +319,7 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind
|
||||
size_t info_size;
|
||||
size_t pre_size;
|
||||
size_t segment_size = mi_segment_size( capacity, required, &pre_size, &info_size);
|
||||
|
||||
mi_assert_internal(segment_size >= required);
|
||||
size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
|
||||
|
||||
// Allocate the segment
|
||||
@ -309,6 +327,9 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind
|
||||
|
||||
// try to get it from our caches
|
||||
segment = mi_segment_cache_find(tld,segment_size);
|
||||
mi_assert_internal(segment == NULL ||
|
||||
(segment_size==MI_SEGMENT_SIZE && segment_size == segment->segment_size) ||
|
||||
(segment_size!=MI_SEGMENT_SIZE && segment_size >= segment->segment_size));
|
||||
if (segment != NULL && mi_option_is_enabled(mi_option_secure) && (segment->page_kind != page_kind || segment->segment_size != segment_size)) {
|
||||
_mi_os_unprotect(segment,segment->segment_size);
|
||||
}
|
||||
@ -317,7 +338,7 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind
|
||||
if (segment == NULL) {
|
||||
segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, os_tld);
|
||||
if (segment == NULL) return NULL;
|
||||
mi_segments_count_add(1,tld);
|
||||
mi_segments_track_size((long)segment_size,tld);
|
||||
}
|
||||
|
||||
mi_assert_internal((uintptr_t)segment % MI_SEGMENT_SIZE == 0);
|
||||
@ -560,7 +581,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
|
||||
mi_atomic_decrement(&abandoned_count);
|
||||
segment->thread_id = _mi_thread_id();
|
||||
segment->abandoned_next = NULL;
|
||||
mi_segments_count_add(1,tld);
|
||||
mi_segments_track_size((long)segment->segment_size,tld);
|
||||
mi_assert_internal(segment->next == NULL && segment->prev == NULL);
|
||||
mi_assert_expensive(mi_segment_is_valid(segment));
|
||||
mi_stat_decrease(tld->stats->segments_abandoned,1);
|
||||
@ -645,7 +666,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_
|
||||
|
||||
static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
|
||||
{
|
||||
mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld);
|
||||
mi_segment_t* segment = mi_segment_alloc(_mi_align_up(size, MI_PAGE_HUGE_ALIGN), MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld);
|
||||
if (segment == NULL) return NULL;
|
||||
mi_assert_internal(segment->segment_size - segment->segment_info_size >= size);
|
||||
segment->used = 1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user