mirror of
https://github.com/microsoft/mimalloc.git
synced 2024-12-28 22:05:40 +08:00
enable more reset delay slots
This commit is contained in:
parent
5e6754f3f7
commit
a0958b2da6
@ -390,13 +390,20 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
|
||||
// ------------------------------------------------------
|
||||
typedef int64_t mi_msecs_t;
|
||||
|
||||
#define MI_RESET_DELAY_SLOTS (256)
|
||||
|
||||
typedef struct mi_delay_slot_s {
|
||||
mi_msecs_t expire;
|
||||
uint8_t* addr;
|
||||
size_t size;
|
||||
} mi_delay_slot_t;
|
||||
|
||||
#define MI_RESET_DELAY_SLOTS (128)
|
||||
typedef struct mi_delay_slots_s {
|
||||
size_t capacity; // always `MI_RESET_DELAY_SLOTS`
|
||||
size_t count; // current slots used (`<= capacity`)
|
||||
mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS];
|
||||
} mi_delay_slots_t;
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Thread Local data
|
||||
@ -411,8 +418,8 @@ typedef struct mi_segment_queue_s {
|
||||
// OS thread local data
|
||||
typedef struct mi_os_tld_s {
|
||||
size_t region_idx; // start point for next allocation
|
||||
mi_stats_t* stats; // points to tld stats
|
||||
mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS];
|
||||
mi_delay_slots_t* reset_delay; // delay slots for OS reset operations
|
||||
mi_stats_t* stats; // points to tld stats
|
||||
} mi_os_tld_t;
|
||||
|
||||
// Segments thread local data
|
||||
|
@ -100,8 +100,8 @@ static mi_tld_t tld_main = {
|
||||
0, false,
|
||||
&_mi_heap_main,
|
||||
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
|
||||
{ 0, tld_main_stats, {{0,NULL,0}} }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
{ 0, NULL, tld_main_stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
};
|
||||
|
||||
mi_heap_t _mi_heap_main = {
|
||||
@ -192,6 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
|
||||
typedef struct mi_thread_data_s {
|
||||
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
|
||||
mi_tld_t tld;
|
||||
mi_delay_slots_t reset_delay;
|
||||
} mi_thread_data_t;
|
||||
|
||||
// Initialize the thread local default heap, called from `mi_thread_init`
|
||||
@ -211,6 +212,7 @@ static bool _mi_heap_init(void) {
|
||||
}
|
||||
mi_tld_t* tld = &td->tld;
|
||||
mi_heap_t* heap = &td->heap;
|
||||
mi_delay_slots_t* reset_delay = &td->reset_delay;
|
||||
memcpy(heap, &_mi_heap_empty, sizeof(*heap));
|
||||
heap->thread_id = _mi_thread_id();
|
||||
heap->random = _mi_random_init(heap->thread_id);
|
||||
@ -221,6 +223,9 @@ static bool _mi_heap_init(void) {
|
||||
tld->segments.stats = &tld->stats;
|
||||
tld->segments.os = &tld->os;
|
||||
tld->os.stats = &tld->stats;
|
||||
tld->os.reset_delay = reset_delay;
|
||||
memset(reset_delay, 0, sizeof(*reset_delay));
|
||||
reset_delay->capacity = MI_RESET_DELAY_SLOTS;
|
||||
_mi_heap_default = heap;
|
||||
}
|
||||
return false;
|
||||
|
70
src/memory.c
70
src/memory.c
@ -54,7 +54,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s
|
||||
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
|
||||
|
||||
// local
|
||||
static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size);
|
||||
static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size);
|
||||
|
||||
|
||||
// Constants
|
||||
@ -208,7 +208,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
|
||||
Try to claim blocks in suitable regions
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) {
|
||||
static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
|
||||
// initialized at all?
|
||||
mi_region_info_t info = mi_atomic_read_relaxed(®ion->info);
|
||||
if (info==0) return false;
|
||||
@ -229,7 +229,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo
|
||||
}
|
||||
|
||||
|
||||
static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
|
||||
static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
|
||||
{
|
||||
// try all regions for a free slot
|
||||
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
|
||||
@ -238,7 +238,7 @@ static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, me
|
||||
for (size_t visited = 0; visited < count; visited++, idx++) {
|
||||
if (idx >= count) idx = 0; // wrap around
|
||||
mem_region_t* r = ®ions[idx];
|
||||
if (mi_region_is_suitable(r, numa_node, commit, allow_large)) {
|
||||
if (mi_region_is_suitable(r, numa_node, allow_large)) {
|
||||
if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) {
|
||||
tld->region_idx = idx; // remember the last found position
|
||||
*region = r;
|
||||
@ -256,7 +256,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
|
||||
mem_region_t* region;
|
||||
mi_bitmap_index_t bit_idx;
|
||||
// first try to claim in existing regions
|
||||
if (!mi_region_try_claim(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) {
|
||||
if (!mi_region_try_claim(blocks, *is_large, ®ion, &bit_idx, tld)) {
|
||||
// otherwise try to allocate a fresh region
|
||||
if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) {
|
||||
// out of regions or memory
|
||||
@ -354,7 +354,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
|
||||
if (p==NULL) return;
|
||||
if (size==0) return;
|
||||
|
||||
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
|
||||
mi_delay_remove(tld->reset_delay, p, size);
|
||||
|
||||
size_t arena_memid = 0;
|
||||
mi_bitmap_index_t bit_idx;
|
||||
@ -424,7 +424,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
|
||||
bool is_eager_committed;
|
||||
void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed);
|
||||
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
|
||||
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE);
|
||||
mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE);
|
||||
_mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
|
||||
}
|
||||
// and release
|
||||
@ -440,21 +440,22 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
|
||||
|
||||
typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg);
|
||||
|
||||
static void mi_delay_insert(mi_delay_slot_t* slots, size_t count,
|
||||
static void mi_delay_insert(mi_delay_slots_t* ds,
|
||||
mi_msecs_t delay, uint8_t* addr, size_t size,
|
||||
mi_delay_resolve_fun* resolve, void* arg)
|
||||
{
|
||||
if (delay==0) {
|
||||
if (ds == NULL || delay==0 || addr==NULL || size==0) {
|
||||
resolve(addr, size, arg);
|
||||
return;
|
||||
}
|
||||
|
||||
mi_msecs_t now = _mi_clock_now();
|
||||
mi_delay_slot_t* oldest = slots;
|
||||
mi_delay_slot_t* oldest = &ds->slots[0];
|
||||
// walk through all slots, resolving expired ones.
|
||||
// remember the oldest slot to insert the new entry in.
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
mi_delay_slot_t* slot = &slots[i];
|
||||
size_t newcount = 0;
|
||||
for (size_t i = 0; i < ds->count; i++) {
|
||||
mi_delay_slot_t* slot = &ds->slots[i];
|
||||
|
||||
if (slot->expire == 0) {
|
||||
// empty slot
|
||||
@ -480,26 +481,40 @@ static void mi_delay_insert(mi_delay_slot_t* slots, size_t count,
|
||||
}
|
||||
else if (oldest->expire > slot->expire) {
|
||||
oldest = slot;
|
||||
newcount = i+1;
|
||||
}
|
||||
else {
|
||||
newcount = i+1;
|
||||
}
|
||||
}
|
||||
ds->count = newcount;
|
||||
if (delay>0) {
|
||||
// not yet registered, use the oldest slot
|
||||
if (oldest->expire > 0) {
|
||||
// not yet registered, use the oldest slot (or a new one if there is space)
|
||||
if (ds->count < ds->capacity) {
|
||||
oldest = &ds->slots[ds->count];
|
||||
ds->count++;
|
||||
}
|
||||
else if (oldest->expire > 0) {
|
||||
resolve(oldest->addr, oldest->size, arg); // evict if not empty
|
||||
}
|
||||
mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count);
|
||||
oldest->expire = now + delay;
|
||||
oldest->addr = addr;
|
||||
oldest->size = size;
|
||||
}
|
||||
}
|
||||
|
||||
static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size)
|
||||
static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size)
|
||||
{
|
||||
if (ds == NULL || p==NULL || size==0) return false;
|
||||
|
||||
uint8_t* addr = (uint8_t*)p;
|
||||
bool done = false;
|
||||
// walk through all slots
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
mi_delay_slot_t* slot = &slots[i];
|
||||
size_t newcount = 0;
|
||||
|
||||
// walk through all valid slots
|
||||
for (size_t i = 0; i < ds->count; i++) {
|
||||
mi_delay_slot_t* slot = &ds->slots[i];
|
||||
if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
|
||||
// earlier slot encompasses the area; remove it
|
||||
slot->expire = 0;
|
||||
@ -510,12 +525,17 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_
|
||||
slot->expire = 0;
|
||||
}
|
||||
else if ((addr <= slot->addr && addr + size > slot->addr) ||
|
||||
(addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) {
|
||||
// partial overlap, remove slot
|
||||
mi_assert_internal(false);
|
||||
(addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) {
|
||||
// partial overlap
|
||||
// can happen with a large object spanning onto some partial end block
|
||||
// mi_assert_internal(false);
|
||||
slot->expire = 0;
|
||||
}
|
||||
else {
|
||||
newcount = i + 1;
|
||||
}
|
||||
}
|
||||
ds->count = newcount;
|
||||
return done;
|
||||
}
|
||||
|
||||
@ -525,13 +545,13 @@ static void mi_resolve_reset(void* p, size_t size, void* vtld) {
|
||||
}
|
||||
|
||||
bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
|
||||
mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay),
|
||||
mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay),
|
||||
(uint8_t*)p, size, &mi_resolve_reset, tld);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
|
||||
if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) {
|
||||
if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) {
|
||||
return _mi_os_unreset(p, size, is_zero, tld->stats);
|
||||
}
|
||||
return true;
|
||||
@ -544,12 +564,12 @@ bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
|
||||
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
|
||||
mi_delay_remove(tld->reset_delay,p, size);
|
||||
return _mi_os_commit(p, size, is_zero, tld->stats);
|
||||
}
|
||||
|
||||
bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
|
||||
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
|
||||
mi_delay_remove(tld->reset_delay, p, size);
|
||||
return _mi_os_decommit(p, size, tld->stats);
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
|
||||
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
|
||||
{ 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
|
||||
{ 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free
|
||||
{ 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free
|
||||
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
|
||||
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
|
||||
{ 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
|
||||
|
@ -504,7 +504,9 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
|
||||
_mi_stat_decrease(&tld->stats->pages, 1);
|
||||
|
||||
// reset the page memory to reduce memory pressure?
|
||||
if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
|
||||
if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset))
|
||||
// && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets
|
||||
{
|
||||
size_t psize;
|
||||
uint8_t* start = _mi_page_start(segment, page, &psize);
|
||||
page->is_reset = true;
|
||||
|
Loading…
x
Reference in New Issue
Block a user