mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-01-14 16:47:59 +08:00
optimize heap walks, by Sam Gross, upstream of python/cpython#114133
This commit is contained in:
parent
855e3b2549
commit
f7fe5bf20e
98
src/heap.c
98
src/heap.c
@ -528,46 +528,83 @@ void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
|
||||
}
|
||||
|
||||
|
||||
static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) {
|
||||
mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX);
|
||||
*shift = 64 - mi_clz(divisor - 1);
|
||||
*magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1);
|
||||
}
|
||||
|
||||
static size_t mi_fast_divide(size_t n, uint64_t magic, size_t shift) {
|
||||
mi_assert_internal(n <= UINT32_MAX);
|
||||
return ((((uint64_t)n * magic) >> 32) + n) >> shift;
|
||||
}
|
||||
|
||||
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg) {
|
||||
mi_assert(area != NULL);
|
||||
if (area==NULL) return true;
|
||||
mi_assert(page != NULL);
|
||||
if (page == NULL) return true;
|
||||
|
||||
_mi_page_free_collect(page,true);
|
||||
_mi_page_free_collect(page,true); // collect both thread_delayed and local_free
|
||||
mi_assert_internal(page->local_free == NULL);
|
||||
if (page->used == 0) return true;
|
||||
|
||||
const size_t bsize = mi_page_block_size(page);
|
||||
const size_t ubsize = mi_page_usable_block_size(page); // without padding
|
||||
size_t psize;
|
||||
uint8_t* pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
|
||||
size_t psize;
|
||||
uint8_t* const pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
|
||||
mi_heap_t* const heap = mi_page_heap(page);
|
||||
const size_t bsize = mi_page_block_size(page);
|
||||
const size_t ubsize = mi_page_usable_block_size(page); // without padding
|
||||
|
||||
// optimize page with one block
|
||||
if (page->capacity == 1) {
|
||||
// optimize page with one block
|
||||
mi_assert_internal(page->used == 1 && page->free == NULL);
|
||||
return visitor(mi_page_heap(page), area, pstart, ubsize, arg);
|
||||
}
|
||||
mi_assert(bsize <= UINT32_MAX);
|
||||
|
||||
// optimize full pages
|
||||
if (page->used == page->capacity) {
|
||||
uint8_t* block = pstart;
|
||||
for (size_t i = 0; i < page->capacity; i++) {
|
||||
if (!visitor(heap, area, block, ubsize, arg)) return false;
|
||||
block += bsize;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// create a bitmap of free blocks.
|
||||
#define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*))
|
||||
uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)];
|
||||
memset(free_map, 0, sizeof(free_map));
|
||||
uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS];
|
||||
const uintptr_t bmapsize = _mi_divide_up(page->capacity, MI_INTPTR_BITS);
|
||||
memset(free_map, 0, bmapsize * sizeof(intptr_t));
|
||||
if (page->capacity % MI_INTPTR_BITS != 0) {
|
||||
// mark left-over bits at the end as free
|
||||
size_t shift = (page->capacity % MI_INTPTR_BITS);
|
||||
uintptr_t mask = (UINTPTR_MAX << shift);
|
||||
free_map[bmapsize - 1] = mask;
|
||||
}
|
||||
|
||||
// fast repeated division by the block size
|
||||
uint64_t magic;
|
||||
size_t shift;
|
||||
mi_get_fast_divisor(bsize, &magic, &shift);
|
||||
|
||||
#if MI_DEBUG>1
|
||||
size_t free_count = 0;
|
||||
#endif
|
||||
for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
|
||||
for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) {
|
||||
#if MI_DEBUG>1
|
||||
free_count++;
|
||||
#endif
|
||||
mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
|
||||
size_t offset = (uint8_t*)block - pstart;
|
||||
mi_assert_internal(offset % bsize == 0);
|
||||
size_t blockidx = offset / bsize; // Todo: avoid division?
|
||||
mi_assert_internal( blockidx < MI_MAX_BLOCKS);
|
||||
size_t bitidx = (blockidx / sizeof(uintptr_t));
|
||||
size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
|
||||
mi_assert_internal(offset <= UINT32_MAX);
|
||||
size_t blockidx = mi_fast_divide(offset, magic, shift);
|
||||
mi_assert_internal(blockidx == offset / bsize);
|
||||
mi_assert_internal(blockidx < MI_MAX_BLOCKS);
|
||||
size_t bitidx = (blockidx / MI_INTPTR_BITS);
|
||||
size_t bit = blockidx - (bitidx * MI_INTPTR_BITS);
|
||||
free_map[bitidx] |= ((uintptr_t)1 << bit);
|
||||
}
|
||||
mi_assert_internal(page->capacity == (free_count + page->used));
|
||||
@ -576,19 +613,30 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_
|
||||
#if MI_DEBUG>1
|
||||
size_t used_count = 0;
|
||||
#endif
|
||||
for (size_t i = 0; i < page->capacity; i++) {
|
||||
size_t bitidx = (i / sizeof(uintptr_t));
|
||||
size_t bit = i - (bitidx * sizeof(uintptr_t));
|
||||
uintptr_t m = free_map[bitidx];
|
||||
if (bit == 0 && m == UINTPTR_MAX) {
|
||||
i += (sizeof(uintptr_t) - 1); // skip a run of free blocks
|
||||
uint8_t* block = pstart;
|
||||
for (size_t i = 0; i < bmapsize; i++) {
|
||||
if (free_map[i] == 0) {
|
||||
// every block is in use
|
||||
for (size_t j = 0; j < MI_INTPTR_BITS; j++) {
|
||||
#if MI_DEBUG>1
|
||||
used_count++;
|
||||
#endif
|
||||
if (!visitor(heap, area, block, ubsize, arg)) return false;
|
||||
block += bsize;
|
||||
}
|
||||
}
|
||||
else if ((m & ((uintptr_t)1 << bit)) == 0) {
|
||||
#if MI_DEBUG>1
|
||||
used_count++;
|
||||
#endif
|
||||
uint8_t* block = pstart + (i * bsize);
|
||||
if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
|
||||
else {
|
||||
// visit the used blocks in the mask
|
||||
uintptr_t m = ~free_map[i];
|
||||
while (m != 0) {
|
||||
#if MI_DEBUG>1
|
||||
used_count++;
|
||||
#endif
|
||||
size_t bitidx = mi_ctz(m);
|
||||
if (!visitor(heap, area, block + (bitidx * bsize), ubsize, arg)) return false;
|
||||
m &= m - 1; // clear least significant bit
|
||||
}
|
||||
block += bsize * MI_INTPTR_BITS;
|
||||
}
|
||||
}
|
||||
mi_assert_internal(page->used == used_count);
|
||||
|
@ -129,6 +129,16 @@ static void free_items(void* p) {
|
||||
custom_free(p);
|
||||
}
|
||||
|
||||
/*
|
||||
static bool visit_blocks(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) {
|
||||
(void)(heap); (void)(area);
|
||||
size_t* total = (size_t*)arg;
|
||||
if (block != NULL) {
|
||||
total += block_size;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
static void stress(intptr_t tid) {
|
||||
//bench_start_thread();
|
||||
@ -173,6 +183,10 @@ static void stress(intptr_t tid) {
|
||||
data[data_idx] = q;
|
||||
}
|
||||
}
|
||||
// walk the heap
|
||||
// size_t total = 0;
|
||||
// mi_heap_visit_blocks(mi_heap_get_default(), true, visit_blocks, &total);
|
||||
|
||||
// free everything that is left
|
||||
for (size_t i = 0; i < retain_top; i++) {
|
||||
free_items(retained[i]);
|
||||
|
Loading…
x
Reference in New Issue
Block a user