mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-01-14 16:47:59 +08:00
improved arena cache
This commit is contained in:
parent
1b158d8e80
commit
37b43e4cea
258
src/arena.c
258
src/arena.c
@ -127,186 +127,146 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
|
|||||||
/* -----------------------------------------------------------
|
/* -----------------------------------------------------------
|
||||||
Arena cache
|
Arena cache
|
||||||
----------------------------------------------------------- */
|
----------------------------------------------------------- */
|
||||||
#define MI_CACHE_MAX (256)
|
#define MI_CACHE_FIELDS (8)
|
||||||
#define MI_MAX_NUMA (4)
|
#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 512 on 64-bit
|
||||||
|
|
||||||
#define MI_SLOT_IN_USE ((void*)1)
|
|
||||||
|
|
||||||
typedef struct mi_cache_slot_s {
|
typedef struct mi_cache_slot_s {
|
||||||
volatile _Atomic(void*) p;
|
void* p;
|
||||||
volatile size_t memid;
|
size_t memid;
|
||||||
volatile mi_msecs_t expire;
|
mi_msecs_t expire;
|
||||||
volatile bool is_committed;
|
bool is_committed; // TODO: use bit from p to reduce size?
|
||||||
volatile bool is_large;
|
|
||||||
} mi_cache_slot_t;
|
} mi_cache_slot_t;
|
||||||
|
|
||||||
static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; // = 0
|
static mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0
|
||||||
static volatile _Atomic(uintptr_t) cache_count[MI_MAX_NUMA]; // = 0
|
|
||||||
|
#define BITS_SET() (UINTPTR_MAX)
|
||||||
|
static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; // zero bit = available!
|
||||||
|
static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) };
|
||||||
|
static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free
|
||||||
|
|
||||||
typedef union mi_cache_count_u {
|
|
||||||
uintptr_t value;
|
|
||||||
struct {
|
|
||||||
int16_t count; // at most `count` elements in the cache
|
|
||||||
#if MI_INTPTR_SIZE > 4
|
|
||||||
uint32_t epoch; // each push/pop increase this
|
|
||||||
#else
|
|
||||||
uint16_t epoch;
|
|
||||||
#endif
|
|
||||||
} x;
|
|
||||||
} mi_cache_count_t;
|
|
||||||
|
|
||||||
static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
|
static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
|
||||||
// only segment blocks
|
// only segment blocks
|
||||||
if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL;
|
if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL;
|
||||||
|
|
||||||
// set numa range
|
// numa node determines start field
|
||||||
int numa_min = numa_node;
|
size_t start_field = 0;
|
||||||
int numa_max = numa_min;
|
if (numa_node > 0) {
|
||||||
if (numa_node < 0) {
|
start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
|
||||||
numa_min = 0;
|
if (start_field >= MI_CACHE_FIELDS) start_field = 0;
|
||||||
numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
|
|
||||||
numa_min = numa_max = numa_node;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// find a free slot
|
// find an available slot
|
||||||
mi_cache_slot_t* slot;
|
mi_bitmap_index_t bitidx;
|
||||||
for (int n = numa_min; n <= numa_max; n++) {
|
bool claimed = false;
|
||||||
mi_cache_count_t top = { 0 };
|
if (*large) { // large allowed?
|
||||||
top.value = mi_atomic_read_relaxed(&cache_count[n]);
|
claimed = mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
|
||||||
int16_t count = top.x.count;
|
if (claimed) *large = true;
|
||||||
for (int16_t i = count - 1; i >= 0; i--) {
|
|
||||||
slot = &cache[n][i];
|
|
||||||
void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t,&slot->p);
|
|
||||||
if (p == NULL) {
|
|
||||||
if (count > 0) { count = i; }
|
|
||||||
}
|
|
||||||
else if (p > MI_SLOT_IN_USE) { // not NULL or 1
|
|
||||||
if (count >= 0 && count < top.x.count) { // new lower bound?
|
|
||||||
mi_cache_count_t newtop = { 0 };
|
|
||||||
newtop.x.count = count;
|
|
||||||
newtop.x.epoch = top.x.epoch + 1;
|
|
||||||
mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value); // it's fine to not succeed; just causes longer scans
|
|
||||||
}
|
|
||||||
count = -1; // don't try to set lower bound again
|
|
||||||
if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
|
|
||||||
// claimed
|
|
||||||
if (!*large && slot->is_large) {
|
|
||||||
// back out again
|
|
||||||
mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); // make it available again
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// keep it
|
|
||||||
*memid = slot->memid;
|
|
||||||
*large = slot->is_large;
|
|
||||||
*is_zero = false;
|
|
||||||
bool committed = slot->is_committed;
|
|
||||||
mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, NULL); // set it free
|
|
||||||
if (*commit && !committed) {
|
|
||||||
bool commit_zero;
|
|
||||||
_mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
|
|
||||||
*commit = true;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
*commit = committed;
|
|
||||||
}
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return NULL;
|
if (!claimed) {
|
||||||
|
claimed = mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx);
|
||||||
|
if (claimed) *large = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!claimed) return NULL;
|
||||||
|
|
||||||
|
// found a slot
|
||||||
|
mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
|
||||||
|
void* p = slot->p;
|
||||||
|
*memid = slot->memid;
|
||||||
|
*is_zero = false;
|
||||||
|
bool committed = slot->is_committed;
|
||||||
|
slot->p = NULL;
|
||||||
|
slot->expire = 0;
|
||||||
|
if (*commit && !committed) {
|
||||||
|
bool commit_zero;
|
||||||
|
_mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
|
||||||
|
*commit = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*commit = committed;
|
||||||
|
}
|
||||||
|
|
||||||
|
// mark the slot as free again
|
||||||
|
mi_assert_internal(mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
|
||||||
|
mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
|
||||||
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mi_cache_purge(mi_os_tld_t* tld) {
|
static void mi_cache_purge(mi_os_tld_t* tld) {
|
||||||
// TODO: for each numa node instead?
|
UNUSED(tld);
|
||||||
// if (mi_option_get(mi_option_arena_reset_delay) == 0) return;
|
|
||||||
|
|
||||||
mi_msecs_t now = _mi_clock_now();
|
mi_msecs_t now = _mi_clock_now();
|
||||||
int numa_node = _mi_os_numa_node(NULL);
|
size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start
|
||||||
if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
|
size_t purged = 0;
|
||||||
mi_cache_slot_t* slot;
|
for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots
|
||||||
int purged = 0;
|
if (idx >= MI_CACHE_MAX) idx = 0; // wrap
|
||||||
mi_cache_count_t top = { 0 };
|
mi_cache_slot_t* slot = &cache[idx];
|
||||||
top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
|
if (slot->expire != 0 && now >= slot->expire) { // racy read
|
||||||
for (int i = 0; i < top.x.count; i++) {
|
// seems expired, first claim it from available
|
||||||
slot = &cache[numa_node][i];
|
purged++;
|
||||||
void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
|
mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
|
||||||
if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) {
|
if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
|
||||||
mi_msecs_t expire = slot->expire;
|
// was available, we claimed it
|
||||||
if (expire != 0 && now >= expire) {
|
if (slot->expire != 0 && now >= slot->expire) { // safe read
|
||||||
// expired, try to claim it
|
// still expired, decommit it
|
||||||
if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
|
slot->expire = 0;
|
||||||
// claimed! test again
|
mi_assert_internal(slot->is_committed && !slot->is_large);
|
||||||
if (slot->is_committed && !slot->is_large && now >= slot->expire) {
|
_mi_abandoned_await_readers(); // wait until safe to decommit
|
||||||
_mi_abandoned_await_readers(); // wait until safe to decommit
|
_mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
|
||||||
_mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats);
|
slot->is_committed = false;
|
||||||
slot->is_committed = false;
|
|
||||||
}
|
|
||||||
// and unclaim again
|
|
||||||
mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p);
|
|
||||||
purged++;
|
|
||||||
if (purged >= 4) break; // limit to at most 4 decommits per push
|
|
||||||
}
|
}
|
||||||
|
mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
|
||||||
}
|
}
|
||||||
|
if (purged > 4) break; // bound to no more than 4 purge tries per push
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld)
|
static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld)
|
||||||
{
|
{
|
||||||
mi_cache_purge(tld);
|
|
||||||
|
|
||||||
// only for segment blocks
|
// only for segment blocks
|
||||||
if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
|
if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
|
||||||
|
|
||||||
// try to add it to the cache
|
// numa node determines start field
|
||||||
int numa_node = _mi_os_numa_node(NULL);
|
int numa_node = _mi_os_numa_node(NULL);
|
||||||
if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
|
size_t start_field = 0;
|
||||||
mi_cache_slot_t* slot;
|
if (numa_node > 0) {
|
||||||
mi_cache_count_t top = { 0 };
|
start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
|
||||||
top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
|
if (start_field >= MI_CACHE_FIELDS) start_field = 0;
|
||||||
for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) {
|
}
|
||||||
slot = &cache[numa_node][i];
|
|
||||||
void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
|
// purge expired entries
|
||||||
if (p == NULL) { // free slot
|
mi_cache_purge(tld);
|
||||||
if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) {
|
|
||||||
// claimed!
|
// find an available slot
|
||||||
// first try to increase the top bound
|
mi_bitmap_index_t bitidx;
|
||||||
mi_cache_count_t newtop = { 0 };
|
bool claimed = mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
|
||||||
newtop.x.count = i+1;
|
if (!claimed) return false;
|
||||||
newtop.x.epoch = top.x.epoch + 1;
|
|
||||||
while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) {
|
mi_assert_internal(mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
|
||||||
top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
|
mi_assert_internal(mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
|
||||||
if (top.x.count > newtop.x.count) break; // another push max'd it
|
|
||||||
newtop.x.epoch = top.x.epoch + 1; // otherwise try again
|
// set the slot
|
||||||
}
|
mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
|
||||||
// set the slot
|
slot->p = start;
|
||||||
slot->expire = 0;
|
slot->memid = memid;
|
||||||
slot->is_committed = is_committed;
|
slot->expire = 0;
|
||||||
slot->memid = memid;
|
slot->is_committed = is_committed;
|
||||||
slot->is_large = is_large;
|
if (is_committed && !is_large) {
|
||||||
if (is_committed) {
|
long delay = mi_option_get(mi_option_arena_reset_delay);
|
||||||
long delay = mi_option_get(mi_option_arena_reset_delay);
|
if (delay == 0) {
|
||||||
if (delay == 0 && !is_large) {
|
_mi_abandoned_await_readers(); // wait until safe to decommit
|
||||||
_mi_abandoned_await_readers(); // wait until safe to decommit
|
_mi_os_decommit(start, size, tld->stats);
|
||||||
_mi_os_decommit(start, size, tld->stats);
|
slot->is_committed = false;
|
||||||
slot->is_committed = false;
|
}
|
||||||
}
|
else {
|
||||||
else {
|
slot->expire = _mi_clock_now() + delay;
|
||||||
slot->expire = _mi_clock_now() + delay;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, start); // and make it available;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
|
// make it available
|
||||||
|
mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -42,6 +42,11 @@ static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx
|
|||||||
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
|
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create a bit index.
|
||||||
|
static inline mi_bitmap_index_t mi_bitmap_index_create_from_bit(size_t full_bitidx) {
|
||||||
|
return mi_bitmap_index_create(full_bitidx / MI_BITMAP_FIELD_BITS, full_bitidx % MI_BITMAP_FIELD_BITS);
|
||||||
|
}
|
||||||
|
|
||||||
// Get the field index from a bit index.
|
// Get the field index from a bit index.
|
||||||
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
|
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
|
||||||
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
|
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
|
||||||
@ -177,11 +182,13 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
|
// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
|
||||||
|
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||||
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
|
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
|
||||||
static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) {
|
static inline bool mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
|
||||||
for (size_t idx = 0; idx < bitmap_fields; idx++) {
|
size_t idx = start_field_idx;
|
||||||
|
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
|
||||||
|
if (idx >= bitmap_fields) idx = 0; // wrap
|
||||||
if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -189,6 +196,13 @@ static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fi
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
|
||||||
|
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
|
||||||
|
static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) {
|
||||||
|
return mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx);
|
||||||
|
}
|
||||||
|
|
||||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||||
// Returns `true` if all `count` bits were 1 previously.
|
// Returns `true` if all `count` bits were 1 previously.
|
||||||
static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||||
|
@ -184,7 +184,7 @@ int main() {
|
|||||||
// double_free1();
|
// double_free1();
|
||||||
// double_free2();
|
// double_free2();
|
||||||
// corrupt_free();
|
// corrupt_free();
|
||||||
block_overflow1();
|
//block_overflow1();
|
||||||
|
|
||||||
void* p1 = malloc(78);
|
void* p1 = malloc(78);
|
||||||
void* p2 = malloc(24);
|
void* p2 = malloc(24);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user