diff --git a/src/arena.c b/src/arena.c index 55f09a60..c12e60a1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -127,186 +127,146 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* /* ----------------------------------------------------------- Arena cache ----------------------------------------------------------- */ -#define MI_CACHE_MAX (256) -#define MI_MAX_NUMA (4) - -#define MI_SLOT_IN_USE ((void*)1) +#define MI_CACHE_FIELDS (8) +#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 512 on 64-bit typedef struct mi_cache_slot_s { - volatile _Atomic(void*) p; - volatile size_t memid; - volatile mi_msecs_t expire; - volatile bool is_committed; - volatile bool is_large; + void* p; + size_t memid; + mi_msecs_t expire; + bool is_committed; // TODO: use bit from p to reduce size? } mi_cache_slot_t; -static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; // = 0 -static volatile _Atomic(uintptr_t) cache_count[MI_MAX_NUMA]; // = 0 +static mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 + +#define BITS_SET() (UINTPTR_MAX) +static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; // zero bit = available! +static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; +static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free -typedef union mi_cache_count_u { - uintptr_t value; - struct { - int16_t count; // at most `count` elements in the cache -#if MI_INTPTR_SIZE > 4 - uint32_t epoch; // each push/pop increase this -#else - uint16_t epoch; -#endif - } x; -} mi_cache_count_t; static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { // only segment blocks if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL; - // set numa range - int numa_min = numa_node; - int numa_max = numa_min; - if (numa_node < 0) { - numa_min = 0; - numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA; - } - else { - if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; - numa_min = numa_max = numa_node; + // numa node determines start field + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; } - // find a free slot - mi_cache_slot_t* slot; - for (int n = numa_min; n <= numa_max; n++) { - mi_cache_count_t top = { 0 }; - top.value = mi_atomic_read_relaxed(&cache_count[n]); - int16_t count = top.x.count; - for (int16_t i = count - 1; i >= 0; i--) { - slot = &cache[n][i]; - void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t,&slot->p); - if (p == NULL) { - if (count > 0) { count = i; } - } - else if (p > MI_SLOT_IN_USE) { // not NULL or 1 - if (count >= 0 && count < top.x.count) { // new lower bound? - mi_cache_count_t newtop = { 0 }; - newtop.x.count = count; - newtop.x.epoch = top.x.epoch + 1; - mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value); // it's fine to not succeed; just causes longer scans - } - count = -1; // don't try to set lower bound again - if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { - // claimed - if (!*large && slot->is_large) { - // back out again - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); // make it available again - } - else { - // keep it - *memid = slot->memid; - *large = slot->is_large; - *is_zero = false; - bool committed = slot->is_committed; - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, NULL); // set it free - if (*commit && !committed) { - bool commit_zero; - _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); - *commit = true; - } - else { - *commit = committed; - } - return p; - } - } - } - } + // find an available slot + mi_bitmap_index_t bitidx; + bool claimed = false; + if (*large) { // large allowed? + claimed = mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = true; } - return NULL; + if (!claimed) { + claimed = mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = false; + } + + if (!claimed) return NULL; + + // found a slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + void* p = slot->p; + *memid = slot->memid; + *is_zero = false; + bool committed = slot->is_committed; + slot->p = NULL; + slot->expire = 0; + if (*commit && !committed) { + bool commit_zero; + _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); + *commit = true; + } + else { + *commit = committed; + } + + // mark the slot as free again + mi_assert_internal(mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); + mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); + return p; } static void mi_cache_purge(mi_os_tld_t* tld) { - // TODO: for each numa node instead? - // if (mi_option_get(mi_option_arena_reset_delay) == 0) return; - + UNUSED(tld); mi_msecs_t now = _mi_clock_now(); - int numa_node = _mi_os_numa_node(NULL); - if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; - mi_cache_slot_t* slot; - int purged = 0; - mi_cache_count_t top = { 0 }; - top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); - for (int i = 0; i < top.x.count; i++) { - slot = &cache[numa_node][i]; - void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); - if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) { - mi_msecs_t expire = slot->expire; - if (expire != 0 && now >= expire) { - // expired, try to claim it - if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { - // claimed! test again - if (slot->is_committed && !slot->is_large && now >= slot->expire) { - _mi_abandoned_await_readers(); // wait until safe to decommit - _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats); - slot->is_committed = false; - } - // and unclaim again - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); - purged++; - if (purged >= 4) break; // limit to at most 4 decommits per push + size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start + size_t purged = 0; + for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots + if (idx >= MI_CACHE_MAX) idx = 0; // wrap + mi_cache_slot_t* slot = &cache[idx]; + if (slot->expire != 0 && now >= slot->expire) { // racy read + // seems expired, first claim it from available + purged++; + mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); + if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { + // was available, we claimed it + if (slot->expire != 0 && now >= slot->expire) { // safe read + // still expired, decommit it + slot->expire = 0; + mi_assert_internal(slot->is_committed && !slot->is_large); + _mi_abandoned_await_readers(); // wait until safe to decommit + _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); + slot->is_committed = false; } + mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } + if (purged > 4) break; // bound to no more than 4 purge tries per push } } } - static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) { - mi_cache_purge(tld); - // only for segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - // try to add it to the cache + // numa node determines start field int numa_node = _mi_os_numa_node(NULL); - if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; - mi_cache_slot_t* slot; - mi_cache_count_t top = { 0 }; - top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); - for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) { - slot = &cache[numa_node][i]; - void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); - if (p == NULL) { // free slot - if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) { - // claimed! - // first try to increase the top bound - mi_cache_count_t newtop = { 0 }; - newtop.x.count = i+1; - newtop.x.epoch = top.x.epoch + 1; - while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) { - top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); - if (top.x.count > newtop.x.count) break; // another push max'd it - newtop.x.epoch = top.x.epoch + 1; // otherwise try again - } - // set the slot - slot->expire = 0; - slot->is_committed = is_committed; - slot->memid = memid; - slot->is_large = is_large; - if (is_committed) { - long delay = mi_option_get(mi_option_arena_reset_delay); - if (delay == 0 && !is_large) { - _mi_abandoned_await_readers(); // wait until safe to decommit - _mi_os_decommit(start, size, tld->stats); - slot->is_committed = false; - } - else { - slot->expire = _mi_clock_now() + delay; - } - } - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, start); // and make it available; - return true; - } + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; + } + + // purge expired entries + mi_cache_purge(tld); + + // find an available slot + mi_bitmap_index_t bitidx; + bool claimed = mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (!claimed) return false; + + mi_assert_internal(mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + + // set the slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + slot->p = start; + slot->memid = memid; + slot->expire = 0; + slot->is_committed = is_committed; + if (is_committed && !is_large) { + long delay = mi_option_get(mi_option_arena_reset_delay); + if (delay == 0) { + _mi_abandoned_await_readers(); // wait until safe to decommit + _mi_os_decommit(start, size, tld->stats); + slot->is_committed = false; + } + else { + slot->expire = _mi_clock_now() + delay; } } - return false; + + // make it available + mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); + return true; } diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index c3813a44..a107545f 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -42,6 +42,11 @@ static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx return (idx*MI_BITMAP_FIELD_BITS) + bitidx; } +// Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create_from_bit(size_t full_bitidx) { + return mi_bitmap_index_create(full_bitidx / MI_BITMAP_FIELD_BITS, full_bitidx % MI_BITMAP_FIELD_BITS); +} + // Get the field index from a bit index. static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { return (bitmap_idx / MI_BITMAP_FIELD_BITS); @@ -177,11 +182,13 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx return false; } - // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { - for (size_t idx = 0; idx < bitmap_fields; idx++) { +static inline bool mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) idx = 0; // wrap if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } @@ -189,6 +196,13 @@ static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fi return false; } + +// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. +static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) { + return mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx); +} + // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { diff --git a/test/main-override-static.c b/test/main-override-static.c index 1ab11385..3ec02bdf 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -184,7 +184,7 @@ int main() { // double_free1(); // double_free2(); // corrupt_free(); - block_overflow1(); + //block_overflow1(); void* p1 = malloc(78); void* p2 = malloc(24);