mirror of
https://github.com/microsoft/mimalloc.git
synced 2024-12-26 21:04:27 +08:00
e6c2fd44fc
The pthread slot approach is somewhat buggy (pretty visible with the stress unit test which segfault more or less randomly, but the stats never show up). Using the default approach instead, the test passes eventough it s relatively slow (e.g 1.5 sec on FreeBSD vs 4.5 on DragonFly with same machine).
331 lines
9.5 KiB
C
331 lines
9.5 KiB
C
/* ----------------------------------------------------------------------------
|
|
Copyright (c) 2018,2019 Microsoft Research, Daan Leijen
|
|
This is free software; you can redistribute it and/or modify it under the
|
|
terms of the MIT license.
|
|
-----------------------------------------------------------------------------*/
|
|
|
|
/* This is a stress test for the allocator, using multiple threads and
|
|
transferring objects between threads. It tries to reflect real-world workloads:
|
|
- allocation size is distributed linearly in powers of two
|
|
- with some fraction extra large (and some extra extra large)
|
|
- the allocations are initialized and read again at free
|
|
- pointers transfer between threads
|
|
- threads are terminated and recreated with some objects surviving in between
|
|
- uses deterministic "randomness", but execution can still depend on
|
|
(random) thread scheduling. Do not use this test as a benchmark!
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <mimalloc.h>
|
|
|
|
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
|
|
//
|
|
// argument defaults
|
|
static int THREADS = 32; // more repeatable if THREADS <= #processors
|
|
static int SCALE = 10; // scaling factor
|
|
static int ITER = 50; // N full iterations destructing and re-creating all threads
|
|
|
|
// static int THREADS = 8; // more repeatable if THREADS <= #processors
|
|
// static int SCALE = 100; // scaling factor
|
|
|
|
#define STRESS // undefine for leak test
|
|
|
|
static bool allow_large_objects = true; // allow very large objects?
|
|
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
|
|
|
|
|
|
#ifdef USE_STD_MALLOC
|
|
#define custom_calloc(n,s) calloc(n,s)
|
|
#define custom_realloc(p,s) realloc(p,s)
|
|
#define custom_free(p) free(p)
|
|
#else
|
|
#define custom_calloc(n,s) mi_calloc(n,s)
|
|
#define custom_realloc(p,s) mi_realloc(p,s)
|
|
#define custom_free(p) mi_free(p)
|
|
#endif
|
|
|
|
// transfer pointer between threads
|
|
#define TRANSFERS (1000)
|
|
static volatile void* transfer[TRANSFERS];
|
|
|
|
|
|
#if (UINTPTR_MAX != UINT32_MAX)
|
|
const uintptr_t cookie = 0xbf58476d1ce4e5b9UL;
|
|
#else
|
|
const uintptr_t cookie = 0x1ce4e5b9UL;
|
|
#endif
|
|
|
|
static void* atomic_exchange_ptr(volatile void** p, void* newval);
|
|
|
|
typedef uintptr_t* random_t;
|
|
|
|
static uintptr_t pick(random_t r) {
|
|
uintptr_t x = *r;
|
|
#if (UINTPTR_MAX > UINT32_MAX)
|
|
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
|
|
x ^= x >> 30;
|
|
x *= 0xbf58476d1ce4e5b9UL;
|
|
x ^= x >> 27;
|
|
x *= 0x94d049bb133111ebUL;
|
|
x ^= x >> 31;
|
|
#else
|
|
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
|
|
x ^= x >> 16;
|
|
x *= 0x7feb352dUL;
|
|
x ^= x >> 15;
|
|
x *= 0x846ca68bUL;
|
|
x ^= x >> 16;
|
|
#endif
|
|
*r = x;
|
|
return x;
|
|
}
|
|
|
|
static bool chance(size_t perc, random_t r) {
|
|
return (pick(r) % 100 <= perc);
|
|
}
|
|
|
|
static void* alloc_items(size_t items, random_t r) {
|
|
if (chance(1, r)) {
|
|
if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant
|
|
else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge
|
|
else items *= 100; // 1% large objects;
|
|
}
|
|
if (items == 40) items++; // pthreads uses that size for stack increases
|
|
if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
|
|
if (items==0) items = 1;
|
|
uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
|
|
if (p != NULL) {
|
|
for (uintptr_t i = 0; i < items; i++) {
|
|
p[i] = (items - i) ^ cookie;
|
|
}
|
|
}
|
|
return p;
|
|
}
|
|
|
|
static void free_items(void* p) {
|
|
if (p != NULL) {
|
|
uintptr_t* q = (uintptr_t*)p;
|
|
uintptr_t items = (q[0] ^ cookie);
|
|
for (uintptr_t i = 0; i < items; i++) {
|
|
if ((q[i] ^ cookie) != items - i) {
|
|
fprintf(stderr, "memory corruption at block %p at %zu\n", p, i);
|
|
abort();
|
|
}
|
|
}
|
|
}
|
|
custom_free(p);
|
|
}
|
|
|
|
|
|
static void stress(intptr_t tid) {
|
|
//bench_start_thread();
|
|
uintptr_t r = ((tid + 1) * 43); // rand();
|
|
const size_t max_item_shift = 5; // 128
|
|
const size_t max_item_retained_shift = max_item_shift + 2;
|
|
size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
|
|
size_t retain = allocs / 2;
|
|
void** data = NULL;
|
|
size_t data_size = 0;
|
|
size_t data_top = 0;
|
|
void** retained = (void**)custom_calloc(retain,sizeof(void*));
|
|
size_t retain_top = 0;
|
|
|
|
while (allocs > 0 || retain > 0) {
|
|
if (retain == 0 || (chance(50, &r) && allocs > 0)) {
|
|
// 50%+ alloc
|
|
allocs--;
|
|
if (data_top >= data_size) {
|
|
data_size += 100000;
|
|
data = (void**)custom_realloc(data, data_size * sizeof(void*));
|
|
}
|
|
data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r);
|
|
}
|
|
else {
|
|
// 25% retain
|
|
retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r);
|
|
retain--;
|
|
}
|
|
if (chance(66, &r) && data_top > 0) {
|
|
// 66% free previous alloc
|
|
size_t idx = pick(&r) % data_top;
|
|
free_items(data[idx]);
|
|
data[idx] = NULL;
|
|
}
|
|
if (chance(25, &r) && data_top > 0) {
|
|
// 25% exchange a local pointer with the (shared) transfer buffer.
|
|
size_t data_idx = pick(&r) % data_top;
|
|
size_t transfer_idx = pick(&r) % TRANSFERS;
|
|
void* p = data[data_idx];
|
|
void* q = atomic_exchange_ptr(&transfer[transfer_idx], p);
|
|
data[data_idx] = q;
|
|
}
|
|
}
|
|
// free everything that is left
|
|
for (size_t i = 0; i < retain_top; i++) {
|
|
free_items(retained[i]);
|
|
}
|
|
for (size_t i = 0; i < data_top; i++) {
|
|
free_items(data[i]);
|
|
}
|
|
custom_free(retained);
|
|
custom_free(data);
|
|
//bench_end_thread();
|
|
}
|
|
|
|
static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
|
|
|
|
static void test_stress(void) {
|
|
uintptr_t r = rand();
|
|
for (int n = 0; n < ITER; n++) {
|
|
run_os_threads(THREADS, &stress);
|
|
for (int i = 0; i < TRANSFERS; i++) {
|
|
if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
|
|
void* p = atomic_exchange_ptr(&transfer[i], NULL);
|
|
free_items(p);
|
|
}
|
|
}
|
|
// mi_collect(false);
|
|
#if !defined(NDEBUG) || defined(MI_TSAN)
|
|
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
|
|
#endif
|
|
}
|
|
}
|
|
|
|
#ifndef STRESS
|
|
static void leak(intptr_t tid) {
|
|
uintptr_t r = rand();
|
|
void* p = alloc_items(1 /*pick(&r)%128*/, &r);
|
|
if (chance(50, &r)) {
|
|
intptr_t i = (pick(&r) % TRANSFERS);
|
|
void* q = atomic_exchange_ptr(&transfer[i], p);
|
|
free_items(q);
|
|
}
|
|
}
|
|
|
|
static void test_leak(void) {
|
|
for (int n = 0; n < ITER; n++) {
|
|
run_os_threads(THREADS, &leak);
|
|
mi_collect(false);
|
|
#ifndef NDEBUG
|
|
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
|
|
#endif
|
|
}
|
|
}
|
|
#endif
|
|
|
|
int main(int argc, char** argv) {
|
|
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
|
|
if (argc >= 2) {
|
|
char* end;
|
|
long n = strtol(argv[1], &end, 10);
|
|
if (n > 0) THREADS = n;
|
|
}
|
|
if (argc >= 3) {
|
|
char* end;
|
|
long n = (strtol(argv[2], &end, 10));
|
|
if (n > 0) SCALE = n;
|
|
}
|
|
if (argc >= 4) {
|
|
char* end;
|
|
long n = (strtol(argv[3], &end, 10));
|
|
if (n > 0) ITER = n;
|
|
}
|
|
printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
|
|
//mi_reserve_os_memory(1024*1024*1024ULL, false, true);
|
|
//int res = mi_reserve_huge_os_pages(4,1);
|
|
//printf("(reserve huge: %i\n)", res);
|
|
|
|
//bench_start_program();
|
|
|
|
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
|
|
srand(0x7feb352d);
|
|
// mi_stats_reset();
|
|
#ifdef STRESS
|
|
test_stress();
|
|
#else
|
|
test_leak();
|
|
#endif
|
|
|
|
// mi_collect(true);
|
|
mi_stats_print(NULL);
|
|
//bench_end_program();
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void (*thread_entry_fun)(intptr_t) = &stress;
|
|
|
|
#ifdef _WIN32
|
|
|
|
#include <Windows.h>
|
|
|
|
static DWORD WINAPI thread_entry(LPVOID param) {
|
|
thread_entry_fun((intptr_t)param);
|
|
return 0;
|
|
}
|
|
|
|
static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
|
|
thread_entry_fun = fun;
|
|
DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
|
|
HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
|
|
for (uintptr_t i = 0; i < nthreads; i++) {
|
|
thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]);
|
|
}
|
|
for (size_t i = 0; i < nthreads; i++) {
|
|
WaitForSingleObject(thandles[i], INFINITE);
|
|
}
|
|
for (size_t i = 0; i < nthreads; i++) {
|
|
CloseHandle(thandles[i]);
|
|
}
|
|
custom_free(tids);
|
|
custom_free(thandles);
|
|
}
|
|
|
|
static void* atomic_exchange_ptr(volatile void** p, void* newval) {
|
|
#if (INTPTR_MAX == INT32_MAX)
|
|
return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
|
|
#else
|
|
return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
|
|
#endif
|
|
}
|
|
#else
|
|
|
|
#include <pthread.h>
|
|
|
|
static void* thread_entry(void* param) {
|
|
thread_entry_fun((uintptr_t)param);
|
|
return NULL;
|
|
}
|
|
|
|
static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
|
|
thread_entry_fun = fun;
|
|
pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
|
|
memset(threads, 0, sizeof(pthread_t) * nthreads);
|
|
//pthread_setconcurrency(nthreads);
|
|
for (size_t i = 0; i < nthreads; i++) {
|
|
pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
|
|
}
|
|
for (size_t i = 0; i < nthreads; i++) {
|
|
pthread_join(threads[i], NULL);
|
|
}
|
|
custom_free(threads);
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
#include <atomic>
|
|
static void* atomic_exchange_ptr(volatile void** p, void* newval) {
|
|
return std::atomic_exchange((volatile std::atomic<void*>*)p, newval);
|
|
}
|
|
#else
|
|
#include <stdatomic.h>
|
|
static void* atomic_exchange_ptr(volatile void** p, void* newval) {
|
|
return atomic_exchange((volatile _Atomic(void*)*)p, newval);
|
|
}
|
|
#endif
|
|
|
|
#endif
|