397 lines
13 KiB
C++
397 lines
13 KiB
C++
// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
|
|
// Copyright (c) 2008, Google Inc.
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following disclaimer
|
|
// in the documentation and/or other materials provided with the
|
|
// distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
// ---
|
|
// Author: Sanjay Ghemawat <opensource@google.com>
|
|
|
|
#include "config.h"
|
|
#include <algorithm>
|
|
#include "central_freelist.h"
|
|
#include "internal_logging.h" // for ASSERT, MESSAGE
|
|
#include "linked_list.h" // for SLL_Next, SLL_Push, etc
|
|
#include "page_heap.h" // for PageHeap
|
|
#include "static_vars.h" // for Static
|
|
|
|
#if defined(__has_builtin)
|
|
#if __has_builtin(__builtin_add_overflow)
|
|
#define USE_ADD_OVERFLOW
|
|
#endif
|
|
#endif
|
|
|
|
using std::min;
|
|
using std::max;
|
|
|
|
namespace tcmalloc {
|
|
|
|
void CentralFreeList::Init(size_t cl) {
|
|
size_class_ = cl;
|
|
tcmalloc::DLL_Init(&empty_);
|
|
tcmalloc::DLL_Init(&nonempty_);
|
|
num_spans_ = 0;
|
|
counter_ = 0;
|
|
|
|
max_cache_size_ = kMaxNumTransferEntries;
|
|
#ifdef TCMALLOC_SMALL_BUT_SLOW
|
|
// Disable the transfer cache for the small footprint case.
|
|
cache_size_ = 0;
|
|
#else
|
|
cache_size_ = 16;
|
|
#endif
|
|
if (cl > 0) {
|
|
// Limit the maximum size of the cache based on the size class. If this
|
|
// is not done, large size class objects will consume a lot of memory if
|
|
// they just sit in the transfer cache.
|
|
int32_t bytes = Static::sizemap()->ByteSizeForClass(cl);
|
|
int32_t objs_to_move = Static::sizemap()->num_objects_to_move(cl);
|
|
|
|
ASSERT(objs_to_move > 0 && bytes > 0);
|
|
// Limit each size class cache to at most 1MB of objects or one entry,
|
|
// whichever is greater. Total transfer cache memory used across all
|
|
// size classes then can't be greater than approximately
|
|
// 1MB * kMaxNumTransferEntries.
|
|
// min and max are in parens to avoid macro-expansion on windows.
|
|
max_cache_size_ = (min)(max_cache_size_,
|
|
(max)(1, (1024 * 1024) / (bytes * objs_to_move)));
|
|
cache_size_ = (min)(cache_size_, max_cache_size_);
|
|
}
|
|
used_slots_ = 0;
|
|
ASSERT(cache_size_ <= max_cache_size_);
|
|
}
|
|
|
|
void CentralFreeList::ReleaseListToSpans(void* start) {
|
|
while (start) {
|
|
void *next = SLL_Next(start);
|
|
ReleaseToSpans(start);
|
|
start = next;
|
|
}
|
|
}
|
|
|
|
void CentralFreeList::ReleaseToSpans(void* object) {
|
|
const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
|
|
Span* span = Static::pageheap()->GetDescriptor(p);
|
|
ASSERT(span != NULL);
|
|
ASSERT(span->refcount > 0);
|
|
|
|
// If span is empty, move it to non-empty list
|
|
if (span->objects == NULL) {
|
|
tcmalloc::DLL_Remove(span);
|
|
tcmalloc::DLL_Prepend(&nonempty_, span);
|
|
}
|
|
|
|
// The following check is expensive, so it is disabled by default
|
|
if (false) {
|
|
// Check that object does not occur in list
|
|
int got = 0;
|
|
for (void* p = span->objects; p != NULL; p = *((void**) p)) {
|
|
ASSERT(p != object);
|
|
got++;
|
|
}
|
|
(void)got;
|
|
ASSERT(got + span->refcount ==
|
|
(span->length<<kPageShift) /
|
|
Static::sizemap()->ByteSizeForClass(span->sizeclass));
|
|
}
|
|
|
|
counter_++;
|
|
span->refcount--;
|
|
if (span->refcount == 0) {
|
|
counter_ -= ((span->length<<kPageShift) /
|
|
Static::sizemap()->ByteSizeForClass(span->sizeclass));
|
|
tcmalloc::DLL_Remove(span);
|
|
--num_spans_;
|
|
|
|
// Release central list lock while operating on pageheap
|
|
lock_.Unlock();
|
|
Static::pageheap()->Delete(span);
|
|
lock_.Lock();
|
|
} else {
|
|
*(reinterpret_cast<void**>(object)) = span->objects;
|
|
span->objects = object;
|
|
}
|
|
}
|
|
|
|
bool CentralFreeList::EvictRandomSizeClass(
|
|
int locked_size_class, bool force) {
|
|
static int race_counter = 0;
|
|
int t = race_counter++; // Updated without a lock, but who cares.
|
|
if (t >= Static::num_size_classes()) {
|
|
while (t >= Static::num_size_classes()) {
|
|
t -= Static::num_size_classes();
|
|
}
|
|
race_counter = t;
|
|
}
|
|
ASSERT(t >= 0);
|
|
ASSERT(t < Static::num_size_classes());
|
|
if (t == locked_size_class) return false;
|
|
return Static::central_cache()[t].ShrinkCache(locked_size_class, force);
|
|
}
|
|
|
|
bool CentralFreeList::MakeCacheSpace() {
|
|
// Is there room in the cache?
|
|
if (used_slots_ < cache_size_) return true;
|
|
// Check if we can expand this cache?
|
|
if (cache_size_ == max_cache_size_) return false;
|
|
// Ok, we'll try to grab an entry from some other size class.
|
|
if (EvictRandomSizeClass(size_class_, false) ||
|
|
EvictRandomSizeClass(size_class_, true)) {
|
|
// Succeeded in evicting, we're going to make our cache larger.
|
|
// However, we may have dropped and re-acquired the lock in
|
|
// EvictRandomSizeClass (via ShrinkCache and the LockInverter), so the
|
|
// cache_size may have changed. Therefore, check and verify that it is
|
|
// still OK to increase the cache_size.
|
|
if (cache_size_ < max_cache_size_) {
|
|
cache_size_++;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
namespace {
|
|
class LockInverter {
|
|
private:
|
|
SpinLock *held_, *temp_;
|
|
public:
|
|
inline explicit LockInverter(SpinLock* held, SpinLock *temp)
|
|
: held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
|
|
inline ~LockInverter() { temp_->Unlock(); held_->Lock(); }
|
|
};
|
|
}
|
|
|
|
// This function is marked as NO_THREAD_SAFETY_ANALYSIS because it uses
|
|
// LockInverter to release one lock and acquire another in scoped-lock
|
|
// style, which our current annotation/analysis does not support.
|
|
bool CentralFreeList::ShrinkCache(int locked_size_class, bool force)
|
|
NO_THREAD_SAFETY_ANALYSIS {
|
|
// Start with a quick check without taking a lock.
|
|
if (cache_size_ == 0) return false;
|
|
// We don't evict from a full cache unless we are 'forcing'.
|
|
if (force == false && used_slots_ == cache_size_) return false;
|
|
|
|
// Grab lock, but first release the other lock held by this thread. We use
|
|
// the lock inverter to ensure that we never hold two size class locks
|
|
// concurrently. That can create a deadlock because there is no well
|
|
// defined nesting order.
|
|
LockInverter li(&Static::central_cache()[locked_size_class].lock_, &lock_);
|
|
ASSERT(used_slots_ <= cache_size_);
|
|
ASSERT(0 <= cache_size_);
|
|
if (cache_size_ == 0) return false;
|
|
if (used_slots_ == cache_size_) {
|
|
if (force == false) return false;
|
|
// ReleaseListToSpans releases the lock, so we have to make all the
|
|
// updates to the central list before calling it.
|
|
cache_size_--;
|
|
used_slots_--;
|
|
ReleaseListToSpans(tc_slots_[used_slots_].head);
|
|
return true;
|
|
}
|
|
cache_size_--;
|
|
return true;
|
|
}
|
|
|
|
void CentralFreeList::InsertRange(void *start, void *end, int N) {
|
|
SpinLockHolder h(&lock_);
|
|
if (N == Static::sizemap()->num_objects_to_move(size_class_) &&
|
|
MakeCacheSpace()) {
|
|
int slot = used_slots_++;
|
|
ASSERT(slot >=0);
|
|
ASSERT(slot < max_cache_size_);
|
|
TCEntry *entry = &tc_slots_[slot];
|
|
entry->head = start;
|
|
entry->tail = end;
|
|
return;
|
|
}
|
|
ReleaseListToSpans(start);
|
|
}
|
|
|
|
int CentralFreeList::RemoveRange(void **start, void **end, int N) {
|
|
ASSERT(N > 0);
|
|
lock_.Lock();
|
|
if (N == Static::sizemap()->num_objects_to_move(size_class_) &&
|
|
used_slots_ > 0) {
|
|
int slot = --used_slots_;
|
|
ASSERT(slot >= 0);
|
|
TCEntry *entry = &tc_slots_[slot];
|
|
*start = entry->head;
|
|
*end = entry->tail;
|
|
lock_.Unlock();
|
|
return N;
|
|
}
|
|
|
|
int result = 0;
|
|
*start = NULL;
|
|
*end = NULL;
|
|
// TODO: Prefetch multiple TCEntries?
|
|
result = FetchFromOneSpansSafe(N, start, end);
|
|
if (result != 0) {
|
|
while (result < N) {
|
|
int n;
|
|
void* head = NULL;
|
|
void* tail = NULL;
|
|
n = FetchFromOneSpans(N - result, &head, &tail);
|
|
if (!n) break;
|
|
result += n;
|
|
SLL_PushRange(start, head, tail);
|
|
}
|
|
}
|
|
lock_.Unlock();
|
|
return result;
|
|
}
|
|
|
|
|
|
int CentralFreeList::FetchFromOneSpansSafe(int N, void **start, void **end) {
|
|
int result = FetchFromOneSpans(N, start, end);
|
|
if (!result) {
|
|
Populate();
|
|
result = FetchFromOneSpans(N, start, end);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
int CentralFreeList::FetchFromOneSpans(int N, void **start, void **end) {
|
|
if (tcmalloc::DLL_IsEmpty(&nonempty_)) return 0;
|
|
Span* span = nonempty_.next;
|
|
|
|
ASSERT(span->objects != NULL);
|
|
|
|
int result = 0;
|
|
void *prev, *curr;
|
|
curr = span->objects;
|
|
do {
|
|
prev = curr;
|
|
curr = *(reinterpret_cast<void**>(curr));
|
|
} while (++result < N && curr != NULL);
|
|
|
|
if (curr == NULL) {
|
|
// Move to empty list
|
|
tcmalloc::DLL_Remove(span);
|
|
tcmalloc::DLL_Prepend(&empty_, span);
|
|
}
|
|
|
|
*start = span->objects;
|
|
*end = prev;
|
|
span->objects = curr;
|
|
SLL_SetNext(*end, NULL);
|
|
span->refcount += result;
|
|
counter_ -= result;
|
|
return result;
|
|
}
|
|
|
|
// Fetch memory from the system and add to the central cache freelist.
|
|
void CentralFreeList::Populate() {
|
|
// Release central list lock while operating on pageheap
|
|
lock_.Unlock();
|
|
const size_t npages = Static::sizemap()->class_to_pages(size_class_);
|
|
|
|
Span* span = Static::pageheap()->NewWithSizeClass(npages, size_class_);
|
|
if (span == nullptr) {
|
|
Log(kLog, __FILE__, __LINE__,
|
|
"tcmalloc: allocation failed", npages << kPageShift);
|
|
lock_.Lock();
|
|
return;
|
|
}
|
|
ASSERT(span->length == npages);
|
|
// Cache sizeclass info eagerly. Locking is not necessary.
|
|
// (Instead of being eager, we could just replace any stale info
|
|
// about this span, but that seems to be no better in practice.)
|
|
for (int i = 0; i < npages; i++) {
|
|
Static::pageheap()->SetCachedSizeClass(span->start + i, size_class_);
|
|
}
|
|
|
|
// Split the block into pieces and add to the free-list
|
|
// TODO: coloring of objects to avoid cache conflicts?
|
|
void** tail = &span->objects;
|
|
char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
|
|
char* limit = ptr + (npages << kPageShift);
|
|
const size_t size = Static::sizemap()->ByteSizeForClass(size_class_);
|
|
int num = 0;
|
|
|
|
// Note, when ptr is close to the top of address space, ptr + size
|
|
// might overflow the top of address space before we're able to
|
|
// detect that it exceeded limit. So we need to be careful. See
|
|
// https://github.com/gperftools/gperftools/issues/1323.
|
|
ASSERT(limit - size >= ptr);
|
|
for (;;) {
|
|
|
|
#ifndef USE_ADD_OVERFLOW
|
|
auto nextptr = reinterpret_cast<char *>(reinterpret_cast<uintptr_t>(ptr) + size);
|
|
if (nextptr < ptr || nextptr > limit) {
|
|
break;
|
|
}
|
|
#else
|
|
// Same as above, just helping compiler a bit to produce better code
|
|
uintptr_t nextaddr;
|
|
if (__builtin_add_overflow(reinterpret_cast<uintptr_t>(ptr), size, &nextaddr)) {
|
|
break;
|
|
}
|
|
char* nextptr = reinterpret_cast<char*>(nextaddr);
|
|
if (nextptr > limit) {
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
// [ptr, ptr+size) bytes are all valid bytes, so append them
|
|
*tail = ptr;
|
|
tail = reinterpret_cast<void**>(ptr);
|
|
num++;
|
|
ptr = nextptr;
|
|
}
|
|
ASSERT(ptr <= limit);
|
|
ASSERT(ptr > limit - size); // same as ptr + size > limit but avoiding overflow
|
|
*tail = NULL;
|
|
span->refcount = 0; // No sub-object in use yet
|
|
|
|
// Add span to list of non-empty spans
|
|
lock_.Lock();
|
|
tcmalloc::DLL_Prepend(&nonempty_, span);
|
|
++num_spans_;
|
|
counter_ += num;
|
|
}
|
|
|
|
int CentralFreeList::tc_length() {
|
|
SpinLockHolder h(&lock_);
|
|
return used_slots_ * Static::sizemap()->num_objects_to_move(size_class_);
|
|
}
|
|
|
|
size_t CentralFreeList::OverheadBytes() {
|
|
SpinLockHolder h(&lock_);
|
|
if (size_class_ == 0) { // 0 holds the 0-sized allocations
|
|
return 0;
|
|
}
|
|
const size_t pages_per_span = Static::sizemap()->class_to_pages(size_class_);
|
|
const size_t object_size = Static::sizemap()->class_to_size(size_class_);
|
|
ASSERT(object_size > 0);
|
|
const size_t overhead_per_span = (pages_per_span * kPageSize) % object_size;
|
|
return num_spans_ * overhead_per_span;
|
|
}
|
|
|
|
} // namespace tcmalloc
|