sled/3party/gperftools/src/central_freelist.cc

// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
// Copyright (c) 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// ---
// Author: Sanjay Ghemawat <opensource@google.com>

#include "config.h"
#include <algorithm>
#include "central_freelist.h"
#include "internal_logging.h"  // for ASSERT, MESSAGE
#include "linked_list.h"       // for SLL_Next, SLL_Push, etc
#include "page_heap.h"         // for PageHeap
#include "static_vars.h"       // for Static

#if defined(__has_builtin)
#if __has_builtin(__builtin_add_overflow)
#define USE_ADD_OVERFLOW
#endif
#endif

using std::min;
using std::max;

namespace tcmalloc {

void CentralFreeList::Init(size_t cl) {
  size_class_ = cl;
  tcmalloc::DLL_Init(&empty_);
  tcmalloc::DLL_Init(&nonempty_);
  num_spans_ = 0;
  counter_ = 0;

  max_cache_size_ = kMaxNumTransferEntries;
#ifdef TCMALLOC_SMALL_BUT_SLOW
  // Disable the transfer cache for the small footprint case.
  cache_size_ = 0;
#else
  cache_size_ = 16;
#endif
  if (cl > 0) {
    // Limit the maximum size of the cache based on the size class.  If this
    // is not done, large size class objects will consume a lot of memory if
    // they just sit in the transfer cache.
    int32_t bytes = Static::sizemap()->ByteSizeForClass(cl);
    int32_t objs_to_move = Static::sizemap()->num_objects_to_move(cl);

    ASSERT(objs_to_move > 0 && bytes > 0);
    // Limit each size class cache to at most 1MB of objects or one entry,
    // whichever is greater. Total transfer cache memory used across all
    // size classes then can't be greater than approximately
    // 1MB * kMaxNumTransferEntries.
    // min and max are in parens to avoid macro-expansion on windows.
    max_cache_size_ = (min)(max_cache_size_,
                          (max)(1, (1024 * 1024) / (bytes * objs_to_move)));
    cache_size_ = (min)(cache_size_, max_cache_size_);
  }
  used_slots_ = 0;
  ASSERT(cache_size_ <= max_cache_size_);
}

void CentralFreeList::ReleaseListToSpans(void* start) {
  while (start) {
    void *next = SLL_Next(start);
    ReleaseToSpans(start);
    start = next;
  }
}

void CentralFreeList::ReleaseToSpans(void* object) {
  const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
  Span* span = Static::pageheap()->GetDescriptor(p);
  ASSERT(span != NULL);
  ASSERT(span->refcount > 0);

  // If span is empty, move it to non-empty list
  if (span->objects == NULL) {
    tcmalloc::DLL_Remove(span);
    tcmalloc::DLL_Prepend(&nonempty_, span);
  }

  // The following check is expensive, so it is disabled by default
  if (false) {
    // Check that object does not occur in list
    int got = 0;
    for (void* p = span->objects; p != NULL; p = *((void**) p)) {
      ASSERT(p != object);
      got++;
    }
    (void)got;
    ASSERT(got + span->refcount ==
           (span->length<<kPageShift) /
           Static::sizemap()->ByteSizeForClass(span->sizeclass));
  }

  counter_++;
  span->refcount--;
  if (span->refcount == 0) {
    counter_ -= ((span->length<<kPageShift) /
                 Static::sizemap()->ByteSizeForClass(span->sizeclass));
    tcmalloc::DLL_Remove(span);
    --num_spans_;

    // Release central list lock while operating on pageheap
    lock_.Unlock();
    Static::pageheap()->Delete(span);
    lock_.Lock();
  } else {
    *(reinterpret_cast<void**>(object)) = span->objects;
    span->objects = object;
  }
}

bool CentralFreeList::EvictRandomSizeClass(
    int locked_size_class, bool force) {
  static int race_counter = 0;
  int t = race_counter++;  // Updated without a lock, but who cares.
  if (t >= Static::num_size_classes()) {
    while (t >= Static::num_size_classes()) {
      t -= Static::num_size_classes();
    }
    race_counter = t;
  }
  ASSERT(t >= 0);
  ASSERT(t < Static::num_size_classes());
  if (t == locked_size_class) return false;
  return Static::central_cache()[t].ShrinkCache(locked_size_class, force);
}

bool CentralFreeList::MakeCacheSpace() {
  // Is there room in the cache?
  if (used_slots_ < cache_size_) return true;
  // Check if we can expand this cache?
  if (cache_size_ == max_cache_size_) return false;
  // Ok, we'll try to grab an entry from some other size class.
  if (EvictRandomSizeClass(size_class_, false) ||
      EvictRandomSizeClass(size_class_, true)) {
    // Succeeded in evicting, we're going to make our cache larger.
    // However, we may have dropped and re-acquired the lock in
    // EvictRandomSizeClass (via ShrinkCache and the LockInverter), so the
    // cache_size may have changed.  Therefore, check and verify that it is
    // still OK to increase the cache_size.
    if (cache_size_ < max_cache_size_) {
      cache_size_++;
      return true;
    }
  }
  return false;
}


namespace {
class LockInverter {
 private:
  SpinLock *held_, *temp_;
 public:
  inline explicit LockInverter(SpinLock* held, SpinLock *temp)
    : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
  inline ~LockInverter() { temp_->Unlock(); held_->Lock();  }
};
}

// This function is marked as NO_THREAD_SAFETY_ANALYSIS because it uses
// LockInverter to release one lock and acquire another in scoped-lock
// style, which our current annotation/analysis does not support.
bool CentralFreeList::ShrinkCache(int locked_size_class, bool force)
    NO_THREAD_SAFETY_ANALYSIS {
  // Start with a quick check without taking a lock.
  if (cache_size_ == 0) return false;
  // We don't evict from a full cache unless we are 'forcing'.
  if (force == false && used_slots_ == cache_size_) return false;

  // Grab lock, but first release the other lock held by this thread.  We use
  // the lock inverter to ensure that we never hold two size class locks
  // concurrently.  That can create a deadlock because there is no well
  // defined nesting order.
  LockInverter li(&Static::central_cache()[locked_size_class].lock_, &lock_);
  ASSERT(used_slots_ <= cache_size_);
  ASSERT(0 <= cache_size_);
  if (cache_size_ == 0) return false;
  if (used_slots_ == cache_size_) {
    if (force == false) return false;
    // ReleaseListToSpans releases the lock, so we have to make all the
    // updates to the central list before calling it.
    cache_size_--;
    used_slots_--;
    ReleaseListToSpans(tc_slots_[used_slots_].head);
    return true;
  }
  cache_size_--;
  return true;
}

void CentralFreeList::InsertRange(void *start, void *end, int N) {
  SpinLockHolder h(&lock_);
  if (N == Static::sizemap()->num_objects_to_move(size_class_) &&
    MakeCacheSpace()) {
    int slot = used_slots_++;
    ASSERT(slot >=0);
    ASSERT(slot < max_cache_size_);
    TCEntry *entry = &tc_slots_[slot];
    entry->head = start;
    entry->tail = end;
    return;
  }
  ReleaseListToSpans(start);
}

int CentralFreeList::RemoveRange(void **start, void **end, int N) {
  ASSERT(N > 0);
  lock_.Lock();
  if (N == Static::sizemap()->num_objects_to_move(size_class_) &&
      used_slots_ > 0) {
    int slot = --used_slots_;
    ASSERT(slot >= 0);
    TCEntry *entry = &tc_slots_[slot];
    *start = entry->head;
    *end = entry->tail;
    lock_.Unlock();
    return N;
  }

  int result = 0;
  *start = NULL;
  *end = NULL;
  // TODO: Prefetch multiple TCEntries?
  result = FetchFromOneSpansSafe(N, start, end);
  if (result != 0) {
    while (result < N) {
      int n;
      void* head = NULL;
      void* tail = NULL;
      n = FetchFromOneSpans(N - result, &head, &tail);
      if (!n) break;
      result += n;
      SLL_PushRange(start, head, tail);
    }
  }
  lock_.Unlock();
  return result;
}


int CentralFreeList::FetchFromOneSpansSafe(int N, void **start, void **end) {
  int result = FetchFromOneSpans(N, start, end);
  if (!result) {
    Populate();
    result = FetchFromOneSpans(N, start, end);
  }
  return result;
}

int CentralFreeList::FetchFromOneSpans(int N, void **start, void **end) {
  if (tcmalloc::DLL_IsEmpty(&nonempty_)) return 0;
  Span* span = nonempty_.next;

  ASSERT(span->objects != NULL);

  int result = 0;
  void *prev, *curr;
  curr = span->objects;
  do {
    prev = curr;
    curr = *(reinterpret_cast<void**>(curr));
  } while (++result < N && curr != NULL);

  if (curr == NULL) {
    // Move to empty list
    tcmalloc::DLL_Remove(span);
    tcmalloc::DLL_Prepend(&empty_, span);
  }

  *start = span->objects;
  *end = prev;
  span->objects = curr;
  SLL_SetNext(*end, NULL);
  span->refcount += result;
  counter_ -= result;
  return result;
}

// Fetch memory from the system and add to the central cache freelist.
void CentralFreeList::Populate() {
  // Release central list lock while operating on pageheap
  lock_.Unlock();
  const size_t npages = Static::sizemap()->class_to_pages(size_class_);

  Span* span = Static::pageheap()->NewWithSizeClass(npages, size_class_);
  if (span == nullptr) {
    Log(kLog, __FILE__, __LINE__,
        "tcmalloc: allocation failed", npages << kPageShift);
    lock_.Lock();
    return;
  }
  ASSERT(span->length == npages);
  // Cache sizeclass info eagerly.  Locking is not necessary.
  // (Instead of being eager, we could just replace any stale info
  // about this span, but that seems to be no better in practice.)
  for (int i = 0; i < npages; i++) {
    Static::pageheap()->SetCachedSizeClass(span->start + i, size_class_);
  }

  // Split the block into pieces and add to the free-list
  // TODO: coloring of objects to avoid cache conflicts?
  void** tail = &span->objects;
  char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
  char* limit = ptr + (npages << kPageShift);
  const size_t size = Static::sizemap()->ByteSizeForClass(size_class_);
  int num = 0;

  // Note, when ptr is close to the top of address space, ptr + size
  // might overflow the top of address space before we're able to
  // detect that it exceeded limit. So we need to be careful. See
  // https://github.com/gperftools/gperftools/issues/1323.
  ASSERT(limit - size >= ptr);
  for (;;) {

#ifndef USE_ADD_OVERFLOW
    auto nextptr = reinterpret_cast<char *>(reinterpret_cast<uintptr_t>(ptr) + size);
    if (nextptr < ptr || nextptr > limit) {
      break;
    }
#else
    // Same as above, just helping compiler a bit to produce better code
    uintptr_t nextaddr;
    if (__builtin_add_overflow(reinterpret_cast<uintptr_t>(ptr), size, &nextaddr)) {
      break;
    }
    char* nextptr = reinterpret_cast<char*>(nextaddr);
    if (nextptr > limit) {
      break;
    }
#endif

    // [ptr, ptr+size) bytes are all valid bytes, so append them
    *tail = ptr;
    tail = reinterpret_cast<void**>(ptr);
    num++;
    ptr = nextptr;
  }
  ASSERT(ptr <= limit);
  ASSERT(ptr > limit - size); // same as ptr + size > limit but avoiding overflow
  *tail = NULL;
  span->refcount = 0; // No sub-object in use yet

  // Add span to list of non-empty spans
  lock_.Lock();
  tcmalloc::DLL_Prepend(&nonempty_, span);
  ++num_spans_;
  counter_ += num;
}

int CentralFreeList::tc_length() {
  SpinLockHolder h(&lock_);
  return used_slots_ * Static::sizemap()->num_objects_to_move(size_class_);
}

size_t CentralFreeList::OverheadBytes() {
  SpinLockHolder h(&lock_);
  if (size_class_ == 0) {  // 0 holds the 0-sized allocations
    return 0;
  }
  const size_t pages_per_span = Static::sizemap()->class_to_pages(size_class_);
  const size_t object_size = Static::sizemap()->class_to_size(size_class_);
  ASSERT(object_size > 0);
  const size_t overhead_per_span = (pages_per_span * kPageSize) % object_size;
  return num_spans_ * overhead_per_span;
}

}  // namespace tcmalloc