Limit the number of read-only files the POSIX Env will have open.

Background compaction can create an unbounded number of
leveldb::RandomAccessFile instances. On 64-bit systems mmap is used and
file descriptors are only used beyond a certain number of mmap's.
32-bit systems to not use mmap at all. leveldb::RandomAccessFile does not
observe Options.max_open_files so compaction could exhaust the file
descriptor limit.

This change uses getrlimit to determine the maximum number of open
files and limits RandomAccessFile to approximately 20% of that value.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=143505556
This commit is contained in:
cmumford 2017-01-03 17:39:49 -08:00 committed by Chris Mumford
parent a2fb086d07
commit 646c3588de
3 changed files with 204 additions and 58 deletions

View File

@ -10,6 +10,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/resource.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/time.h> #include <sys/time.h>
#include <sys/types.h> #include <sys/types.h>
@ -23,15 +24,70 @@
#include "util/logging.h" #include "util/logging.h"
#include "util/mutexlock.h" #include "util/mutexlock.h"
#include "util/posix_logger.h" #include "util/posix_logger.h"
#include "util/env_posix_test_helper.h"
namespace leveldb { namespace leveldb {
namespace { namespace {
static int open_read_only_file_limit = -1;
static int mmap_limit = -1;
static Status IOError(const std::string& context, int err_number) { static Status IOError(const std::string& context, int err_number) {
return Status::IOError(context, strerror(err_number)); return Status::IOError(context, strerror(err_number));
} }
// Helper class to limit resource usage to avoid exhaustion.
// Currently used to limit read-only file descriptors and mmap file usage
// so that we do not end up running out of file descriptors, virtual memory,
// or running into kernel performance problems for very large databases.
class Limiter {
public:
// Limit maximum number of resources to |n|.
Limiter(intptr_t n) {
SetAllowed(n);
}
// If another resource is available, acquire it and return true.
// Else return false.
bool Acquire() {
if (GetAllowed() <= 0) {
return false;
}
MutexLock l(&mu_);
intptr_t x = GetAllowed();
if (x <= 0) {
return false;
} else {
SetAllowed(x - 1);
return true;
}
}
// Release a resource acquired by a previous call to Acquire() that returned
// true.
void Release() {
MutexLock l(&mu_);
SetAllowed(GetAllowed() + 1);
}
private:
port::Mutex mu_;
port::AtomicPointer allowed_;
intptr_t GetAllowed() const {
return reinterpret_cast<intptr_t>(allowed_.Acquire_Load());
}
// REQUIRES: mu_ must be held
void SetAllowed(intptr_t v) {
allowed_.Release_Store(reinterpret_cast<void*>(v));
}
Limiter(const Limiter&);
void operator=(const Limiter&);
};
class PosixSequentialFile: public SequentialFile { class PosixSequentialFile: public SequentialFile {
private: private:
std::string filename_; std::string filename_;
@ -69,87 +125,65 @@ class PosixSequentialFile: public SequentialFile {
class PosixRandomAccessFile: public RandomAccessFile { class PosixRandomAccessFile: public RandomAccessFile {
private: private:
std::string filename_; std::string filename_;
bool temporary_fd_; // If true, fd_ is -1 and we open on every read.
int fd_; int fd_;
Limiter* limiter_;
public: public:
PosixRandomAccessFile(const std::string& fname, int fd) PosixRandomAccessFile(const std::string& fname, int fd, Limiter* limiter)
: filename_(fname), fd_(fd) { } : filename_(fname), fd_(fd), limiter_(limiter) {
virtual ~PosixRandomAccessFile() { close(fd_); } temporary_fd_ = !limiter->Acquire();
if (temporary_fd_) {
// Open file on every access.
close(fd_);
fd_ = -1;
}
}
virtual ~PosixRandomAccessFile() {
if (!temporary_fd_) {
close(fd_);
limiter_->Release();
}
}
virtual Status Read(uint64_t offset, size_t n, Slice* result, virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const { char* scratch) const {
int fd = fd_;
if (temporary_fd_) {
fd = open(filename_.c_str(), O_RDONLY);
if (fd < 0) {
return IOError(filename_, errno);
}
}
Status s; Status s;
ssize_t r = pread(fd_, scratch, n, static_cast<off_t>(offset)); ssize_t r = pread(fd, scratch, n, static_cast<off_t>(offset));
*result = Slice(scratch, (r < 0) ? 0 : r); *result = Slice(scratch, (r < 0) ? 0 : r);
if (r < 0) { if (r < 0) {
// An error: return a non-ok status // An error: return a non-ok status
s = IOError(filename_, errno); s = IOError(filename_, errno);
} }
if (temporary_fd_) {
// Close the temporary file descriptor opened earlier.
close(fd);
}
return s; return s;
} }
}; };
// Helper class to limit mmap file usage so that we do not end up
// running out virtual memory or running into kernel performance
// problems for very large databases.
class MmapLimiter {
public:
// Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes.
MmapLimiter() {
SetAllowed(sizeof(void*) >= 8 ? 1000 : 0);
}
// If another mmap slot is available, acquire it and return true.
// Else return false.
bool Acquire() {
if (GetAllowed() <= 0) {
return false;
}
MutexLock l(&mu_);
intptr_t x = GetAllowed();
if (x <= 0) {
return false;
} else {
SetAllowed(x - 1);
return true;
}
}
// Release a slot acquired by a previous call to Acquire() that returned true.
void Release() {
MutexLock l(&mu_);
SetAllowed(GetAllowed() + 1);
}
private:
port::Mutex mu_;
port::AtomicPointer allowed_;
intptr_t GetAllowed() const {
return reinterpret_cast<intptr_t>(allowed_.Acquire_Load());
}
// REQUIRES: mu_ must be held
void SetAllowed(intptr_t v) {
allowed_.Release_Store(reinterpret_cast<void*>(v));
}
MmapLimiter(const MmapLimiter&);
void operator=(const MmapLimiter&);
};
// mmap() based random-access // mmap() based random-access
class PosixMmapReadableFile: public RandomAccessFile { class PosixMmapReadableFile: public RandomAccessFile {
private: private:
std::string filename_; std::string filename_;
void* mmapped_region_; void* mmapped_region_;
size_t length_; size_t length_;
MmapLimiter* limiter_; Limiter* limiter_;
public: public:
// base[0,length-1] contains the mmapped contents of the file. // base[0,length-1] contains the mmapped contents of the file.
PosixMmapReadableFile(const std::string& fname, void* base, size_t length, PosixMmapReadableFile(const std::string& fname, void* base, size_t length,
MmapLimiter* limiter) Limiter* limiter)
: filename_(fname), mmapped_region_(base), length_(length), : filename_(fname), mmapped_region_(base), length_(length),
limiter_(limiter) { limiter_(limiter) {
} }
@ -332,7 +366,7 @@ class PosixEnv : public Env {
mmap_limit_.Release(); mmap_limit_.Release();
} }
} else { } else {
*result = new PosixRandomAccessFile(fname, fd); *result = new PosixRandomAccessFile(fname, fd, &fd_limit_);
} }
return s; return s;
} }
@ -532,10 +566,42 @@ class PosixEnv : public Env {
BGQueue queue_; BGQueue queue_;
PosixLockTable locks_; PosixLockTable locks_;
MmapLimiter mmap_limit_; Limiter mmap_limit_;
Limiter fd_limit_;
}; };
PosixEnv::PosixEnv() : started_bgthread_(false) { // Return the maximum number of concurrent mmaps.
static int MaxMmaps() {
if (mmap_limit >= 0) {
return mmap_limit;
}
// Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes.
mmap_limit = sizeof(void*) >= 8 ? 1000 : 0;
return mmap_limit;
}
// Return the maximum number of read-only files to keep open.
static intptr_t MaxOpenFiles() {
if (open_read_only_file_limit >= 0) {
return open_read_only_file_limit;
}
struct rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim)) {
// getrlimit failed, fallback to hard-coded default.
open_read_only_file_limit = 50;
} else if (rlim.rlim_cur == RLIM_INFINITY) {
open_read_only_file_limit = std::numeric_limits<int>::max();
} else {
// Allow use of 20% of available file descriptors for read-only files.
open_read_only_file_limit = rlim.rlim_cur / 5;
}
return open_read_only_file_limit;
}
PosixEnv::PosixEnv()
: started_bgthread_(false),
mmap_limit_(MaxMmaps()),
fd_limit_(MaxOpenFiles()) {
PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL)); PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL));
PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL)); PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL));
} }
@ -610,6 +676,16 @@ static pthread_once_t once = PTHREAD_ONCE_INIT;
static Env* default_env; static Env* default_env;
static void InitDefaultEnv() { default_env = new PosixEnv; } static void InitDefaultEnv() { default_env = new PosixEnv; }
void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) {
assert(default_env == NULL);
open_read_only_file_limit = limit;
}
void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) {
assert(default_env == NULL);
mmap_limit = limit;
}
Env* Env::Default() { Env* Env::Default() {
pthread_once(&once, InitDefaultEnv); pthread_once(&once, InitDefaultEnv);
return default_env; return default_env;

View File

@ -0,0 +1,28 @@
// Copyright 2017 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef STORAGE_LEVELDB_UTIL_ENV_POSIX_TEST_HELPER_H_
#define STORAGE_LEVELDB_UTIL_ENV_POSIX_TEST_HELPER_H_
namespace leveldb {
class EnvPosixTest;
// A helper for the POSIX Env to facilitate testing.
class EnvPosixTestHelper {
private:
friend class EnvPosixTest;
// Set the maximum number of read-only files that will be opened.
// Must be called before creating an Env.
static void SetReadOnlyFDLimit(int limit);
// Set the maximum number of read-only files that will be mapped via mmap.
// Must be called before creating an Env.
static void SetReadOnlyMMapLimit(int limit);
};
} // namespace leveldb
#endif // STORAGE_LEVELDB_UTIL_ENV_POSIX_TEST_HELPER_H_

View File

@ -6,10 +6,13 @@
#include "port/port.h" #include "port/port.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/env_posix_test_helper.h"
namespace leveldb { namespace leveldb {
static const int kDelayMicros = 100000; static const int kDelayMicros = 100000;
static const int kReadOnlyFileLimit = 4;
static const int kMMapLimit = 4;
class EnvPosixTest { class EnvPosixTest {
private: private:
@ -19,6 +22,11 @@ class EnvPosixTest {
public: public:
Env* env_; Env* env_;
EnvPosixTest() : env_(Env::Default()) { } EnvPosixTest() : env_(Env::Default()) { }
static void SetFileLimits(int read_only_file_limit, int mmap_limit) {
EnvPosixTestHelper::SetReadOnlyFDLimit(read_only_file_limit);
EnvPosixTestHelper::SetReadOnlyMMapLimit(mmap_limit);
}
}; };
static void SetBool(void* ptr) { static void SetBool(void* ptr) {
@ -97,8 +105,42 @@ TEST(EnvPosixTest, StartThread) {
ASSERT_EQ(state.val, 3); ASSERT_EQ(state.val, 3);
} }
TEST(EnvPosixTest, TestOpenOnRead) {
// Write some test data to a single file that will be opened |n| times.
std::string test_dir;
ASSERT_OK(Env::Default()->GetTestDirectory(&test_dir));
std::string test_file = test_dir + "/open_on_read.txt";
FILE* f = fopen(test_file.c_str(), "w");
ASSERT_TRUE(f != NULL);
const char kFileData[] = "abcdefghijklmnopqrstuvwxyz";
fputs(kFileData, f);
fclose(f);
// Open test file some number above the sum of the two limits to force
// open-on-read behavior of POSIX Env leveldb::RandomAccessFile.
const int kNumFiles = kReadOnlyFileLimit + kMMapLimit + 5;
leveldb::RandomAccessFile* files[kNumFiles] = {0};
for (int i = 0; i < kNumFiles; i++) {
ASSERT_OK(Env::Default()->NewRandomAccessFile(test_file, &files[i]));
}
char scratch;
Slice read_result;
for (int i = 0; i < kNumFiles; i++) {
ASSERT_OK(files[i]->Read(i, 1, &read_result, &scratch));
ASSERT_EQ(kFileData[i], read_result[0]);
}
for (int i = 0; i < kNumFiles; i++) {
delete files[i];
}
ASSERT_OK(Env::Default()->DeleteFile(test_file));
}
} // namespace leveldb } // namespace leveldb
int main(int argc, char** argv) { int main(int argc, char** argv) {
// All tests currently run with the same read-only file limits.
leveldb::EnvPosixTest::SetFileLimits(leveldb::kReadOnlyFileLimit,
leveldb::kMMapLimit);
return leveldb::test::RunAllTests(); return leveldb::test::RunAllTests();
} }