Update to leveldb 1.6

Highlights
----------
Mmap at most 1000 files on Posix to improve performance for large databases.
Support for more architectures (thanks to Alexander K.)

Building and porting
--------------------
HP/UX support (issue 126)
AtomicPointer for ia64 (issue 123)
Sparc v9 support (issue 124)
Atomic ops for powerpc
Use -fno-builtin-memcmp only when using g++
Simplify IOS build rules (issue 114)
Use CXXFLAGS instead of CFLAGS when invoking C++ compiler (issue 118)
Fix snappy shared library problem (issue 94)
Fix shared library installation path regression
Endian-ness detection tweak for FreeBSD

Bug fixes
---------
Stop ignoring FLAGS_open_files in db_bench
Make bloom test behavior agnostic to endian-ness

Performance
-----------
Limit number of mmapped files to 1000 to improve perf for large dbs
Do not delay for 1 second on shutdown path (issue 125)

Misc
----
Make InMemoryEnv return a no-op logger
C binding now has a wrapper for free (issue 117)
Add thread-safety annotations
Added an in-process lock table (issue 120)
Make RandomAccessFile and SequentialFile non-copyable
This commit is contained in:
David Grogan 2012-10-12 11:53:12 -07:00
parent dd0d562b4d
commit 946e5b5a4c
25 changed files with 382 additions and 72 deletions

View File

@ -15,7 +15,8 @@ OPT ?= -O2 -DNDEBUG # (A) Production use (optimized mode)
#-----------------------------------------------
# detect what platform we're building on
$(shell ./build_detect_platform build_config.mk)
$(shell CC=$(CC) CXX=$(CXX) TARGET_OS=$(TARGET_OS) \
./build_detect_platform build_config.mk ./)
# this file is generated by the previous line to set build flags and sources
include build_config.mk
@ -70,7 +71,7 @@ SHARED = $(SHARED1)
else
# Update db.h if you change these.
SHARED_MAJOR = 1
SHARED_MINOR = 5
SHARED_MINOR = 6
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
SHARED2 = $(SHARED1).$(SHARED_MAJOR)
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
@ -82,7 +83,7 @@ $(SHARED2): $(SHARED3)
endif
$(SHARED3):
$(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $(SHARED3)
$(CXX) $(SOURCES) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(INSTALL_PATH)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -o $(SHARED3)
endif # PLATFORM_SHARED_EXT
@ -179,14 +180,14 @@ IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBu
.cc.o:
mkdir -p ios-x86/$(dir $@)
$(SIMULATORROOT)/usr/bin/$(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@
$(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@
mkdir -p ios-arm/$(dir $@)
$(DEVICEROOT)/usr/bin/$(CXX) $(CXXFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk -arch armv6 -arch armv7 -c $< -o ios-arm/$@
lipo ios-x86/$@ ios-arm/$@ -create -output $@
.c.o:
mkdir -p ios-x86/$(dir $@)
$(SIMULATORROOT)/usr/bin/$(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@
$(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@
mkdir -p ios-arm/$(dir $@)
$(DEVICEROOT)/usr/bin/$(CC) $(CFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk -arch armv6 -arch armv7 -c $< -o ios-arm/$@
lipo ios-x86/$@ ios-arm/$@ -create -output $@

1
TODO
View File

@ -7,6 +7,7 @@ db
within [start_key..end_key]? For Chrome, deletion of obsolete
object stores, etc. can be done in the background anyway, so
probably not that important.
- There have been requests for MultiGet.
After a range is completely deleted, what gets rid of the
corresponding files if we do no future changes to that range. Make

View File

@ -23,8 +23,9 @@
#
OUTPUT=$1
if test -z "$OUTPUT"; then
echo "usage: $0 <output-filename>" >&2
PREFIX=$2
if test -z "$OUTPUT" || test -z "$PREFIX"; then
echo "usage: $0 <output-filename> <directory_prefix>" >&2
exit 1
fi
@ -55,58 +56,72 @@ PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl,"
PLATFORM_SHARED_CFLAGS="-fPIC"
PLATFORM_SHARED_VERSIONED=true
# On GCC, we pick libc's memcmp over GCC's memcmp via -fno-builtin-memcmp
MEMCMP_FLAG=
if [ "$CXX" = "g++" ]; then
# Use libc's memcmp instead of GCC's memcmp. This results in ~40%
# performance improvement on readrandom under gcc 4.4.3 on Linux/x86.
MEMCMP_FLAG="-fno-builtin-memcmp"
fi
case "$TARGET_OS" in
Darwin)
PLATFORM=OS_MACOSX
COMMON_FLAGS="-fno-builtin-memcmp -DOS_MACOSX"
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
PLATFORM_SHARED_EXT=dylib
PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name "
PORT_FILE=port/port_posix.cc
;;
Linux)
PLATFORM=OS_LINUX
COMMON_FLAGS="-fno-builtin-memcmp -pthread -DOS_LINUX"
COMMON_FLAGS="$MEMCMP_FLAG -pthread -DOS_LINUX"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
;;
SunOS)
PLATFORM=OS_SOLARIS
COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_SOLARIS"
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_SOLARIS"
PLATFORM_LDFLAGS="-lpthread -lrt"
PORT_FILE=port/port_posix.cc
;;
FreeBSD)
PLATFORM=OS_FREEBSD
COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_FREEBSD"
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_FREEBSD"
PLATFORM_LDFLAGS="-lpthread"
PORT_FILE=port/port_posix.cc
;;
NetBSD)
PLATFORM=OS_NETBSD
COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD"
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_NETBSD"
PLATFORM_LDFLAGS="-lpthread -lgcc_s"
PORT_FILE=port/port_posix.cc
;;
OpenBSD)
PLATFORM=OS_OPENBSD
COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_OPENBSD"
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_OPENBSD"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
;;
DragonFly)
PLATFORM=OS_DRAGONFLYBSD
COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_DRAGONFLYBSD"
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD"
PLATFORM_LDFLAGS="-lpthread"
PORT_FILE=port/port_posix.cc
;;
OS_ANDROID_CROSSCOMPILE)
PLATFORM=OS_ANDROID
COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX"
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX"
PLATFORM_LDFLAGS="" # All pthread features are in the Android C library
PORT_FILE=port/port_posix.cc
CROSS_COMPILE=true
;;
HP-UX)
PLATFORM=OS_HPUX
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_HPUX"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
# man ld: +h internal_name
PLATFORM_SHARED_LDFLAGS="-shared -Wl,+h -Wl,"
;;
*)
echo "Unknown platform!" >&2
exit 1
@ -116,11 +131,13 @@ esac
# except for the test and benchmark files. By default, find will output a list
# of all files matching either rule, so we need to append -print to make the
# prune take effect.
DIRS="util db table"
DIRS="$PREFIX/db $PREFIX/util $PREFIX/table"
set -f # temporarily disable globbing so that our patterns aren't expanded
PRUNE_TEST="-name *test*.cc -prune"
PRUNE_BENCH="-name *_bench.cc -prune"
PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cc' -print | sort | tr "\n" " "`
PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cc' -print | sort | sed "s,^$PREFIX/,," | tr "\n" " "`
set +f # re-enable globbing
# The sources consist of the portable files, plus the platform-specific port
@ -133,7 +150,7 @@ if [ "$CROSS_COMPILE" = "true" ]; then
true
else
# If -std=c++0x works, use <cstdatomic>. Otherwise use port_posix.h.
$CXX $CFLAGS -std=c++0x -x c++ - -o /dev/null 2>/dev/null <<EOF
$CXX $CXXFLAGS -std=c++0x -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdatomic>
int main() {}
EOF
@ -146,7 +163,7 @@ EOF
# Test whether Snappy library is installed
# http://code.google.com/p/snappy/
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
$CXX $CXXFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <snappy.h>
int main() {}
EOF
@ -156,7 +173,7 @@ EOF
fi
# Test whether tcmalloc is available
$CXX $CFLAGS -x c++ - -o /dev/null -ltcmalloc 2>/dev/null <<EOF
$CXX $CXXFLAGS -x c++ - -o /dev/null -ltcmalloc 2>/dev/null <<EOF
int main() {}
EOF
if [ "$?" = 0 ]; then

View File

@ -578,4 +578,8 @@ void leveldb_env_destroy(leveldb_env_t* env) {
delete env;
}
void leveldb_free(void* ptr) {
free(ptr);
}
} // end extern "C"

View File

@ -204,6 +204,12 @@ int main(int argc, char** argv) {
CheckCondition(err != NULL);
Free(&err);
StartPhase("leveldb_free");
db = leveldb_open(options, dbname, &err);
CheckCondition(err != NULL);
leveldb_free(err);
err = NULL;
StartPhase("open");
leveldb_options_set_create_if_missing(options, 1);
db = leveldb_open(options, dbname, &err);

View File

@ -693,6 +693,7 @@ class Benchmark {
options.create_if_missing = !FLAGS_use_existing_db;
options.block_cache = cache_;
options.write_buffer_size = FLAGS_write_buffer_size;
options.max_open_files = FLAGS_open_files;
options.filter_policy = filter_policy_;
Status s = DB::Open(options, FLAGS_db, &db_);
if (!s.ok()) {

View File

@ -609,7 +609,11 @@ void DBImpl::BackgroundCall() {
assert(bg_compaction_scheduled_);
if (!shutting_down_.Acquire_Load()) {
Status s = BackgroundCompaction();
if (!s.ok()) {
if (s.ok()) {
// Success
} else if (shutting_down_.Acquire_Load()) {
// Error most likely due to shutdown; do not wait
} else {
// Wait a little bit before retrying background compaction in
// case this is an environmental problem and we do not want to
// chew up resources for failed compactions for the duration of

View File

@ -13,6 +13,7 @@
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "port/port.h"
#include "port/thread_annotations.h"
namespace leveldb {
@ -71,7 +72,7 @@ class DBImpl : public DB {
// Recover the descriptor from persistent storage. May do a significant
// amount of work to recover recently logged updates. Any changes to
// be made to the descriptor are added to *edit.
Status Recover(VersionEdit* edit);
Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
void MaybeIgnoreError(Status* s) const;
@ -80,27 +81,34 @@ class DBImpl : public DB {
// Compact the in-memory write buffer to disk. Switches to a new
// log-file/memtable and writes a new descriptor iff successful.
Status CompactMemTable();
Status CompactMemTable()
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Status RecoverLogFile(uint64_t log_number,
VersionEdit* edit,
SequenceNumber* max_sequence);
SequenceNumber* max_sequence)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base);
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Status MakeRoomForWrite(bool force /* compact even if there is room? */);
Status MakeRoomForWrite(bool force /* compact even if there is room? */)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
WriteBatch* BuildBatchGroup(Writer** last_writer);
void MaybeScheduleCompaction();
void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
static void BGWork(void* db);
void BackgroundCall();
Status BackgroundCompaction();
void CleanupCompaction(CompactionState* compact);
Status DoCompactionWork(CompactionState* compact);
Status BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
void CleanupCompaction(CompactionState* compact)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Status DoCompactionWork(CompactionState* compact)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Status OpenCompactionOutputFile(CompactionState* compact);
Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input);
Status InstallCompactionResults(CompactionState* compact);
Status InstallCompactionResults(CompactionState* compact)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Constant after construction
Env* const env_;

View File

@ -1442,6 +1442,12 @@ TEST(DBTest, DBOpen_Options) {
db = NULL;
}
TEST(DBTest, Locking) {
DB* db2 = NULL;
Status s = DB::Open(CurrentOptions(), dbname_, &db2);
ASSERT_TRUE(!s.ok()) << "Locking did not prevent re-opening db";
}
// Check that number of files does not grow when we are out of space
TEST(DBTest, NoSpace) {
Options options = CurrentOptions();

View File

@ -865,7 +865,7 @@ Status VersionSet::Recover() {
if (edit.has_comparator_ &&
edit.comparator_ != icmp_.user_comparator()->Name()) {
s = Status::InvalidArgument(
edit.comparator_ + "does not match existing comparator ",
edit.comparator_ + " does not match existing comparator ",
icmp_.user_comparator()->Name());
}
}

View File

@ -21,6 +21,7 @@
#include "db/dbformat.h"
#include "db/version_edit.h"
#include "port/port.h"
#include "port/thread_annotations.h"
namespace leveldb {
@ -159,7 +160,8 @@ class VersionSet {
// current version. Will release *mu while actually writing to the file.
// REQUIRES: *mu is held on entry.
// REQUIRES: no other thread concurrently calls LogAndApply()
Status LogAndApply(VersionEdit* edit, port::Mutex* mu);
Status LogAndApply(VersionEdit* edit, port::Mutex* mu)
EXCLUSIVE_LOCKS_REQUIRED(mu);
// Recover the last saved descriptor from persistent storage.
Status Recover();

View File

@ -618,7 +618,7 @@ class Benchmark {
ErrorCheck(status);
// Execute read statement
while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW);
while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW) {}
StepErrorCheck(status);
// Reset SQLite statement for another use

View File

@ -408,7 +408,7 @@ The optional <code>FilterPolicy</code> mechanism can be used to reduce
the number of disk reads substantially.
<pre>
leveldb::Options options;
options.filter_policy = NewBloomFilter(10);
options.filter_policy = NewBloomFilterPolicy(10);
leveldb::DB* db;
leveldb::DB::Open(options, "/tmp/testdb", &amp;db);
... use the database ...

View File

@ -221,6 +221,11 @@ class WritableFileImpl : public WritableFile {
FileState* file_;
};
class NoOpLogger : public Logger {
public:
virtual void Logv(const char* format, va_list ap) { }
};
class InMemoryEnv : public EnvWrapper {
public:
explicit InMemoryEnv(Env* base_env) : EnvWrapper(base_env) { }
@ -358,6 +363,11 @@ class InMemoryEnv : public EnvWrapper {
return Status::OK();
}
virtual Status NewLogger(const std::string& fname, Logger** result) {
*result = new NoOpLogger;
return Status::OK();
}
private:
// Map from filenames to FileState objects, representing a simple file system.
typedef std::map<std::string, FileState*> FileSystem;

View File

@ -28,6 +28,7 @@
be true on entry:
*errptr == NULL
*errptr points to a malloc()ed null-terminated error message
(On Windows, *errptr must have been malloc()-ed by this library.)
On success, a leveldb routine leaves *errptr unchanged.
On failure, leveldb frees the old value of *errptr and
set *errptr to a malloc()ed error message.
@ -268,6 +269,15 @@ extern void leveldb_cache_destroy(leveldb_cache_t* cache);
extern leveldb_env_t* leveldb_create_default_env();
extern void leveldb_env_destroy(leveldb_env_t*);
/* Utility */
/* Calls free(ptr).
REQUIRES: ptr was malloc()-ed and returned by one of the routines
in this file. Note that in certain cases (typically on Windows), you
may need to call this routine instead of free(ptr) to dispose of
malloc()-ed memory returned by this library. */
extern void leveldb_free(void* ptr);
#ifdef __cplusplus
} /* end extern "C" */
#endif

View File

@ -14,7 +14,7 @@ namespace leveldb {
// Update Makefile if you change these
static const int kMajorVersion = 1;
static const int kMinorVersion = 5;
static const int kMinorVersion = 6;
struct Options;
struct ReadOptions;

View File

@ -175,6 +175,11 @@ class SequentialFile {
//
// REQUIRES: External synchronization
virtual Status Skip(uint64_t n) = 0;
private:
// No copying allowed
SequentialFile(const SequentialFile&);
void operator=(const SequentialFile&);
};
// A file abstraction for randomly reading the contents of a file.
@ -194,6 +199,11 @@ class RandomAccessFile {
// Safe for concurrent use by multiple threads.
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const = 0;
private:
// No copying allowed
RandomAccessFile(const RandomAccessFile&);
void operator=(const RandomAccessFile&);
};
// A file abstraction for sequential writing. The implementation

View File

@ -36,6 +36,8 @@
#define ARCH_CPU_X86_FAMILY 1
#elif defined(__ARMEL__)
#define ARCH_CPU_ARM_FAMILY 1
#elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__)
#define ARCH_CPU_PPC_FAMILY 1
#endif
namespace leveldb {
@ -91,6 +93,15 @@ inline void MemoryBarrier() {
}
#define LEVELDB_HAVE_MEMORY_BARRIER
// PPC
#elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__)
inline void MemoryBarrier() {
// TODO for some powerpc expert: is there a cheaper suitable variant?
// Perhaps by having separate barriers for acquire and release ops.
asm volatile("sync" : : : "memory");
}
#define LEVELDB_HAVE_MEMORY_BARRIER
#endif
// AtomicPointer built using platform-specific MemoryBarrier()
@ -136,6 +147,66 @@ class AtomicPointer {
}
};
// Atomic pointer based on sparc memory barriers
#elif defined(__sparcv9) && defined(__GNUC__)
class AtomicPointer {
private:
void* rep_;
public:
AtomicPointer() { }
explicit AtomicPointer(void* v) : rep_(v) { }
inline void* Acquire_Load() const {
void* val;
__asm__ __volatile__ (
"ldx [%[rep_]], %[val] \n\t"
"membar #LoadLoad|#LoadStore \n\t"
: [val] "=r" (val)
: [rep_] "r" (&rep_)
: "memory");
return val;
}
inline void Release_Store(void* v) {
__asm__ __volatile__ (
"membar #LoadStore|#StoreStore \n\t"
"stx %[v], [%[rep_]] \n\t"
:
: [rep_] "r" (&rep_), [v] "r" (v)
: "memory");
}
inline void* NoBarrier_Load() const { return rep_; }
inline void NoBarrier_Store(void* v) { rep_ = v; }
};
// Atomic pointer based on ia64 acq/rel
#elif defined(__ia64) && defined(__GNUC__)
class AtomicPointer {
private:
void* rep_;
public:
AtomicPointer() { }
explicit AtomicPointer(void* v) : rep_(v) { }
inline void* Acquire_Load() const {
void* val ;
__asm__ __volatile__ (
"ld8.acq %[val] = [%[rep_]] \n\t"
: [val] "=r" (val)
: [rep_] "r" (&rep_)
: "memory"
);
return val;
}
inline void Release_Store(void* v) {
__asm__ __volatile__ (
"st8.rel [%[rep_]] = %[v] \n\t"
:
: [rep_] "r" (&rep_), [v] "r" (v)
: "memory"
);
}
inline void* NoBarrier_Load() const { return rep_; }
inline void NoBarrier_Store(void* v) { rep_ = v; }
};
// We have neither MemoryBarrier(), nor <cstdatomic>
#else
#error Please implement AtomicPointer for this platform.
@ -145,6 +216,7 @@ class AtomicPointer {
#undef LEVELDB_HAVE_MEMORY_BARRIER
#undef ARCH_CPU_X86_FAMILY
#undef ARCH_CPU_ARM_FAMILY
#undef ARCH_CPU_PPC_FAMILY
} // namespace port
} // namespace leveldb

View File

@ -21,13 +21,20 @@
#else
#define PLATFORM_IS_LITTLE_ENDIAN false
#endif
#elif defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) ||\
#elif defined(OS_FREEBSD)
#include <sys/types.h>
#include <sys/endian.h>
#define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN)
#elif defined(OS_OPENBSD) || defined(OS_NETBSD) ||\
defined(OS_DRAGONFLYBSD) || defined(OS_ANDROID)
#include <sys/types.h>
#include <sys/endian.h>
#elif defined(OS_HPUX)
#define PLATFORM_IS_LITTLE_ENDIAN false
#else
#include <endian.h>
#endif
#include <pthread.h>
#ifdef SNAPPY
#include <snappy.h>
@ -42,7 +49,7 @@
#if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\
defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\
defined(OS_ANDROID)
defined(OS_ANDROID) || defined(OS_HPUX)
// Use fread/fwrite/fflush on platforms without _unlocked variants
#define fread_unlocked fread
#define fwrite_unlocked fwrite

59
port/thread_annotations.h Normal file
View File

@ -0,0 +1,59 @@
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H
// Some environments provide custom macros to aid in static thread-safety
// analysis. Provide empty definitions of such macros unless they are already
// defined.
#ifndef EXCLUSIVE_LOCKS_REQUIRED
#define EXCLUSIVE_LOCKS_REQUIRED(...)
#endif
#ifndef SHARED_LOCKS_REQUIRED
#define SHARED_LOCKS_REQUIRED(...)
#endif
#ifndef LOCKS_EXCLUDED
#define LOCKS_EXCLUDED(...)
#endif
#ifndef LOCK_RETURNED
#define LOCK_RETURNED(x)
#endif
#ifndef LOCKABLE
#define LOCKABLE
#endif
#ifndef SCOPED_LOCKABLE
#define SCOPED_LOCKABLE
#endif
#ifndef EXCLUSIVE_LOCK_FUNCTION
#define EXCLUSIVE_LOCK_FUNCTION(...)
#endif
#ifndef SHARED_LOCK_FUNCTION
#define SHARED_LOCK_FUNCTION(...)
#endif
#ifndef EXCLUSIVE_TRYLOCK_FUNCTION
#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
#endif
#ifndef SHARED_TRYLOCK_FUNCTION
#define SHARED_TRYLOCK_FUNCTION(...)
#endif
#ifndef UNLOCK_FUNCTION
#define UNLOCK_FUNCTION(...)
#endif
#ifndef NO_THREAD_SAFETY_ANALYSIS
#define NO_THREAD_SAFETY_ANALYSIS
#endif
#endif // STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H

View File

@ -162,8 +162,8 @@ class Block::Iter : public Iterator {
}
virtual void Seek(const Slice& target) {
// Binary search in restart array to find the first restart point
// with a key >= target
// Binary search in restart array to find the last restart point
// with a key < target
uint32_t left = 0;
uint32_t right = num_restarts_ - 1;
while (left < right) {

View File

@ -4,6 +4,7 @@
#include "leveldb/filter_policy.h"
#include "util/coding.h"
#include "util/logging.h"
#include "util/testharness.h"
#include "util/testutil.h"
@ -13,8 +14,8 @@ namespace leveldb {
static const int kVerbose = 1;
static Slice Key(int i, char* buffer) {
memcpy(buffer, &i, sizeof(i));
return Slice(buffer, sizeof(i));
EncodeFixed32(buffer, i);
return Slice(buffer, sizeof(uint32_t));
}
class BloomTest {

View File

@ -7,29 +7,29 @@
namespace leveldb {
void EncodeFixed32(char* buf, uint32_t value) {
#if __BYTE_ORDER == __LITTLE_ENDIAN
memcpy(buf, &value, sizeof(value));
#else
buf[0] = value & 0xff;
buf[1] = (value >> 8) & 0xff;
buf[2] = (value >> 16) & 0xff;
buf[3] = (value >> 24) & 0xff;
#endif
if (port::kLittleEndian) {
memcpy(buf, &value, sizeof(value));
} else {
buf[0] = value & 0xff;
buf[1] = (value >> 8) & 0xff;
buf[2] = (value >> 16) & 0xff;
buf[3] = (value >> 24) & 0xff;
}
}
void EncodeFixed64(char* buf, uint64_t value) {
#if __BYTE_ORDER == __LITTLE_ENDIAN
memcpy(buf, &value, sizeof(value));
#else
buf[0] = value & 0xff;
buf[1] = (value >> 8) & 0xff;
buf[2] = (value >> 16) & 0xff;
buf[3] = (value >> 24) & 0xff;
buf[4] = (value >> 32) & 0xff;
buf[5] = (value >> 40) & 0xff;
buf[6] = (value >> 48) & 0xff;
buf[7] = (value >> 56) & 0xff;
#endif
if (port::kLittleEndian) {
memcpy(buf, &value, sizeof(value));
} else {
buf[0] = value & 0xff;
buf[1] = (value >> 8) & 0xff;
buf[2] = (value >> 16) & 0xff;
buf[3] = (value >> 24) & 0xff;
buf[4] = (value >> 32) & 0xff;
buf[5] = (value >> 40) & 0xff;
buf[6] = (value >> 48) & 0xff;
buf[7] = (value >> 56) & 0xff;
}
}
void PutFixed32(std::string* dst, uint32_t value) {

View File

@ -3,6 +3,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <deque>
#include <set>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
@ -23,6 +24,7 @@
#include "leveldb/slice.h"
#include "port/port.h"
#include "util/logging.h"
#include "util/mutexlock.h"
#include "util/posix_logger.h"
namespace leveldb {
@ -90,18 +92,75 @@ class PosixRandomAccessFile: public RandomAccessFile {
}
};
// Helper class to limit mmap file usage so that we do not end up
// running out virtual memory or running into kernel performance
// problems for very large databases.
class MmapLimiter {
public:
// Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes.
MmapLimiter() {
SetAllowed(sizeof(void*) >= 8 ? 1000 : 0);
}
// If another mmap slot is available, acquire it and return true.
// Else return false.
bool Acquire() {
if (GetAllowed() <= 0) {
return false;
}
MutexLock l(&mu_);
intptr_t x = GetAllowed();
if (x <= 0) {
return false;
} else {
SetAllowed(x - 1);
return true;
}
}
// Release a slot acquired by a previous call to Acquire() that returned true.
void Release() {
MutexLock l(&mu_);
SetAllowed(GetAllowed() + 1);
}
private:
port::Mutex mu_;
port::AtomicPointer allowed_;
intptr_t GetAllowed() const {
return reinterpret_cast<intptr_t>(allowed_.Acquire_Load());
}
// REQUIRES: mu_ must be held
void SetAllowed(intptr_t v) {
allowed_.Release_Store(reinterpret_cast<void*>(v));
}
MmapLimiter(const MmapLimiter&);
void operator=(const MmapLimiter&);
};
// mmap() based random-access
class PosixMmapReadableFile: public RandomAccessFile {
private:
std::string filename_;
void* mmapped_region_;
size_t length_;
MmapLimiter* limiter_;
public:
// base[0,length-1] contains the mmapped contents of the file.
PosixMmapReadableFile(const std::string& fname, void* base, size_t length)
: filename_(fname), mmapped_region_(base), length_(length) { }
virtual ~PosixMmapReadableFile() { munmap(mmapped_region_, length_); }
PosixMmapReadableFile(const std::string& fname, void* base, size_t length,
MmapLimiter* limiter)
: filename_(fname), mmapped_region_(base), length_(length),
limiter_(limiter) {
}
virtual ~PosixMmapReadableFile() {
munmap(mmapped_region_, length_);
limiter_->Release();
}
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
@ -300,6 +359,25 @@ static int LockOrUnlock(int fd, bool lock) {
class PosixFileLock : public FileLock {
public:
int fd_;
std::string name_;
};
// Set of locked files. We keep a separate set instead of just
// relying on fcntrl(F_SETLK) since fcntl(F_SETLK) does not provide
// any protection against multiple uses from the same process.
class PosixLockTable {
private:
port::Mutex mu_;
std::set<std::string> locked_files_;
public:
bool Insert(const std::string& fname) {
MutexLock l(&mu_);
return locked_files_.insert(fname).second;
}
void Remove(const std::string& fname) {
MutexLock l(&mu_);
locked_files_.erase(fname);
}
};
class PosixEnv : public Env {
@ -329,19 +407,21 @@ class PosixEnv : public Env {
int fd = open(fname.c_str(), O_RDONLY);
if (fd < 0) {
s = IOError(fname, errno);
} else if (sizeof(void*) >= 8) {
// Use mmap when virtual address-space is plentiful.
} else if (mmap_limit_.Acquire()) {
uint64_t size;
s = GetFileSize(fname, &size);
if (s.ok()) {
void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
if (base != MAP_FAILED) {
*result = new PosixMmapReadableFile(fname, base, size);
*result = new PosixMmapReadableFile(fname, base, size, &mmap_limit_);
} else {
s = IOError(fname, errno);
}
}
close(fd);
if (!s.ok()) {
mmap_limit_.Release();
}
} else {
*result = new PosixRandomAccessFile(fname, fd);
}
@ -430,12 +510,17 @@ class PosixEnv : public Env {
int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
if (fd < 0) {
result = IOError(fname, errno);
} else if (!locks_.Insert(fname)) {
close(fd);
result = Status::IOError("lock " + fname, "already held by process");
} else if (LockOrUnlock(fd, true) == -1) {
result = IOError("lock " + fname, errno);
close(fd);
locks_.Remove(fname);
} else {
PosixFileLock* my_lock = new PosixFileLock;
my_lock->fd_ = fd;
my_lock->name_ = fname;
*lock = my_lock;
}
return result;
@ -447,6 +532,7 @@ class PosixEnv : public Env {
if (LockOrUnlock(my_lock->fd_, false) == -1) {
result = IOError("unlock", errno);
}
locks_.Remove(my_lock->name_);
close(my_lock->fd_);
delete my_lock;
return result;
@ -523,6 +609,9 @@ class PosixEnv : public Env {
struct BGItem { void* arg; void (*function)(void*); };
typedef std::deque<BGItem> BGQueue;
BGQueue queue_;
PosixLockTable locks_;
MmapLimiter mmap_limit_;
};
PosixEnv::PosixEnv() : page_size_(getpagesize()),

View File

@ -6,6 +6,7 @@
#define STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_
#include "port/port.h"
#include "port/thread_annotations.h"
namespace leveldb {
@ -19,12 +20,13 @@ namespace leveldb {
// ... some complex code, possibly with multiple return paths ...
// }
class MutexLock {
class SCOPED_LOCKABLE MutexLock {
public:
explicit MutexLock(port::Mutex *mu) : mu_(mu) {
explicit MutexLock(port::Mutex *mu) EXCLUSIVE_LOCK_FUNCTION(mu)
: mu_(mu) {
this->mu_->Lock();
}
~MutexLock() { this->mu_->Unlock(); }
~MutexLock() UNLOCK_FUNCTION() { this->mu_->Unlock(); }
private:
port::Mutex *const mu_;