Added bloom filter support.
In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update.
This commit is contained in:
parent
bc1ee4d25e
commit
85584d497e
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,5 +1,8 @@
|
||||
build_config.mk
|
||||
*.a
|
||||
*.o
|
||||
*.dylib*
|
||||
*.so
|
||||
*.so.*
|
||||
*_test
|
||||
db_bench
|
||||
|
14
Makefile
14
Makefile
@ -17,8 +17,8 @@ OPT ?= -O2 -DNDEBUG # (A) Production use (optimized mode)
|
||||
#-----------------------------------------------
|
||||
|
||||
# detect what platform we're building on
|
||||
$(shell sh ./build_detect_platform)
|
||||
# this file is generated by build_detect_platform to set build flags and sources
|
||||
$(shell ./build_detect_platform build_config.mk)
|
||||
# this file is generated by the previous line to set build flags and sources
|
||||
include build_config.mk
|
||||
|
||||
CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
|
||||
@ -34,6 +34,7 @@ TESTHARNESS = ./util/testharness.o $(TESTUTIL)
|
||||
|
||||
TESTS = \
|
||||
arena_test \
|
||||
bloom_test \
|
||||
c_test \
|
||||
cache_test \
|
||||
coding_test \
|
||||
@ -43,6 +44,7 @@ TESTS = \
|
||||
dbformat_test \
|
||||
env_test \
|
||||
filename_test \
|
||||
filter_block_test \
|
||||
log_test \
|
||||
memenv_test \
|
||||
skiplist_test \
|
||||
@ -63,7 +65,7 @@ default: all
|
||||
ifneq ($(PLATFORM_SHARED_EXT),)
|
||||
# Update db.h if you change these.
|
||||
SHARED_MAJOR = 1
|
||||
SHARED_MINOR = 3
|
||||
SHARED_MINOR = 4
|
||||
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
|
||||
SHARED2 = $(SHARED1).$(SHARED_MAJOR)
|
||||
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
|
||||
@ -101,6 +103,9 @@ db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL)
|
||||
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||
|
||||
bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||
|
||||
c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||
|
||||
@ -128,6 +133,9 @@ env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||
|
||||
filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||
|
||||
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||
|
||||
|
38
build_detect_platform
Normal file → Executable file
38
build_detect_platform
Normal file → Executable file
@ -1,9 +1,9 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Detects OS we're compiling on and generates build_config.mk,
|
||||
# which in turn gets read while processing Makefile.
|
||||
# Detects OS we're compiling on and outputs a file specified by the first
|
||||
# argument, which in turn gets read while processing Makefile.
|
||||
#
|
||||
# build_config.mk will set the following variables:
|
||||
# The output will set the following variables:
|
||||
# PLATFORM_LDFLAGS Linker flags
|
||||
# PLATFORM_SHARED_EXT Extension for shared libraries
|
||||
# PLATFORM_SHARED_LDFLAGS Flags for building shared library
|
||||
@ -13,11 +13,15 @@
|
||||
# -DLEVELDB_PLATFORM_POSIX if cstdatomic is present
|
||||
# -DLEVELDB_PLATFORM_NOATOMIC if it is not
|
||||
|
||||
SCRIPT_DIR=`dirname $0`
|
||||
OUTPUT=$1
|
||||
if test -z "$OUTPUT"; then
|
||||
echo "usage: $0 <output-filename>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Delete existing build_config.mk
|
||||
rm -f build_config.mk
|
||||
touch build_config.mk
|
||||
# Delete existing output, if it exists
|
||||
rm -f $OUTPUT
|
||||
touch $OUTPUT
|
||||
|
||||
if test -z "$CXX"; then
|
||||
CXX=g++
|
||||
@ -96,7 +100,7 @@ esac
|
||||
# except for the test and benchmark files. By default, find will output a list
|
||||
# of all files matching either rule, so we need to append -print to make the
|
||||
# prune take effect.
|
||||
DIRS="$SCRIPT_DIR/util $SCRIPT_DIR/db $SCRIPT_DIR/table"
|
||||
DIRS="util db table"
|
||||
set -f # temporarily disable globbing so that our patterns aren't expanded
|
||||
PRUNE_TEST="-name *test*.cc -prune"
|
||||
PRUNE_BENCH="-name *_bench.cc -prune"
|
||||
@ -105,8 +109,8 @@ set +f # re-enable globbing
|
||||
|
||||
# The sources consist of the portable files, plus the platform-specific port
|
||||
# file.
|
||||
echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> build_config.mk
|
||||
echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> build_config.mk
|
||||
echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> $OUTPUT
|
||||
echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> $OUTPUT
|
||||
|
||||
if [ "$PLATFORM" = "OS_ANDROID_CROSSCOMPILE" ]; then
|
||||
# Cross-compiling; do not try any compilation tests.
|
||||
@ -147,10 +151,10 @@ fi
|
||||
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
|
||||
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
|
||||
|
||||
echo "PLATFORM=$PLATFORM" >> build_config.mk
|
||||
echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> build_config.mk
|
||||
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> build_config.mk
|
||||
echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> build_config.mk
|
||||
echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> build_config.mk
|
||||
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> build_config.mk
|
||||
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> build_config.mk
|
||||
echo "PLATFORM=$PLATFORM" >> $OUTPUT
|
||||
echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> $OUTPUT
|
||||
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> $OUTPUT
|
||||
echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> $OUTPUT
|
||||
echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> $OUTPUT
|
||||
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> $OUTPUT
|
||||
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> $OUTPUT
|
||||
|
110
db/c.cc
110
db/c.cc
@ -10,6 +10,7 @@
|
||||
#include "leveldb/comparator.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/filter_policy.h"
|
||||
#include "leveldb/iterator.h"
|
||||
#include "leveldb/options.h"
|
||||
#include "leveldb/status.h"
|
||||
@ -21,8 +22,10 @@ using leveldb::CompressionType;
|
||||
using leveldb::DB;
|
||||
using leveldb::Env;
|
||||
using leveldb::FileLock;
|
||||
using leveldb::FilterPolicy;
|
||||
using leveldb::Iterator;
|
||||
using leveldb::Logger;
|
||||
using leveldb::NewBloomFilterPolicy;
|
||||
using leveldb::NewLRUCache;
|
||||
using leveldb::Options;
|
||||
using leveldb::RandomAccessFile;
|
||||
@ -78,6 +81,47 @@ struct leveldb_comparator_t : public Comparator {
|
||||
virtual void FindShortSuccessor(std::string* key) const { }
|
||||
};
|
||||
|
||||
struct leveldb_filterpolicy_t : public FilterPolicy {
|
||||
void* state_;
|
||||
void (*destructor_)(void*);
|
||||
const char* (*name_)(void*);
|
||||
char* (*create_)(
|
||||
void*,
|
||||
const char* const* key_array, const size_t* key_length_array,
|
||||
int num_keys,
|
||||
size_t* filter_length);
|
||||
unsigned char (*key_match_)(
|
||||
void*,
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length);
|
||||
|
||||
virtual ~leveldb_filterpolicy_t() {
|
||||
(*destructor_)(state_);
|
||||
}
|
||||
|
||||
virtual const char* Name() const {
|
||||
return (*name_)(state_);
|
||||
}
|
||||
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
std::vector<const char*> key_pointers(n);
|
||||
std::vector<size_t> key_sizes(n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
key_pointers[i] = keys[i].data();
|
||||
key_sizes[i] = keys[i].size();
|
||||
}
|
||||
size_t len;
|
||||
char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len);
|
||||
dst->append(filter, len);
|
||||
free(filter);
|
||||
}
|
||||
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||
return (*key_match_)(state_, key.data(), key.size(),
|
||||
filter.data(), filter.size());
|
||||
}
|
||||
};
|
||||
|
||||
struct leveldb_env_t {
|
||||
Env* rep;
|
||||
bool is_default;
|
||||
@ -218,6 +262,17 @@ void leveldb_approximate_sizes(
|
||||
delete[] ranges;
|
||||
}
|
||||
|
||||
void leveldb_compact_range(
|
||||
leveldb_t* db,
|
||||
const char* start_key, size_t start_key_len,
|
||||
const char* limit_key, size_t limit_key_len) {
|
||||
Slice a, b;
|
||||
db->rep->CompactRange(
|
||||
// Pass NULL Slice if corresponding "const char*" is NULL
|
||||
(start_key ? (a = Slice(start_key, start_key_len), &a) : NULL),
|
||||
(limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL));
|
||||
}
|
||||
|
||||
void leveldb_destroy_db(
|
||||
const leveldb_options_t* options,
|
||||
const char* name,
|
||||
@ -340,6 +395,12 @@ void leveldb_options_set_comparator(
|
||||
opt->rep.comparator = cmp;
|
||||
}
|
||||
|
||||
void leveldb_options_set_filter_policy(
|
||||
leveldb_options_t* opt,
|
||||
leveldb_filterpolicy_t* policy) {
|
||||
opt->rep.filter_policy = policy;
|
||||
}
|
||||
|
||||
void leveldb_options_set_create_if_missing(
|
||||
leveldb_options_t* opt, unsigned char v) {
|
||||
opt->rep.create_if_missing = v;
|
||||
@ -407,6 +468,55 @@ void leveldb_comparator_destroy(leveldb_comparator_t* cmp) {
|
||||
delete cmp;
|
||||
}
|
||||
|
||||
leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
char* (*create_filter)(
|
||||
void*,
|
||||
const char* const* key_array, const size_t* key_length_array,
|
||||
int num_keys,
|
||||
size_t* filter_length),
|
||||
unsigned char (*key_may_match)(
|
||||
void*,
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length),
|
||||
const char* (*name)(void*)) {
|
||||
leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t;
|
||||
result->state_ = state;
|
||||
result->destructor_ = destructor;
|
||||
result->create_ = create_filter;
|
||||
result->key_match_ = key_may_match;
|
||||
result->name_ = name;
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) {
|
||||
delete filter;
|
||||
}
|
||||
|
||||
leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
|
||||
// Make a leveldb_filterpolicy_t, but override all of its methods so
|
||||
// they delegate to a NewBloomFilterPolicy() instead of user
|
||||
// supplied C functions.
|
||||
struct Wrapper : public leveldb_filterpolicy_t {
|
||||
const FilterPolicy* rep_;
|
||||
~Wrapper() { delete rep_; }
|
||||
const char* Name() const { return rep_->Name(); }
|
||||
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
return rep_->CreateFilter(keys, n, dst);
|
||||
}
|
||||
bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||
return rep_->KeyMayMatch(key, filter);
|
||||
}
|
||||
static void DoNothing(void*) { }
|
||||
};
|
||||
Wrapper* wrapper = new Wrapper;
|
||||
wrapper->rep_ = NewBloomFilterPolicy(bits_per_key);
|
||||
wrapper->state_ = NULL;
|
||||
wrapper->destructor_ = &Wrapper::DoNothing;
|
||||
return wrapper;
|
||||
}
|
||||
|
||||
leveldb_readoptions_t* leveldb_readoptions_create() {
|
||||
return new leveldb_readoptions_t;
|
||||
}
|
||||
|
77
db/c_test.c
77
db/c_test.c
@ -122,6 +122,31 @@ static const char* CmpName(void* arg) {
|
||||
return "foo";
|
||||
}
|
||||
|
||||
// Custom filter policy
|
||||
static unsigned char fake_filter_result = 1;
|
||||
static void FilterDestroy(void* arg) { }
|
||||
static const char* FilterName(void* arg) {
|
||||
return "TestFilter";
|
||||
}
|
||||
static char* FilterCreate(
|
||||
void* arg,
|
||||
const char* const* key_array, const size_t* key_length_array,
|
||||
int num_keys,
|
||||
size_t* filter_length) {
|
||||
*filter_length = 4;
|
||||
char* result = malloc(4);
|
||||
memcpy(result, "fake", 4);
|
||||
return result;
|
||||
}
|
||||
unsigned char FilterKeyMatch(
|
||||
void* arg,
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length) {
|
||||
CheckCondition(filter_length == 4);
|
||||
CheckCondition(memcmp(filter, "fake", 4) == 0);
|
||||
return fake_filter_result;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
leveldb_t* db;
|
||||
leveldb_comparator_t* cmp;
|
||||
@ -131,6 +156,7 @@ int main(int argc, char** argv) {
|
||||
leveldb_readoptions_t* roptions;
|
||||
leveldb_writeoptions_t* woptions;
|
||||
char* err = NULL;
|
||||
int run = -1;
|
||||
|
||||
snprintf(dbname, sizeof(dbname), "/tmp/leveldb_c_test-%d",
|
||||
((int) geteuid()));
|
||||
@ -180,6 +206,14 @@ int main(int argc, char** argv) {
|
||||
CheckNoError(err);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
|
||||
StartPhase("compactall");
|
||||
leveldb_compact_range(db, NULL, 0, NULL, 0);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
|
||||
StartPhase("compactrange");
|
||||
leveldb_compact_range(db, "a", 1, "z", 1);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
|
||||
StartPhase("writebatch");
|
||||
{
|
||||
leveldb_writebatch_t* wb = leveldb_writebatch_create();
|
||||
@ -279,6 +313,49 @@ int main(int argc, char** argv) {
|
||||
CheckGet(db, roptions, "foo", NULL);
|
||||
CheckGet(db, roptions, "bar", NULL);
|
||||
CheckGet(db, roptions, "box", "c");
|
||||
leveldb_options_set_create_if_missing(options, 1);
|
||||
leveldb_options_set_error_if_exists(options, 1);
|
||||
}
|
||||
|
||||
StartPhase("filter");
|
||||
for (run = 0; run < 2; run++) {
|
||||
// First run uses custom filter, second run uses bloom filter
|
||||
CheckNoError(err);
|
||||
leveldb_filterpolicy_t* policy;
|
||||
if (run == 0) {
|
||||
policy = leveldb_filterpolicy_create(
|
||||
NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName);
|
||||
} else {
|
||||
policy = leveldb_filterpolicy_create_bloom(10);
|
||||
}
|
||||
|
||||
// Create new database
|
||||
leveldb_close(db);
|
||||
leveldb_destroy_db(options, dbname, &err);
|
||||
leveldb_options_set_filter_policy(options, policy);
|
||||
db = leveldb_open(options, dbname, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_put(db, woptions, "foo", 3, "foovalue", 8, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_put(db, woptions, "bar", 3, "barvalue", 8, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_compact_range(db, NULL, 0, NULL, 0);
|
||||
|
||||
fake_filter_result = 1;
|
||||
CheckGet(db, roptions, "foo", "foovalue");
|
||||
CheckGet(db, roptions, "bar", "barvalue");
|
||||
if (phase == 0) {
|
||||
// Must not find value when custom filter returns false
|
||||
fake_filter_result = 0;
|
||||
CheckGet(db, roptions, "foo", NULL);
|
||||
CheckGet(db, roptions, "bar", NULL);
|
||||
fake_filter_result = 1;
|
||||
|
||||
CheckGet(db, roptions, "foo", "foovalue");
|
||||
CheckGet(db, roptions, "bar", "barvalue");
|
||||
}
|
||||
leveldb_options_set_filter_policy(options, NULL);
|
||||
leveldb_filterpolicy_destroy(policy);
|
||||
}
|
||||
|
||||
StartPhase("cleanup");
|
||||
|
@ -25,15 +25,20 @@
|
||||
// overwrite -- overwrite N values in random key order in async mode
|
||||
// fillsync -- write N/100 values in random key order in sync mode
|
||||
// fill100K -- write N/1000 100K values in random order in async mode
|
||||
// deleteseq -- delete N keys in sequential order
|
||||
// deleterandom -- delete N keys in random order
|
||||
// readseq -- read N times sequentially
|
||||
// readreverse -- read N times in reverse order
|
||||
// readrandom -- read N times in random order
|
||||
// readmissing -- read N missing keys in random order
|
||||
// readhot -- read N times in random order from 1% section of DB
|
||||
// seekrandom -- N random seeks
|
||||
// crc32c -- repeated crc32c of 4K of data
|
||||
// acquireload -- load N*1000 times
|
||||
// Meta operations:
|
||||
// compact -- Compact the entire DB
|
||||
// stats -- Print DB stats
|
||||
// sstables -- Print sstable info
|
||||
// heapprofile -- Dump a heap profile (if supported by this port)
|
||||
static const char* FLAGS_benchmarks =
|
||||
"fillseq,"
|
||||
@ -85,6 +90,10 @@ static int FLAGS_cache_size = -1;
|
||||
// Maximum number of files to keep open at the same time (use default if == 0)
|
||||
static int FLAGS_open_files = 0;
|
||||
|
||||
// Bloom filter bits per key.
|
||||
// Negative means use default settings.
|
||||
static int FLAGS_bloom_bits = -1;
|
||||
|
||||
// If true, do not destroy the existing database. If you set this
|
||||
// flag and also specify a benchmark that wants a fresh database, that
|
||||
// benchmark will fail.
|
||||
@ -293,6 +302,7 @@ struct ThreadState {
|
||||
class Benchmark {
|
||||
private:
|
||||
Cache* cache_;
|
||||
const FilterPolicy* filter_policy_;
|
||||
DB* db_;
|
||||
int num_;
|
||||
int value_size_;
|
||||
@ -378,6 +388,9 @@ class Benchmark {
|
||||
public:
|
||||
Benchmark()
|
||||
: cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL),
|
||||
filter_policy_(FLAGS_bloom_bits >= 0
|
||||
? NewBloomFilterPolicy(FLAGS_bloom_bits)
|
||||
: NULL),
|
||||
db_(NULL),
|
||||
num_(FLAGS_num),
|
||||
value_size_(FLAGS_value_size),
|
||||
@ -399,6 +412,7 @@ class Benchmark {
|
||||
~Benchmark() {
|
||||
delete db_;
|
||||
delete cache_;
|
||||
delete filter_policy_;
|
||||
}
|
||||
|
||||
void Run() {
|
||||
@ -457,11 +471,19 @@ class Benchmark {
|
||||
method = &Benchmark::ReadReverse;
|
||||
} else if (name == Slice("readrandom")) {
|
||||
method = &Benchmark::ReadRandom;
|
||||
} else if (name == Slice("readmissing")) {
|
||||
method = &Benchmark::ReadMissing;
|
||||
} else if (name == Slice("seekrandom")) {
|
||||
method = &Benchmark::SeekRandom;
|
||||
} else if (name == Slice("readhot")) {
|
||||
method = &Benchmark::ReadHot;
|
||||
} else if (name == Slice("readrandomsmall")) {
|
||||
reads_ /= 1000;
|
||||
method = &Benchmark::ReadRandom;
|
||||
} else if (name == Slice("deleteseq")) {
|
||||
method = &Benchmark::DeleteSeq;
|
||||
} else if (name == Slice("deleterandom")) {
|
||||
method = &Benchmark::DeleteRandom;
|
||||
} else if (name == Slice("readwhilewriting")) {
|
||||
num_threads++; // Add extra thread for writing
|
||||
method = &Benchmark::ReadWhileWriting;
|
||||
@ -478,7 +500,9 @@ class Benchmark {
|
||||
} else if (name == Slice("heapprofile")) {
|
||||
HeapProfile();
|
||||
} else if (name == Slice("stats")) {
|
||||
PrintStats();
|
||||
PrintStats("leveldb.stats");
|
||||
} else if (name == Slice("sstables")) {
|
||||
PrintStats("leveldb.sstables");
|
||||
} else {
|
||||
if (name != Slice()) { // No error message for empty name
|
||||
fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
|
||||
@ -669,6 +693,7 @@ class Benchmark {
|
||||
options.create_if_missing = !FLAGS_use_existing_db;
|
||||
options.block_cache = cache_;
|
||||
options.write_buffer_size = FLAGS_write_buffer_size;
|
||||
options.filter_policy = filter_policy_;
|
||||
Status s = DB::Open(options, FLAGS_db, &db_);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "open error: %s\n", s.ToString().c_str());
|
||||
@ -743,10 +768,28 @@ class Benchmark {
|
||||
void ReadRandom(ThreadState* thread) {
|
||||
ReadOptions options;
|
||||
std::string value;
|
||||
int found = 0;
|
||||
for (int i = 0; i < reads_; i++) {
|
||||
char key[100];
|
||||
const int k = thread->rand.Next() % FLAGS_num;
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
if (db_->Get(options, key, &value).ok()) {
|
||||
found++;
|
||||
}
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
char msg[100];
|
||||
snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_);
|
||||
thread->stats.AddMessage(msg);
|
||||
}
|
||||
|
||||
void ReadMissing(ThreadState* thread) {
|
||||
ReadOptions options;
|
||||
std::string value;
|
||||
for (int i = 0; i < reads_; i++) {
|
||||
char key[100];
|
||||
const int k = thread->rand.Next() % FLAGS_num;
|
||||
snprintf(key, sizeof(key), "%016d.", k);
|
||||
db_->Get(options, key, &value);
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
@ -765,6 +808,54 @@ class Benchmark {
|
||||
}
|
||||
}
|
||||
|
||||
void SeekRandom(ThreadState* thread) {
|
||||
ReadOptions options;
|
||||
std::string value;
|
||||
int found = 0;
|
||||
for (int i = 0; i < reads_; i++) {
|
||||
Iterator* iter = db_->NewIterator(options);
|
||||
char key[100];
|
||||
const int k = thread->rand.Next() % FLAGS_num;
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
iter->Seek(key);
|
||||
if (iter->Valid() && iter->key() == key) found++;
|
||||
delete iter;
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
char msg[100];
|
||||
snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_);
|
||||
thread->stats.AddMessage(msg);
|
||||
}
|
||||
|
||||
void DoDelete(ThreadState* thread, bool seq) {
|
||||
RandomGenerator gen;
|
||||
WriteBatch batch;
|
||||
Status s;
|
||||
for (int i = 0; i < num_; i += entries_per_batch_) {
|
||||
batch.Clear();
|
||||
for (int j = 0; j < entries_per_batch_; j++) {
|
||||
const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
|
||||
char key[100];
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
batch.Delete(key);
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
s = db_->Write(write_options_, &batch);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "del error: %s\n", s.ToString().c_str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DeleteSeq(ThreadState* thread) {
|
||||
DoDelete(thread, true);
|
||||
}
|
||||
|
||||
void DeleteRandom(ThreadState* thread) {
|
||||
DoDelete(thread, false);
|
||||
}
|
||||
|
||||
void ReadWhileWriting(ThreadState* thread) {
|
||||
if (thread->tid > 0) {
|
||||
ReadRandom(thread);
|
||||
@ -799,9 +890,9 @@ class Benchmark {
|
||||
db_->CompactRange(NULL, NULL);
|
||||
}
|
||||
|
||||
void PrintStats() {
|
||||
void PrintStats(const char* key) {
|
||||
std::string stats;
|
||||
if (!db_->GetProperty("leveldb.stats", &stats)) {
|
||||
if (!db_->GetProperty(key, &stats)) {
|
||||
stats = "(failed)";
|
||||
}
|
||||
fprintf(stdout, "\n%s\n", stats.c_str());
|
||||
@ -861,6 +952,8 @@ int main(int argc, char** argv) {
|
||||
FLAGS_write_buffer_size = n;
|
||||
} else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_cache_size = n;
|
||||
} else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_bloom_bits = n;
|
||||
} else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_open_files = n;
|
||||
} else if (strncmp(argv[i], "--db=", 5) == 0) {
|
||||
|
@ -87,12 +87,14 @@ static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
|
||||
}
|
||||
Options SanitizeOptions(const std::string& dbname,
|
||||
const InternalKeyComparator* icmp,
|
||||
const InternalFilterPolicy* ipolicy,
|
||||
const Options& src) {
|
||||
Options result = src;
|
||||
result.comparator = icmp;
|
||||
ClipToRange(&result.max_open_files, 20, 50000);
|
||||
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
||||
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
||||
result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL;
|
||||
ClipToRange(&result.max_open_files, 20, 50000);
|
||||
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
||||
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
||||
if (result.info_log == NULL) {
|
||||
// Open a log file in the same directory as the db
|
||||
src.env->CreateDir(dbname); // In case it does not exist
|
||||
@ -112,7 +114,9 @@ Options SanitizeOptions(const std::string& dbname,
|
||||
DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
||||
: env_(options.env),
|
||||
internal_comparator_(options.comparator),
|
||||
options_(SanitizeOptions(dbname, &internal_comparator_, options)),
|
||||
internal_filter_policy_(options.filter_policy),
|
||||
options_(SanitizeOptions(
|
||||
dbname, &internal_comparator_, &internal_filter_policy_, options)),
|
||||
owns_info_log_(options_.info_log != options.info_log),
|
||||
owns_cache_(options_.block_cache != options.block_cache),
|
||||
dbname_(dbname),
|
||||
|
@ -105,6 +105,7 @@ class DBImpl : public DB {
|
||||
// Constant after construction
|
||||
Env* const env_;
|
||||
const InternalKeyComparator internal_comparator_;
|
||||
const InternalFilterPolicy internal_filter_policy_;
|
||||
const Options options_; // options_.comparator == &internal_comparator_
|
||||
bool owns_info_log_;
|
||||
bool owns_cache_;
|
||||
@ -185,6 +186,7 @@ class DBImpl : public DB {
|
||||
// it is not equal to src.info_log.
|
||||
extern Options SanitizeOptions(const std::string& db,
|
||||
const InternalKeyComparator* icmp,
|
||||
const InternalFilterPolicy* ipolicy,
|
||||
const Options& src);
|
||||
|
||||
} // namespace leveldb
|
||||
|
1034
db/db_test.cc
1034
db/db_test.cc
File diff suppressed because it is too large
Load Diff
@ -98,6 +98,26 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
|
||||
}
|
||||
}
|
||||
|
||||
const char* InternalFilterPolicy::Name() const {
|
||||
return user_policy_->Name();
|
||||
}
|
||||
|
||||
void InternalFilterPolicy::CreateFilter(const Slice* keys, int n,
|
||||
std::string* dst) const {
|
||||
// We rely on the fact that the code in table.cc does not mind us
|
||||
// adjusting keys[].
|
||||
Slice* mkey = const_cast<Slice*>(keys);
|
||||
for (int i = 0; i < n; i++) {
|
||||
mkey[i] = ExtractUserKey(keys[i]);
|
||||
// TODO(sanjay): Suppress dups?
|
||||
}
|
||||
user_policy_->CreateFilter(keys, n, dst);
|
||||
}
|
||||
|
||||
bool InternalFilterPolicy::KeyMayMatch(const Slice& key, const Slice& f) const {
|
||||
return user_policy_->KeyMayMatch(ExtractUserKey(key), f);
|
||||
}
|
||||
|
||||
LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
|
||||
size_t usize = user_key.size();
|
||||
size_t needed = usize + 13; // A conservative estimate
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <stdio.h>
|
||||
#include "leveldb/comparator.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/filter_policy.h"
|
||||
#include "leveldb/slice.h"
|
||||
#include "leveldb/table_builder.h"
|
||||
#include "util/coding.h"
|
||||
@ -123,6 +124,17 @@ class InternalKeyComparator : public Comparator {
|
||||
int Compare(const InternalKey& a, const InternalKey& b) const;
|
||||
};
|
||||
|
||||
// Filter policy wrapper that converts from internal keys to user keys
|
||||
class InternalFilterPolicy : public FilterPolicy {
|
||||
private:
|
||||
const FilterPolicy* const user_policy_;
|
||||
public:
|
||||
explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { }
|
||||
virtual const char* Name() const;
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const;
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const;
|
||||
};
|
||||
|
||||
// Modules in this directory should keep internal keys wrapped inside
|
||||
// the following class instead of plain strings so that we do not
|
||||
// incorrectly use string comparisons instead of an InternalKeyComparator.
|
||||
|
@ -48,7 +48,8 @@ class Repairer {
|
||||
: dbname_(dbname),
|
||||
env_(options.env),
|
||||
icmp_(options.comparator),
|
||||
options_(SanitizeOptions(dbname, &icmp_, options)),
|
||||
ipolicy_(options.filter_policy),
|
||||
options_(SanitizeOptions(dbname, &icmp_, &ipolicy_, options)),
|
||||
owns_info_log_(options_.info_log != options.info_log),
|
||||
owns_cache_(options_.block_cache != options.block_cache),
|
||||
next_file_number_(1) {
|
||||
@ -99,6 +100,7 @@ class Repairer {
|
||||
std::string const dbname_;
|
||||
Env* const env_;
|
||||
InternalKeyComparator const icmp_;
|
||||
InternalFilterPolicy const ipolicy_;
|
||||
Options const options_;
|
||||
bool owns_info_log_;
|
||||
bool owns_cache_;
|
||||
|
@ -42,23 +42,18 @@ TableCache::~TableCache() {
|
||||
delete cache_;
|
||||
}
|
||||
|
||||
Iterator* TableCache::NewIterator(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
Table** tableptr) {
|
||||
if (tableptr != NULL) {
|
||||
*tableptr = NULL;
|
||||
}
|
||||
|
||||
Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
|
||||
Cache::Handle** handle) {
|
||||
Status s;
|
||||
char buf[sizeof(file_number)];
|
||||
EncodeFixed64(buf, file_number);
|
||||
Slice key(buf, sizeof(buf));
|
||||
Cache::Handle* handle = cache_->Lookup(key);
|
||||
if (handle == NULL) {
|
||||
*handle = cache_->Lookup(key);
|
||||
if (*handle == NULL) {
|
||||
std::string fname = TableFileName(dbname_, file_number);
|
||||
RandomAccessFile* file = NULL;
|
||||
Table* table = NULL;
|
||||
Status s = env_->NewRandomAccessFile(fname, &file);
|
||||
s = env_->NewRandomAccessFile(fname, &file);
|
||||
if (s.ok()) {
|
||||
s = Table::Open(*options_, file, file_size, &table);
|
||||
}
|
||||
@ -68,13 +63,28 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
|
||||
delete file;
|
||||
// We do not cache error results so that if the error is transient,
|
||||
// or somebody repairs the file, we recover automatically.
|
||||
return NewErrorIterator(s);
|
||||
} else {
|
||||
TableAndFile* tf = new TableAndFile;
|
||||
tf->file = file;
|
||||
tf->table = table;
|
||||
*handle = cache_->Insert(key, tf, 1, &DeleteEntry);
|
||||
}
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
TableAndFile* tf = new TableAndFile;
|
||||
tf->file = file;
|
||||
tf->table = table;
|
||||
handle = cache_->Insert(key, tf, 1, &DeleteEntry);
|
||||
Iterator* TableCache::NewIterator(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
Table** tableptr) {
|
||||
if (tableptr != NULL) {
|
||||
*tableptr = NULL;
|
||||
}
|
||||
|
||||
Cache::Handle* handle = NULL;
|
||||
Status s = FindTable(file_number, file_size, &handle);
|
||||
if (!s.ok()) {
|
||||
return NewErrorIterator(s);
|
||||
}
|
||||
|
||||
Table* table = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
|
||||
@ -86,6 +96,22 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
|
||||
return result;
|
||||
}
|
||||
|
||||
Status TableCache::Get(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
const Slice& k,
|
||||
void* arg,
|
||||
void (*saver)(void*, const Slice&, const Slice&)) {
|
||||
Cache::Handle* handle = NULL;
|
||||
Status s = FindTable(file_number, file_size, &handle);
|
||||
if (s.ok()) {
|
||||
Table* t = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
|
||||
s = t->InternalGet(options, k, arg, saver);
|
||||
cache_->Release(handle);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
void TableCache::Evict(uint64_t file_number) {
|
||||
char buf[sizeof(file_number)];
|
||||
EncodeFixed64(buf, file_number);
|
||||
|
@ -35,6 +35,15 @@ class TableCache {
|
||||
uint64_t file_size,
|
||||
Table** tableptr = NULL);
|
||||
|
||||
// If a seek to internal key "k" in specified file finds an entry,
|
||||
// call (*handle_result)(arg, found_key, found_value).
|
||||
Status Get(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
const Slice& k,
|
||||
void* arg,
|
||||
void (*handle_result)(void*, const Slice&, const Slice&));
|
||||
|
||||
// Evict any entry for the specified file number
|
||||
void Evict(uint64_t file_number);
|
||||
|
||||
@ -43,6 +52,8 @@ class TableCache {
|
||||
const std::string dbname_;
|
||||
const Options* options_;
|
||||
Cache* cache_;
|
||||
|
||||
Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**);
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -255,35 +255,34 @@ void Version::AddIterators(const ReadOptions& options,
|
||||
}
|
||||
}
|
||||
|
||||
// If "*iter" points at a value or deletion for user_key, store
|
||||
// either the value, or a NotFound error and return true.
|
||||
// Else return false.
|
||||
static bool GetValue(const Comparator* cmp,
|
||||
Iterator* iter, const Slice& user_key,
|
||||
std::string* value,
|
||||
Status* s) {
|
||||
if (!iter->Valid()) {
|
||||
return false;
|
||||
}
|
||||
// Callback from TableCache::Get()
|
||||
namespace {
|
||||
enum SaverState {
|
||||
kNotFound,
|
||||
kFound,
|
||||
kDeleted,
|
||||
kCorrupt,
|
||||
};
|
||||
struct Saver {
|
||||
SaverState state;
|
||||
const Comparator* ucmp;
|
||||
Slice user_key;
|
||||
std::string* value;
|
||||
};
|
||||
}
|
||||
static void SaveValue(void* arg, const Slice& ikey, const Slice& v) {
|
||||
Saver* s = reinterpret_cast<Saver*>(arg);
|
||||
ParsedInternalKey parsed_key;
|
||||
if (!ParseInternalKey(iter->key(), &parsed_key)) {
|
||||
*s = Status::Corruption("corrupted key for ", user_key);
|
||||
return true;
|
||||
}
|
||||
if (cmp->Compare(parsed_key.user_key, user_key) != 0) {
|
||||
return false;
|
||||
}
|
||||
switch (parsed_key.type) {
|
||||
case kTypeDeletion:
|
||||
*s = Status::NotFound(Slice()); // Use an empty error message for speed
|
||||
break;
|
||||
case kTypeValue: {
|
||||
Slice v = iter->value();
|
||||
value->assign(v.data(), v.size());
|
||||
break;
|
||||
if (!ParseInternalKey(ikey, &parsed_key)) {
|
||||
s->state = kCorrupt;
|
||||
} else {
|
||||
if (s->ucmp->Compare(parsed_key.user_key, s->user_key) == 0) {
|
||||
s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted;
|
||||
if (s->state == kFound) {
|
||||
s->value->assign(v.data(), v.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
|
||||
@ -361,21 +360,27 @@ Status Version::Get(const ReadOptions& options,
|
||||
last_file_read = f;
|
||||
last_file_read_level = level;
|
||||
|
||||
Iterator* iter = vset_->table_cache_->NewIterator(
|
||||
options,
|
||||
f->number,
|
||||
f->file_size);
|
||||
iter->Seek(ikey);
|
||||
const bool done = GetValue(ucmp, iter, user_key, value, &s);
|
||||
if (!iter->status().ok()) {
|
||||
s = iter->status();
|
||||
delete iter;
|
||||
Saver saver;
|
||||
saver.state = kNotFound;
|
||||
saver.ucmp = ucmp;
|
||||
saver.user_key = user_key;
|
||||
saver.value = value;
|
||||
s = vset_->table_cache_->Get(options, f->number, f->file_size,
|
||||
ikey, &saver, SaveValue);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
} else {
|
||||
delete iter;
|
||||
if (done) {
|
||||
}
|
||||
switch (saver.state) {
|
||||
case kNotFound:
|
||||
break; // Keep searching in other files
|
||||
case kFound:
|
||||
return s;
|
||||
case kDeleted:
|
||||
s = Status::NotFound(Slice()); // Use empty error message for speed
|
||||
return s;
|
||||
case kCorrupt:
|
||||
s = Status::Corruption("corrupted key for ", user_key);
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -400,6 +400,69 @@ We might want to prefix <code>filename</code> keys with one letter (say '/') and
|
||||
over just the metadata do not force us to fetch and cache bulky file
|
||||
contents.
|
||||
<p>
|
||||
<h2>Filters</h2>
|
||||
<p>
|
||||
Because of the way <code>leveldb</code> data is organized on disk,
|
||||
a single <code>Get()</code> call may involve multiple reads from disk.
|
||||
The optional <code>FilterPolicy</code> mechanism can be used to reduce
|
||||
the number of disk reads substantially.
|
||||
<pre>
|
||||
leveldb::Options options;
|
||||
options.filter_policy = NewBloomFilter(10);
|
||||
leveldb::DB* db;
|
||||
leveldb::DB::Open(options, "/tmp/testdb", &db);
|
||||
... use the database ...
|
||||
delete db;
|
||||
delete options.filter_policy;
|
||||
</pre>
|
||||
The preceding code associates a
|
||||
<a href="http://en.wikipedia.org/wiki/Bloom_filter">Bloom filter</a>
|
||||
based filtering policy with the database. Bloom filter based
|
||||
filtering relies on keeping some number of bits of data in memory per
|
||||
key (in this case 10 bits per key since that is the argument we passed
|
||||
to NewBloomFilter). This filter will reduce the number of unnecessary
|
||||
disk reads needed for <code>Get()</code> calls by a factor of
|
||||
approximately a 100. Increasing the bits per key will lead to a
|
||||
larger reduction at the cost of more memory usage. We recommend that
|
||||
applications whose working set does not fit in memory and that do a
|
||||
lot of random reads set a filter policy.
|
||||
<p>
|
||||
If you are using a custom comparator, you should ensure that the filter
|
||||
policy you are using is compatible with your comparator. For example,
|
||||
consider a comparator that ignores trailing spaces when comparing keys.
|
||||
<code>NewBloomFilter</code> must not be used with such a comparator.
|
||||
Instead, the application should provide a custom filter policy that
|
||||
also ignores trailing spaces. For example:
|
||||
<pre>
|
||||
class CustomFilterPolicy : public leveldb::FilterPolicy {
|
||||
private:
|
||||
FilterPolicy* builtin_policy_;
|
||||
public:
|
||||
CustomFilterPolicy() : builtin_policy_(NewBloomFilter(10)) { }
|
||||
~CustomFilterPolicy() { delete builtin_policy_; }
|
||||
|
||||
const char* Name() const { return "IgnoreTrailingSpacesFilter"; }
|
||||
|
||||
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
// Use builtin bloom filter code after removing trailing spaces
|
||||
std::vector<Slice> trimmed(n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
trimmed[i] = RemoveTrailingSpaces(keys[i]);
|
||||
}
|
||||
return builtin_policy_->CreateFilter(&trimmed[i], n, dst);
|
||||
}
|
||||
|
||||
bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||
// Use builtin bloom filter code after removing trailing spaces
|
||||
return builtin_policy_->KeyMayMatch(RemoveTrailingSpaces(key), filter);
|
||||
}
|
||||
};
|
||||
</pre>
|
||||
<p>
|
||||
Advanced applications may provide a filter policy that does not use
|
||||
a bloom filter but uses some other mechanism for summarizing a set
|
||||
of keys. See <code>leveldb/filter_policy.h</code> for detail.
|
||||
<p>
|
||||
<h1>Checksums</h1>
|
||||
<p>
|
||||
<code>leveldb</code> associates checksums with all data it stores in the file system.
|
||||
|
@ -47,6 +47,47 @@ the BlockHandle of the metaindex and index blocks as well as a magic number.
|
||||
// (40==2*BlockHandle::kMaxEncodedLength)
|
||||
magic: fixed64; // == 0xdb4775248b80fb57
|
||||
|
||||
"filter" Meta Block
|
||||
-------------------
|
||||
|
||||
If a "FilterPolicy" was specified when the database was opened, a
|
||||
filter block is stored in each table. The "metaindex" block contains
|
||||
an entry that maps from "filter.<N>" to the BlockHandle for the filter
|
||||
block where "<N>" is the string returned by the filter policy's
|
||||
"Name()" method.
|
||||
|
||||
The filter block stores a sequence of filters, where filter i contains
|
||||
the output of FilterPolicy::CreateFilter() on all keys that are stored
|
||||
in a block whose file offset falls within the range
|
||||
|
||||
[ i*base ... (i+1)*base-1 ]
|
||||
|
||||
Currently, "base" is 2KB. So for example, if blocks X and Y start in
|
||||
the range [ 0KB .. 2KB-1 ], all of the keys in X and Y will be
|
||||
converted to a filter by calling FilterPolicy::CreateFilter(), and the
|
||||
resulting filter will be stored as the first filter in the filter
|
||||
block.
|
||||
|
||||
The filter block is formatted as follows:
|
||||
|
||||
[filter 0]
|
||||
[filter 1]
|
||||
[filter 2]
|
||||
...
|
||||
[filter N-1]
|
||||
|
||||
[offset of filter 0] : 4 bytes
|
||||
[offset of filter 1] : 4 bytes
|
||||
[offset of filter 2] : 4 bytes
|
||||
...
|
||||
[offset of filter N-1] : 4 bytes
|
||||
|
||||
[offset of beginning of offset array] : 4 bytes
|
||||
lg(base) : 1 byte
|
||||
|
||||
The offset array at the end of the filter block allows efficient
|
||||
mapping from a data block offset to the corresponding filter.
|
||||
|
||||
"stats" Meta Block
|
||||
------------------
|
||||
|
||||
|
@ -55,6 +55,7 @@ typedef struct leveldb_cache_t leveldb_cache_t;
|
||||
typedef struct leveldb_comparator_t leveldb_comparator_t;
|
||||
typedef struct leveldb_env_t leveldb_env_t;
|
||||
typedef struct leveldb_filelock_t leveldb_filelock_t;
|
||||
typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t;
|
||||
typedef struct leveldb_iterator_t leveldb_iterator_t;
|
||||
typedef struct leveldb_logger_t leveldb_logger_t;
|
||||
typedef struct leveldb_options_t leveldb_options_t;
|
||||
@ -127,6 +128,11 @@ extern void leveldb_approximate_sizes(
|
||||
const char* const* range_limit_key, const size_t* range_limit_key_len,
|
||||
uint64_t* sizes);
|
||||
|
||||
extern void leveldb_compact_range(
|
||||
leveldb_t* db,
|
||||
const char* start_key, size_t start_key_len,
|
||||
const char* limit_key, size_t limit_key_len);
|
||||
|
||||
/* Management operations */
|
||||
|
||||
extern void leveldb_destroy_db(
|
||||
@ -177,6 +183,9 @@ extern void leveldb_options_destroy(leveldb_options_t*);
|
||||
extern void leveldb_options_set_comparator(
|
||||
leveldb_options_t*,
|
||||
leveldb_comparator_t*);
|
||||
extern void leveldb_options_set_filter_policy(
|
||||
leveldb_options_t*,
|
||||
leveldb_filterpolicy_t*);
|
||||
extern void leveldb_options_set_create_if_missing(
|
||||
leveldb_options_t*, unsigned char);
|
||||
extern void leveldb_options_set_error_if_exists(
|
||||
@ -209,6 +218,26 @@ extern leveldb_comparator_t* leveldb_comparator_create(
|
||||
const char* (*name)(void*));
|
||||
extern void leveldb_comparator_destroy(leveldb_comparator_t*);
|
||||
|
||||
/* Filter policy */
|
||||
|
||||
extern leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
char* (*create_filter)(
|
||||
void*,
|
||||
const char* const* key_array, const size_t* key_length_array,
|
||||
int num_keys,
|
||||
size_t* filter_length),
|
||||
unsigned char (*key_may_match)(
|
||||
void*,
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length),
|
||||
const char* (*name)(void*));
|
||||
extern void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t*);
|
||||
|
||||
extern leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(
|
||||
int bits_per_key);
|
||||
|
||||
/* Read options */
|
||||
|
||||
extern leveldb_readoptions_t* leveldb_readoptions_create();
|
||||
|
@ -14,7 +14,7 @@ namespace leveldb {
|
||||
|
||||
// Update Makefile if you change these
|
||||
static const int kMajorVersion = 1;
|
||||
static const int kMinorVersion = 3;
|
||||
static const int kMinorVersion = 4;
|
||||
|
||||
struct Options;
|
||||
struct ReadOptions;
|
||||
|
70
include/leveldb/filter_policy.h
Normal file
70
include/leveldb/filter_policy.h
Normal file
@ -0,0 +1,70 @@
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// A database can be configured with a custom FilterPolicy object.
|
||||
// This object is responsible for creating a small filter from a set
|
||||
// of keys. These filters are stored in leveldb and are consulted
|
||||
// automatically by leveldb to decide whether or not to read some
|
||||
// information from disk. In many cases, a filter can cut down the
|
||||
// number of disk seeks form a handful to a single disk seek per
|
||||
// DB::Get() call.
|
||||
//
|
||||
// Most people will want to use the builtin bloom filter support (see
|
||||
// NewBloomFilterPolicy() below).
|
||||
|
||||
#ifndef STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
|
||||
#define STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Slice;
|
||||
|
||||
class FilterPolicy {
|
||||
public:
|
||||
virtual ~FilterPolicy();
|
||||
|
||||
// Return the name of this policy. Note that if the filter encoding
|
||||
// changes in an incompatible way, the name returned by this method
|
||||
// must be changed. Otherwise, old incompatible filters may be
|
||||
// passed to methods of this type.
|
||||
virtual const char* Name() const = 0;
|
||||
|
||||
// keys[0,n-1] contains a list of keys (potentially with duplicates)
|
||||
// that are ordered according to the user supplied comparator.
|
||||
// Append a filter that summarizes keys[0,n-1] to *dst.
|
||||
//
|
||||
// Warning: do not change the initial contents of *dst. Instead,
|
||||
// append the newly constructed filter to *dst.
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst)
|
||||
const = 0;
|
||||
|
||||
// "filter" contains the data appended by a preceding call to
|
||||
// CreateFilter() on this class. This method must return true if
|
||||
// the key was in the list of keys passed to CreateFilter().
|
||||
// This method may return true or false if the key was not on the
|
||||
// list, but it should aim to return false with a high probability.
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
|
||||
};
|
||||
|
||||
// Return a new filter policy that uses a bloom filter with approximately
|
||||
// the specified number of bits per key. A good value for bits_per_key
|
||||
// is 10, which yields a filter with ~ 1% false positive rate.
|
||||
//
|
||||
// Callers must delete the result after any database that is using the
|
||||
// result has been closed.
|
||||
//
|
||||
// Note: if you are using a custom comparator that ignores some parts
|
||||
// of the keys being compared, you must not use NewBloomFilterPolicy()
|
||||
// and must provide your own FilterPolicy that also ignores the
|
||||
// corresponding parts of the keys. For example, if the comparator
|
||||
// ignores trailing spaces, it would be incorrect to use a
|
||||
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
|
||||
// trailing spaces in keys.
|
||||
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
|
||||
|
||||
}
|
||||
|
||||
#endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
|
@ -12,6 +12,7 @@ namespace leveldb {
|
||||
class Cache;
|
||||
class Comparator;
|
||||
class Env;
|
||||
class FilterPolicy;
|
||||
class Logger;
|
||||
class Snapshot;
|
||||
|
||||
@ -127,6 +128,13 @@ struct Options {
|
||||
// efficiently detect that and will switch to uncompressed mode.
|
||||
CompressionType compression;
|
||||
|
||||
// If non-NULL, use the specified filter policy to reduce disk reads.
|
||||
// Many applications will benefit from passing the result of
|
||||
// NewBloomFilterPolicy() here.
|
||||
//
|
||||
// Default: NULL
|
||||
const FilterPolicy* filter_policy;
|
||||
|
||||
// Create an Options object with default values for all fields.
|
||||
Options();
|
||||
};
|
||||
|
@ -12,9 +12,11 @@ namespace leveldb {
|
||||
|
||||
class Block;
|
||||
class BlockHandle;
|
||||
class Footer;
|
||||
struct Options;
|
||||
class RandomAccessFile;
|
||||
struct ReadOptions;
|
||||
class TableCache;
|
||||
|
||||
// A Table is a sorted map from strings to strings. Tables are
|
||||
// immutable and persistent. A Table may be safely accessed from
|
||||
@ -60,6 +62,19 @@ class Table {
|
||||
explicit Table(Rep* rep) { rep_ = rep; }
|
||||
static Iterator* BlockReader(void*, const ReadOptions&, const Slice&);
|
||||
|
||||
// Calls (*handle_result)(arg, ...) with the entry found after a call
|
||||
// to Seek(key). May not make such a call if filter policy says
|
||||
// that key is not present.
|
||||
friend class TableCache;
|
||||
Status InternalGet(
|
||||
const ReadOptions&, const Slice& key,
|
||||
void* arg,
|
||||
void (*handle_result)(void* arg, const Slice& k, const Slice& v));
|
||||
|
||||
|
||||
void ReadMeta(const Footer& footer);
|
||||
void ReadFilter(const Slice& filter_handle_value);
|
||||
|
||||
// No copying allowed
|
||||
Table(const Table&);
|
||||
void operator=(const Table&);
|
||||
|
@ -77,6 +77,7 @@ class TableBuilder {
|
||||
private:
|
||||
bool ok() const { return status().ok(); }
|
||||
void WriteBlock(BlockBuilder* block, BlockHandle* handle);
|
||||
void WriteRawBlock(const Slice& data, CompressionType, BlockHandle* handle);
|
||||
|
||||
struct Rep;
|
||||
Rep* rep_;
|
||||
|
@ -78,6 +78,9 @@ class CondVar {
|
||||
// On ARM chipsets <V6, 0xffff0fa0 is the hard coded address of a
|
||||
// memory barrier function provided by the kernel.
|
||||
typedef void (*LinuxKernelMemoryBarrierFunc)(void);
|
||||
// TODO(user): ATTRIBUTE_WEAK is undefined, so this fails to build on
|
||||
// non-ARMV6_OR_7. We may be able to replace it with __attribute__((weak)) for
|
||||
// older ARM builds, but x86 builds will require a different memory barrier.
|
||||
LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier ATTRIBUTE_WEAK =
|
||||
(LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
|
||||
#endif
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "leveldb/comparator.h"
|
||||
#include "table/format.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/logging.h"
|
||||
|
||||
@ -19,10 +20,10 @@ inline uint32_t Block::NumRestarts() const {
|
||||
return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
|
||||
}
|
||||
|
||||
Block::Block(const char* data, size_t size, bool take_ownership)
|
||||
: data_(data),
|
||||
size_(size),
|
||||
owned_(take_ownership) {
|
||||
Block::Block(const BlockContents& contents)
|
||||
: data_(contents.data.data()),
|
||||
size_(contents.data.size()),
|
||||
owned_(contents.heap_allocated) {
|
||||
if (size_ < sizeof(uint32_t)) {
|
||||
size_ = 0; // Error marker
|
||||
} else {
|
||||
|
@ -11,14 +11,13 @@
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
struct BlockContents;
|
||||
class Comparator;
|
||||
|
||||
class Block {
|
||||
public:
|
||||
// Initialize the block with the specified contents.
|
||||
// Takes ownership of data[] and will delete[] it when done iff
|
||||
// "take_ownership is true.
|
||||
Block(const char* data, size_t size, bool take_ownership);
|
||||
explicit Block(const BlockContents& contents);
|
||||
|
||||
~Block();
|
||||
|
||||
|
111
table/filter_block.cc
Normal file
111
table/filter_block.cc
Normal file
@ -0,0 +1,111 @@
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "table/filter_block.h"
|
||||
|
||||
#include "leveldb/filter_policy.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
// See doc/table_format.txt for an explanation of the filter block format.
|
||||
|
||||
// Generate new filter every 2KB of data
|
||||
static const size_t kFilterBaseLg = 11;
|
||||
static const size_t kFilterBase = 1 << kFilterBaseLg;
|
||||
|
||||
FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy)
|
||||
: policy_(policy) {
|
||||
}
|
||||
|
||||
void FilterBlockBuilder::StartBlock(uint64_t block_offset) {
|
||||
uint64_t filter_index = (block_offset / kFilterBase);
|
||||
assert(filter_index >= filter_offsets_.size());
|
||||
while (filter_index > filter_offsets_.size()) {
|
||||
GenerateFilter();
|
||||
}
|
||||
}
|
||||
|
||||
void FilterBlockBuilder::AddKey(const Slice& key) {
|
||||
Slice k = key;
|
||||
start_.push_back(keys_.size());
|
||||
keys_.append(k.data(), k.size());
|
||||
}
|
||||
|
||||
Slice FilterBlockBuilder::Finish() {
|
||||
if (!start_.empty()) {
|
||||
GenerateFilter();
|
||||
}
|
||||
|
||||
// Append array of per-filter offsets
|
||||
const uint32_t array_offset = result_.size();
|
||||
for (size_t i = 0; i < filter_offsets_.size(); i++) {
|
||||
PutFixed32(&result_, filter_offsets_[i]);
|
||||
}
|
||||
|
||||
PutFixed32(&result_, array_offset);
|
||||
result_.push_back(kFilterBaseLg); // Save encoding parameter in result
|
||||
return Slice(result_);
|
||||
}
|
||||
|
||||
void FilterBlockBuilder::GenerateFilter() {
|
||||
const size_t num_keys = start_.size();
|
||||
if (num_keys == 0) {
|
||||
// Fast path if there are no keys for this filter
|
||||
filter_offsets_.push_back(result_.size());
|
||||
return;
|
||||
}
|
||||
|
||||
// Make list of keys from flattened key structure
|
||||
start_.push_back(keys_.size()); // Simplify length computation
|
||||
tmp_keys_.resize(num_keys);
|
||||
for (size_t i = 0; i < num_keys; i++) {
|
||||
const char* base = keys_.data() + start_[i];
|
||||
size_t length = start_[i+1] - start_[i];
|
||||
tmp_keys_[i] = Slice(base, length);
|
||||
}
|
||||
|
||||
// Generate filter for current set of keys and append to result_.
|
||||
filter_offsets_.push_back(result_.size());
|
||||
policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_);
|
||||
|
||||
tmp_keys_.clear();
|
||||
keys_.clear();
|
||||
start_.clear();
|
||||
}
|
||||
|
||||
FilterBlockReader::FilterBlockReader(const FilterPolicy* policy,
|
||||
const Slice& contents)
|
||||
: policy_(policy),
|
||||
data_(NULL),
|
||||
offset_(NULL),
|
||||
num_(0),
|
||||
base_lg_(0) {
|
||||
size_t n = contents.size();
|
||||
if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
|
||||
base_lg_ = contents[n-1];
|
||||
uint32_t last_word = DecodeFixed32(contents.data() + n - 5);
|
||||
if (last_word > n - 5) return;
|
||||
data_ = contents.data();
|
||||
offset_ = data_ + last_word;
|
||||
num_ = (n - 5 - last_word) / 4;
|
||||
}
|
||||
|
||||
bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) {
|
||||
uint64_t index = block_offset >> base_lg_;
|
||||
if (index < num_) {
|
||||
uint32_t start = DecodeFixed32(offset_ + index*4);
|
||||
uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
|
||||
if (start <= limit && limit <= (offset_ - data_)) {
|
||||
Slice filter = Slice(data_ + start, limit - start);
|
||||
return policy_->KeyMayMatch(key, filter);
|
||||
} else if (start == limit) {
|
||||
// Empty filters do not match any keys
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true; // Errors are treated as potential matches
|
||||
}
|
||||
|
||||
}
|
68
table/filter_block.h
Normal file
68
table/filter_block.h
Normal file
@ -0,0 +1,68 @@
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// A filter block is stored near the end of a Table file. It contains
|
||||
// filters (e.g., bloom filters) for all data blocks in the table combined
|
||||
// into a single filter block.
|
||||
|
||||
#ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
|
||||
#define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "leveldb/slice.h"
|
||||
#include "util/hash.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class FilterPolicy;
|
||||
|
||||
// A FilterBlockBuilder is used to construct all of the filters for a
|
||||
// particular Table. It generates a single string which is stored as
|
||||
// a special block in the Table.
|
||||
//
|
||||
// The sequence of calls to FilterBlockBuilder must match the regexp:
|
||||
// (StartBlock AddKey*)* Finish
|
||||
class FilterBlockBuilder {
|
||||
public:
|
||||
explicit FilterBlockBuilder(const FilterPolicy*);
|
||||
|
||||
void StartBlock(uint64_t block_offset);
|
||||
void AddKey(const Slice& key);
|
||||
Slice Finish();
|
||||
|
||||
private:
|
||||
void GenerateFilter();
|
||||
|
||||
const FilterPolicy* policy_;
|
||||
std::string keys_; // Flattened key contents
|
||||
std::vector<size_t> start_; // Starting index in keys_ of each key
|
||||
std::string result_; // Filter data computed so far
|
||||
std::vector<Slice> tmp_keys_; // policy_->CreateFilter() argument
|
||||
std::vector<uint32_t> filter_offsets_;
|
||||
|
||||
// No copying allowed
|
||||
FilterBlockBuilder(const FilterBlockBuilder&);
|
||||
void operator=(const FilterBlockBuilder&);
|
||||
};
|
||||
|
||||
class FilterBlockReader {
|
||||
public:
|
||||
// REQUIRES: "contents" and *policy must stay live while *this is live.
|
||||
FilterBlockReader(const FilterPolicy* policy, const Slice& contents);
|
||||
bool KeyMayMatch(uint64_t block_offset, const Slice& key);
|
||||
|
||||
private:
|
||||
const FilterPolicy* policy_;
|
||||
const char* data_; // Pointer to filter data (at block-start)
|
||||
const char* offset_; // Pointer to beginning of offset array (at block-end)
|
||||
size_t num_; // Number of entries in offset array
|
||||
size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
|
128
table/filter_block_test.cc
Normal file
128
table/filter_block_test.cc
Normal file
@ -0,0 +1,128 @@
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "table/filter_block.h"
|
||||
|
||||
#include "leveldb/filter_policy.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/hash.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
// For testing: emit an array with one hash value per key
|
||||
class TestHashFilter : public FilterPolicy {
|
||||
public:
|
||||
virtual const char* Name() const {
|
||||
return "TestHashFilter";
|
||||
}
|
||||
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
for (int i = 0; i < n; i++) {
|
||||
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1);
|
||||
PutFixed32(dst, h);
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||
uint32_t h = Hash(key.data(), key.size(), 1);
|
||||
for (int i = 0; i + 4 <= filter.size(); i += 4) {
|
||||
if (h == DecodeFixed32(filter.data() + i)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class FilterBlockTest {
|
||||
public:
|
||||
TestHashFilter policy_;
|
||||
};
|
||||
|
||||
TEST(FilterBlockTest, EmptyBuilder) {
|
||||
FilterBlockBuilder builder(&policy_);
|
||||
Slice block = builder.Finish();
|
||||
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block));
|
||||
FilterBlockReader reader(&policy_, block);
|
||||
ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
|
||||
ASSERT_TRUE(reader.KeyMayMatch(100000, "foo"));
|
||||
}
|
||||
|
||||
TEST(FilterBlockTest, SingleChunk) {
|
||||
FilterBlockBuilder builder(&policy_);
|
||||
builder.StartBlock(100);
|
||||
builder.AddKey("foo");
|
||||
builder.AddKey("bar");
|
||||
builder.AddKey("box");
|
||||
builder.StartBlock(200);
|
||||
builder.AddKey("box");
|
||||
builder.StartBlock(300);
|
||||
builder.AddKey("hello");
|
||||
Slice block = builder.Finish();
|
||||
FilterBlockReader reader(&policy_, block);
|
||||
ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
|
||||
ASSERT_TRUE(reader.KeyMayMatch(100, "bar"));
|
||||
ASSERT_TRUE(reader.KeyMayMatch(100, "box"));
|
||||
ASSERT_TRUE(reader.KeyMayMatch(100, "hello"));
|
||||
ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(100, "missing"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(100, "other"));
|
||||
}
|
||||
|
||||
TEST(FilterBlockTest, MultiChunk) {
|
||||
FilterBlockBuilder builder(&policy_);
|
||||
|
||||
// First filter
|
||||
builder.StartBlock(0);
|
||||
builder.AddKey("foo");
|
||||
builder.StartBlock(2000);
|
||||
builder.AddKey("bar");
|
||||
|
||||
// Second filter
|
||||
builder.StartBlock(3100);
|
||||
builder.AddKey("box");
|
||||
|
||||
// Third filter is empty
|
||||
|
||||
// Last filter
|
||||
builder.StartBlock(9000);
|
||||
builder.AddKey("box");
|
||||
builder.AddKey("hello");
|
||||
|
||||
Slice block = builder.Finish();
|
||||
FilterBlockReader reader(&policy_, block);
|
||||
|
||||
// Check first filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
|
||||
ASSERT_TRUE(reader.KeyMayMatch(2000, "bar"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(0, "box"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(0, "hello"));
|
||||
|
||||
// Check second filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch(3100, "box"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello"));
|
||||
|
||||
// Check third filter (empty)
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "box"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello"));
|
||||
|
||||
// Check last filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch(9000, "box"));
|
||||
ASSERT_TRUE(reader.KeyMayMatch(9000, "hello"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo"));
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar"));
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
@ -66,10 +66,10 @@ Status Footer::DecodeFrom(Slice* input) {
|
||||
Status ReadBlock(RandomAccessFile* file,
|
||||
const ReadOptions& options,
|
||||
const BlockHandle& handle,
|
||||
Block** block,
|
||||
bool* may_cache) {
|
||||
*block = NULL;
|
||||
*may_cache = false;
|
||||
BlockContents* result) {
|
||||
result->data = Slice();
|
||||
result->cachable = false;
|
||||
result->heap_allocated = false;
|
||||
|
||||
// Read the block contents as well as the type/crc footer.
|
||||
// See table_builder.cc for the code that built this structure.
|
||||
@ -105,11 +105,13 @@ Status ReadBlock(RandomAccessFile* file,
|
||||
// Use it directly under the assumption that it will be live
|
||||
// while the file is open.
|
||||
delete[] buf;
|
||||
*block = new Block(data, n, false /* do not take ownership */);
|
||||
*may_cache = false; // Do not double-cache
|
||||
result->data = Slice(data, n);
|
||||
result->heap_allocated = false;
|
||||
result->cachable = false; // Do not double-cache
|
||||
} else {
|
||||
*block = new Block(buf, n, true /* take ownership */);
|
||||
*may_cache = true;
|
||||
result->data = Slice(buf, n);
|
||||
result->heap_allocated = true;
|
||||
result->cachable = true;
|
||||
}
|
||||
|
||||
// Ok
|
||||
@ -127,8 +129,9 @@ Status ReadBlock(RandomAccessFile* file,
|
||||
return Status::Corruption("corrupted compressed block contents");
|
||||
}
|
||||
delete[] buf;
|
||||
*block = new Block(ubuf, ulength, true /* take ownership */);
|
||||
*may_cache = true;
|
||||
result->data = Slice(ubuf, ulength);
|
||||
result->heap_allocated = true;
|
||||
result->cachable = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -83,16 +83,18 @@ static const uint64_t kTableMagicNumber = 0xdb4775248b80fb57ull;
|
||||
// 1-byte type + 32-bit crc
|
||||
static const size_t kBlockTrailerSize = 5;
|
||||
|
||||
// Read the block identified by "handle" from "file". On success,
|
||||
// store a pointer to the heap-allocated result in *block and return
|
||||
// OK. On failure store NULL in *block and return non-OK.
|
||||
// On success, stores true in *may_cache if the result may be
|
||||
// cached, false if it must not be cached.
|
||||
struct BlockContents {
|
||||
Slice data; // Actual contents of data
|
||||
bool cachable; // True iff data can be cached
|
||||
bool heap_allocated; // True iff caller should delete[] data.data()
|
||||
};
|
||||
|
||||
// Read the block identified by "handle" from "file". On failure
|
||||
// return non-OK. On success fill *result and return OK.
|
||||
extern Status ReadBlock(RandomAccessFile* file,
|
||||
const ReadOptions& options,
|
||||
const BlockHandle& handle,
|
||||
Block** block,
|
||||
bool* may_cache);
|
||||
BlockContents* result);
|
||||
|
||||
// Implementation details follow. Clients should ignore,
|
||||
|
||||
|
116
table/table.cc
116
table/table.cc
@ -5,8 +5,12 @@
|
||||
#include "leveldb/table.h"
|
||||
|
||||
#include "leveldb/cache.h"
|
||||
#include "leveldb/comparator.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/filter_policy.h"
|
||||
#include "leveldb/options.h"
|
||||
#include "table/block.h"
|
||||
#include "table/filter_block.h"
|
||||
#include "table/format.h"
|
||||
#include "table/two_level_iterator.h"
|
||||
#include "util/coding.h"
|
||||
@ -15,6 +19,8 @@ namespace leveldb {
|
||||
|
||||
struct Table::Rep {
|
||||
~Rep() {
|
||||
delete filter;
|
||||
delete [] filter_data;
|
||||
delete index_block;
|
||||
}
|
||||
|
||||
@ -22,6 +28,8 @@ struct Table::Rep {
|
||||
Status status;
|
||||
RandomAccessFile* file;
|
||||
uint64_t cache_id;
|
||||
FilterBlockReader* filter;
|
||||
const char* filter_data;
|
||||
|
||||
BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer
|
||||
Block* index_block;
|
||||
@ -47,11 +55,13 @@ Status Table::Open(const Options& options,
|
||||
if (!s.ok()) return s;
|
||||
|
||||
// Read the index block
|
||||
BlockContents contents;
|
||||
Block* index_block = NULL;
|
||||
if (s.ok()) {
|
||||
bool may_cache; // Ignored result
|
||||
s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block,
|
||||
&may_cache);
|
||||
s = ReadBlock(file, ReadOptions(), footer.index_handle(), &contents);
|
||||
if (s.ok()) {
|
||||
index_block = new Block(contents);
|
||||
}
|
||||
}
|
||||
|
||||
if (s.ok()) {
|
||||
@ -63,7 +73,10 @@ Status Table::Open(const Options& options,
|
||||
rep->metaindex_handle = footer.metaindex_handle();
|
||||
rep->index_block = index_block;
|
||||
rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
|
||||
rep->filter_data = NULL;
|
||||
rep->filter = NULL;
|
||||
*table = new Table(rep);
|
||||
(*table)->ReadMeta(footer);
|
||||
} else {
|
||||
if (index_block) delete index_block;
|
||||
}
|
||||
@ -71,6 +84,52 @@ Status Table::Open(const Options& options,
|
||||
return s;
|
||||
}
|
||||
|
||||
void Table::ReadMeta(const Footer& footer) {
|
||||
if (rep_->options.filter_policy == NULL) {
|
||||
return; // Do not need any metadata
|
||||
}
|
||||
|
||||
// TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
|
||||
// it is an empty block.
|
||||
ReadOptions opt;
|
||||
BlockContents contents;
|
||||
if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
|
||||
// Do not propagate errors since meta info is not needed for operation
|
||||
return;
|
||||
}
|
||||
Block* meta = new Block(contents);
|
||||
|
||||
Iterator* iter = meta->NewIterator(BytewiseComparator());
|
||||
std::string key = "filter.";
|
||||
key.append(rep_->options.filter_policy->Name());
|
||||
iter->Seek(key);
|
||||
if (iter->Valid() && iter->key() == Slice(key)) {
|
||||
ReadFilter(iter->value());
|
||||
}
|
||||
delete iter;
|
||||
delete meta;
|
||||
}
|
||||
|
||||
void Table::ReadFilter(const Slice& filter_handle_value) {
|
||||
Slice v = filter_handle_value;
|
||||
BlockHandle filter_handle;
|
||||
if (!filter_handle.DecodeFrom(&v).ok()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// We might want to unify with ReadBlock() if we start
|
||||
// requiring checksum verification in Table::Open.
|
||||
ReadOptions opt;
|
||||
BlockContents block;
|
||||
if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
|
||||
return;
|
||||
}
|
||||
if (block.heap_allocated) {
|
||||
rep_->filter_data = block.data.data(); // Will need to delete later
|
||||
}
|
||||
rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
|
||||
}
|
||||
|
||||
Table::~Table() {
|
||||
delete rep_;
|
||||
}
|
||||
@ -107,7 +166,7 @@ Iterator* Table::BlockReader(void* arg,
|
||||
// can add more features in the future.
|
||||
|
||||
if (s.ok()) {
|
||||
bool may_cache;
|
||||
BlockContents contents;
|
||||
if (block_cache != NULL) {
|
||||
char cache_key_buffer[16];
|
||||
EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
|
||||
@ -117,14 +176,20 @@ Iterator* Table::BlockReader(void* arg,
|
||||
if (cache_handle != NULL) {
|
||||
block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
|
||||
} else {
|
||||
s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
|
||||
if (s.ok() && may_cache && options.fill_cache) {
|
||||
cache_handle = block_cache->Insert(
|
||||
key, block, block->size(), &DeleteCachedBlock);
|
||||
s = ReadBlock(table->rep_->file, options, handle, &contents);
|
||||
if (s.ok()) {
|
||||
block = new Block(contents);
|
||||
if (contents.cachable && options.fill_cache) {
|
||||
cache_handle = block_cache->Insert(
|
||||
key, block, block->size(), &DeleteCachedBlock);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
|
||||
s = ReadBlock(table->rep_->file, options, handle, &contents);
|
||||
if (s.ok()) {
|
||||
block = new Block(contents);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -148,6 +213,39 @@ Iterator* Table::NewIterator(const ReadOptions& options) const {
|
||||
&Table::BlockReader, const_cast<Table*>(this), options);
|
||||
}
|
||||
|
||||
Status Table::InternalGet(const ReadOptions& options, const Slice& k,
|
||||
void* arg,
|
||||
void (*saver)(void*, const Slice&, const Slice&)) {
|
||||
Status s;
|
||||
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
|
||||
iiter->Seek(k);
|
||||
if (iiter->Valid()) {
|
||||
Slice handle_value = iiter->value();
|
||||
FilterBlockReader* filter = rep_->filter;
|
||||
BlockHandle handle;
|
||||
if (filter != NULL &&
|
||||
handle.DecodeFrom(&handle_value).ok() &&
|
||||
!filter->KeyMayMatch(handle.offset(), k)) {
|
||||
// Not found
|
||||
} else {
|
||||
Slice handle = iiter->value();
|
||||
Iterator* block_iter = BlockReader(this, options, iiter->value());
|
||||
block_iter->Seek(k);
|
||||
if (block_iter->Valid()) {
|
||||
(*saver)(arg, block_iter->key(), block_iter->value());
|
||||
}
|
||||
s = block_iter->status();
|
||||
delete block_iter;
|
||||
}
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = iiter->status();
|
||||
}
|
||||
delete iiter;
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
|
||||
Iterator* index_iter =
|
||||
rep_->index_block->NewIterator(rep_->options.comparator);
|
||||
|
@ -5,14 +5,15 @@
|
||||
#include "leveldb/table_builder.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include "leveldb/comparator.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/filter_policy.h"
|
||||
#include "leveldb/options.h"
|
||||
#include "table/block_builder.h"
|
||||
#include "table/filter_block.h"
|
||||
#include "table/format.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/crc32c.h"
|
||||
#include "util/logging.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
@ -27,6 +28,7 @@ struct TableBuilder::Rep {
|
||||
std::string last_key;
|
||||
int64_t num_entries;
|
||||
bool closed; // Either Finish() or Abandon() has been called.
|
||||
FilterBlockBuilder* filter_block;
|
||||
|
||||
// We do not emit the index entry for a block until we have seen the
|
||||
// first key for the next data block. This allows us to use shorter
|
||||
@ -51,6 +53,8 @@ struct TableBuilder::Rep {
|
||||
index_block(&index_block_options),
|
||||
num_entries(0),
|
||||
closed(false),
|
||||
filter_block(opt.filter_policy == NULL ? NULL
|
||||
: new FilterBlockBuilder(opt.filter_policy)),
|
||||
pending_index_entry(false) {
|
||||
index_block_options.block_restart_interval = 1;
|
||||
}
|
||||
@ -58,10 +62,14 @@ struct TableBuilder::Rep {
|
||||
|
||||
TableBuilder::TableBuilder(const Options& options, WritableFile* file)
|
||||
: rep_(new Rep(options, file)) {
|
||||
if (rep_->filter_block != NULL) {
|
||||
rep_->filter_block->StartBlock(0);
|
||||
}
|
||||
}
|
||||
|
||||
TableBuilder::~TableBuilder() {
|
||||
assert(rep_->closed); // Catch errors where caller forgot to call Finish()
|
||||
delete rep_->filter_block;
|
||||
delete rep_;
|
||||
}
|
||||
|
||||
@ -98,6 +106,10 @@ void TableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
r->pending_index_entry = false;
|
||||
}
|
||||
|
||||
if (r->filter_block != NULL) {
|
||||
r->filter_block->AddKey(key);
|
||||
}
|
||||
|
||||
r->last_key.assign(key.data(), key.size());
|
||||
r->num_entries++;
|
||||
r->data_block.Add(key, value);
|
||||
@ -119,6 +131,9 @@ void TableBuilder::Flush() {
|
||||
r->pending_index_entry = true;
|
||||
r->status = r->file->Flush();
|
||||
}
|
||||
if (r->filter_block != NULL) {
|
||||
r->filter_block->StartBlock(r->offset);
|
||||
}
|
||||
}
|
||||
|
||||
void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
|
||||
@ -152,6 +167,15 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
WriteRawBlock(block_contents, type, handle);
|
||||
r->compressed_output.clear();
|
||||
block->Reset();
|
||||
}
|
||||
|
||||
void TableBuilder::WriteRawBlock(const Slice& block_contents,
|
||||
CompressionType type,
|
||||
BlockHandle* handle) {
|
||||
Rep* r = rep_;
|
||||
handle->set_offset(r->offset);
|
||||
handle->set_size(block_contents.size());
|
||||
r->status = r->file->Append(block_contents);
|
||||
@ -166,8 +190,6 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
|
||||
r->offset += block_contents.size() + kBlockTrailerSize;
|
||||
}
|
||||
}
|
||||
r->compressed_output.clear();
|
||||
block->Reset();
|
||||
}
|
||||
|
||||
Status TableBuilder::status() const {
|
||||
@ -179,13 +201,32 @@ Status TableBuilder::Finish() {
|
||||
Flush();
|
||||
assert(!r->closed);
|
||||
r->closed = true;
|
||||
BlockHandle metaindex_block_handle;
|
||||
BlockHandle index_block_handle;
|
||||
|
||||
BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle;
|
||||
|
||||
// Write filter block
|
||||
if (ok() && r->filter_block != NULL) {
|
||||
WriteRawBlock(r->filter_block->Finish(), kNoCompression,
|
||||
&filter_block_handle);
|
||||
}
|
||||
|
||||
// Write metaindex block
|
||||
if (ok()) {
|
||||
BlockBuilder meta_index_block(&r->options);
|
||||
if (r->filter_block != NULL) {
|
||||
// Add mapping from "filter.Name" to location of filter data
|
||||
std::string key = "filter.";
|
||||
key.append(r->options.filter_policy->Name());
|
||||
std::string handle_encoding;
|
||||
filter_block_handle.EncodeTo(&handle_encoding);
|
||||
meta_index_block.Add(key, handle_encoding);
|
||||
}
|
||||
|
||||
// TODO(postrelease): Add stats and other meta blocks
|
||||
WriteBlock(&meta_index_block, &metaindex_block_handle);
|
||||
}
|
||||
|
||||
// Write index block
|
||||
if (ok()) {
|
||||
if (r->pending_index_entry) {
|
||||
r->options.comparator->FindShortSuccessor(&r->last_key);
|
||||
@ -196,6 +237,8 @@ Status TableBuilder::Finish() {
|
||||
}
|
||||
WriteBlock(&r->index_block, &index_block_handle);
|
||||
}
|
||||
|
||||
// Write footer
|
||||
if (ok()) {
|
||||
Footer footer;
|
||||
footer.set_metaindex_handle(metaindex_block_handle);
|
||||
|
@ -168,8 +168,6 @@ class Constructor {
|
||||
// Construct the data structure from the data in "data"
|
||||
virtual Status FinishImpl(const Options& options, const KVMap& data) = 0;
|
||||
|
||||
virtual size_t NumBytes() const = 0;
|
||||
|
||||
virtual Iterator* NewIterator() const = 0;
|
||||
|
||||
virtual const KVMap& data() { return data_; }
|
||||
@ -185,7 +183,6 @@ class BlockConstructor: public Constructor {
|
||||
explicit BlockConstructor(const Comparator* cmp)
|
||||
: Constructor(cmp),
|
||||
comparator_(cmp),
|
||||
block_size_(-1),
|
||||
block_(NULL) { }
|
||||
~BlockConstructor() {
|
||||
delete block_;
|
||||
@ -201,22 +198,21 @@ class BlockConstructor: public Constructor {
|
||||
builder.Add(it->first, it->second);
|
||||
}
|
||||
// Open the block
|
||||
Slice block_data = builder.Finish();
|
||||
block_size_ = block_data.size();
|
||||
char* block_data_copy = new char[block_size_];
|
||||
memcpy(block_data_copy, block_data.data(), block_size_);
|
||||
block_ = new Block(block_data_copy, block_size_, true /* take ownership */);
|
||||
data_ = builder.Finish().ToString();
|
||||
BlockContents contents;
|
||||
contents.data = data_;
|
||||
contents.cachable = false;
|
||||
contents.heap_allocated = false;
|
||||
block_ = new Block(contents);
|
||||
return Status::OK();
|
||||
}
|
||||
virtual size_t NumBytes() const { return block_size_; }
|
||||
|
||||
virtual Iterator* NewIterator() const {
|
||||
return block_->NewIterator(comparator_);
|
||||
}
|
||||
|
||||
private:
|
||||
const Comparator* comparator_;
|
||||
int block_size_;
|
||||
std::string data_;
|
||||
Block* block_;
|
||||
|
||||
BlockConstructor();
|
||||
@ -253,7 +249,6 @@ class TableConstructor: public Constructor {
|
||||
table_options.comparator = options.comparator;
|
||||
return Table::Open(table_options, source_, sink.contents().size(), &table_);
|
||||
}
|
||||
virtual size_t NumBytes() const { return source_->Size(); }
|
||||
|
||||
virtual Iterator* NewIterator() const {
|
||||
return table_->NewIterator(ReadOptions());
|
||||
@ -342,10 +337,6 @@ class MemTableConstructor: public Constructor {
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
virtual size_t NumBytes() const {
|
||||
return memtable_->ApproximateMemoryUsage();
|
||||
}
|
||||
|
||||
virtual Iterator* NewIterator() const {
|
||||
return new KeyConvertingIterator(memtable_->NewIterator());
|
||||
}
|
||||
@ -379,13 +370,6 @@ class DBConstructor: public Constructor {
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
virtual size_t NumBytes() const {
|
||||
Range r("", "\xff\xff");
|
||||
uint64_t size;
|
||||
db_->GetApproximateSizes(&r, 1, &size);
|
||||
return size;
|
||||
}
|
||||
|
||||
virtual Iterator* NewIterator() const {
|
||||
return db_->NewIterator(ReadOptions());
|
||||
}
|
||||
@ -809,7 +793,7 @@ TEST(TableTest, ApproximateOffsetOfPlain) {
|
||||
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000));
|
||||
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000));
|
||||
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000));
|
||||
ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 611000));
|
||||
ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000));
|
||||
|
||||
}
|
||||
|
||||
|
95
util/bloom.cc
Normal file
95
util/bloom.cc
Normal file
@ -0,0 +1,95 @@
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "leveldb/filter_policy.h"
|
||||
|
||||
#include "leveldb/slice.h"
|
||||
#include "util/hash.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
namespace {
|
||||
static uint32_t BloomHash(const Slice& key) {
|
||||
return Hash(key.data(), key.size(), 0xbc9f1d34);
|
||||
}
|
||||
|
||||
class BloomFilterPolicy : public FilterPolicy {
|
||||
private:
|
||||
size_t bits_per_key_;
|
||||
size_t k_;
|
||||
|
||||
public:
|
||||
explicit BloomFilterPolicy(int bits_per_key)
|
||||
: bits_per_key_(bits_per_key) {
|
||||
// We intentionally round down to reduce probing cost a little bit
|
||||
k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
|
||||
if (k_ < 1) k_ = 1;
|
||||
if (k_ > 30) k_ = 30;
|
||||
}
|
||||
|
||||
virtual const char* Name() const {
|
||||
return "leveldb.BuiltinBloomFilter";
|
||||
}
|
||||
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
// Compute bloom filter size (in both bits and bytes)
|
||||
size_t bits = n * bits_per_key_;
|
||||
|
||||
// For small n, we can see a very high false positive rate. Fix it
|
||||
// by enforcing a minimum bloom filter length.
|
||||
if (bits < 64) bits = 64;
|
||||
|
||||
size_t bytes = (bits + 7) / 8;
|
||||
bits = bytes * 8;
|
||||
|
||||
const size_t init_size = dst->size();
|
||||
dst->resize(init_size + bytes, 0);
|
||||
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
|
||||
char* array = &(*dst)[init_size];
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
// Use double-hashing to generate a sequence of hash values.
|
||||
// See analysis in [Kirsch,Mitzenmacher 2006].
|
||||
uint32_t h = BloomHash(keys[i]);
|
||||
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||
for (size_t j = 0; j < k_; j++) {
|
||||
const uint32_t bitpos = h % bits;
|
||||
array[bitpos/8] |= (1 << (bitpos % 8));
|
||||
h += delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
|
||||
const size_t len = bloom_filter.size();
|
||||
if (len < 2) return false;
|
||||
|
||||
const char* array = bloom_filter.data();
|
||||
const size_t bits = (len - 1) * 8;
|
||||
|
||||
// Use the encoded k so that we can read filters generated by
|
||||
// bloom filters created using different parameters.
|
||||
const size_t k = array[len-1];
|
||||
if (k > 30) {
|
||||
// Reserved for potentially new encodings for short bloom filters.
|
||||
// Consider it a match.
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t h = BloomHash(key);
|
||||
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||
for (size_t j = 0; j < k; j++) {
|
||||
const uint32_t bitpos = h % bits;
|
||||
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
|
||||
h += delta;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
|
||||
return new BloomFilterPolicy(bits_per_key);
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
159
util/bloom_test.cc
Normal file
159
util/bloom_test.cc
Normal file
@ -0,0 +1,159 @@
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "leveldb/filter_policy.h"
|
||||
|
||||
#include "util/logging.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
static const int kVerbose = 1;
|
||||
|
||||
static Slice Key(int i, char* buffer) {
|
||||
memcpy(buffer, &i, sizeof(i));
|
||||
return Slice(buffer, sizeof(i));
|
||||
}
|
||||
|
||||
class BloomTest {
|
||||
private:
|
||||
const FilterPolicy* policy_;
|
||||
std::string filter_;
|
||||
std::vector<std::string> keys_;
|
||||
|
||||
public:
|
||||
BloomTest() : policy_(NewBloomFilterPolicy(10)) { }
|
||||
|
||||
~BloomTest() {
|
||||
delete policy_;
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
keys_.clear();
|
||||
filter_.clear();
|
||||
}
|
||||
|
||||
void Add(const Slice& s) {
|
||||
keys_.push_back(s.ToString());
|
||||
}
|
||||
|
||||
void Build() {
|
||||
std::vector<Slice> key_slices;
|
||||
for (size_t i = 0; i < keys_.size(); i++) {
|
||||
key_slices.push_back(Slice(keys_[i]));
|
||||
}
|
||||
filter_.clear();
|
||||
policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_);
|
||||
keys_.clear();
|
||||
if (kVerbose >= 2) DumpFilter();
|
||||
}
|
||||
|
||||
size_t FilterSize() const {
|
||||
return filter_.size();
|
||||
}
|
||||
|
||||
void DumpFilter() {
|
||||
fprintf(stderr, "F(");
|
||||
for (size_t i = 0; i+1 < filter_.size(); i++) {
|
||||
const unsigned int c = static_cast<unsigned int>(filter_[i]);
|
||||
for (int j = 0; j < 8; j++) {
|
||||
fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
|
||||
}
|
||||
}
|
||||
fprintf(stderr, ")\n");
|
||||
}
|
||||
|
||||
bool Matches(const Slice& s) {
|
||||
if (!keys_.empty()) {
|
||||
Build();
|
||||
}
|
||||
return policy_->KeyMayMatch(s, filter_);
|
||||
}
|
||||
|
||||
double FalsePositiveRate() {
|
||||
char buffer[sizeof(int)];
|
||||
int result = 0;
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
if (Matches(Key(i + 1000000000, buffer))) {
|
||||
result++;
|
||||
}
|
||||
}
|
||||
return result / 10000.0;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(BloomTest, EmptyFilter) {
|
||||
ASSERT_TRUE(! Matches("hello"));
|
||||
ASSERT_TRUE(! Matches("world"));
|
||||
}
|
||||
|
||||
TEST(BloomTest, Small) {
|
||||
Add("hello");
|
||||
Add("world");
|
||||
ASSERT_TRUE(Matches("hello"));
|
||||
ASSERT_TRUE(Matches("world"));
|
||||
ASSERT_TRUE(! Matches("x"));
|
||||
ASSERT_TRUE(! Matches("foo"));
|
||||
}
|
||||
|
||||
static int NextLength(int length) {
|
||||
if (length < 10) {
|
||||
length += 1;
|
||||
} else if (length < 100) {
|
||||
length += 10;
|
||||
} else if (length < 1000) {
|
||||
length += 100;
|
||||
} else {
|
||||
length += 1000;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
TEST(BloomTest, VaryingLengths) {
|
||||
char buffer[sizeof(int)];
|
||||
|
||||
// Count number of filters that significantly exceed the false positive rate
|
||||
int mediocre_filters = 0;
|
||||
int good_filters = 0;
|
||||
|
||||
for (int length = 1; length <= 10000; length = NextLength(length)) {
|
||||
Reset();
|
||||
for (int i = 0; i < length; i++) {
|
||||
Add(Key(i, buffer));
|
||||
}
|
||||
Build();
|
||||
|
||||
ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length;
|
||||
|
||||
// All added keys must match
|
||||
for (int i = 0; i < length; i++) {
|
||||
ASSERT_TRUE(Matches(Key(i, buffer)))
|
||||
<< "Length " << length << "; key " << i;
|
||||
}
|
||||
|
||||
// Check false positive rate
|
||||
double rate = FalsePositiveRate();
|
||||
if (kVerbose >= 1) {
|
||||
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
|
||||
rate*100.0, length, static_cast<int>(FilterSize()));
|
||||
}
|
||||
ASSERT_LE(rate, 0.02); // Must not be over 2%
|
||||
if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
|
||||
else good_filters++;
|
||||
}
|
||||
if (kVerbose >= 1) {
|
||||
fprintf(stderr, "Filters: %d good, %d mediocre\n",
|
||||
good_filters, mediocre_filters);
|
||||
}
|
||||
ASSERT_LE(mediocre_filters, good_filters/5);
|
||||
}
|
||||
|
||||
// Different bits-per-byte
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
11
util/filter_policy.cc
Normal file
11
util/filter_policy.cc
Normal file
@ -0,0 +1,11 @@
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "leveldb/filter_policy.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
FilterPolicy::~FilterPolicy() { }
|
||||
|
||||
} // namespace leveldb
|
@ -21,7 +21,8 @@ Options::Options()
|
||||
block_cache(NULL),
|
||||
block_size(4096),
|
||||
block_restart_interval(16),
|
||||
compression(kSnappyCompression) {
|
||||
compression(kSnappyCompression),
|
||||
filter_policy(NULL) {
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user