803d69203a
Changes are: * Update version number to 1.18 * Replace the basic fprintf call with a call to fwrite in order to work around the apparent compiler optimization/rewrite failure that we are seeing with the new toolchain/iOS SDKs provided with Xcode6 and iOS8. * Fix ALL the header guards. * Createed a README.md with the LevelDB project description. * A new CONTRIBUTING file. * Don't implicitly convert uint64_t to size_t or int. Either preserve it as uint64_t, or explicitly cast. This fixes MSVC warnings about possible value truncation when compiling this code in Chromium. * Added a DumpFile() library function that encapsulates the guts of the "leveldbutil dump" command. This will allow clients to dump data to their log files instead of stdout. It will also allow clients to supply their own environment. * leveldb: Remove unused function 'ConsumeChar'. * leveldbutil: Remove unused member variables from WriteBatchItemPrinter. * OpenBSD, NetBSD and DragonflyBSD have _LITTLE_ENDIAN, so define PLATFORM_IS_LITTLE_ENDIAN like on FreeBSD. This fixes: * issue #143 * issue #198 * issue #249 * Switch from <cstdatomic> to <atomic>. The former never made it into the standard and doesn't exist in modern gcc versions at all. The later contains everything that leveldb was using from the former. This problem was noticed when porting to Portable Native Client where no memory barrier is defined. The fact that <cstdatomic> is missing normally goes unnoticed since memory barriers are defined for most architectures. * Make Hash() treat its input as unsigned. Before this change LevelDB files from platforms with different signedness of char were not compatible. This change fixes: issue #243 * Verify checksums of index/meta/filter blocks when paranoid_checks set. * Invoke all tools for iOS with xcrun. (This was causing problems with the new XCode 5.1.1 image on pulse.) * include <sys/stat.h> only once, and fix the following linter warning: "Found C system header after C++ system header" * When encountering a corrupted table file, return Status::Corruption instead of Status::InvalidArgument. * Support cygwin as build platform, patch is from https://code.google.com/p/leveldb/issues/detail?id=188 * Fix typo, merge patch from https://code.google.com/p/leveldb/issues/detail?id=159 * Fix typos and comments, and address the following two issues: * issue #166 * issue #241 * Add missing db synchronize after "fillseq" in the benchmark. * Removed unused variable in SeekRandom: value (issue #201)
96 lines
2.9 KiB
C++
96 lines
2.9 KiB
C++
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include "leveldb/filter_policy.h"
|
|
|
|
#include "leveldb/slice.h"
|
|
#include "util/hash.h"
|
|
|
|
namespace leveldb {
|
|
|
|
namespace {
|
|
static uint32_t BloomHash(const Slice& key) {
|
|
return Hash(key.data(), key.size(), 0xbc9f1d34);
|
|
}
|
|
|
|
class BloomFilterPolicy : public FilterPolicy {
|
|
private:
|
|
size_t bits_per_key_;
|
|
size_t k_;
|
|
|
|
public:
|
|
explicit BloomFilterPolicy(int bits_per_key)
|
|
: bits_per_key_(bits_per_key) {
|
|
// We intentionally round down to reduce probing cost a little bit
|
|
k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
|
|
if (k_ < 1) k_ = 1;
|
|
if (k_ > 30) k_ = 30;
|
|
}
|
|
|
|
virtual const char* Name() const {
|
|
return "leveldb.BuiltinBloomFilter2";
|
|
}
|
|
|
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
|
// Compute bloom filter size (in both bits and bytes)
|
|
size_t bits = n * bits_per_key_;
|
|
|
|
// For small n, we can see a very high false positive rate. Fix it
|
|
// by enforcing a minimum bloom filter length.
|
|
if (bits < 64) bits = 64;
|
|
|
|
size_t bytes = (bits + 7) / 8;
|
|
bits = bytes * 8;
|
|
|
|
const size_t init_size = dst->size();
|
|
dst->resize(init_size + bytes, 0);
|
|
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
|
|
char* array = &(*dst)[init_size];
|
|
for (size_t i = 0; i < n; i++) {
|
|
// Use double-hashing to generate a sequence of hash values.
|
|
// See analysis in [Kirsch,Mitzenmacher 2006].
|
|
uint32_t h = BloomHash(keys[i]);
|
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
|
for (size_t j = 0; j < k_; j++) {
|
|
const uint32_t bitpos = h % bits;
|
|
array[bitpos/8] |= (1 << (bitpos % 8));
|
|
h += delta;
|
|
}
|
|
}
|
|
}
|
|
|
|
virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
|
|
const size_t len = bloom_filter.size();
|
|
if (len < 2) return false;
|
|
|
|
const char* array = bloom_filter.data();
|
|
const size_t bits = (len - 1) * 8;
|
|
|
|
// Use the encoded k so that we can read filters generated by
|
|
// bloom filters created using different parameters.
|
|
const size_t k = array[len-1];
|
|
if (k > 30) {
|
|
// Reserved for potentially new encodings for short bloom filters.
|
|
// Consider it a match.
|
|
return true;
|
|
}
|
|
|
|
uint32_t h = BloomHash(key);
|
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
|
for (size_t j = 0; j < k; j++) {
|
|
const uint32_t bitpos = h % bits;
|
|
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
|
|
h += delta;
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
}
|
|
|
|
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
|
|
return new BloomFilterPolicy(bits_per_key);
|
|
}
|
|
|
|
} // namespace leveldb
|