leveldb/util/bloom.cc

// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "leveldb/filter_policy.h"

#include "leveldb/slice.h"
#include "util/hash.h"

namespace leveldb {

namespace {
static uint32_t BloomHash(const Slice& key) {
  return Hash(key.data(), key.size(), 0xbc9f1d34);
}

class BloomFilterPolicy : public FilterPolicy {
 public:
  explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) {
    // We intentionally round down to reduce probing cost a little bit
    k_ = static_cast<size_t>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
    if (k_ < 1) k_ = 1;
    if (k_ > 30) k_ = 30;
  }

  virtual const char* Name() const { return "leveldb.BuiltinBloomFilter2"; }

  virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
    // Compute bloom filter size (in both bits and bytes)
    size_t bits = n * bits_per_key_;

    // For small n, we can see a very high false positive rate.  Fix it
    // by enforcing a minimum bloom filter length.
    if (bits < 64) bits = 64;

    size_t bytes = (bits + 7) / 8;
    bits = bytes * 8;

    const size_t init_size = dst->size();
    dst->resize(init_size + bytes, 0);
    dst->push_back(static_cast<char>(k_));  // Remember # of probes in filter
    char* array = &(*dst)[init_size];
    for (int i = 0; i < n; i++) {
      // Use double-hashing to generate a sequence of hash values.
      // See analysis in [Kirsch,Mitzenmacher 2006].
      uint32_t h = BloomHash(keys[i]);
      const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
      for (size_t j = 0; j < k_; j++) {
        const uint32_t bitpos = h % bits;
        array[bitpos / 8] |= (1 << (bitpos % 8));
        h += delta;
      }
    }
  }

  virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
    const size_t len = bloom_filter.size();
    if (len < 2) return false;

    const char* array = bloom_filter.data();
    const size_t bits = (len - 1) * 8;

    // Use the encoded k so that we can read filters generated by
    // bloom filters created using different parameters.
    const size_t k = array[len - 1];
    if (k > 30) {
      // Reserved for potentially new encodings for short bloom filters.
      // Consider it a match.
      return true;
    }

    uint32_t h = BloomHash(key);
    const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
    for (size_t j = 0; j < k; j++) {
      const uint32_t bitpos = h % bits;
      if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false;
      h += delta;
    }
    return true;
  }

 private:
  size_t bits_per_key_;
  size_t k_;
};
}  // namespace

const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
  return new BloomFilterPolicy(bits_per_key);
}

}  // namespace leveldb
Added bloom filter support. In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update. 2012-04-17 23:36:46 +08:00			`// Copyright (c) 2012 The LevelDB Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style license that can be`
			`// found in the LICENSE file. See the AUTHORS file for names of contributors.`

			`#include "leveldb/filter_policy.h"`

			`#include "leveldb/slice.h"`
			`#include "util/hash.h"`

			`namespace leveldb {`

			`namespace {`
			`static uint32_t BloomHash(const Slice& key) {`
			`return Hash(key.data(), key.size(), 0xbc9f1d34);`
			`}`

			`class BloomFilterPolicy : public FilterPolicy {`
			`public:`
Format all files IAW the Google C++ Style Guide. Use clang-format to correct formatting to be in agreement with the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). Doing this simplifies the process of accepting changes. Also fixed a few warnings flagged by clang-tidy. PiperOrigin-RevId: 246350737 2019-05-03 02:01:00 +08:00			`explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) {`
Added bloom filter support. In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update. 2012-04-17 23:36:46 +08:00			`// We intentionally round down to reduce probing cost a little bit`
			`k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)`
			`if (k_ < 1) k_ = 1;`
			`if (k_ > 30) k_ = 30;`
			`}`

Format all files IAW the Google C++ Style Guide. Use clang-format to correct formatting to be in agreement with the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). Doing this simplifies the process of accepting changes. Also fixed a few warnings flagged by clang-tidy. PiperOrigin-RevId: 246350737 2019-05-03 02:01:00 +08:00			`virtual const char* Name() const { return "leveldb.BuiltinBloomFilter2"; }`
Added bloom filter support. In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update. 2012-04-17 23:36:46 +08:00
			`virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {`
			`// Compute bloom filter size (in both bits and bytes)`
			`size_t bits = n * bits_per_key_;`

			`// For small n, we can see a very high false positive rate. Fix it`
			`// by enforcing a minimum bloom filter length.`
			`if (bits < 64) bits = 64;`

			`size_t bytes = (bits + 7) / 8;`
			`bits = bytes * 8;`

			`const size_t init_size = dst->size();`
			`dst->resize(init_size + bytes, 0);`
			`dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter`
			`char* array = &(*dst)[init_size];`
Fix size_t/int comparison/conversion issues in leveldb. The create function took \|num_keys\| as an int, but callers and implementers wanted it to function as a size_t (e.g. passing std::vector::size() in, passing it to vector constructors as a size arg, indexing containers by it, etc.). This resulted in implicit conversions between the two types as well as warnings (found with Chromium's external copy of these sources, built with MSVC) about signed vs. unsigned comparisons. The leveldb sources were already widely using size_t elsewhere, e.g. for key and filter lengths, so using size_t here is not inconsistent with the existing code. However, it does change the public C API. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=101074871 2015-08-20 07:40:51 +08:00			`for (int i = 0; i < n; i++) {`
Added bloom filter support. In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update. 2012-04-17 23:36:46 +08:00			`// Use double-hashing to generate a sequence of hash values.`
			`// See analysis in [Kirsch,Mitzenmacher 2006].`
			`uint32_t h = BloomHash(keys[i]);`
			`const uint32_t delta = (h >> 17) \| (h << 15); // Rotate right 17 bits`
			`for (size_t j = 0; j < k_; j++) {`
			`const uint32_t bitpos = h % bits;`
Format all files IAW the Google C++ Style Guide. Use clang-format to correct formatting to be in agreement with the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). Doing this simplifies the process of accepting changes. Also fixed a few warnings flagged by clang-tidy. PiperOrigin-RevId: 246350737 2019-05-03 02:01:00 +08:00			`array[bitpos / 8] \|= (1 << (bitpos % 8));`
Added bloom filter support. In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update. 2012-04-17 23:36:46 +08:00			`h += delta;`
			`}`
			`}`
			`}`

			`virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {`
			`const size_t len = bloom_filter.size();`
			`if (len < 2) return false;`

			`const char* array = bloom_filter.data();`
			`const size_t bits = (len - 1) * 8;`

			`// Use the encoded k so that we can read filters generated by`
			`// bloom filters created using different parameters.`
Format all files IAW the Google C++ Style Guide. Use clang-format to correct formatting to be in agreement with the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). Doing this simplifies the process of accepting changes. Also fixed a few warnings flagged by clang-tidy. PiperOrigin-RevId: 246350737 2019-05-03 02:01:00 +08:00			`const size_t k = array[len - 1];`
Added bloom filter support. In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update. 2012-04-17 23:36:46 +08:00			`if (k > 30) {`
			`// Reserved for potentially new encodings for short bloom filters.`
			`// Consider it a match.`
			`return true;`
			`}`

			`uint32_t h = BloomHash(key);`
			`const uint32_t delta = (h >> 17) \| (h << 15); // Rotate right 17 bits`
			`for (size_t j = 0; j < k; j++) {`
			`const uint32_t bitpos = h % bits;`
Format all files IAW the Google C++ Style Guide. Use clang-format to correct formatting to be in agreement with the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). Doing this simplifies the process of accepting changes. Also fixed a few warnings flagged by clang-tidy. PiperOrigin-RevId: 246350737 2019-05-03 02:01:00 +08:00			`if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false;`
Added bloom filter support. In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update. 2012-04-17 23:36:46 +08:00			`h += delta;`
			`}`
			`return true;`
			`}`
Correct class/structure declaration order. 1. Correct the class/struct declaration order to be IAW the Google C++ style guide[1]. 2. For non-copyable classes, switched from non-implemented private methods to explicitly deleted[2] methods. 3. Minor const and member initialization fixes. [1] https://google.github.io/styleguide/cppguide.html#Declaration_Order [2] http://eel.is/c++draft/dcl.fct.def.delete PiperOrigin-RevId: 246521844 2019-05-04 00:31:18 +08:00
			`private:`
			`size_t bits_per_key_;`
			`size_t k_;`
Added bloom filter support. In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update. 2012-04-17 23:36:46 +08:00			`};`
Format all files IAW the Google C++ Style Guide. Use clang-format to correct formatting to be in agreement with the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). Doing this simplifies the process of accepting changes. Also fixed a few warnings flagged by clang-tidy. PiperOrigin-RevId: 246350737 2019-05-03 02:01:00 +08:00			`} // namespace`
Added bloom filter support. In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update. 2012-04-17 23:36:46 +08:00
			`const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {`
			`return new BloomFilterPolicy(bits_per_key);`
			`}`

			`} // namespace leveldb`