leveldb/table/block_builder.cc
jorlow@chromium.org f67e15e50f Initial checkin.
git-svn-id: https://leveldb.googlecode.com/svn/trunk@2 62dab493-f737-651d-591e-8d6aee1b9529
2011-03-18 22:37:00 +00:00

110 lines
3.5 KiB
C++

// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// BlockBuilder generates blocks where keys are prefix-compressed:
//
// When we store a key, we drop the prefix shared with the previous
// string. This helps reduce the space requirement significantly.
// Furthermore, once every K keys, we do not apply the prefix
// compression and store the entire key. We call this a "restart
// point". The tail end of the block stores the offsets of all of the
// restart points, and can be used to do a binary search when looking
// for a particular key. Values are stored as-is (without compression)
// immediately following the corresponding key.
//
// An entry for a particular key-value pair has the form:
// shared_bytes: varint32
// unshared_bytes: varint32
// value_length: varint32
// key_delta: char[unshared_bytes]
// value: char[value_length]
// shared_bytes == 0 for restart points.
//
// The trailer of the block has the form:
// restarts: uint32[num_restarts]
// num_restarts: uint32
// restarts[i] contains the offset within the block of the ith restart point.
#include "table/block_builder.h"
#include <algorithm>
#include <assert.h>
#include "include/comparator.h"
#include "include/table_builder.h"
#include "util/coding.h"
namespace leveldb {
BlockBuilder::BlockBuilder(const Options* options)
: options_(options),
restarts_(),
counter_(0),
finished_(false) {
assert(options->block_restart_interval >= 1);
restarts_.push_back(0); // First restart point is at offset 0
}
void BlockBuilder::Reset() {
buffer_.clear();
restarts_.clear();
restarts_.push_back(0); // First restart point is at offset 0
counter_ = 0;
finished_ = false;
last_key_.clear();
}
size_t BlockBuilder::CurrentSizeEstimate() const {
return (buffer_.size() + // Raw data buffer
restarts_.size() * sizeof(uint32_t) + // Restart array
sizeof(uint32_t)); // Restart array length
}
Slice BlockBuilder::Finish() {
// Append restart array
for (int i = 0; i < restarts_.size(); i++) {
PutFixed32(&buffer_, restarts_[i]);
}
PutFixed32(&buffer_, restarts_.size());
finished_ = true;
return Slice(buffer_);
}
void BlockBuilder::Add(const Slice& key, const Slice& value) {
Slice last_key_piece(last_key_);
assert(!finished_);
assert(counter_ <= options_->block_restart_interval);
assert(buffer_.empty() // No values yet?
|| options_->comparator->Compare(key, last_key_piece) > 0);
size_t shared = 0;
if (counter_ < options_->block_restart_interval) {
// See how much sharing to do with previous string
const size_t min_length = std::min(last_key_piece.size(), key.size());
while ((shared < min_length) && (last_key_[shared] == key[shared])) {
shared++;
}
} else {
// Restart compression
restarts_.push_back(buffer_.size());
counter_ = 0;
}
const size_t non_shared = key.size() - shared;
// Add "<shared><non_shared><value_size>" to buffer_
PutVarint32(&buffer_, shared);
PutVarint32(&buffer_, non_shared);
PutVarint32(&buffer_, value.size());
// Add string delta to buffer_ followed by value
buffer_.append(key.data() + shared, non_shared);
buffer_.append(value.data(), value.size());
// Update state
last_key_.resize(shared);
last_key_.append(key.data() + shared, non_shared);
assert(Slice(last_key_) == key);
counter_++;
}
}