@20776309
* env_chromium.cc should not export symbols. * Fix MSVC warnings. * Removed large value support. * Fix broken reference to documentation file git-svn-id: https://leveldb.googlecode.com/svn/trunk@24 62dab493-f737-651d-591e-8d6aee1b9529
This commit is contained in:
parent
69c6d38342
commit
ba6dac0e80
5
Makefile
5
Makefile
@ -27,7 +27,6 @@ LIBOBJECTS = \
|
|||||||
./db/version_set.o \
|
./db/version_set.o \
|
||||||
./db/write_batch.o \
|
./db/write_batch.o \
|
||||||
./port/port_posix.o \
|
./port/port_posix.o \
|
||||||
./port/sha1_portable.o \
|
|
||||||
./table/block.o \
|
./table/block.o \
|
||||||
./table/block_builder.o \
|
./table/block_builder.o \
|
||||||
./table/format.o \
|
./table/format.o \
|
||||||
@ -63,7 +62,6 @@ TESTS = \
|
|||||||
env_test \
|
env_test \
|
||||||
filename_test \
|
filename_test \
|
||||||
log_test \
|
log_test \
|
||||||
sha1_test \
|
|
||||||
skiplist_test \
|
skiplist_test \
|
||||||
table_test \
|
table_test \
|
||||||
version_edit_test \
|
version_edit_test \
|
||||||
@ -115,9 +113,6 @@ log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
|||||||
table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CC) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
$(CC) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
||||||
|
|
||||||
sha1_test: port/sha1_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
|
||||||
$(CC) $(LDFLAGS) port/sha1_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
|
||||||
|
|
||||||
skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CC) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
$(CC) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
||||||
|
|
||||||
|
4
README
4
README
@ -2,10 +2,10 @@ leveldb: A key-value store
|
|||||||
Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
|
Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
|
||||||
|
|
||||||
The code under this directory implements a system for maintaining a
|
The code under this directory implements a system for maintaining a
|
||||||
persistent key/value store.
|
persistent key/value store.
|
||||||
|
|
||||||
See doc/index.html for more explanation.
|
See doc/index.html for more explanation.
|
||||||
See doc/db_layout.txt for a brief overview of the implementation.
|
See doc/impl.html for a brief overview of the implementation.
|
||||||
|
|
||||||
The public interface is in include/*.h. Callers should not include or
|
The public interface is in include/*.h. Callers should not include or
|
||||||
rely on the details of any other header files in this package. Those
|
rely on the details of any other header files in this package. Those
|
||||||
|
4
TODO
4
TODO
@ -8,7 +8,7 @@ db
|
|||||||
object stores, etc. can be done in the background anyway, so
|
object stores, etc. can be done in the background anyway, so
|
||||||
probably not that important.
|
probably not that important.
|
||||||
|
|
||||||
api changes?
|
api changes:
|
||||||
- Efficient large value reading and writing
|
- Make it wrappable
|
||||||
|
|
||||||
Faster Get implementation
|
Faster Get implementation
|
||||||
|
@ -38,15 +38,6 @@ Status BuildTable(const std::string& dbname,
|
|||||||
for (; iter->Valid(); iter->Next()) {
|
for (; iter->Valid(); iter->Next()) {
|
||||||
Slice key = iter->key();
|
Slice key = iter->key();
|
||||||
meta->largest.DecodeFrom(key);
|
meta->largest.DecodeFrom(key);
|
||||||
if (ExtractValueType(key) == kTypeLargeValueRef) {
|
|
||||||
if (iter->value().size() != LargeValueRef::ByteSize()) {
|
|
||||||
s = Status::Corruption("invalid indirect reference hash value (L0)");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
edit->AddLargeValueRef(LargeValueRef::FromRef(iter->value()),
|
|
||||||
meta->number,
|
|
||||||
iter->key());
|
|
||||||
}
|
|
||||||
builder->Add(key, iter->value());
|
builder->Add(key, iter->value());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,9 +20,9 @@ class VersionEdit;
|
|||||||
// Build a Table file from the contents of *iter. The generated file
|
// Build a Table file from the contents of *iter. The generated file
|
||||||
// will be named according to meta->number. On success, the rest of
|
// will be named according to meta->number. On success, the rest of
|
||||||
// *meta will be filled with metadata about the generated table, and
|
// *meta will be filled with metadata about the generated table, and
|
||||||
// large value refs and the added file information will be added to
|
// the file information will be added to *edit. If no data is present
|
||||||
// *edit. If no data is present in *iter, meta->file_size will be set
|
// in *iter, meta->file_size will be set to zero, and no Table file
|
||||||
// to zero, and no Table file will be produced.
|
// will be produced.
|
||||||
extern Status BuildTable(const std::string& dbname,
|
extern Status BuildTable(const std::string& dbname,
|
||||||
Env* env,
|
Env* env,
|
||||||
const Options& options,
|
const Options& options,
|
||||||
|
@ -121,11 +121,10 @@ class CorruptionTest {
|
|||||||
std::vector<std::string> filenames;
|
std::vector<std::string> filenames;
|
||||||
ASSERT_OK(env_.GetChildren(dbname_, &filenames));
|
ASSERT_OK(env_.GetChildren(dbname_, &filenames));
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
LargeValueRef large_ref;
|
|
||||||
FileType type;
|
FileType type;
|
||||||
std::vector<std::string> candidates;
|
std::vector<std::string> candidates;
|
||||||
for (int i = 0; i < filenames.size(); i++) {
|
for (int i = 0; i < filenames.size(); i++) {
|
||||||
if (ParseFileName(filenames[i], &number, &large_ref, &type) &&
|
if (ParseFileName(filenames[i], &number, &type) &&
|
||||||
type == filetype) {
|
type == filetype) {
|
||||||
candidates.push_back(dbname_ + "/" + filenames[i]);
|
candidates.push_back(dbname_ + "/" + filenames[i]);
|
||||||
}
|
}
|
||||||
@ -276,29 +275,6 @@ TEST(CorruptionTest, SequenceNumberRecovery) {
|
|||||||
ASSERT_EQ("v6", v);
|
ASSERT_EQ("v6", v);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(CorruptionTest, LargeValueRecovery) {
|
|
||||||
Options options;
|
|
||||||
options.large_value_threshold = 10000;
|
|
||||||
Reopen(&options);
|
|
||||||
|
|
||||||
Random rnd(301);
|
|
||||||
std::string big;
|
|
||||||
ASSERT_OK(db_->Put(WriteOptions(),
|
|
||||||
"foo", test::RandomString(&rnd, 100000, &big)));
|
|
||||||
std::string v;
|
|
||||||
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
|
||||||
ASSERT_EQ(big, v);
|
|
||||||
|
|
||||||
RepairDB();
|
|
||||||
Reopen();
|
|
||||||
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
|
||||||
ASSERT_EQ(big, v);
|
|
||||||
|
|
||||||
Reopen();
|
|
||||||
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
|
||||||
ASSERT_EQ(big, v);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(CorruptionTest, CorruptedDescriptor) {
|
TEST(CorruptionTest, CorruptedDescriptor) {
|
||||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
|
ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
|
||||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
||||||
|
@ -28,7 +28,6 @@
|
|||||||
// readreverse -- read N values in reverse order
|
// readreverse -- read N values in reverse order
|
||||||
// readrandom -- read N values in random order
|
// readrandom -- read N values in random order
|
||||||
// crc32c -- repeated crc32c of 4K of data
|
// crc32c -- repeated crc32c of 4K of data
|
||||||
// sha1 -- repeated SHA1 computation over 4K of data
|
|
||||||
// Meta operations:
|
// Meta operations:
|
||||||
// compact -- Compact the entire DB
|
// compact -- Compact the entire DB
|
||||||
// stats -- Print DB stats
|
// stats -- Print DB stats
|
||||||
@ -48,7 +47,6 @@ static const char* FLAGS_benchmarks =
|
|||||||
"readreverse,"
|
"readreverse,"
|
||||||
"fill100K,"
|
"fill100K,"
|
||||||
"crc32c,"
|
"crc32c,"
|
||||||
"sha1,"
|
|
||||||
"snappycomp,"
|
"snappycomp,"
|
||||||
"snappyuncomp,"
|
"snappyuncomp,"
|
||||||
;
|
;
|
||||||
@ -366,8 +364,6 @@ class Benchmark {
|
|||||||
Compact();
|
Compact();
|
||||||
} else if (name == Slice("crc32c")) {
|
} else if (name == Slice("crc32c")) {
|
||||||
Crc32c(4096, "(4K per op)");
|
Crc32c(4096, "(4K per op)");
|
||||||
} else if (name == Slice("sha1")) {
|
|
||||||
SHA1(4096, "(4K per op)");
|
|
||||||
} else if (name == Slice("snappycomp")) {
|
} else if (name == Slice("snappycomp")) {
|
||||||
SnappyCompress();
|
SnappyCompress();
|
||||||
} else if (name == Slice("snappyuncomp")) {
|
} else if (name == Slice("snappyuncomp")) {
|
||||||
@ -406,24 +402,6 @@ class Benchmark {
|
|||||||
message_ = label;
|
message_ = label;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SHA1(int size, const char* label) {
|
|
||||||
// SHA1 about 100MB of data total
|
|
||||||
std::string data(size, 'x');
|
|
||||||
int64_t bytes = 0;
|
|
||||||
char sha1[20];
|
|
||||||
while (bytes < 100 * 1048576) {
|
|
||||||
port::SHA1_Hash(data.data(), size, sha1);
|
|
||||||
FinishedSingleOp();
|
|
||||||
bytes += size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print so result is not dead
|
|
||||||
fprintf(stderr, "... sha1=%02x...\r", static_cast<unsigned int>(sha1[0]));
|
|
||||||
|
|
||||||
bytes_ = bytes;
|
|
||||||
message_ = label;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SnappyCompress() {
|
void SnappyCompress() {
|
||||||
Slice input = gen_.Generate(Options().block_size);
|
Slice input = gen_.Generate(Options().block_size);
|
||||||
int64_t bytes = 0;
|
int64_t bytes = 0;
|
||||||
|
215
db/db_impl.cc
215
db/db_impl.cc
@ -81,8 +81,8 @@ class NullWritableFile : public WritableFile {
|
|||||||
// Fix user-supplied options to be reasonable
|
// Fix user-supplied options to be reasonable
|
||||||
template <class T,class V>
|
template <class T,class V>
|
||||||
static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
|
static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
|
||||||
if (*ptr > maxvalue) *ptr = maxvalue;
|
if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue;
|
||||||
if (*ptr < minvalue) *ptr = minvalue;
|
if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue;
|
||||||
}
|
}
|
||||||
Options SanitizeOptions(const std::string& dbname,
|
Options SanitizeOptions(const std::string& dbname,
|
||||||
const InternalKeyComparator* icmp,
|
const InternalKeyComparator* icmp,
|
||||||
@ -91,7 +91,6 @@ Options SanitizeOptions(const std::string& dbname,
|
|||||||
result.comparator = icmp;
|
result.comparator = icmp;
|
||||||
ClipToRange(&result.max_open_files, 20, 50000);
|
ClipToRange(&result.max_open_files, 20, 50000);
|
||||||
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
||||||
ClipToRange(&result.large_value_threshold, 16<<10, 1<<30);
|
|
||||||
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
||||||
if (result.info_log == NULL) {
|
if (result.info_log == NULL) {
|
||||||
// Open a log file in the same directory as the db
|
// Open a log file in the same directory as the db
|
||||||
@ -213,15 +212,12 @@ void DBImpl::DeleteObsoleteFiles() {
|
|||||||
std::set<uint64_t> live = pending_outputs_;
|
std::set<uint64_t> live = pending_outputs_;
|
||||||
versions_->AddLiveFiles(&live);
|
versions_->AddLiveFiles(&live);
|
||||||
|
|
||||||
versions_->CleanupLargeValueRefs(live);
|
|
||||||
|
|
||||||
std::vector<std::string> filenames;
|
std::vector<std::string> filenames;
|
||||||
env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
|
env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
LargeValueRef large_ref;
|
|
||||||
FileType type;
|
FileType type;
|
||||||
for (int i = 0; i < filenames.size(); i++) {
|
for (size_t i = 0; i < filenames.size(); i++) {
|
||||||
if (ParseFileName(filenames[i], &number, &large_ref, &type)) {
|
if (ParseFileName(filenames[i], &number, &type)) {
|
||||||
bool keep = true;
|
bool keep = true;
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case kLogFile:
|
case kLogFile:
|
||||||
@ -241,9 +237,6 @@ void DBImpl::DeleteObsoleteFiles() {
|
|||||||
// be recorded in pending_outputs_, which is inserted into "live"
|
// be recorded in pending_outputs_, which is inserted into "live"
|
||||||
keep = (live.find(number) != live.end());
|
keep = (live.find(number) != live.end());
|
||||||
break;
|
break;
|
||||||
case kLargeValueFile:
|
|
||||||
keep = versions_->LargeValueIsLive(large_ref);
|
|
||||||
break;
|
|
||||||
case kCurrentFile:
|
case kCurrentFile:
|
||||||
case kDBLockFile:
|
case kDBLockFile:
|
||||||
case kInfoLogFile:
|
case kInfoLogFile:
|
||||||
@ -599,7 +592,7 @@ void DBImpl::CleanupCompaction(CompactionState* compact) {
|
|||||||
assert(compact->outfile == NULL);
|
assert(compact->outfile == NULL);
|
||||||
}
|
}
|
||||||
delete compact->outfile;
|
delete compact->outfile;
|
||||||
for (int i = 0; i < compact->outputs.size(); i++) {
|
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
||||||
const CompactionState::Output& out = compact->outputs[i];
|
const CompactionState::Output& out = compact->outputs[i];
|
||||||
pending_outputs_.erase(out.number);
|
pending_outputs_.erase(out.number);
|
||||||
}
|
}
|
||||||
@ -695,7 +688,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
|||||||
// Add compaction outputs
|
// Add compaction outputs
|
||||||
compact->compaction->AddInputDeletions(compact->compaction->edit());
|
compact->compaction->AddInputDeletions(compact->compaction->edit());
|
||||||
const int level = compact->compaction->level();
|
const int level = compact->compaction->level();
|
||||||
for (int i = 0; i < compact->outputs.size(); i++) {
|
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
||||||
const CompactionState::Output& out = compact->outputs[i];
|
const CompactionState::Output& out = compact->outputs[i];
|
||||||
compact->compaction->edit()->AddFile(
|
compact->compaction->edit()->AddFile(
|
||||||
level + 1,
|
level + 1,
|
||||||
@ -710,7 +703,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
|||||||
DeleteObsoleteFiles();
|
DeleteObsoleteFiles();
|
||||||
} else {
|
} else {
|
||||||
// Discard any files we may have created during this failed compaction
|
// Discard any files we may have created during this failed compaction
|
||||||
for (int i = 0; i < compact->outputs.size(); i++) {
|
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
||||||
env_->DeleteFile(TableFileName(dbname_, compact->outputs[i].number));
|
env_->DeleteFile(TableFileName(dbname_, compact->outputs[i].number));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -811,7 +804,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|||||||
" Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
|
" Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
|
||||||
"%d smallest_snapshot: %d",
|
"%d smallest_snapshot: %d",
|
||||||
ikey.user_key.ToString().c_str(),
|
ikey.user_key.ToString().c_str(),
|
||||||
(int)ikey.sequence, ikey.type, kTypeLargeValueRef, drop,
|
(int)ikey.sequence, ikey.type, kTypeValue, drop,
|
||||||
compact->compaction->IsBaseLevelForKey(ikey.user_key),
|
compact->compaction->IsBaseLevelForKey(ikey.user_key),
|
||||||
(int)last_sequence_for_key, (int)compact->smallest_snapshot);
|
(int)last_sequence_for_key, (int)compact->smallest_snapshot);
|
||||||
#endif
|
#endif
|
||||||
@ -828,26 +821,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|||||||
compact->current_output()->smallest.DecodeFrom(key);
|
compact->current_output()->smallest.DecodeFrom(key);
|
||||||
}
|
}
|
||||||
compact->current_output()->largest.DecodeFrom(key);
|
compact->current_output()->largest.DecodeFrom(key);
|
||||||
|
compact->builder->Add(key, input->value());
|
||||||
if (ikey.type == kTypeLargeValueRef) {
|
|
||||||
if (input->value().size() != LargeValueRef::ByteSize()) {
|
|
||||||
if (options_.paranoid_checks) {
|
|
||||||
status = Status::Corruption("invalid large value ref");
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
Log(env_, options_.info_log,
|
|
||||||
"compaction found invalid large value ref");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
compact->compaction->edit()->AddLargeValueRef(
|
|
||||||
LargeValueRef::FromRef(input->value()),
|
|
||||||
compact->current_output()->number,
|
|
||||||
input->key());
|
|
||||||
compact->builder->Add(key, input->value());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
compact->builder->Add(key, input->value());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close output file if it is big enough
|
// Close output file if it is big enough
|
||||||
if (compact->builder->FileSize() >=
|
if (compact->builder->FileSize() >=
|
||||||
@ -881,7 +855,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|||||||
stats.bytes_read += compact->compaction->input(which, i)->file_size;
|
stats.bytes_read += compact->compaction->input(which, i)->file_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int i = 0; i < compact->outputs.size(); i++) {
|
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
||||||
stats.bytes_written += compact->outputs[i].file_size;
|
stats.bytes_written += compact->outputs[i].file_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -985,40 +959,27 @@ Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
|||||||
|
|
||||||
Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
|
Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
|
||||||
Status status;
|
Status status;
|
||||||
|
MutexLock l(&mutex_);
|
||||||
|
status = MakeRoomForWrite(false); // May temporarily release lock and wait
|
||||||
|
uint64_t last_sequence = versions_->LastSequence();
|
||||||
|
if (status.ok()) {
|
||||||
|
WriteBatchInternal::SetSequence(updates, last_sequence + 1);
|
||||||
|
last_sequence += WriteBatchInternal::Count(updates);
|
||||||
|
versions_->SetLastSequence(last_sequence);
|
||||||
|
|
||||||
WriteBatch* final = NULL;
|
// Add to log and apply to memtable
|
||||||
{
|
status = log_->AddRecord(WriteBatchInternal::Contents(updates));
|
||||||
MutexLock l(&mutex_);
|
if (status.ok() && options.sync) {
|
||||||
status = MakeRoomForWrite(false); // May temporarily release lock and wait
|
status = logfile_->Sync();
|
||||||
|
|
||||||
uint64_t last_sequence = versions_->LastSequence();
|
|
||||||
if (status.ok()) {
|
|
||||||
status = HandleLargeValues(last_sequence + 1, updates, &final);
|
|
||||||
}
|
}
|
||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
WriteBatchInternal::SetSequence(final, last_sequence + 1);
|
status = WriteBatchInternal::InsertInto(updates, mem_);
|
||||||
last_sequence += WriteBatchInternal::Count(final);
|
|
||||||
versions_->SetLastSequence(last_sequence);
|
|
||||||
|
|
||||||
// Add to log and apply to memtable
|
|
||||||
status = log_->AddRecord(WriteBatchInternal::Contents(final));
|
|
||||||
if (status.ok() && options.sync) {
|
|
||||||
status = logfile_->Sync();
|
|
||||||
}
|
|
||||||
if (status.ok()) {
|
|
||||||
status = WriteBatchInternal::InsertInto(final, mem_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options.post_write_snapshot != NULL) {
|
|
||||||
*options.post_write_snapshot =
|
|
||||||
status.ok() ? snapshots_.New(last_sequence) : NULL;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (final != updates) {
|
if (options.post_write_snapshot != NULL) {
|
||||||
delete final;
|
*options.post_write_snapshot =
|
||||||
|
status.ok() ? snapshots_.New(last_sequence) : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1070,124 +1031,6 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DBImpl::HasLargeValues(const WriteBatch& batch) const {
|
|
||||||
if (WriteBatchInternal::ByteSize(&batch) >= options_.large_value_threshold) {
|
|
||||||
for (WriteBatchInternal::Iterator it(batch); !it.Done(); it.Next()) {
|
|
||||||
if (it.op() == kTypeValue &&
|
|
||||||
it.value().size() >= options_.large_value_threshold) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Given "raw_value", determines the appropriate compression format to use
|
|
||||||
// and stores the data that should be written to the large value file in
|
|
||||||
// "*file_bytes", and sets "*ref" to the appropriate large value reference.
|
|
||||||
// May use "*scratch" as backing store for "*file_bytes".
|
|
||||||
void DBImpl::MaybeCompressLargeValue(
|
|
||||||
const Slice& raw_value,
|
|
||||||
Slice* file_bytes,
|
|
||||||
std::string* scratch,
|
|
||||||
LargeValueRef* ref) {
|
|
||||||
switch (options_.compression) {
|
|
||||||
case kSnappyCompression: {
|
|
||||||
if (port::Snappy_Compress(raw_value.data(), raw_value.size(), scratch) &&
|
|
||||||
(scratch->size() < (raw_value.size() / 8) * 7)) {
|
|
||||||
*file_bytes = *scratch;
|
|
||||||
*ref = LargeValueRef::Make(raw_value, kSnappyCompression);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Less than 12.5% compression: just leave as uncompressed data
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case kNoCompression:
|
|
||||||
// Use default code outside of switch
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Store as uncompressed data
|
|
||||||
*file_bytes = raw_value;
|
|
||||||
*ref = LargeValueRef::Make(raw_value, kNoCompression);
|
|
||||||
}
|
|
||||||
|
|
||||||
Status DBImpl::HandleLargeValues(SequenceNumber assigned_seq,
|
|
||||||
WriteBatch* updates,
|
|
||||||
WriteBatch** final) {
|
|
||||||
if (!HasLargeValues(*updates)) {
|
|
||||||
// Fast path: no large values found
|
|
||||||
*final = updates;
|
|
||||||
} else {
|
|
||||||
// Copy *updates to a new WriteBatch, replacing the references to
|
|
||||||
*final = new WriteBatch;
|
|
||||||
SequenceNumber seq = assigned_seq;
|
|
||||||
for (WriteBatchInternal::Iterator it(*updates); !it.Done(); it.Next()) {
|
|
||||||
switch (it.op()) {
|
|
||||||
case kTypeValue:
|
|
||||||
if (it.value().size() < options_.large_value_threshold) {
|
|
||||||
(*final)->Put(it.key(), it.value());
|
|
||||||
} else {
|
|
||||||
std::string scratch;
|
|
||||||
Slice file_bytes;
|
|
||||||
LargeValueRef large_ref;
|
|
||||||
MaybeCompressLargeValue(
|
|
||||||
it.value(), &file_bytes, &scratch, &large_ref);
|
|
||||||
InternalKey ikey(it.key(), seq, kTypeLargeValueRef);
|
|
||||||
if (versions_->RegisterLargeValueRef(
|
|
||||||
large_ref, versions_->LogNumber(), ikey)) {
|
|
||||||
// TODO(opt): avoid holding the lock here (but be careful about
|
|
||||||
// another thread doing a Write and switching logs or
|
|
||||||
// having us get a different "assigned_seq" value).
|
|
||||||
|
|
||||||
uint64_t tmp_number = versions_->NewFileNumber();
|
|
||||||
pending_outputs_.insert(tmp_number);
|
|
||||||
std::string tmp = TempFileName(dbname_, tmp_number);
|
|
||||||
WritableFile* file;
|
|
||||||
Status s = env_->NewWritableFile(tmp, &file);
|
|
||||||
if (!s.ok()) {
|
|
||||||
return s; // Caller will delete *final
|
|
||||||
}
|
|
||||||
|
|
||||||
file->Append(file_bytes);
|
|
||||||
|
|
||||||
s = file->Close();
|
|
||||||
delete file;
|
|
||||||
|
|
||||||
if (s.ok()) {
|
|
||||||
const std::string fname =
|
|
||||||
LargeValueFileName(dbname_, large_ref);
|
|
||||||
s = env_->RenameFile(tmp, fname);
|
|
||||||
} else {
|
|
||||||
Log(env_, options_.info_log, "Write large value: %s",
|
|
||||||
s.ToString().c_str());
|
|
||||||
}
|
|
||||||
pending_outputs_.erase(tmp_number);
|
|
||||||
|
|
||||||
if (!s.ok()) {
|
|
||||||
env_->DeleteFile(tmp); // Cleanup; intentionally ignoring error
|
|
||||||
return s; // Caller will delete *final
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Put an indirect reference in the write batch in place
|
|
||||||
// of large value
|
|
||||||
WriteBatchInternal::PutLargeValueRef(*final, it.key(), large_ref);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case kTypeLargeValueRef:
|
|
||||||
return Status::Corruption("Corrupted write batch");
|
|
||||||
break;
|
|
||||||
case kTypeDeletion:
|
|
||||||
(*final)->Delete(it.key());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
seq = seq + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||||
value->clear();
|
value->clear();
|
||||||
|
|
||||||
@ -1205,7 +1048,8 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
|||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
char buf[100];
|
char buf[100];
|
||||||
snprintf(buf, sizeof(buf), "%d", versions_->NumLevelFiles(level));
|
snprintf(buf, sizeof(buf), "%d",
|
||||||
|
versions_->NumLevelFiles(static_cast<int>(level)));
|
||||||
*value = buf;
|
*value = buf;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1325,10 +1169,9 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
|
|||||||
Status result = env->LockFile(LockFileName(dbname), &lock);
|
Status result = env->LockFile(LockFileName(dbname), &lock);
|
||||||
if (result.ok()) {
|
if (result.ok()) {
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
LargeValueRef large_ref;
|
|
||||||
FileType type;
|
FileType type;
|
||||||
for (int i = 0; i < filenames.size(); i++) {
|
for (size_t i = 0; i < filenames.size(); i++) {
|
||||||
if (ParseFileName(filenames[i], &number, &large_ref, &type)) {
|
if (ParseFileName(filenames[i], &number, &type)) {
|
||||||
Status del = env->DeleteFile(dbname + "/" + filenames[i]);
|
Status del = env->DeleteFile(dbname + "/" + filenames[i]);
|
||||||
if (result.ok() && !del.ok()) {
|
if (result.ok() && !del.ok()) {
|
||||||
result = del;
|
result = del;
|
||||||
|
23
db/db_impl.h
23
db/db_impl.h
@ -92,29 +92,6 @@ class DBImpl : public DB {
|
|||||||
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit);
|
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit);
|
||||||
|
|
||||||
Status MakeRoomForWrite(bool force /* compact even if there is room? */);
|
Status MakeRoomForWrite(bool force /* compact even if there is room? */);
|
||||||
bool HasLargeValues(const WriteBatch& batch) const;
|
|
||||||
|
|
||||||
// Process data in "*updates" and return a status. "assigned_seq"
|
|
||||||
// is the sequence number assigned to the first mod in "*updates".
|
|
||||||
// If no large values are encountered, "*final" is set to "updates".
|
|
||||||
// If large values were encountered, registers the references of the
|
|
||||||
// large values with the VersionSet, writes the large values to
|
|
||||||
// files (if appropriate), and allocates a new WriteBatch with the
|
|
||||||
// large values replaced with indirect references and stores a
|
|
||||||
// pointer to the new WriteBatch in *final. If *final != updates on
|
|
||||||
// return, then the client should delete *final when no longer
|
|
||||||
// needed. Returns OK on success, and an appropriate error
|
|
||||||
// otherwise.
|
|
||||||
Status HandleLargeValues(SequenceNumber assigned_seq,
|
|
||||||
WriteBatch* updates,
|
|
||||||
WriteBatch** final);
|
|
||||||
|
|
||||||
// Helper routine for HandleLargeValues
|
|
||||||
void MaybeCompressLargeValue(
|
|
||||||
const Slice& raw_value,
|
|
||||||
Slice* file_bytes,
|
|
||||||
std::string* scratch,
|
|
||||||
LargeValueRef* ref);
|
|
||||||
|
|
||||||
struct CompactionState;
|
struct CompactionState;
|
||||||
|
|
||||||
|
101
db/db_iter.cc
101
db/db_iter.cc
@ -53,13 +53,11 @@ class DBIter: public Iterator {
|
|||||||
user_comparator_(cmp),
|
user_comparator_(cmp),
|
||||||
iter_(iter),
|
iter_(iter),
|
||||||
sequence_(s),
|
sequence_(s),
|
||||||
large_(NULL),
|
|
||||||
direction_(kForward),
|
direction_(kForward),
|
||||||
valid_(false) {
|
valid_(false) {
|
||||||
}
|
}
|
||||||
virtual ~DBIter() {
|
virtual ~DBIter() {
|
||||||
delete iter_;
|
delete iter_;
|
||||||
delete large_;
|
|
||||||
}
|
}
|
||||||
virtual bool Valid() const { return valid_; }
|
virtual bool Valid() const { return valid_; }
|
||||||
virtual Slice key() const {
|
virtual Slice key() const {
|
||||||
@ -68,20 +66,10 @@ class DBIter: public Iterator {
|
|||||||
}
|
}
|
||||||
virtual Slice value() const {
|
virtual Slice value() const {
|
||||||
assert(valid_);
|
assert(valid_);
|
||||||
Slice raw_value = (direction_ == kForward) ? iter_->value() : saved_value_;
|
return (direction_ == kForward) ? iter_->value() : saved_value_;
|
||||||
if (large_ == NULL) {
|
|
||||||
return raw_value;
|
|
||||||
} else {
|
|
||||||
MutexLock l(&large_->mutex);
|
|
||||||
if (!large_->produced) {
|
|
||||||
ReadIndirectValue(raw_value);
|
|
||||||
}
|
|
||||||
return large_->value;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
virtual Status status() const {
|
virtual Status status() const {
|
||||||
if (status_.ok()) {
|
if (status_.ok()) {
|
||||||
if (large_ != NULL && !large_->status.ok()) return large_->status;
|
|
||||||
return iter_->status();
|
return iter_->status();
|
||||||
} else {
|
} else {
|
||||||
return status_;
|
return status_;
|
||||||
@ -95,29 +83,14 @@ class DBIter: public Iterator {
|
|||||||
virtual void SeekToLast();
|
virtual void SeekToLast();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Large {
|
|
||||||
port::Mutex mutex;
|
|
||||||
std::string value;
|
|
||||||
bool produced;
|
|
||||||
Status status;
|
|
||||||
};
|
|
||||||
|
|
||||||
void FindNextUserEntry(bool skipping, std::string* skip);
|
void FindNextUserEntry(bool skipping, std::string* skip);
|
||||||
void FindPrevUserEntry();
|
void FindPrevUserEntry();
|
||||||
bool ParseKey(ParsedInternalKey* key);
|
bool ParseKey(ParsedInternalKey* key);
|
||||||
void ReadIndirectValue(Slice ref) const;
|
|
||||||
|
|
||||||
inline void SaveKey(const Slice& k, std::string* dst) {
|
inline void SaveKey(const Slice& k, std::string* dst) {
|
||||||
dst->assign(k.data(), k.size());
|
dst->assign(k.data(), k.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void ForgetLargeValue() {
|
|
||||||
if (large_ != NULL) {
|
|
||||||
delete large_;
|
|
||||||
large_ = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void ClearSavedValue() {
|
inline void ClearSavedValue() {
|
||||||
if (saved_value_.capacity() > 1048576) {
|
if (saved_value_.capacity() > 1048576) {
|
||||||
std::string empty;
|
std::string empty;
|
||||||
@ -136,7 +109,6 @@ class DBIter: public Iterator {
|
|||||||
Status status_;
|
Status status_;
|
||||||
std::string saved_key_; // == current key when direction_==kReverse
|
std::string saved_key_; // == current key when direction_==kReverse
|
||||||
std::string saved_value_; // == current raw value when direction_==kReverse
|
std::string saved_value_; // == current raw value when direction_==kReverse
|
||||||
Large* large_; // Non-NULL if value is an indirect reference
|
|
||||||
Direction direction_;
|
Direction direction_;
|
||||||
bool valid_;
|
bool valid_;
|
||||||
|
|
||||||
@ -156,7 +128,6 @@ inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
|
|||||||
|
|
||||||
void DBIter::Next() {
|
void DBIter::Next() {
|
||||||
assert(valid_);
|
assert(valid_);
|
||||||
ForgetLargeValue();
|
|
||||||
|
|
||||||
if (direction_ == kReverse) { // Switch directions?
|
if (direction_ == kReverse) { // Switch directions?
|
||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
@ -185,7 +156,6 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
|
|||||||
// Loop until we hit an acceptable entry to yield
|
// Loop until we hit an acceptable entry to yield
|
||||||
assert(iter_->Valid());
|
assert(iter_->Valid());
|
||||||
assert(direction_ == kForward);
|
assert(direction_ == kForward);
|
||||||
assert(large_ == NULL);
|
|
||||||
do {
|
do {
|
||||||
ParsedInternalKey ikey;
|
ParsedInternalKey ikey;
|
||||||
if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
|
if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
|
||||||
@ -197,17 +167,12 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
|
|||||||
skipping = true;
|
skipping = true;
|
||||||
break;
|
break;
|
||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
case kTypeLargeValueRef:
|
|
||||||
if (skipping &&
|
if (skipping &&
|
||||||
user_comparator_->Compare(ikey.user_key, *skip) <= 0) {
|
user_comparator_->Compare(ikey.user_key, *skip) <= 0) {
|
||||||
// Entry hidden
|
// Entry hidden
|
||||||
} else {
|
} else {
|
||||||
valid_ = true;
|
valid_ = true;
|
||||||
saved_key_.clear();
|
saved_key_.clear();
|
||||||
if (ikey.type == kTypeLargeValueRef) {
|
|
||||||
large_ = new Large;
|
|
||||||
large_->produced = false;
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -221,7 +186,6 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
|
|||||||
|
|
||||||
void DBIter::Prev() {
|
void DBIter::Prev() {
|
||||||
assert(valid_);
|
assert(valid_);
|
||||||
ForgetLargeValue();
|
|
||||||
|
|
||||||
if (direction_ == kForward) { // Switch directions?
|
if (direction_ == kForward) { // Switch directions?
|
||||||
// iter_ is pointing at the current entry. Scan backwards until
|
// iter_ is pointing at the current entry. Scan backwards until
|
||||||
@ -249,7 +213,6 @@ void DBIter::Prev() {
|
|||||||
|
|
||||||
void DBIter::FindPrevUserEntry() {
|
void DBIter::FindPrevUserEntry() {
|
||||||
assert(direction_ == kReverse);
|
assert(direction_ == kReverse);
|
||||||
assert(large_ == NULL);
|
|
||||||
|
|
||||||
ValueType value_type = kTypeDeletion;
|
ValueType value_type = kTypeDeletion;
|
||||||
if (iter_->Valid()) {
|
if (iter_->Valid()) {
|
||||||
@ -286,16 +249,11 @@ void DBIter::FindPrevUserEntry() {
|
|||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
} else {
|
} else {
|
||||||
valid_ = true;
|
valid_ = true;
|
||||||
if (value_type == kTypeLargeValueRef) {
|
|
||||||
large_ = new Large;
|
|
||||||
large_->produced = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DBIter::Seek(const Slice& target) {
|
void DBIter::Seek(const Slice& target) {
|
||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
ForgetLargeValue();
|
|
||||||
ClearSavedValue();
|
ClearSavedValue();
|
||||||
saved_key_.clear();
|
saved_key_.clear();
|
||||||
AppendInternalKey(
|
AppendInternalKey(
|
||||||
@ -310,7 +268,6 @@ void DBIter::Seek(const Slice& target) {
|
|||||||
|
|
||||||
void DBIter::SeekToFirst() {
|
void DBIter::SeekToFirst() {
|
||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
ForgetLargeValue();
|
|
||||||
ClearSavedValue();
|
ClearSavedValue();
|
||||||
iter_->SeekToFirst();
|
iter_->SeekToFirst();
|
||||||
if (iter_->Valid()) {
|
if (iter_->Valid()) {
|
||||||
@ -322,67 +279,11 @@ void DBIter::SeekToFirst() {
|
|||||||
|
|
||||||
void DBIter::SeekToLast() {
|
void DBIter::SeekToLast() {
|
||||||
direction_ = kReverse;
|
direction_ = kReverse;
|
||||||
ForgetLargeValue();
|
|
||||||
ClearSavedValue();
|
ClearSavedValue();
|
||||||
iter_->SeekToLast();
|
iter_->SeekToLast();
|
||||||
FindPrevUserEntry();
|
FindPrevUserEntry();
|
||||||
}
|
}
|
||||||
|
|
||||||
void DBIter::ReadIndirectValue(Slice ref) const {
|
|
||||||
assert(!large_->produced);
|
|
||||||
large_->produced = true;
|
|
||||||
LargeValueRef large_ref;
|
|
||||||
if (ref.size() != LargeValueRef::ByteSize()) {
|
|
||||||
large_->status = Status::Corruption("malformed large value reference");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
memcpy(large_ref.data, ref.data(), LargeValueRef::ByteSize());
|
|
||||||
std::string fname = LargeValueFileName(*dbname_, large_ref);
|
|
||||||
RandomAccessFile* file;
|
|
||||||
Status s = env_->NewRandomAccessFile(fname, &file);
|
|
||||||
uint64_t file_size = 0;
|
|
||||||
if (s.ok()) {
|
|
||||||
s = env_->GetFileSize(fname, &file_size);
|
|
||||||
}
|
|
||||||
if (s.ok()) {
|
|
||||||
uint64_t value_size = large_ref.ValueSize();
|
|
||||||
large_->value.resize(value_size);
|
|
||||||
Slice result;
|
|
||||||
s = file->Read(0, file_size, &result,
|
|
||||||
const_cast<char*>(large_->value.data()));
|
|
||||||
if (s.ok()) {
|
|
||||||
if (result.size() == file_size) {
|
|
||||||
switch (large_ref.compression_type()) {
|
|
||||||
case kNoCompression: {
|
|
||||||
if (result.data() != large_->value.data()) {
|
|
||||||
large_->value.assign(result.data(), result.size());
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case kSnappyCompression: {
|
|
||||||
std::string uncompressed;
|
|
||||||
if (port::Snappy_Uncompress(result.data(), result.size(),
|
|
||||||
&uncompressed) &&
|
|
||||||
uncompressed.size() == large_ref.ValueSize()) {
|
|
||||||
swap(uncompressed, large_->value);
|
|
||||||
} else {
|
|
||||||
s = Status::Corruption(
|
|
||||||
"Unable to read entire compressed large value file");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
s = Status::Corruption("Unable to read entire large value file");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
delete file; // Ignore errors on closing
|
|
||||||
}
|
|
||||||
if (!s.ok()) {
|
|
||||||
large_->value.clear();
|
|
||||||
large_->status = s;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
Iterator* NewDBIterator(
|
Iterator* NewDBIterator(
|
||||||
|
253
db/db_test.cc
253
db/db_test.cc
@ -119,9 +119,6 @@ class DBTest {
|
|||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
result += iter->value().ToString();
|
result += iter->value().ToString();
|
||||||
break;
|
break;
|
||||||
case kTypeLargeValueRef:
|
|
||||||
result += "LARGEVALUE(" + EscapeString(iter->value()) + ")";
|
|
||||||
break;
|
|
||||||
case kTypeDeletion:
|
case kTypeDeletion:
|
||||||
result += "DEL";
|
result += "DEL";
|
||||||
break;
|
break;
|
||||||
@ -153,26 +150,6 @@ class DBTest {
|
|||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::set<LargeValueRef> LargeValueFiles() const {
|
|
||||||
// Return the set of large value files that exist in the database
|
|
||||||
std::vector<std::string> filenames;
|
|
||||||
env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
|
|
||||||
uint64_t number;
|
|
||||||
LargeValueRef large_ref;
|
|
||||||
FileType type;
|
|
||||||
std::set<LargeValueRef> live;
|
|
||||||
for (int i = 0; i < filenames.size(); i++) {
|
|
||||||
if (ParseFileName(filenames[i], &number, &large_ref, &type) &&
|
|
||||||
type == kLargeValueFile) {
|
|
||||||
fprintf(stderr, " live: %s\n",
|
|
||||||
LargeValueRefToFilenameString(large_ref).c_str());
|
|
||||||
live.insert(large_ref);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fprintf(stderr, "Found %d live large value files\n", (int)live.size());
|
|
||||||
return live;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Compact(const Slice& start, const Slice& limit) {
|
void Compact(const Slice& start, const Slice& limit) {
|
||||||
dbfull()->TEST_CompactMemTable();
|
dbfull()->TEST_CompactMemTable();
|
||||||
int max_level_with_files = 1;
|
int max_level_with_files = 1;
|
||||||
@ -471,7 +448,6 @@ TEST(DBTest, MinorCompactionsHappen) {
|
|||||||
TEST(DBTest, RecoverWithLargeLog) {
|
TEST(DBTest, RecoverWithLargeLog) {
|
||||||
{
|
{
|
||||||
Options options;
|
Options options;
|
||||||
options.large_value_threshold = 1048576;
|
|
||||||
Reopen(&options);
|
Reopen(&options);
|
||||||
ASSERT_OK(Put("big1", std::string(200000, '1')));
|
ASSERT_OK(Put("big1", std::string(200000, '1')));
|
||||||
ASSERT_OK(Put("big2", std::string(200000, '2')));
|
ASSERT_OK(Put("big2", std::string(200000, '2')));
|
||||||
@ -484,7 +460,6 @@ TEST(DBTest, RecoverWithLargeLog) {
|
|||||||
// we flush table files in the middle of a large log file.
|
// we flush table files in the middle of a large log file.
|
||||||
Options options;
|
Options options;
|
||||||
options.write_buffer_size = 100000;
|
options.write_buffer_size = 100000;
|
||||||
options.large_value_threshold = 1048576;
|
|
||||||
Reopen(&options);
|
Reopen(&options);
|
||||||
ASSERT_EQ(NumTableFilesAtLevel(0), 3);
|
ASSERT_EQ(NumTableFilesAtLevel(0), 3);
|
||||||
ASSERT_EQ(std::string(200000, '1'), Get("big1"));
|
ASSERT_EQ(std::string(200000, '1'), Get("big1"));
|
||||||
@ -497,7 +472,6 @@ TEST(DBTest, RecoverWithLargeLog) {
|
|||||||
TEST(DBTest, CompactionsGenerateMultipleFiles) {
|
TEST(DBTest, CompactionsGenerateMultipleFiles) {
|
||||||
Options options;
|
Options options;
|
||||||
options.write_buffer_size = 100000000; // Large write buffer
|
options.write_buffer_size = 100000000; // Large write buffer
|
||||||
options.large_value_threshold = 1048576;
|
|
||||||
Reopen(&options);
|
Reopen(&options);
|
||||||
|
|
||||||
Random rnd(301);
|
Random rnd(301);
|
||||||
@ -570,65 +544,53 @@ static bool Between(uint64_t val, uint64_t low, uint64_t high) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(DBTest, ApproximateSizes) {
|
TEST(DBTest, ApproximateSizes) {
|
||||||
for (int test = 0; test < 2; test++) {
|
Options options;
|
||||||
// test==0: default large_value_threshold
|
options.write_buffer_size = 100000000; // Large write buffer
|
||||||
// test==1: 1 MB large_value_threshold
|
options.compression = kNoCompression;
|
||||||
Options options;
|
DestroyAndReopen();
|
||||||
options.large_value_threshold = (test == 0) ? 65536 : 1048576;
|
|
||||||
options.write_buffer_size = 100000000; // Large write buffer
|
|
||||||
options.compression = kNoCompression;
|
|
||||||
DestroyAndReopen();
|
|
||||||
|
|
||||||
ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
|
ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
|
||||||
|
Reopen(&options);
|
||||||
|
ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
|
||||||
|
|
||||||
|
// Write 8MB (80 values, each 100K)
|
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||||
|
const int N = 80;
|
||||||
|
Random rnd(301);
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 0 because GetApproximateSizes() does not account for memtable space
|
||||||
|
ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
|
||||||
|
|
||||||
|
// Check sizes across recovery by reopening a few times
|
||||||
|
for (int run = 0; run < 3; run++) {
|
||||||
Reopen(&options);
|
Reopen(&options);
|
||||||
ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
|
|
||||||
|
|
||||||
// Write 8MB (80 values, each 100K)
|
for (int compact_start = 0; compact_start < N; compact_start += 10) {
|
||||||
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
for (int i = 0; i < N; i += 10) {
|
||||||
const int N = 80;
|
ASSERT_TRUE(Between(Size("", Key(i)), 100000*i, 100000*i + 10000));
|
||||||
Random rnd(301);
|
ASSERT_TRUE(Between(Size("", Key(i)+".suffix"),
|
||||||
for (int i = 0; i < N; i++) {
|
100000 * (i+1), 100000 * (i+1) + 10000));
|
||||||
ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000)));
|
ASSERT_TRUE(Between(Size(Key(i), Key(i+10)),
|
||||||
}
|
100000 * 10, 100000 * 10 + 10000));
|
||||||
if (test == 1) {
|
|
||||||
// 0 because GetApproximateSizes() does not account for memtable space for
|
|
||||||
// non-large values
|
|
||||||
ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
|
|
||||||
} else {
|
|
||||||
ASSERT_TRUE(Between(Size("", Key(50)), 100000*50, 100000*50 + 10000));
|
|
||||||
ASSERT_TRUE(Between(Size(Key(20), Key(30)),
|
|
||||||
100000*10, 100000*10 + 10000));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check sizes across recovery by reopening a few times
|
|
||||||
for (int run = 0; run < 3; run++) {
|
|
||||||
Reopen(&options);
|
|
||||||
|
|
||||||
for (int compact_start = 0; compact_start < N; compact_start += 10) {
|
|
||||||
for (int i = 0; i < N; i += 10) {
|
|
||||||
ASSERT_TRUE(Between(Size("", Key(i)), 100000*i, 100000*i + 10000));
|
|
||||||
ASSERT_TRUE(Between(Size("", Key(i)+".suffix"),
|
|
||||||
100000 * (i+1), 100000 * (i+1) + 10000));
|
|
||||||
ASSERT_TRUE(Between(Size(Key(i), Key(i+10)),
|
|
||||||
100000 * 10, 100000 * 10 + 10000));
|
|
||||||
}
|
|
||||||
ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000));
|
|
||||||
ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000));
|
|
||||||
|
|
||||||
dbfull()->TEST_CompactRange(0,
|
|
||||||
Key(compact_start),
|
|
||||||
Key(compact_start + 9));
|
|
||||||
}
|
}
|
||||||
|
ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000));
|
||||||
|
ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000));
|
||||||
|
|
||||||
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
dbfull()->TEST_CompactRange(0,
|
||||||
ASSERT_GT(NumTableFilesAtLevel(1), 0);
|
Key(compact_start),
|
||||||
|
Key(compact_start + 9));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||||
|
ASSERT_GT(NumTableFilesAtLevel(1), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
|
TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
|
||||||
Options options;
|
Options options;
|
||||||
options.large_value_threshold = 65536;
|
|
||||||
options.compression = kNoCompression;
|
options.compression = kNoCompression;
|
||||||
Reopen();
|
Reopen();
|
||||||
|
|
||||||
@ -801,146 +763,6 @@ TEST(DBTest, ComparatorCheck) {
|
|||||||
<< s.ToString();
|
<< s.ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool LargeValuesOK(DBTest* db,
|
|
||||||
const std::set<LargeValueRef>& expected) {
|
|
||||||
std::set<LargeValueRef> actual = db->LargeValueFiles();
|
|
||||||
if (actual.size() != expected.size()) {
|
|
||||||
fprintf(stderr, "Sets differ in size: %d vs %d\n",
|
|
||||||
(int)actual.size(), (int)expected.size());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (std::set<LargeValueRef>::const_iterator it = expected.begin();
|
|
||||||
it != expected.end();
|
|
||||||
++it) {
|
|
||||||
if (actual.count(*it) != 1) {
|
|
||||||
fprintf(stderr, " key '%s' not found in actual set\n",
|
|
||||||
LargeValueRefToFilenameString(*it).c_str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(DBTest, LargeValues1) {
|
|
||||||
Options options;
|
|
||||||
options.large_value_threshold = 10000;
|
|
||||||
Reopen(&options);
|
|
||||||
|
|
||||||
Random rnd(301);
|
|
||||||
|
|
||||||
std::string big1;
|
|
||||||
test::CompressibleString(&rnd, 1.0, 100000, &big1); // Not compressible
|
|
||||||
std::set<LargeValueRef> expected;
|
|
||||||
|
|
||||||
ASSERT_OK(Put("big1", big1));
|
|
||||||
expected.insert(LargeValueRef::Make(big1, kNoCompression));
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
|
|
||||||
ASSERT_OK(Delete("big1"));
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
|
||||||
// No handling of deletion markers on memtable compactions, so big1 remains
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
|
|
||||||
dbfull()->TEST_CompactRange(0, "", "z");
|
|
||||||
expected.erase(LargeValueRef::Make(big1, kNoCompression));
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool SnappyCompressionSupported() {
|
|
||||||
std::string out;
|
|
||||||
Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
|
|
||||||
return port::Snappy_Compress(in.data(), in.size(), &out);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(DBTest, LargeValues2) {
|
|
||||||
Options options;
|
|
||||||
options.large_value_threshold = 10000;
|
|
||||||
Reopen(&options);
|
|
||||||
|
|
||||||
Random rnd(301);
|
|
||||||
|
|
||||||
std::string big1, big2;
|
|
||||||
test::CompressibleString(&rnd, 1.0, 20000, &big1); // Not compressible
|
|
||||||
test::CompressibleString(&rnd, 0.6, 40000, &big2); // Compressible
|
|
||||||
std::set<LargeValueRef> expected;
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
|
|
||||||
ASSERT_OK(Put("big1", big1));
|
|
||||||
expected.insert(LargeValueRef::Make(big1, kNoCompression));
|
|
||||||
ASSERT_EQ(big1, Get("big1"));
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
|
|
||||||
ASSERT_OK(Put("big2", big2));
|
|
||||||
ASSERT_EQ(big2, Get("big2"));
|
|
||||||
if (SnappyCompressionSupported()) {
|
|
||||||
expected.insert(LargeValueRef::Make(big2, kSnappyCompression));
|
|
||||||
} else {
|
|
||||||
expected.insert(LargeValueRef::Make(big2, kNoCompression));
|
|
||||||
}
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
|
|
||||||
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
|
|
||||||
dbfull()->TEST_CompactRange(0, "", "z");
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
|
|
||||||
ASSERT_OK(Put("big2", big2));
|
|
||||||
ASSERT_OK(Put("big2_b", big2));
|
|
||||||
ASSERT_EQ(big1, Get("big1"));
|
|
||||||
ASSERT_EQ(big2, Get("big2"));
|
|
||||||
ASSERT_EQ(big2, Get("big2_b"));
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
|
|
||||||
ASSERT_OK(Delete("big1"));
|
|
||||||
ASSERT_EQ("NOT_FOUND", Get("big1"));
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
|
|
||||||
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
dbfull()->TEST_CompactRange(0, "", "z");
|
|
||||||
expected.erase(LargeValueRef::Make(big1, kNoCompression));
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
dbfull()->TEST_CompactRange(1, "", "z");
|
|
||||||
|
|
||||||
ASSERT_OK(Delete("big2"));
|
|
||||||
ASSERT_EQ("NOT_FOUND", Get("big2"));
|
|
||||||
ASSERT_EQ(big2, Get("big2_b"));
|
|
||||||
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
dbfull()->TEST_CompactRange(0, "", "z");
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
|
|
||||||
// Make sure the large value refs survive a reload and compactions after
|
|
||||||
// the reload.
|
|
||||||
Reopen();
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
ASSERT_OK(Put("foo", "bar"));
|
|
||||||
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
|
||||||
dbfull()->TEST_CompactRange(0, "", "z");
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(DBTest, LargeValues3) {
|
|
||||||
// Make sure we don't compress values if
|
|
||||||
Options options;
|
|
||||||
options.large_value_threshold = 10000;
|
|
||||||
options.compression = kNoCompression;
|
|
||||||
Reopen(&options);
|
|
||||||
|
|
||||||
Random rnd(301);
|
|
||||||
|
|
||||||
std::string big1 = std::string(100000, 'x'); // Very compressible
|
|
||||||
std::set<LargeValueRef> expected;
|
|
||||||
|
|
||||||
ASSERT_OK(Put("big1", big1));
|
|
||||||
ASSERT_EQ(big1, Get("big1"));
|
|
||||||
expected.insert(LargeValueRef::Make(big1, kNoCompression));
|
|
||||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
TEST(DBTest, DBOpen_Options) {
|
TEST(DBTest, DBOpen_Options) {
|
||||||
std::string dbname = test::TmpDir() + "/db_options_test";
|
std::string dbname = test::TmpDir() + "/db_options_test";
|
||||||
DestroyDB(dbname, Options());
|
DestroyDB(dbname, Options());
|
||||||
@ -1025,9 +847,6 @@ class ModelDB: public DB {
|
|||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
map_[it.key().ToString()] = it.value().ToString();
|
map_[it.key().ToString()] = it.value().ToString();
|
||||||
break;
|
break;
|
||||||
case kTypeLargeValueRef:
|
|
||||||
assert(false); // Should not occur
|
|
||||||
break;
|
|
||||||
case kTypeDeletion:
|
case kTypeDeletion:
|
||||||
map_.erase(it.key().ToString());
|
map_.erase(it.key().ToString());
|
||||||
break;
|
break;
|
||||||
|
@ -84,69 +84,4 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LargeValueRef LargeValueRef::Make(const Slice& value, CompressionType ctype) {
|
|
||||||
LargeValueRef result;
|
|
||||||
port::SHA1_Hash(value.data(), value.size(), &result.data[0]);
|
|
||||||
EncodeFixed64(&result.data[20], value.size());
|
|
||||||
result.data[28] = static_cast<unsigned char>(ctype);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string LargeValueRefToFilenameString(const LargeValueRef& h) {
|
|
||||||
assert(sizeof(h.data) == LargeValueRef::ByteSize());
|
|
||||||
assert(sizeof(h.data) == 29); // So we can hardcode the array size of buf
|
|
||||||
static const char tohex[] = "0123456789abcdef";
|
|
||||||
char buf[20*2];
|
|
||||||
for (int i = 0; i < 20; i++) {
|
|
||||||
buf[2*i] = tohex[(h.data[i] >> 4) & 0xf];
|
|
||||||
buf[2*i+1] = tohex[h.data[i] & 0xf];
|
|
||||||
}
|
|
||||||
std::string result = std::string(buf, sizeof(buf));
|
|
||||||
result += "-";
|
|
||||||
result += NumberToString(h.ValueSize());
|
|
||||||
result += "-";
|
|
||||||
result += NumberToString(static_cast<uint64_t>(h.compression_type()));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t hexvalue(char c) {
|
|
||||||
if (c >= '0' && c <= '9') {
|
|
||||||
return c - '0';
|
|
||||||
} else if (c >= 'A' && c <= 'F') {
|
|
||||||
return 10 + c - 'A';
|
|
||||||
} else {
|
|
||||||
assert(c >= 'a' && c <= 'f');
|
|
||||||
return 10 + c - 'a';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool FilenameStringToLargeValueRef(const Slice& s, LargeValueRef* h) {
|
|
||||||
Slice in = s;
|
|
||||||
if (in.size() < 40) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < 20; i++) {
|
|
||||||
if (!isxdigit(in[i*2]) || !isxdigit(in[i*2+1])) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
unsigned char c = (hexvalue(in[i*2])<<4) | hexvalue(in[i*2+1]);
|
|
||||||
h->data[i] = c;
|
|
||||||
}
|
|
||||||
in.remove_prefix(40);
|
|
||||||
uint64_t value_size, ctype;
|
|
||||||
|
|
||||||
if (ConsumeChar(&in, '-') &&
|
|
||||||
ConsumeDecimalNumber(&in, &value_size) &&
|
|
||||||
ConsumeChar(&in, '-') &&
|
|
||||||
ConsumeDecimalNumber(&in, &ctype) &&
|
|
||||||
in.empty() &&
|
|
||||||
(ctype <= kSnappyCompression)) {
|
|
||||||
EncodeFixed64(&h->data[20], value_size);
|
|
||||||
h->data[28] = static_cast<unsigned char>(ctype);
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -29,7 +29,6 @@ class InternalKey;
|
|||||||
enum ValueType {
|
enum ValueType {
|
||||||
kTypeDeletion = 0x0,
|
kTypeDeletion = 0x0,
|
||||||
kTypeValue = 0x1,
|
kTypeValue = 0x1,
|
||||||
kTypeLargeValueRef = 0x2,
|
|
||||||
};
|
};
|
||||||
// kValueTypeForSeek defines the ValueType that should be passed when
|
// kValueTypeForSeek defines the ValueType that should be passed when
|
||||||
// constructing a ParsedInternalKey object for seeking to a particular
|
// constructing a ParsedInternalKey object for seeking to a particular
|
||||||
@ -37,7 +36,7 @@ enum ValueType {
|
|||||||
// and the value type is embedded as the low 8 bits in the sequence
|
// and the value type is embedded as the low 8 bits in the sequence
|
||||||
// number in internal keys, we need to use the highest-numbered
|
// number in internal keys, we need to use the highest-numbered
|
||||||
// ValueType, not the lowest).
|
// ValueType, not the lowest).
|
||||||
static const ValueType kValueTypeForSeek = kTypeLargeValueRef;
|
static const ValueType kValueTypeForSeek = kTypeValue;
|
||||||
|
|
||||||
typedef uint64_t SequenceNumber;
|
typedef uint64_t SequenceNumber;
|
||||||
|
|
||||||
@ -139,54 +138,6 @@ inline int InternalKeyComparator::Compare(
|
|||||||
return Compare(a.Encode(), b.Encode());
|
return Compare(a.Encode(), b.Encode());
|
||||||
}
|
}
|
||||||
|
|
||||||
// LargeValueRef is a 160-bit hash value (20 bytes), plus an 8 byte
|
|
||||||
// uncompressed size, and a 1 byte CompressionType code. An
|
|
||||||
// encoded form of it is embedded in the filenames of large value
|
|
||||||
// files stored in the database, and the raw binary form is stored as
|
|
||||||
// the iter->value() result for values of type kTypeLargeValueRef in
|
|
||||||
// the table and log files that make up the database.
|
|
||||||
struct LargeValueRef {
|
|
||||||
char data[29];
|
|
||||||
|
|
||||||
// Initialize a large value ref for the given data
|
|
||||||
static LargeValueRef Make(const Slice& data,
|
|
||||||
CompressionType compression_type);
|
|
||||||
|
|
||||||
// Initialize a large value ref from a serialized, 29-byte reference value
|
|
||||||
static LargeValueRef FromRef(const Slice& ref) {
|
|
||||||
LargeValueRef result;
|
|
||||||
assert(ref.size() == sizeof(result.data));
|
|
||||||
memcpy(result.data, ref.data(), sizeof(result.data));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the number of bytes in a LargeValueRef (not the
|
|
||||||
// number of bytes in the value referenced).
|
|
||||||
static size_t ByteSize() { return sizeof(LargeValueRef().data); }
|
|
||||||
|
|
||||||
// Return the number of bytes in the value referenced by "*this".
|
|
||||||
uint64_t ValueSize() const { return DecodeFixed64(&data[20]); }
|
|
||||||
|
|
||||||
CompressionType compression_type() const {
|
|
||||||
return static_cast<CompressionType>(data[28]);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator==(const LargeValueRef& b) const {
|
|
||||||
return memcmp(data, b.data, sizeof(data)) == 0;
|
|
||||||
}
|
|
||||||
bool operator<(const LargeValueRef& b) const {
|
|
||||||
return memcmp(data, b.data, sizeof(data)) < 0;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Convert the large value ref to a human-readable string suitable
|
|
||||||
// for embedding in a large value filename.
|
|
||||||
extern std::string LargeValueRefToFilenameString(const LargeValueRef& h);
|
|
||||||
|
|
||||||
// Parse the large value filename string in "input" and store it in
|
|
||||||
// "*h". If successful, returns true. Otherwise returns false.
|
|
||||||
extern bool FilenameStringToLargeValueRef(const Slice& in, LargeValueRef* ref);
|
|
||||||
|
|
||||||
inline bool ParseInternalKey(const Slice& internal_key,
|
inline bool ParseInternalKey(const Slice& internal_key,
|
||||||
ParsedInternalKey* result) {
|
ParsedInternalKey* result) {
|
||||||
const size_t n = internal_key.size();
|
const size_t n = internal_key.size();
|
||||||
@ -196,7 +147,7 @@ inline bool ParseInternalKey(const Slice& internal_key,
|
|||||||
result->sequence = num >> 8;
|
result->sequence = num >> 8;
|
||||||
result->type = static_cast<ValueType>(c);
|
result->type = static_cast<ValueType>(c);
|
||||||
result->user_key = Slice(internal_key.data(), n - 8);
|
result->user_key = Slice(internal_key.data(), n - 8);
|
||||||
return (c <= static_cast<unsigned char>(kTypeLargeValueRef));
|
return (c <= static_cast<unsigned char>(kTypeValue));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -76,9 +76,6 @@ TEST(FormatTest, InternalKeyShortSeparator) {
|
|||||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||||
Shorten(IKey("foo", 100, kTypeValue),
|
Shorten(IKey("foo", 100, kTypeValue),
|
||||||
IKey("foo", 100, kTypeDeletion)));
|
IKey("foo", 100, kTypeDeletion)));
|
||||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
|
||||||
Shorten(IKey("foo", 100, kTypeValue),
|
|
||||||
IKey("foo", 100, kTypeLargeValueRef)));
|
|
||||||
|
|
||||||
// When user keys are misordered
|
// When user keys are misordered
|
||||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||||
@ -108,18 +105,6 @@ TEST(FormatTest, InternalKeyShortestSuccessor) {
|
|||||||
ShortSuccessor(IKey("\xff\xff", 100, kTypeValue)));
|
ShortSuccessor(IKey("\xff\xff", 100, kTypeValue)));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(FormatTest, SHA1) {
|
|
||||||
// Check that we are computing the same value as sha1.
|
|
||||||
// Note that the last two numbers are the length of the input and the
|
|
||||||
// compression type.
|
|
||||||
ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-0", // SHA1, uncompr
|
|
||||||
LargeValueRefToFilenameString(
|
|
||||||
LargeValueRef::Make("hello", kNoCompression)));
|
|
||||||
ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-1", // SHA1, lwcompr
|
|
||||||
LargeValueRefToFilenameString(
|
|
||||||
LargeValueRef::Make("hello", kSnappyCompression)));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
@ -30,14 +30,6 @@ std::string TableFileName(const std::string& name, uint64_t number) {
|
|||||||
return MakeFileName(name, number, "sst");
|
return MakeFileName(name, number, "sst");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string LargeValueFileName(const std::string& name,
|
|
||||||
const LargeValueRef& large_ref) {
|
|
||||||
std::string result = name + "/";
|
|
||||||
result += LargeValueRefToFilenameString(large_ref);
|
|
||||||
result += ".val";
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string DescriptorFileName(const std::string& dbname, uint64_t number) {
|
std::string DescriptorFileName(const std::string& dbname, uint64_t number) {
|
||||||
assert(number > 0);
|
assert(number > 0);
|
||||||
char buf[100];
|
char buf[100];
|
||||||
@ -75,11 +67,9 @@ std::string OldInfoLogFileName(const std::string& dbname) {
|
|||||||
// dbname/LOG
|
// dbname/LOG
|
||||||
// dbname/LOG.old
|
// dbname/LOG.old
|
||||||
// dbname/MANIFEST-[0-9]+
|
// dbname/MANIFEST-[0-9]+
|
||||||
// dbname/[0-9a-f]{20}-[0-9]+-[0-9]+.val
|
|
||||||
// dbname/[0-9]+.(log|sst)
|
// dbname/[0-9]+.(log|sst)
|
||||||
bool ParseFileName(const std::string& fname,
|
bool ParseFileName(const std::string& fname,
|
||||||
uint64_t* number,
|
uint64_t* number,
|
||||||
LargeValueRef* large_ref,
|
|
||||||
FileType* type) {
|
FileType* type) {
|
||||||
Slice rest(fname);
|
Slice rest(fname);
|
||||||
if (rest == "CURRENT") {
|
if (rest == "CURRENT") {
|
||||||
@ -91,15 +81,6 @@ bool ParseFileName(const std::string& fname,
|
|||||||
} else if (rest == "LOG" || rest == "LOG.old") {
|
} else if (rest == "LOG" || rest == "LOG.old") {
|
||||||
*number = 0;
|
*number = 0;
|
||||||
*type = kInfoLogFile;
|
*type = kInfoLogFile;
|
||||||
} else if (rest.size() >= 4 &&
|
|
||||||
Slice(rest.data() + rest.size() - 4, 4) == ".val") {
|
|
||||||
LargeValueRef h;
|
|
||||||
if (!FilenameStringToLargeValueRef(Slice(rest.data(), rest.size() - 4),
|
|
||||||
&h)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
*large_ref = h;
|
|
||||||
*type = kLargeValueFile;
|
|
||||||
} else if (rest.starts_with("MANIFEST-")) {
|
} else if (rest.starts_with("MANIFEST-")) {
|
||||||
rest.remove_prefix(strlen("MANIFEST-"));
|
rest.remove_prefix(strlen("MANIFEST-"));
|
||||||
uint64_t num;
|
uint64_t num;
|
||||||
|
@ -16,13 +16,11 @@
|
|||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
|
|
||||||
class Env;
|
class Env;
|
||||||
struct LargeValueRef;
|
|
||||||
|
|
||||||
enum FileType {
|
enum FileType {
|
||||||
kLogFile,
|
kLogFile,
|
||||||
kDBLockFile,
|
kDBLockFile,
|
||||||
kTableFile,
|
kTableFile,
|
||||||
kLargeValueFile,
|
|
||||||
kDescriptorFile,
|
kDescriptorFile,
|
||||||
kCurrentFile,
|
kCurrentFile,
|
||||||
kTempFile,
|
kTempFile,
|
||||||
@ -39,12 +37,6 @@ extern std::string LogFileName(const std::string& dbname, uint64_t number);
|
|||||||
// "dbname".
|
// "dbname".
|
||||||
extern std::string TableFileName(const std::string& dbname, uint64_t number);
|
extern std::string TableFileName(const std::string& dbname, uint64_t number);
|
||||||
|
|
||||||
// Return the name of the large value file with the specified large
|
|
||||||
// value reference in the db named by "dbname". The result will be
|
|
||||||
// prefixed with "dbname".
|
|
||||||
extern std::string LargeValueFileName(const std::string& dbname,
|
|
||||||
const LargeValueRef& large_ref);
|
|
||||||
|
|
||||||
// Return the name of the descriptor file for the db named by
|
// Return the name of the descriptor file for the db named by
|
||||||
// "dbname" and the specified incarnation number. The result will be
|
// "dbname" and the specified incarnation number. The result will be
|
||||||
// prefixed with "dbname".
|
// prefixed with "dbname".
|
||||||
@ -71,14 +63,10 @@ extern std::string InfoLogFileName(const std::string& dbname);
|
|||||||
extern std::string OldInfoLogFileName(const std::string& dbname);
|
extern std::string OldInfoLogFileName(const std::string& dbname);
|
||||||
|
|
||||||
// If filename is a leveldb file, store the type of the file in *type.
|
// If filename is a leveldb file, store the type of the file in *type.
|
||||||
// If *type is kLargeValueFile, then the large value reference data
|
// The number encoded in the filename is stored in *number. If the
|
||||||
// from the filename is stored in "*large_ref. For all other types of
|
// filename was successfully parsed, returns true. Else return false.
|
||||||
// files, the number encoded in the filename is stored in *number. If
|
|
||||||
// the filename was successfully parsed, returns true. Else return
|
|
||||||
// false.
|
|
||||||
extern bool ParseFileName(const std::string& filename,
|
extern bool ParseFileName(const std::string& filename,
|
||||||
uint64_t* number,
|
uint64_t* number,
|
||||||
LargeValueRef* large_ref,
|
|
||||||
FileType* type);
|
FileType* type);
|
||||||
|
|
||||||
// Make the CURRENT file point to the descriptor file with the
|
// Make the CURRENT file point to the descriptor file with the
|
||||||
|
@ -17,42 +17,29 @@ TEST(FileNameTest, Parse) {
|
|||||||
Slice db;
|
Slice db;
|
||||||
FileType type;
|
FileType type;
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
LargeValueRef large_ref;
|
|
||||||
|
|
||||||
// Successful parses
|
// Successful parses
|
||||||
static struct {
|
static struct {
|
||||||
const char* fname;
|
const char* fname;
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
const char* large_ref;
|
|
||||||
FileType type;
|
FileType type;
|
||||||
} cases[] = {
|
} cases[] = {
|
||||||
{ "100.log", 100, "", kLogFile },
|
{ "100.log", 100, kLogFile },
|
||||||
{ "0.log", 0, "", kLogFile },
|
{ "0.log", 0, kLogFile },
|
||||||
{ "0.sst", 0, "", kTableFile },
|
{ "0.sst", 0, kTableFile },
|
||||||
{ "CURRENT", 0, "", kCurrentFile },
|
{ "CURRENT", 0, kCurrentFile },
|
||||||
{ "LOCK", 0, "", kDBLockFile },
|
{ "LOCK", 0, kDBLockFile },
|
||||||
{ "MANIFEST-2", 2, "", kDescriptorFile },
|
{ "MANIFEST-2", 2, kDescriptorFile },
|
||||||
{ "MANIFEST-7", 7, "", kDescriptorFile },
|
{ "MANIFEST-7", 7, kDescriptorFile },
|
||||||
{ "LOG", 0, "", kInfoLogFile },
|
{ "LOG", 0, kInfoLogFile },
|
||||||
{ "LOG.old", 0, "", kInfoLogFile },
|
{ "LOG.old", 0, kInfoLogFile },
|
||||||
{ "18446744073709551615.log", 18446744073709551615ull, "",
|
{ "18446744073709551615.log", 18446744073709551615ull, kLogFile },
|
||||||
kLogFile },
|
|
||||||
{ "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-1234-0.val", 0,
|
|
||||||
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-1234-0", kLargeValueFile },
|
|
||||||
{ "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-10000000000-0.val", 0,
|
|
||||||
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-10000000000-0",
|
|
||||||
kLargeValueFile },
|
|
||||||
};
|
};
|
||||||
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
||||||
std::string f = cases[i].fname;
|
std::string f = cases[i].fname;
|
||||||
ASSERT_TRUE(ParseFileName(f, &number, &large_ref, &type)) << f;
|
ASSERT_TRUE(ParseFileName(f, &number, &type)) << f;
|
||||||
ASSERT_EQ(cases[i].type, type) << f;
|
ASSERT_EQ(cases[i].type, type) << f;
|
||||||
if (type == kLargeValueFile) {
|
ASSERT_EQ(cases[i].number, number) << f;
|
||||||
ASSERT_EQ(cases[i].large_ref, LargeValueRefToFilenameString(large_ref))
|
|
||||||
<< f;
|
|
||||||
} else {
|
|
||||||
ASSERT_EQ(cases[i].number, number) << f;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Errors
|
// Errors
|
||||||
@ -78,75 +65,54 @@ TEST(FileNameTest, Parse) {
|
|||||||
"184467440737095516150.log",
|
"184467440737095516150.log",
|
||||||
"100",
|
"100",
|
||||||
"100.",
|
"100.",
|
||||||
"100.lop",
|
"100.lop"
|
||||||
"100.val",
|
};
|
||||||
".val",
|
|
||||||
"123456789012345678901234567890123456789-12340.val",
|
|
||||||
"1234567890123456789012345678901234567-123-0.val",
|
|
||||||
"12345678901234567890123456789012345678902-100-1-.val",
|
|
||||||
// Overflow on value size
|
|
||||||
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-100000000000000000000-1.val",
|
|
||||||
// '03.val' is a bad compression type
|
|
||||||
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-100000-3.val" };
|
|
||||||
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
|
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
|
||||||
std::string f = errors[i];
|
std::string f = errors[i];
|
||||||
ASSERT_TRUE(!ParseFileName(f, &number, &large_ref, &type)) << f;
|
ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(FileNameTest, Construction) {
|
TEST(FileNameTest, Construction) {
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
FileType type;
|
FileType type;
|
||||||
LargeValueRef large_ref;
|
|
||||||
std::string fname;
|
std::string fname;
|
||||||
|
|
||||||
fname = CurrentFileName("foo");
|
fname = CurrentFileName("foo");
|
||||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||||
ASSERT_EQ(0, number);
|
ASSERT_EQ(0, number);
|
||||||
ASSERT_EQ(kCurrentFile, type);
|
ASSERT_EQ(kCurrentFile, type);
|
||||||
|
|
||||||
fname = LockFileName("foo");
|
fname = LockFileName("foo");
|
||||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||||
ASSERT_EQ(0, number);
|
ASSERT_EQ(0, number);
|
||||||
ASSERT_EQ(kDBLockFile, type);
|
ASSERT_EQ(kDBLockFile, type);
|
||||||
|
|
||||||
fname = LogFileName("foo", 192);
|
fname = LogFileName("foo", 192);
|
||||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||||
ASSERT_EQ(192, number);
|
ASSERT_EQ(192, number);
|
||||||
ASSERT_EQ(kLogFile, type);
|
ASSERT_EQ(kLogFile, type);
|
||||||
|
|
||||||
fname = TableFileName("bar", 200);
|
fname = TableFileName("bar", 200);
|
||||||
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||||
ASSERT_EQ(200, number);
|
ASSERT_EQ(200, number);
|
||||||
ASSERT_EQ(kTableFile, type);
|
ASSERT_EQ(kTableFile, type);
|
||||||
|
|
||||||
fname = DescriptorFileName("bar", 100);
|
fname = DescriptorFileName("bar", 100);
|
||||||
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||||
ASSERT_EQ(100, number);
|
ASSERT_EQ(100, number);
|
||||||
ASSERT_EQ(kDescriptorFile, type);
|
ASSERT_EQ(kDescriptorFile, type);
|
||||||
|
|
||||||
fname = TempFileName("tmp", 999);
|
fname = TempFileName("tmp", 999);
|
||||||
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
|
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||||
ASSERT_EQ(999, number);
|
ASSERT_EQ(999, number);
|
||||||
ASSERT_EQ(kTempFile, type);
|
ASSERT_EQ(kTempFile, type);
|
||||||
|
|
||||||
for (int i = 0; i <= kSnappyCompression; i++) {
|
|
||||||
CompressionType ctype = static_cast<CompressionType>(i);
|
|
||||||
std::string value = "abcdef";
|
|
||||||
LargeValueRef real_large_ref = LargeValueRef::Make(Slice(value), ctype);
|
|
||||||
fname = LargeValueFileName("tmp", real_large_ref);
|
|
||||||
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
|
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
|
||||||
ASSERT_TRUE(real_large_ref == large_ref);
|
|
||||||
ASSERT_EQ(kLargeValueFile, type);
|
|
||||||
ASSERT_EQ(large_ref.compression_type(), ctype);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -46,9 +46,9 @@ Status Writer::AddRecord(const Slice& slice) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Invariant: we never leave < kHeaderSize bytes in a block.
|
// Invariant: we never leave < kHeaderSize bytes in a block.
|
||||||
const int avail = kBlockSize - block_offset_ - kHeaderSize;
|
assert(kBlockSize - block_offset_ - kHeaderSize >= 0);
|
||||||
assert(avail >= 0);
|
|
||||||
|
|
||||||
|
const size_t avail = kBlockSize - block_offset_ - kHeaderSize;
|
||||||
const size_t fragment_length = (left < avail) ? left : avail;
|
const size_t fragment_length = (left < avail) ? left : avail;
|
||||||
|
|
||||||
RecordType type;
|
RecordType type;
|
||||||
|
40
db/repair.cc
40
db/repair.cc
@ -6,8 +6,7 @@
|
|||||||
// (1) Any log files are first converted to tables
|
// (1) Any log files are first converted to tables
|
||||||
// (2) We scan every table to compute
|
// (2) We scan every table to compute
|
||||||
// (a) smallest/largest for the table
|
// (a) smallest/largest for the table
|
||||||
// (b) large value refs from the table
|
// (b) largest sequence number in the table
|
||||||
// (c) largest sequence number in the table
|
|
||||||
// (3) We generate descriptor contents:
|
// (3) We generate descriptor contents:
|
||||||
// - log number is set to zero
|
// - log number is set to zero
|
||||||
// - next-file-number is set to 1 + largest file number we found
|
// - next-file-number is set to 1 + largest file number we found
|
||||||
@ -22,9 +21,8 @@
|
|||||||
// (c) For each table: if it overlaps earlier table, place in level-0,
|
// (c) For each table: if it overlaps earlier table, place in level-0,
|
||||||
// else place in level-M.
|
// else place in level-M.
|
||||||
// Possible optimization 2:
|
// Possible optimization 2:
|
||||||
// Store per-table metadata (smallest, largest, largest-seq#,
|
// Store per-table metadata (smallest, largest, largest-seq#, ...)
|
||||||
// large-value-refs, ...) in the table's meta section to speed up
|
// in the table's meta section to speed up ScanTable.
|
||||||
// ScanTable.
|
|
||||||
|
|
||||||
#include "db/builder.h"
|
#include "db/builder.h"
|
||||||
#include "db/db_impl.h"
|
#include "db/db_impl.h"
|
||||||
@ -73,7 +71,7 @@ class Repairer {
|
|||||||
}
|
}
|
||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
unsigned long long bytes = 0;
|
unsigned long long bytes = 0;
|
||||||
for (int i = 0; i < tables_.size(); i++) {
|
for (size_t i = 0; i < tables_.size(); i++) {
|
||||||
bytes += tables_[i].meta.file_size;
|
bytes += tables_[i].meta.file_size;
|
||||||
}
|
}
|
||||||
Log(env_, options_.info_log,
|
Log(env_, options_.info_log,
|
||||||
@ -119,13 +117,10 @@ class Repairer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
LargeValueRef large_ref;
|
|
||||||
FileType type;
|
FileType type;
|
||||||
for (int i = 0; i < filenames.size(); i++) {
|
for (size_t i = 0; i < filenames.size(); i++) {
|
||||||
if (ParseFileName(filenames[i], &number, &large_ref, &type)) {
|
if (ParseFileName(filenames[i], &number, &type)) {
|
||||||
if (type == kLargeValueFile) {
|
if (type == kDescriptorFile) {
|
||||||
// Will be picked up when we process a Table that points to it
|
|
||||||
} else if (type == kDescriptorFile) {
|
|
||||||
manifests_.push_back(filenames[i]);
|
manifests_.push_back(filenames[i]);
|
||||||
} else {
|
} else {
|
||||||
if (number + 1 > next_file_number_) {
|
if (number + 1 > next_file_number_) {
|
||||||
@ -145,7 +140,7 @@ class Repairer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ConvertLogFilesToTables() {
|
void ConvertLogFilesToTables() {
|
||||||
for (int i = 0; i < logs_.size(); i++) {
|
for (size_t i = 0; i < logs_.size(); i++) {
|
||||||
std::string logname = LogFileName(dbname_, logs_[i]);
|
std::string logname = LogFileName(dbname_, logs_[i]);
|
||||||
Status status = ConvertLogToTable(logs_[i]);
|
Status status = ConvertLogToTable(logs_[i]);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
@ -239,7 +234,7 @@ class Repairer {
|
|||||||
|
|
||||||
void ExtractMetaData() {
|
void ExtractMetaData() {
|
||||||
std::vector<TableInfo> kept;
|
std::vector<TableInfo> kept;
|
||||||
for (int i = 0; i < table_numbers_.size(); i++) {
|
for (size_t i = 0; i < table_numbers_.size(); i++) {
|
||||||
TableInfo t;
|
TableInfo t;
|
||||||
t.meta.number = table_numbers_[i];
|
t.meta.number = table_numbers_[i];
|
||||||
Status status = ScanTable(&t);
|
Status status = ScanTable(&t);
|
||||||
@ -283,17 +278,6 @@ class Repairer {
|
|||||||
if (parsed.sequence > t->max_sequence) {
|
if (parsed.sequence > t->max_sequence) {
|
||||||
t->max_sequence = parsed.sequence;
|
t->max_sequence = parsed.sequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ExtractValueType(key) == kTypeLargeValueRef) {
|
|
||||||
if (iter->value().size() != LargeValueRef::ByteSize()) {
|
|
||||||
Log(env_, options_.info_log, "Table #%llu: bad large value ref",
|
|
||||||
(unsigned long long) t->meta.number);
|
|
||||||
} else {
|
|
||||||
edit_.AddLargeValueRef(LargeValueRef::FromRef(iter->value()),
|
|
||||||
t->meta.number,
|
|
||||||
key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (!iter->status().ok()) {
|
if (!iter->status().ok()) {
|
||||||
status = iter->status();
|
status = iter->status();
|
||||||
@ -316,7 +300,7 @@ class Repairer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
SequenceNumber max_sequence = 0;
|
SequenceNumber max_sequence = 0;
|
||||||
for (int i = 0; i < tables_.size(); i++) {
|
for (size_t i = 0; i < tables_.size(); i++) {
|
||||||
if (max_sequence < tables_[i].max_sequence) {
|
if (max_sequence < tables_[i].max_sequence) {
|
||||||
max_sequence = tables_[i].max_sequence;
|
max_sequence = tables_[i].max_sequence;
|
||||||
}
|
}
|
||||||
@ -327,7 +311,7 @@ class Repairer {
|
|||||||
edit_.SetNextFile(next_file_number_);
|
edit_.SetNextFile(next_file_number_);
|
||||||
edit_.SetLastSequence(max_sequence);
|
edit_.SetLastSequence(max_sequence);
|
||||||
|
|
||||||
for (int i = 0; i < tables_.size(); i++) {
|
for (size_t i = 0; i < tables_.size(); i++) {
|
||||||
// TODO(opt): separate out into multiple levels
|
// TODO(opt): separate out into multiple levels
|
||||||
const TableInfo& t = tables_[i];
|
const TableInfo& t = tables_[i];
|
||||||
edit_.AddFile(0, t.meta.number, t.meta.file_size,
|
edit_.AddFile(0, t.meta.number, t.meta.file_size,
|
||||||
@ -351,7 +335,7 @@ class Repairer {
|
|||||||
env_->DeleteFile(tmp);
|
env_->DeleteFile(tmp);
|
||||||
} else {
|
} else {
|
||||||
// Discard older manifests
|
// Discard older manifests
|
||||||
for (int i = 0; i < manifests_.size(); i++) {
|
for (size_t i = 0; i < manifests_.size(); i++) {
|
||||||
ArchiveFile(dbname_ + "/" + manifests_[i]);
|
ArchiveFile(dbname_ + "/" + manifests_[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ enum Tag {
|
|||||||
kCompactPointer = 5,
|
kCompactPointer = 5,
|
||||||
kDeletedFile = 6,
|
kDeletedFile = 6,
|
||||||
kNewFile = 7,
|
kNewFile = 7,
|
||||||
kLargeValueRef = 8,
|
// 8 was used for large value refs
|
||||||
kPrevLogNumber = 9,
|
kPrevLogNumber = 9,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -36,7 +36,6 @@ void VersionEdit::Clear() {
|
|||||||
has_last_sequence_ = false;
|
has_last_sequence_ = false;
|
||||||
deleted_files_.clear();
|
deleted_files_.clear();
|
||||||
new_files_.clear();
|
new_files_.clear();
|
||||||
large_refs_added_.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void VersionEdit::EncodeTo(std::string* dst) const {
|
void VersionEdit::EncodeTo(std::string* dst) const {
|
||||||
@ -61,7 +60,7 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
|||||||
PutVarint64(dst, last_sequence_);
|
PutVarint64(dst, last_sequence_);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < compact_pointers_.size(); i++) {
|
for (size_t i = 0; i < compact_pointers_.size(); i++) {
|
||||||
PutVarint32(dst, kCompactPointer);
|
PutVarint32(dst, kCompactPointer);
|
||||||
PutVarint32(dst, compact_pointers_[i].first); // level
|
PutVarint32(dst, compact_pointers_[i].first); // level
|
||||||
PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
|
PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
|
||||||
@ -75,7 +74,7 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
|||||||
PutVarint64(dst, iter->second); // file number
|
PutVarint64(dst, iter->second); // file number
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < new_files_.size(); i++) {
|
for (size_t i = 0; i < new_files_.size(); i++) {
|
||||||
const FileMetaData& f = new_files_[i].second;
|
const FileMetaData& f = new_files_[i].second;
|
||||||
PutVarint32(dst, kNewFile);
|
PutVarint32(dst, kNewFile);
|
||||||
PutVarint32(dst, new_files_[i].first); // level
|
PutVarint32(dst, new_files_[i].first); // level
|
||||||
@ -84,15 +83,6 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
|||||||
PutLengthPrefixedSlice(dst, f.smallest.Encode());
|
PutLengthPrefixedSlice(dst, f.smallest.Encode());
|
||||||
PutLengthPrefixedSlice(dst, f.largest.Encode());
|
PutLengthPrefixedSlice(dst, f.largest.Encode());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < large_refs_added_.size(); i++) {
|
|
||||||
const VersionEdit::Large& l = large_refs_added_[i];
|
|
||||||
PutVarint32(dst, kLargeValueRef);
|
|
||||||
PutLengthPrefixedSlice(dst,
|
|
||||||
Slice(l.large_ref.data, LargeValueRef::ByteSize()));
|
|
||||||
PutVarint64(dst, l.fnum);
|
|
||||||
PutLengthPrefixedSlice(dst, l.internal_key.Encode());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool GetInternalKey(Slice* input, InternalKey* dst) {
|
static bool GetInternalKey(Slice* input, InternalKey* dst) {
|
||||||
@ -127,7 +117,6 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
|||||||
uint64_t number;
|
uint64_t number;
|
||||||
FileMetaData f;
|
FileMetaData f;
|
||||||
Slice str;
|
Slice str;
|
||||||
Large large;
|
|
||||||
InternalKey key;
|
InternalKey key;
|
||||||
|
|
||||||
while (msg == NULL && GetVarint32(&input, &tag)) {
|
while (msg == NULL && GetVarint32(&input, &tag)) {
|
||||||
@ -203,18 +192,6 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case kLargeValueRef:
|
|
||||||
if (GetLengthPrefixedSlice(&input, &str) &&
|
|
||||||
(str.size() == LargeValueRef::ByteSize()) &&
|
|
||||||
GetVarint64(&input, &large.fnum) &&
|
|
||||||
GetInternalKey(&input, &large.internal_key)) {
|
|
||||||
large.large_ref = LargeValueRef::FromRef(str);
|
|
||||||
large_refs_added_.push_back(large);
|
|
||||||
} else {
|
|
||||||
msg = "large ref";
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
msg = "unknown tag";
|
msg = "unknown tag";
|
||||||
break;
|
break;
|
||||||
@ -255,7 +232,7 @@ std::string VersionEdit::DebugString() const {
|
|||||||
r.append("\n LastSeq: ");
|
r.append("\n LastSeq: ");
|
||||||
AppendNumberTo(&r, last_sequence_);
|
AppendNumberTo(&r, last_sequence_);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < compact_pointers_.size(); i++) {
|
for (size_t i = 0; i < compact_pointers_.size(); i++) {
|
||||||
r.append("\n CompactPointer: ");
|
r.append("\n CompactPointer: ");
|
||||||
AppendNumberTo(&r, compact_pointers_[i].first);
|
AppendNumberTo(&r, compact_pointers_[i].first);
|
||||||
r.append(" '");
|
r.append(" '");
|
||||||
@ -270,7 +247,7 @@ std::string VersionEdit::DebugString() const {
|
|||||||
r.append(" ");
|
r.append(" ");
|
||||||
AppendNumberTo(&r, iter->second);
|
AppendNumberTo(&r, iter->second);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < new_files_.size(); i++) {
|
for (size_t i = 0; i < new_files_.size(); i++) {
|
||||||
const FileMetaData& f = new_files_[i].second;
|
const FileMetaData& f = new_files_[i].second;
|
||||||
r.append("\n AddFile: ");
|
r.append("\n AddFile: ");
|
||||||
AppendNumberTo(&r, new_files_[i].first);
|
AppendNumberTo(&r, new_files_[i].first);
|
||||||
@ -284,16 +261,6 @@ std::string VersionEdit::DebugString() const {
|
|||||||
AppendEscapedStringTo(&r, f.largest.Encode());
|
AppendEscapedStringTo(&r, f.largest.Encode());
|
||||||
r.append("'");
|
r.append("'");
|
||||||
}
|
}
|
||||||
for (int i = 0; i < large_refs_added_.size(); i++) {
|
|
||||||
const VersionEdit::Large& l = large_refs_added_[i];
|
|
||||||
r.append("\n LargeRef: ");
|
|
||||||
AppendNumberTo(&r, l.fnum);
|
|
||||||
r.append(" ");
|
|
||||||
r.append(LargeValueRefToFilenameString(l.large_ref));
|
|
||||||
r.append(" '");
|
|
||||||
AppendEscapedStringTo(&r, l.internal_key.Encode());
|
|
||||||
r.append("'");
|
|
||||||
}
|
|
||||||
r.append("\n}\n");
|
r.append("\n}\n");
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
@ -75,18 +75,6 @@ class VersionEdit {
|
|||||||
deleted_files_.insert(std::make_pair(level, file));
|
deleted_files_.insert(std::make_pair(level, file));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Record that a large value with the specified large_ref was
|
|
||||||
// written to the output file numbered "fnum"
|
|
||||||
void AddLargeValueRef(const LargeValueRef& large_ref,
|
|
||||||
uint64_t fnum,
|
|
||||||
const Slice& internal_key) {
|
|
||||||
large_refs_added_.resize(large_refs_added_.size() + 1);
|
|
||||||
Large* large = &(large_refs_added_.back());
|
|
||||||
large->large_ref = large_ref;
|
|
||||||
large->fnum = fnum;
|
|
||||||
large->internal_key.DecodeFrom(internal_key);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EncodeTo(std::string* dst) const;
|
void EncodeTo(std::string* dst) const;
|
||||||
Status DecodeFrom(const Slice& src);
|
Status DecodeFrom(const Slice& src);
|
||||||
|
|
||||||
@ -111,12 +99,6 @@ class VersionEdit {
|
|||||||
std::vector< std::pair<int, InternalKey> > compact_pointers_;
|
std::vector< std::pair<int, InternalKey> > compact_pointers_;
|
||||||
DeletedFileSet deleted_files_;
|
DeletedFileSet deleted_files_;
|
||||||
std::vector< std::pair<int, FileMetaData> > new_files_;
|
std::vector< std::pair<int, FileMetaData> > new_files_;
|
||||||
struct Large {
|
|
||||||
LargeValueRef large_ref;
|
|
||||||
uint64_t fnum;
|
|
||||||
InternalKey internal_key;
|
|
||||||
};
|
|
||||||
std::vector<Large> large_refs_added_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -26,13 +26,9 @@ TEST(VersionEditTest, EncodeDecode) {
|
|||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
TestEncodeDecode(edit);
|
TestEncodeDecode(edit);
|
||||||
edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
|
edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
|
||||||
InternalKey("foo", kBig + 500 + i, kTypeLargeValueRef),
|
InternalKey("foo", kBig + 500 + i, kTypeValue),
|
||||||
InternalKey("zoo", kBig + 600 + i, kTypeDeletion));
|
InternalKey("zoo", kBig + 600 + i, kTypeDeletion));
|
||||||
edit.DeleteFile(4, kBig + 700 + i);
|
edit.DeleteFile(4, kBig + 700 + i);
|
||||||
edit.AddLargeValueRef(LargeValueRef::Make("big", kNoCompression),
|
|
||||||
kBig + 800 + i, "foobar");
|
|
||||||
edit.AddLargeValueRef(LargeValueRef::Make("big2", kSnappyCompression),
|
|
||||||
kBig + 801 + i, "baz");
|
|
||||||
edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
|
edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ std::string IntSetToString(const std::set<uint64_t>& s) {
|
|||||||
Version::~Version() {
|
Version::~Version() {
|
||||||
assert(refs_ == 0);
|
assert(refs_ == 0);
|
||||||
for (int level = 0; level < config::kNumLevels; level++) {
|
for (int level = 0; level < config::kNumLevels; level++) {
|
||||||
for (int i = 0; i < files_[level].size(); i++) {
|
for (size_t i = 0; i < files_[level].size(); i++) {
|
||||||
FileMetaData* f = files_[level][i];
|
FileMetaData* f = files_[level][i];
|
||||||
assert(f->refs >= 0);
|
assert(f->refs >= 0);
|
||||||
f->refs--;
|
f->refs--;
|
||||||
@ -134,7 +134,7 @@ class Version::LevelFileNumIterator : public Iterator {
|
|||||||
private:
|
private:
|
||||||
const InternalKeyComparator icmp_;
|
const InternalKeyComparator icmp_;
|
||||||
const std::vector<FileMetaData*>* const flist_;
|
const std::vector<FileMetaData*>* const flist_;
|
||||||
int index_;
|
uint32_t index_;
|
||||||
|
|
||||||
// Backing store for value(). Holds the file number and size.
|
// Backing store for value(). Holds the file number and size.
|
||||||
mutable char value_buf_[16];
|
mutable char value_buf_[16];
|
||||||
@ -164,7 +164,7 @@ Iterator* Version::NewConcatenatingIterator(const ReadOptions& options,
|
|||||||
void Version::AddIterators(const ReadOptions& options,
|
void Version::AddIterators(const ReadOptions& options,
|
||||||
std::vector<Iterator*>* iters) {
|
std::vector<Iterator*>* iters) {
|
||||||
// Merge all level zero files together since they may overlap
|
// Merge all level zero files together since they may overlap
|
||||||
for (int i = 0; i < files_[0].size(); i++) {
|
for (size_t i = 0; i < files_[0].size(); i++) {
|
||||||
iters->push_back(
|
iters->push_back(
|
||||||
vset_->table_cache_->NewIterator(
|
vset_->table_cache_->NewIterator(
|
||||||
options, files_[0][i]->number, files_[0][i]->file_size));
|
options, files_[0][i]->number, files_[0][i]->file_size));
|
||||||
@ -201,7 +201,7 @@ std::string Version::DebugString() const {
|
|||||||
AppendNumberTo(&r, level);
|
AppendNumberTo(&r, level);
|
||||||
r.push_back(':');
|
r.push_back(':');
|
||||||
const std::vector<FileMetaData*>& files = files_[level];
|
const std::vector<FileMetaData*>& files = files_[level];
|
||||||
for (int i = 0; i < files.size(); i++) {
|
for (size_t i = 0; i < files.size(); i++) {
|
||||||
r.push_back(' ');
|
r.push_back(' ');
|
||||||
AppendNumberTo(&r, files[i]->number);
|
AppendNumberTo(&r, files[i]->number);
|
||||||
r.push_back(':');
|
r.push_back(':');
|
||||||
@ -232,7 +232,7 @@ class VersionSet::Builder {
|
|||||||
: vset_(vset) {
|
: vset_(vset) {
|
||||||
for (int level = 0; level < config::kNumLevels; level++) {
|
for (int level = 0; level < config::kNumLevels; level++) {
|
||||||
const std::vector<FileMetaData*>& files = base->files_[level];
|
const std::vector<FileMetaData*>& files = base->files_[level];
|
||||||
for (int i = 0; i < files.size(); i++) {
|
for (size_t i = 0; i < files.size(); i++) {
|
||||||
FileMetaData* f = files[i];
|
FileMetaData* f = files[i];
|
||||||
f->refs++;
|
f->refs++;
|
||||||
files_[level].insert(std::make_pair(f->number, f));
|
files_[level].insert(std::make_pair(f->number, f));
|
||||||
@ -258,7 +258,7 @@ class VersionSet::Builder {
|
|||||||
// Apply all of the edits in *edit to the current state.
|
// Apply all of the edits in *edit to the current state.
|
||||||
void Apply(VersionEdit* edit) {
|
void Apply(VersionEdit* edit) {
|
||||||
// Update compaction pointers
|
// Update compaction pointers
|
||||||
for (int i = 0; i < edit->compact_pointers_.size(); i++) {
|
for (size_t i = 0; i < edit->compact_pointers_.size(); i++) {
|
||||||
const int level = edit->compact_pointers_[i].first;
|
const int level = edit->compact_pointers_[i].first;
|
||||||
vset_->compact_pointer_[level] =
|
vset_->compact_pointer_[level] =
|
||||||
edit->compact_pointers_[i].second.Encode().ToString();
|
edit->compact_pointers_[i].second.Encode().ToString();
|
||||||
@ -284,19 +284,13 @@ class VersionSet::Builder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add new files
|
// Add new files
|
||||||
for (int i = 0; i < edit->new_files_.size(); i++) {
|
for (size_t i = 0; i < edit->new_files_.size(); i++) {
|
||||||
const int level = edit->new_files_[i].first;
|
const int level = edit->new_files_[i].first;
|
||||||
FileMetaData* f = new FileMetaData(edit->new_files_[i].second);
|
FileMetaData* f = new FileMetaData(edit->new_files_[i].second);
|
||||||
f->refs = 1;
|
f->refs = 1;
|
||||||
assert(files_[level].count(f->number) == 0);
|
assert(files_[level].count(f->number) == 0);
|
||||||
files_[level].insert(std::make_pair(f->number, f));
|
files_[level].insert(std::make_pair(f->number, f));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add large value refs
|
|
||||||
for (int i = 0; i < edit->large_refs_added_.size(); i++) {
|
|
||||||
const VersionEdit::Large& l = edit->large_refs_added_[i];
|
|
||||||
vset_->RegisterLargeValueRef(l.large_ref, l.fnum, l.internal_key);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save the current state in *v.
|
// Save the current state in *v.
|
||||||
@ -545,7 +539,7 @@ Status VersionSet::Recover() {
|
|||||||
|
|
||||||
static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
||||||
int64_t sum = 0;
|
int64_t sum = 0;
|
||||||
for (int i = 0; i < files.size(); i++) {
|
for (size_t i = 0; i < files.size(); i++) {
|
||||||
sum += files[i]->file_size;
|
sum += files[i]->file_size;
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
@ -610,25 +604,12 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
|
|||||||
// Save files
|
// Save files
|
||||||
for (int level = 0; level < config::kNumLevels; level++) {
|
for (int level = 0; level < config::kNumLevels; level++) {
|
||||||
const std::vector<FileMetaData*>& files = current_->files_[level];
|
const std::vector<FileMetaData*>& files = current_->files_[level];
|
||||||
for (int i = 0; i < files.size(); i++) {
|
for (size_t i = 0; i < files.size(); i++) {
|
||||||
const FileMetaData* f = files[i];
|
const FileMetaData* f = files[i];
|
||||||
edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest);
|
edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save large value refs
|
|
||||||
for (LargeValueMap::const_iterator it = large_value_refs_.begin();
|
|
||||||
it != large_value_refs_.end();
|
|
||||||
++it) {
|
|
||||||
const LargeValueRef& ref = it->first;
|
|
||||||
const LargeReferencesSet& pointers = it->second;
|
|
||||||
for (LargeReferencesSet::const_iterator j = pointers.begin();
|
|
||||||
j != pointers.end();
|
|
||||||
++j) {
|
|
||||||
edit.AddLargeValueRef(ref, j->first, j->second);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string record;
|
std::string record;
|
||||||
edit.EncodeTo(&record);
|
edit.EncodeTo(&record);
|
||||||
return log->AddRecord(record);
|
return log->AddRecord(record);
|
||||||
@ -651,7 +632,7 @@ Status VersionSet::SortLevel(Version* v, uint64_t level) {
|
|||||||
|
|
||||||
if (result.ok() && level > 0) {
|
if (result.ok() && level > 0) {
|
||||||
// There should be no overlap
|
// There should be no overlap
|
||||||
for (int i = 1; i < v->files_[level].size(); i++) {
|
for (size_t i = 1; i < v->files_[level].size(); i++) {
|
||||||
const InternalKey& prev_end = v->files_[level][i-1]->largest;
|
const InternalKey& prev_end = v->files_[level][i-1]->largest;
|
||||||
const InternalKey& this_begin = v->files_[level][i]->smallest;
|
const InternalKey& this_begin = v->files_[level][i]->smallest;
|
||||||
if (icmp_.Compare(prev_end, this_begin) >= 0) {
|
if (icmp_.Compare(prev_end, this_begin) >= 0) {
|
||||||
@ -676,7 +657,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
|
|||||||
uint64_t result = 0;
|
uint64_t result = 0;
|
||||||
for (int level = 0; level < config::kNumLevels; level++) {
|
for (int level = 0; level < config::kNumLevels; level++) {
|
||||||
const std::vector<FileMetaData*>& files = v->files_[level];
|
const std::vector<FileMetaData*>& files = v->files_[level];
|
||||||
for (int i = 0; i < files.size(); i++) {
|
for (size_t i = 0; i < files.size(); i++) {
|
||||||
if (icmp_.Compare(files[i]->largest, ikey) <= 0) {
|
if (icmp_.Compare(files[i]->largest, ikey) <= 0) {
|
||||||
// Entire file is before "ikey", so just add the file size
|
// Entire file is before "ikey", so just add the file size
|
||||||
result += files[i]->file_size;
|
result += files[i]->file_size;
|
||||||
@ -701,83 +682,9 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add in large value files which are references from internal keys
|
|
||||||
// stored in the table files
|
|
||||||
//
|
|
||||||
// TODO(opt): this is O(# large values in db). If this becomes too slow,
|
|
||||||
// we could store an auxiliary data structure indexed by internal key
|
|
||||||
for (LargeValueMap::const_iterator it = large_value_refs_.begin();
|
|
||||||
it != large_value_refs_.end();
|
|
||||||
++it) {
|
|
||||||
const LargeValueRef& lref = it->first;
|
|
||||||
for (LargeReferencesSet::const_iterator it2 = it->second.begin();
|
|
||||||
it2 != it->second.end();
|
|
||||||
++it2) {
|
|
||||||
if (icmp_.Compare(it2->second, ikey.Encode()) <= 0) {
|
|
||||||
// Internal key for large value is before our key of interest
|
|
||||||
result += lref.ValueSize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VersionSet::RegisterLargeValueRef(const LargeValueRef& large_ref,
|
|
||||||
uint64_t fnum,
|
|
||||||
const InternalKey& internal_key) {
|
|
||||||
LargeReferencesSet* refs = &large_value_refs_[large_ref];
|
|
||||||
bool is_first = refs->empty();
|
|
||||||
refs->insert(make_pair(fnum, internal_key.Encode().ToString()));
|
|
||||||
return is_first;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VersionSet::CleanupLargeValueRefs(const std::set<uint64_t>& live_tables) {
|
|
||||||
for (LargeValueMap::iterator it = large_value_refs_.begin();
|
|
||||||
it != large_value_refs_.end();
|
|
||||||
) {
|
|
||||||
LargeReferencesSet* refs = &it->second;
|
|
||||||
for (LargeReferencesSet::iterator ref_it = refs->begin();
|
|
||||||
ref_it != refs->end();
|
|
||||||
) {
|
|
||||||
if (ref_it->first != log_number_ && // Not in log file
|
|
||||||
ref_it->first != prev_log_number_ && // Not in prev log
|
|
||||||
live_tables.count(ref_it->first) == 0) { // Not in a live table
|
|
||||||
// No longer live: erase
|
|
||||||
LargeReferencesSet::iterator to_erase = ref_it;
|
|
||||||
++ref_it;
|
|
||||||
refs->erase(to_erase);
|
|
||||||
} else {
|
|
||||||
// Still live: leave this reference alone
|
|
||||||
++ref_it;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (refs->empty()) {
|
|
||||||
// No longer any live references to this large value: remove from
|
|
||||||
// large_value_refs
|
|
||||||
Log(env_, options_->info_log, "large value is dead: '%s'",
|
|
||||||
LargeValueRefToFilenameString(it->first).c_str());
|
|
||||||
LargeValueMap::iterator to_erase = it;
|
|
||||||
++it;
|
|
||||||
large_value_refs_.erase(to_erase);
|
|
||||||
} else {
|
|
||||||
++it;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VersionSet::LargeValueIsLive(const LargeValueRef& large_ref) {
|
|
||||||
LargeValueMap::iterator it = large_value_refs_.find(large_ref);
|
|
||||||
if (it == large_value_refs_.end()) {
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
assert(!it->second.empty());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void VersionSet::MaybeDeleteOldVersions() {
|
void VersionSet::MaybeDeleteOldVersions() {
|
||||||
// Note: it is important to delete versions in order since a newer
|
// Note: it is important to delete versions in order since a newer
|
||||||
// version with zero refs may be holding a pointer to a memtable
|
// version with zero refs may be holding a pointer to a memtable
|
||||||
@ -793,7 +700,7 @@ void VersionSet::AddLiveFiles(std::set<uint64_t>* live) {
|
|||||||
for (Version* v = oldest_; v != NULL; v = v->next_) {
|
for (Version* v = oldest_; v != NULL; v = v->next_) {
|
||||||
for (int level = 0; level < config::kNumLevels; level++) {
|
for (int level = 0; level < config::kNumLevels; level++) {
|
||||||
const std::vector<FileMetaData*>& files = v->files_[level];
|
const std::vector<FileMetaData*>& files = v->files_[level];
|
||||||
for (int i = 0; i < files.size(); i++) {
|
for (size_t i = 0; i < files.size(); i++) {
|
||||||
live->insert(files[i]->number);
|
live->insert(files[i]->number);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -810,7 +717,7 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() {
|
|||||||
int64_t result = 0;
|
int64_t result = 0;
|
||||||
std::vector<FileMetaData*> overlaps;
|
std::vector<FileMetaData*> overlaps;
|
||||||
for (int level = 0; level < config::kNumLevels - 1; level++) {
|
for (int level = 0; level < config::kNumLevels - 1; level++) {
|
||||||
for (int i = 0; i < current_->files_[level].size(); i++) {
|
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
||||||
const FileMetaData* f = current_->files_[level][i];
|
const FileMetaData* f = current_->files_[level][i];
|
||||||
GetOverlappingInputs(level+1, f->smallest, f->largest, &overlaps);
|
GetOverlappingInputs(level+1, f->smallest, f->largest, &overlaps);
|
||||||
const int64_t sum = TotalFileSize(overlaps);
|
const int64_t sum = TotalFileSize(overlaps);
|
||||||
@ -832,7 +739,7 @@ void VersionSet::GetOverlappingInputs(
|
|||||||
Slice user_begin = begin.user_key();
|
Slice user_begin = begin.user_key();
|
||||||
Slice user_end = end.user_key();
|
Slice user_end = end.user_key();
|
||||||
const Comparator* user_cmp = icmp_.user_comparator();
|
const Comparator* user_cmp = icmp_.user_comparator();
|
||||||
for (int i = 0; i < current_->files_[level].size(); i++) {
|
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
||||||
FileMetaData* f = current_->files_[level][i];
|
FileMetaData* f = current_->files_[level][i];
|
||||||
if (user_cmp->Compare(f->largest.user_key(), user_begin) < 0 ||
|
if (user_cmp->Compare(f->largest.user_key(), user_begin) < 0 ||
|
||||||
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
|
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
|
||||||
@ -852,7 +759,7 @@ void VersionSet::GetRange(const std::vector<FileMetaData*>& inputs,
|
|||||||
assert(!inputs.empty());
|
assert(!inputs.empty());
|
||||||
smallest->Clear();
|
smallest->Clear();
|
||||||
largest->Clear();
|
largest->Clear();
|
||||||
for (int i = 0; i < inputs.size(); i++) {
|
for (size_t i = 0; i < inputs.size(); i++) {
|
||||||
FileMetaData* f = inputs[i];
|
FileMetaData* f = inputs[i];
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
*smallest = f->smallest;
|
*smallest = f->smallest;
|
||||||
@ -895,7 +802,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
|
|||||||
if (!c->inputs_[which].empty()) {
|
if (!c->inputs_[which].empty()) {
|
||||||
if (c->level() + which == 0) {
|
if (c->level() + which == 0) {
|
||||||
const std::vector<FileMetaData*>& files = c->inputs_[which];
|
const std::vector<FileMetaData*>& files = c->inputs_[which];
|
||||||
for (int i = 0; i < files.size(); i++) {
|
for (size_t i = 0; i < files.size(); i++) {
|
||||||
list[num++] = table_cache_->NewIterator(
|
list[num++] = table_cache_->NewIterator(
|
||||||
options, files[i]->number, files[i]->file_size);
|
options, files[i]->number, files[i]->file_size);
|
||||||
}
|
}
|
||||||
@ -927,7 +834,7 @@ Compaction* VersionSet::PickCompaction() {
|
|||||||
c->input_version_->Ref();
|
c->input_version_->Ref();
|
||||||
|
|
||||||
// Pick the first file that comes after compact_pointer_[level]
|
// Pick the first file that comes after compact_pointer_[level]
|
||||||
for (int i = 0; i < current_->files_[level].size(); i++) {
|
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
||||||
FileMetaData* f = current_->files_[level][i];
|
FileMetaData* f = current_->files_[level][i];
|
||||||
if (compact_pointer_[level].empty() ||
|
if (compact_pointer_[level].empty() ||
|
||||||
icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
|
icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
|
||||||
@ -1062,7 +969,7 @@ bool Compaction::IsTrivialMove() const {
|
|||||||
|
|
||||||
void Compaction::AddInputDeletions(VersionEdit* edit) {
|
void Compaction::AddInputDeletions(VersionEdit* edit) {
|
||||||
for (int which = 0; which < 2; which++) {
|
for (int which = 0; which < 2; which++) {
|
||||||
for (int i = 0; i < inputs_[which].size(); i++) {
|
for (size_t i = 0; i < inputs_[which].size(); i++) {
|
||||||
edit->DeleteFile(level_ + which, inputs_[which][i]->number);
|
edit->DeleteFile(level_ + which, inputs_[which][i]->number);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -171,22 +171,6 @@ class VersionSet {
|
|||||||
// "key" as of version "v".
|
// "key" as of version "v".
|
||||||
uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
|
uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
|
||||||
|
|
||||||
// Register a reference to a large value with the specified
|
|
||||||
// large_ref from the specified file number. Returns "true" if this
|
|
||||||
// is the first recorded reference to the "large_ref" value in the
|
|
||||||
// database, and false otherwise.
|
|
||||||
bool RegisterLargeValueRef(const LargeValueRef& large_ref,
|
|
||||||
uint64_t filenum,
|
|
||||||
const InternalKey& internal_key);
|
|
||||||
|
|
||||||
// Cleanup the large value reference state by eliminating any
|
|
||||||
// references from files that are not includes in either "live_tables"
|
|
||||||
// or the current log.
|
|
||||||
void CleanupLargeValueRefs(const std::set<uint64_t>& live_tables);
|
|
||||||
|
|
||||||
// Returns true if a large value with the given reference is live.
|
|
||||||
bool LargeValueIsLive(const LargeValueRef& large_ref);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class Builder;
|
class Builder;
|
||||||
|
|
||||||
@ -237,14 +221,6 @@ class VersionSet {
|
|||||||
Version* current_; // Pointer to the last (newest) list entry
|
Version* current_; // Pointer to the last (newest) list entry
|
||||||
Version* oldest_; // Pointer to the first (oldest) list entry
|
Version* oldest_; // Pointer to the first (oldest) list entry
|
||||||
|
|
||||||
// Map from large value reference to the set of <file numbers,internal_key>
|
|
||||||
// values containing references to the value. We keep the
|
|
||||||
// internal key as a std::string rather than as an InternalKey because
|
|
||||||
// we want to be able to easily use a set.
|
|
||||||
typedef std::set<std::pair<uint64_t, std::string> > LargeReferencesSet;
|
|
||||||
typedef std::map<LargeValueRef, LargeReferencesSet> LargeValueMap;
|
|
||||||
LargeValueMap large_value_refs_;
|
|
||||||
|
|
||||||
// Per-level key at which the next compaction at that level should start.
|
// Per-level key at which the next compaction at that level should start.
|
||||||
// Either an empty string, or a valid InternalKey.
|
// Either an empty string, or a valid InternalKey.
|
||||||
std::string compact_pointer_[config::kNumLevels];
|
std::string compact_pointer_[config::kNumLevels];
|
||||||
@ -313,7 +289,7 @@ class Compaction {
|
|||||||
// State used to check for number of of overlapping grandparent files
|
// State used to check for number of of overlapping grandparent files
|
||||||
// (parent == level_ + 1, grandparent == level_ + 2)
|
// (parent == level_ + 1, grandparent == level_ + 2)
|
||||||
std::vector<FileMetaData*> grandparents_;
|
std::vector<FileMetaData*> grandparents_;
|
||||||
int grandparent_index_; // Index in grandparent_starts_
|
size_t grandparent_index_; // Index in grandparent_starts_
|
||||||
bool seen_key_; // Some output key has been seen
|
bool seen_key_; // Some output key has been seen
|
||||||
int64_t overlapped_bytes_; // Bytes of overlap between current output
|
int64_t overlapped_bytes_; // Bytes of overlap between current output
|
||||||
// and grandparent files
|
// and grandparent files
|
||||||
@ -324,7 +300,7 @@ class Compaction {
|
|||||||
// is that we are positioned at one of the file ranges for each
|
// is that we are positioned at one of the file ranges for each
|
||||||
// higher level than the ones involved in this compaction (i.e. for
|
// higher level than the ones involved in this compaction (i.e. for
|
||||||
// all L >= level_ + 2).
|
// all L >= level_ + 2).
|
||||||
int level_ptrs_[config::kNumLevels];
|
size_t level_ptrs_[config::kNumLevels];
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
// data: record[count]
|
// data: record[count]
|
||||||
// record :=
|
// record :=
|
||||||
// kTypeValue varstring varstring |
|
// kTypeValue varstring varstring |
|
||||||
// kTypeLargeValueRef varstring varstring |
|
|
||||||
// kTypeDeletion varstring
|
// kTypeDeletion varstring
|
||||||
// varstring :=
|
// varstring :=
|
||||||
// len: varint32
|
// len: varint32
|
||||||
@ -58,16 +57,6 @@ void WriteBatch::Put(const Slice& key, const Slice& value) {
|
|||||||
PutLengthPrefixedSlice(&rep_, value);
|
PutLengthPrefixedSlice(&rep_, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void WriteBatchInternal::PutLargeValueRef(WriteBatch* b,
|
|
||||||
const Slice& key,
|
|
||||||
const LargeValueRef& large_ref) {
|
|
||||||
WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
|
|
||||||
b->rep_.push_back(static_cast<char>(kTypeLargeValueRef));
|
|
||||||
PutLengthPrefixedSlice(&b->rep_, key);
|
|
||||||
PutLengthPrefixedSlice(&b->rep_,
|
|
||||||
Slice(large_ref.data, sizeof(large_ref.data)));
|
|
||||||
}
|
|
||||||
|
|
||||||
void WriteBatch::Delete(const Slice& key) {
|
void WriteBatch::Delete(const Slice& key) {
|
||||||
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
|
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
|
||||||
rep_.push_back(static_cast<char>(kTypeDeletion));
|
rep_.push_back(static_cast<char>(kTypeDeletion));
|
||||||
@ -87,10 +76,6 @@ Status WriteBatchInternal::InsertInto(const WriteBatch* b,
|
|||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
memtable->Add(it.sequence_number(), kTypeValue, it.key(), it.value());
|
memtable->Add(it.sequence_number(), kTypeValue, it.key(), it.value());
|
||||||
break;
|
break;
|
||||||
case kTypeLargeValueRef:
|
|
||||||
memtable->Add(it.sequence_number(), kTypeLargeValueRef,
|
|
||||||
it.key(), it.value());
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
found++;
|
found++;
|
||||||
}
|
}
|
||||||
@ -134,7 +119,6 @@ void WriteBatchInternal::Iterator::GetNextEntry() {
|
|||||||
input_.remove_prefix(1);
|
input_.remove_prefix(1);
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
case kTypeLargeValueRef:
|
|
||||||
if (GetLengthPrefixedSlice(&input_, &key_) &&
|
if (GetLengthPrefixedSlice(&input_, &key_) &&
|
||||||
GetLengthPrefixedSlice(&input_, &value_)) {
|
GetLengthPrefixedSlice(&input_, &value_)) {
|
||||||
op_ = static_cast<ValueType>(tag);
|
op_ = static_cast<ValueType>(tag);
|
||||||
|
@ -13,10 +13,6 @@ namespace leveldb {
|
|||||||
// WriteBatch that we don't want in the public WriteBatch interface.
|
// WriteBatch that we don't want in the public WriteBatch interface.
|
||||||
class WriteBatchInternal {
|
class WriteBatchInternal {
|
||||||
public:
|
public:
|
||||||
static void PutLargeValueRef(WriteBatch* batch,
|
|
||||||
const Slice& key,
|
|
||||||
const LargeValueRef& large_ref);
|
|
||||||
|
|
||||||
// Return the number of entries in the batch.
|
// Return the number of entries in the batch.
|
||||||
static int Count(const WriteBatch* batch);
|
static int Count(const WriteBatch* batch);
|
||||||
|
|
||||||
|
@ -29,13 +29,6 @@ static std::string PrintContents(WriteBatch* b) {
|
|||||||
state.append(iter->value().ToString());
|
state.append(iter->value().ToString());
|
||||||
state.append(")");
|
state.append(")");
|
||||||
break;
|
break;
|
||||||
case kTypeLargeValueRef:
|
|
||||||
state.append("PutRef(");
|
|
||||||
state.append(ikey.user_key.ToString());
|
|
||||||
state.append(", ");
|
|
||||||
state.append(iter->value().ToString());
|
|
||||||
state.append(")");
|
|
||||||
break;
|
|
||||||
case kTypeDeletion:
|
case kTypeDeletion:
|
||||||
state.append("Delete(");
|
state.append("Delete(");
|
||||||
state.append(ikey.user_key.ToString());
|
state.append(ikey.user_key.ToString());
|
||||||
@ -74,22 +67,6 @@ TEST(WriteBatchTest, Multiple) {
|
|||||||
PrintContents(&batch));
|
PrintContents(&batch));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(WriteBatchTest, PutIndirect) {
|
|
||||||
WriteBatch batch;
|
|
||||||
batch.Put(Slice("baz"), Slice("boo"));
|
|
||||||
LargeValueRef h;
|
|
||||||
for (int i = 0; i < LargeValueRef::ByteSize(); i++) {
|
|
||||||
h.data[i] = (i < 20) ? 'a' : 'b';
|
|
||||||
}
|
|
||||||
WriteBatchInternal::PutLargeValueRef(&batch, Slice("foo"), h);
|
|
||||||
WriteBatchInternal::SetSequence(&batch, 100);
|
|
||||||
ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch));
|
|
||||||
ASSERT_EQ(2, WriteBatchInternal::Count(&batch));
|
|
||||||
ASSERT_EQ("Put(baz, boo)@100"
|
|
||||||
"PutRef(foo, aaaaaaaaaaaaaaaaaaaabbbbbbbbb)@101",
|
|
||||||
PrintContents(&batch));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(WriteBatchTest, Corruption) {
|
TEST(WriteBatchTest, Corruption) {
|
||||||
WriteBatch batch;
|
WriteBatch batch;
|
||||||
batch.Put(Slice("foo"), Slice("bar"));
|
batch.Put(Slice("foo"), Slice("bar"));
|
||||||
|
@ -57,15 +57,6 @@ These merges have the effect of gradually migrating new updates from
|
|||||||
the young level to the largest level using only bulk reads and writes
|
the young level to the largest level using only bulk reads and writes
|
||||||
(i.e., minimizing expensive seeks).
|
(i.e., minimizing expensive seeks).
|
||||||
|
|
||||||
<h2>Large value files</h2>
|
|
||||||
<p>
|
|
||||||
Each large value (greater than 64KB by default) is placed in a large
|
|
||||||
value file (*.val) of its own. An entry is maintained in the log
|
|
||||||
and/or sorted tables that maps from the corresponding key to the
|
|
||||||
name of this large value file. The name of the large value file
|
|
||||||
is derived from a SHA1 hash of the value and its length so that
|
|
||||||
identical values share the same file.
|
|
||||||
<p>
|
|
||||||
<h2>Manifest</h2>
|
<h2>Manifest</h2>
|
||||||
<p>
|
<p>
|
||||||
A MANIFEST file lists the set of sorted tables that make up each
|
A MANIFEST file lists the set of sorted tables that make up each
|
||||||
@ -220,9 +211,7 @@ So maybe even the sharding is not necessary on modern filesystems?
|
|||||||
compaction and at the end of recovery. It finds the names of all
|
compaction and at the end of recovery. It finds the names of all
|
||||||
files in the database. It deletes all log files that are not the
|
files in the database. It deletes all log files that are not the
|
||||||
current log file. It deletes all table files that are not referenced
|
current log file. It deletes all table files that are not referenced
|
||||||
from some level and are not the output of an active compaction. It
|
from some level and are not the output of an active compaction.
|
||||||
deletes all large value files that are not referenced from any live
|
|
||||||
table or log file.
|
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@ -412,17 +412,6 @@ We might want to prefix <code>filename</code> keys with one letter (say '/') and
|
|||||||
over just the metadata do not force us to fetch and cache bulky file
|
over just the metadata do not force us to fetch and cache bulky file
|
||||||
contents.
|
contents.
|
||||||
<p>
|
<p>
|
||||||
<h2>Large Values</h2>
|
|
||||||
<p>
|
|
||||||
<code>leveldb</code> has special treatment of large values (by default, a value
|
|
||||||
of length greater than or equal to 64K is considered large, though a
|
|
||||||
field in Options can be used to adjust this threshold). Each such
|
|
||||||
large value is placed in a separate operating system file, and the
|
|
||||||
normal database blocks just contain pointers to such files.
|
|
||||||
<p>
|
|
||||||
Furthermore, if the same large value occurs multiple times in a single
|
|
||||||
database, it will be stored just once.
|
|
||||||
<p>
|
|
||||||
<h1>Checksums</h1>
|
<h1>Checksums</h1>
|
||||||
<p>
|
<p>
|
||||||
<code>leveldb</code> associates checksums with all data it stores in the file system.
|
<code>leveldb</code> associates checksums with all data it stores in the file system.
|
||||||
|
@ -86,16 +86,6 @@ struct Options {
|
|||||||
// Default: 1000
|
// Default: 1000
|
||||||
int max_open_files;
|
int max_open_files;
|
||||||
|
|
||||||
// Handle values larger than "large_value_threshold" bytes
|
|
||||||
// specially, by writing them into their own files (to avoid
|
|
||||||
// compaction overhead) and doing content-based elimination of
|
|
||||||
// duplicate values to save space.
|
|
||||||
//
|
|
||||||
// We recommend against changing this value.
|
|
||||||
//
|
|
||||||
// Default: 64K
|
|
||||||
size_t large_value_threshold;
|
|
||||||
|
|
||||||
// Control over blocks (user data is stored in a set of blocks, and
|
// Control over blocks (user data is stored in a set of blocks, and
|
||||||
// a block is the unit of reading from disk).
|
// a block is the unit of reading from disk).
|
||||||
|
|
||||||
@ -110,7 +100,7 @@ struct Options {
|
|||||||
// compression is enabled. This parameter can be changed dynamically.
|
// compression is enabled. This parameter can be changed dynamically.
|
||||||
//
|
//
|
||||||
// Default: 4K
|
// Default: 4K
|
||||||
int block_size;
|
size_t block_size;
|
||||||
|
|
||||||
// Number of keys between restart points for delta encoding of keys.
|
// Number of keys between restart points for delta encoding of keys.
|
||||||
// This parameter can be changed dynamically. Most clients should
|
// This parameter can be changed dynamically. Most clients should
|
||||||
|
12
leveldb.gyp
12
leveldb.gyp
@ -96,8 +96,6 @@
|
|||||||
'port/port_example.h',
|
'port/port_example.h',
|
||||||
'port/port_posix.cc',
|
'port/port_posix.cc',
|
||||||
'port/port_posix.h',
|
'port/port_posix.h',
|
||||||
'port/sha1_portable.cc',
|
|
||||||
'port/sha1_portable.h',
|
|
||||||
'table/block.cc',
|
'table/block.cc',
|
||||||
'table/block.h',
|
'table/block.h',
|
||||||
'table/block_builder.cc',
|
'table/block_builder.cc',
|
||||||
@ -267,16 +265,6 @@
|
|||||||
'db/log_test.cc',
|
'db/log_test.cc',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'target_name': 'leveldb_sha1_test',
|
|
||||||
'type': 'executable',
|
|
||||||
'dependencies': [
|
|
||||||
'leveldb_testutil',
|
|
||||||
],
|
|
||||||
'sources': [
|
|
||||||
'port/sha1_test.cc',
|
|
||||||
],
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
'target_name': 'leveldb_skiplist_test',
|
'target_name': 'leveldb_skiplist_test',
|
||||||
'type': 'executable',
|
'type': 'executable',
|
||||||
|
@ -10,7 +10,6 @@
|
|||||||
#include <endian.h>
|
#include <endian.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <sha1.h>
|
|
||||||
#include <cstdatomic>
|
#include <cstdatomic>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
@ -134,13 +133,6 @@ inline bool Snappy_Uncompress(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
|
|
||||||
SHA1_CTX sha1_ctx;
|
|
||||||
SHA1Init(&sha1_ctx);
|
|
||||||
SHA1Update(&sha1_ctx, (const u_char*)data, len);
|
|
||||||
SHA1Final((u_char*)hash_array, &sha1_ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uint64_t ThreadIdentifier() {
|
inline uint64_t ThreadIdentifier() {
|
||||||
pthread_t tid = pthread_self();
|
pthread_t tid = pthread_self();
|
||||||
uint64_t r = 0;
|
uint64_t r = 0;
|
||||||
|
@ -13,7 +13,6 @@
|
|||||||
#include "base/atomicops.h"
|
#include "base/atomicops.h"
|
||||||
#include "base/basictypes.h"
|
#include "base/basictypes.h"
|
||||||
#include "base/logging.h"
|
#include "base/logging.h"
|
||||||
#include "base/sha1.h"
|
|
||||||
#include "base/synchronization/condition_variable.h"
|
#include "base/synchronization/condition_variable.h"
|
||||||
#include "base/synchronization/lock.h"
|
#include "base/synchronization/lock.h"
|
||||||
|
|
||||||
@ -83,12 +82,6 @@ class AtomicPointer {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
|
|
||||||
return ::base::SHA1HashBytes(reinterpret_cast<const unsigned char*>(data),
|
|
||||||
len,
|
|
||||||
reinterpret_cast<unsigned char*>(hash_array));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Snappy_Compress(const char* input, size_t input_length,
|
bool Snappy_Compress(const char* input, size_t input_length,
|
||||||
std::string* output);
|
std::string* output);
|
||||||
bool Snappy_Uncompress(const char* input_data, size_t input_length,
|
bool Snappy_Uncompress(const char* input_data, size_t input_length,
|
||||||
|
@ -89,11 +89,6 @@ class AtomicPointer {
|
|||||||
void NoBarrier_Store(void* v);
|
void NoBarrier_Store(void* v);
|
||||||
};
|
};
|
||||||
|
|
||||||
// ------------------ Checksumming -------------------
|
|
||||||
|
|
||||||
// Store a 160-bit hash of "data[0..len-1]" in "hash_array[0]..hash_array[19]"
|
|
||||||
extern void SHA1_Hash(const char* data, size_t len, char* hash_array);
|
|
||||||
|
|
||||||
// ------------------ Compression -------------------
|
// ------------------ Compression -------------------
|
||||||
|
|
||||||
// Store the snappy compression of "input[0,input_length-1]" in *output.
|
// Store the snappy compression of "input[0,input_length-1]" in *output.
|
||||||
|
@ -13,7 +13,6 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <cstdatomic>
|
#include <cstdatomic>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include "port/sha1_portable.h"
|
|
||||||
|
|
||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
namespace port {
|
namespace port {
|
||||||
@ -73,10 +72,6 @@ class AtomicPointer {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
|
|
||||||
SHA1_Hash_Portable(data, len, hash_array);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(gabor): Implement actual compress
|
// TODO(gabor): Implement actual compress
|
||||||
inline bool Snappy_Compress(const char* input, size_t input_length,
|
inline bool Snappy_Compress(const char* input, size_t input_length,
|
||||||
std::string* output) {
|
std::string* output) {
|
||||||
|
@ -62,7 +62,9 @@ static inline const char* DecodeEntry(const char* p, const char* limit,
|
|||||||
if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL;
|
if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (limit - p < (*non_shared + *value_length)) return NULL;
|
if (static_cast<uint32>(limit - p) < (*non_shared + *value_length)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ size_t BlockBuilder::CurrentSizeEstimate() const {
|
|||||||
|
|
||||||
Slice BlockBuilder::Finish() {
|
Slice BlockBuilder::Finish() {
|
||||||
// Append restart array
|
// Append restart array
|
||||||
for (int i = 0; i < restarts_.size(); i++) {
|
for (size_t i = 0; i < restarts_.size(); i++) {
|
||||||
PutFixed32(&buffer_, restarts_[i]);
|
PutFixed32(&buffer_, restarts_[i]);
|
||||||
}
|
}
|
||||||
PutFixed32(&buffer_, restarts_.size());
|
PutFixed32(&buffer_, restarts_.size());
|
||||||
|
@ -36,7 +36,7 @@ void Footer::EncodeTo(std::string* dst) const {
|
|||||||
metaindex_handle_.EncodeTo(dst);
|
metaindex_handle_.EncodeTo(dst);
|
||||||
index_handle_.EncodeTo(dst);
|
index_handle_.EncodeTo(dst);
|
||||||
dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding
|
dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding
|
||||||
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber));
|
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber & 0xffffffffu));
|
||||||
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32));
|
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32));
|
||||||
assert(dst->size() == original_size + kEncodedLength);
|
assert(dst->size() == original_size + kEncodedLength);
|
||||||
}
|
}
|
||||||
@ -71,7 +71,7 @@ Status ReadBlock(RandomAccessFile* file,
|
|||||||
|
|
||||||
// Read the block contents as well as the type/crc footer.
|
// Read the block contents as well as the type/crc footer.
|
||||||
// See table_builder.cc for the code that built this structure.
|
// See table_builder.cc for the code that built this structure.
|
||||||
size_t n = handle.size();
|
size_t n = static_cast<size_t>(handle.size());
|
||||||
char* buf = new char[n + kBlockTrailerSize];
|
char* buf = new char[n + kBlockTrailerSize];
|
||||||
Slice contents;
|
Slice contents;
|
||||||
Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf);
|
Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf);
|
||||||
|
@ -16,7 +16,7 @@ Arena::Arena() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Arena::~Arena() {
|
Arena::~Arena() {
|
||||||
for (int i = 0; i < blocks_.size(); i++) {
|
for (size_t i = 0; i < blocks_.size(); i++) {
|
||||||
delete[] blocks_[i];
|
delete[] blocks_[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -85,7 +85,7 @@ char* EncodeVarint64(char* dst, uint64_t v) {
|
|||||||
*(ptr++) = (v & (B-1)) | B;
|
*(ptr++) = (v & (B-1)) | B;
|
||||||
v >>= 7;
|
v >>= 7;
|
||||||
}
|
}
|
||||||
*(ptr++) = v;
|
*(ptr++) = static_cast<unsigned char>(v);
|
||||||
return reinterpret_cast<char*>(ptr);
|
return reinterpret_cast<char*>(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ class BytewiseComparatorImpl : public Comparator {
|
|||||||
virtual void FindShortSuccessor(std::string* key) const {
|
virtual void FindShortSuccessor(std::string* key) const {
|
||||||
// Find first character that can be incremented
|
// Find first character that can be incremented
|
||||||
size_t n = key->size();
|
size_t n = key->size();
|
||||||
for (int i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
const uint8_t byte = (*key)[i];
|
const uint8_t byte = (*key)[i];
|
||||||
if (byte != static_cast<uint8_t>(0xff)) {
|
if (byte != static_cast<uint8_t>(0xff)) {
|
||||||
(*key)[i] = byte + 1;
|
(*key)[i] = byte + 1;
|
||||||
|
@ -20,7 +20,7 @@ void AppendNumberTo(std::string* str, uint64_t num) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AppendEscapedStringTo(std::string* str, const Slice& value) {
|
void AppendEscapedStringTo(std::string* str, const Slice& value) {
|
||||||
for (int i = 0; i < value.size(); i++) {
|
for (size_t i = 0; i < value.size(); i++) {
|
||||||
char c = value[i];
|
char c = value[i];
|
||||||
if (c >= ' ' && c <= '~') {
|
if (c >= ' ' && c <= '~') {
|
||||||
str->push_back(c);
|
str->push_back(c);
|
||||||
|
@ -18,7 +18,6 @@ Options::Options()
|
|||||||
info_log(NULL),
|
info_log(NULL),
|
||||||
write_buffer_size(4<<20),
|
write_buffer_size(4<<20),
|
||||||
max_open_files(1000),
|
max_open_files(1000),
|
||||||
large_value_threshold(65536),
|
|
||||||
block_cache(NULL),
|
block_cache(NULL),
|
||||||
block_size(4096),
|
block_size(4096),
|
||||||
block_restart_interval(16),
|
block_restart_interval(16),
|
||||||
|
@ -29,7 +29,7 @@ class Random {
|
|||||||
uint64_t product = seed_ * A;
|
uint64_t product = seed_ * A;
|
||||||
|
|
||||||
// Compute (product % M) using the fact that ((x << 31) % M) == x.
|
// Compute (product % M) using the fact that ((x << 31) % M) == x.
|
||||||
seed_ = (product >> 31) + (product & M);
|
seed_ = static_cast<uint32_t>((product >> 31) + (product & M));
|
||||||
// The first reduction may overflow by 1 bit, so we may need to
|
// The first reduction may overflow by 1 bit, so we may need to
|
||||||
// repeat. mod == M is not possible; using > allows the faster
|
// repeat. mod == M is not possible; using > allows the faster
|
||||||
// sign-bit-based test.
|
// sign-bit-based test.
|
||||||
|
Loading…
Reference in New Issue
Block a user