diff --git a/Makefile b/Makefile index ab5ed83..748d122 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,7 @@ TESTS = \ env_test \ filename_test \ filter_block_test \ + issue178_test \ log_test \ memenv_test \ skiplist_test \ @@ -69,7 +70,7 @@ SHARED = $(SHARED1) else # Update db.h if you change these. SHARED_MAJOR = 1 -SHARED_MINOR = 10 +SHARED_MINOR = 11 SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT) SHARED2 = $(SHARED1).$(SHARED_MAJOR) SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR) @@ -146,6 +147,9 @@ filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) $(LDFLAGS) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) +issue178_test: issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(CXX) $(LDFLAGS) issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) + log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) $(LDFLAGS) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) diff --git a/db/db_impl.cc b/db/db_impl.cc index 5c30d6f..395d317 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -35,6 +35,8 @@ namespace leveldb { +const int kNumNonTableCacheFiles = 10; + // Information kept for every waiting writer struct DBImpl::Writer { Status status; @@ -92,9 +94,9 @@ Options SanitizeOptions(const std::string& dbname, Options result = src; result.comparator = icmp; result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL; - ClipToRange(&result.max_open_files, 20, 50000); - ClipToRange(&result.write_buffer_size, 64<<10, 1<<30); - ClipToRange(&result.block_size, 1<<10, 4<<20); + ClipToRange(&result.max_open_files, 64 + kNumNonTableCacheFiles, 50000); + ClipToRange(&result.write_buffer_size, 64<<10, 1<<30); + ClipToRange(&result.block_size, 1<<10, 4<<20); if (result.info_log == NULL) { // Open a log file in the same directory as the db src.env->CreateDir(dbname); // In case it does not exist @@ -130,12 +132,13 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) log_(NULL), tmp_batch_(new WriteBatch), bg_compaction_scheduled_(false), - manual_compaction_(NULL) { + manual_compaction_(NULL), + consecutive_compaction_errors_(0) { mem_->Ref(); has_imm_.Release_Store(NULL); // Reserve ten files or so for other uses and give the rest to TableCache. - const int table_cache_size = options.max_open_files - 10; + const int table_cache_size = options.max_open_files - kNumNonTableCacheFiles; table_cache_ = new TableCache(dbname_, &options_, table_cache_size); versions_ = new VersionSet(dbname_, &options_, table_cache_, @@ -619,6 +622,7 @@ void DBImpl::BackgroundCall() { Status s = BackgroundCompaction(); if (s.ok()) { // Success + consecutive_compaction_errors_ = 0; } else if (shutting_down_.Acquire_Load()) { // Error most likely due to shutdown; do not wait } else { @@ -630,7 +634,12 @@ void DBImpl::BackgroundCall() { Log(options_.info_log, "Waiting after background compaction error: %s", s.ToString().c_str()); mutex_.Unlock(); - env_->SleepForMicroseconds(1000000); + ++consecutive_compaction_errors_; + int seconds_to_sleep = 1; + for (int i = 0; i < 3 && i < consecutive_compaction_errors_ - 1; ++i) { + seconds_to_sleep *= 2; + } + env_->SleepForMicroseconds(seconds_to_sleep * 1000000); mutex_.Lock(); } } diff --git a/db/db_impl.h b/db/db_impl.h index bd29dd8..3c8d711 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -163,6 +163,7 @@ class DBImpl : public DB { // Have we encountered a background error in paranoid mode? Status bg_error_; + int consecutive_compaction_errors_; // Per level compaction stats. stats_[level] stores the stats for // compactions that produced data for the specified "level". diff --git a/db/db_test.cc b/db/db_test.cc index 2f51296..49aae04 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -33,8 +33,11 @@ class AtomicCounter { public: AtomicCounter() : count_(0) { } void Increment() { + IncrementBy(1); + } + void IncrementBy(int count) { MutexLock l(&mu_); - count_++; + count_ += count; } int Read() { MutexLock l(&mu_); @@ -45,6 +48,10 @@ class AtomicCounter { count_ = 0; } }; + +void DelayMilliseconds(int millis) { + Env::Default()->SleepForMicroseconds(millis * 1000); +} } // Special Env used to delay background operations @@ -69,6 +76,7 @@ class SpecialEnv : public EnvWrapper { AtomicCounter random_read_counter_; AtomicCounter sleep_counter_; + AtomicCounter sleep_time_counter_; explicit SpecialEnv(Env* base) : EnvWrapper(base) { delay_sstable_sync_.Release_Store(NULL); @@ -103,7 +111,7 @@ class SpecialEnv : public EnvWrapper { Status Flush() { return base_->Flush(); } Status Sync() { while (env_->delay_sstable_sync_.Acquire_Load() != NULL) { - env_->SleepForMicroseconds(100000); + DelayMilliseconds(100); } return base_->Sync(); } @@ -174,8 +182,9 @@ class SpecialEnv : public EnvWrapper { virtual void SleepForMicroseconds(int micros) { sleep_counter_.Increment(); - target()->SleepForMicroseconds(micros); + sleep_time_counter_.IncrementBy(micros); } + }; class DBTest { @@ -625,7 +634,7 @@ TEST(DBTest, GetEncountersEmptyLevel) { } // Step 4: Wait for compaction to finish - env_->SleepForMicroseconds(1000000); + DelayMilliseconds(1000); ASSERT_EQ(NumTableFilesAtLevel(0), 0); } while (ChangeOptions()); @@ -1309,7 +1318,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_a) { Reopen(); Reopen(); ASSERT_EQ("(a->v)", Contents()); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish + DelayMilliseconds(1000); // Wait for compaction to finish ASSERT_EQ("(a->v)", Contents()); } @@ -1325,7 +1334,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_b) { Put("",""); Reopen(); Put("",""); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish + DelayMilliseconds(1000); // Wait for compaction to finish Reopen(); Put("d","dv"); Reopen(); @@ -1335,7 +1344,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_b) { Delete("b"); Reopen(); ASSERT_EQ("(->)(c->cv)", Contents()); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish + DelayMilliseconds(1000); // Wait for compaction to finish ASSERT_EQ("(->)(c->cv)", Contents()); } @@ -1520,6 +1529,30 @@ TEST(DBTest, NoSpace) { ASSERT_GE(env_->sleep_counter_.Read(), 5); } +TEST(DBTest, ExponentialBackoff) { + Options options = CurrentOptions(); + options.env = env_; + Reopen(&options); + + ASSERT_OK(Put("foo", "v1")); + ASSERT_EQ("v1", Get("foo")); + Compact("a", "z"); + env_->non_writable_.Release_Store(env_); // Force errors for new files + env_->sleep_counter_.Reset(); + env_->sleep_time_counter_.Reset(); + for (int i = 0; i < 5; i++) { + dbfull()->TEST_CompactRange(2, NULL, NULL); + } + env_->non_writable_.Release_Store(NULL); + + // Wait for compaction to finish + DelayMilliseconds(1000); + + ASSERT_GE(env_->sleep_counter_.Read(), 5); + ASSERT_LT(env_->sleep_counter_.Read(), 10); + ASSERT_GE(env_->sleep_time_counter_.Read(), 10e6); +} + TEST(DBTest, NonWritableFileSystem) { Options options = CurrentOptions(); options.write_buffer_size = 1000; @@ -1533,7 +1566,7 @@ TEST(DBTest, NonWritableFileSystem) { fprintf(stderr, "iter %d; errors %d\n", i, errors); if (!Put("foo", big).ok()) { errors++; - env_->SleepForMicroseconds(100000); + DelayMilliseconds(100); } } ASSERT_GT(errors, 0); @@ -1589,6 +1622,7 @@ TEST(DBTest, MissingSSTFile) { dbfull()->TEST_CompactMemTable(); ASSERT_EQ("bar", Get("foo")); + Close(); ASSERT_TRUE(DeleteAnSSTFile()); Options options = CurrentOptions(); options.paranoid_checks = true; @@ -1742,13 +1776,13 @@ TEST(DBTest, MultiThreaded) { } // Let them run for a while - env_->SleepForMicroseconds(kTestSeconds * 1000000); + DelayMilliseconds(kTestSeconds * 1000); // Stop the threads and wait for them to finish mt.stop.Release_Store(&mt); for (int id = 0; id < kNumThreads; id++) { while (mt.thread_done[id].Acquire_Load() == NULL) { - env_->SleepForMicroseconds(100000); + DelayMilliseconds(100); } } } while (ChangeOptions()); diff --git a/db/filename_test.cc b/db/filename_test.cc index 47353d6..5a26da4 100644 --- a/db/filename_test.cc +++ b/db/filename_test.cc @@ -70,7 +70,7 @@ TEST(FileNameTest, Parse) { for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) { std::string f = errors[i]; ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f; - }; + } } TEST(FileNameTest, Construction) { diff --git a/db/version_set.cc b/db/version_set.cc index 7d0a5de..4fd1dde 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1331,14 +1331,19 @@ Compaction* VersionSet::CompactRange( } // Avoid compacting too much in one shot in case the range is large. - const uint64_t limit = MaxFileSizeForLevel(level); - uint64_t total = 0; - for (size_t i = 0; i < inputs.size(); i++) { - uint64_t s = inputs[i]->file_size; - total += s; - if (total >= limit) { - inputs.resize(i + 1); - break; + // But we cannot do this for level-0 since level-0 files can overlap + // and we must not pick one file and drop another older file if the + // two files overlap. + if (level > 0) { + const uint64_t limit = MaxFileSizeForLevel(level); + uint64_t total = 0; + for (size_t i = 0; i < inputs.size(); i++) { + uint64_t s = inputs[i]->file_size; + total += s; + if (total >= limit) { + inputs.resize(i + 1); + break; + } } } diff --git a/include/leveldb/db.h b/include/leveldb/db.h index a37c097..27ea76e 100644 --- a/include/leveldb/db.h +++ b/include/leveldb/db.h @@ -14,7 +14,7 @@ namespace leveldb { // Update Makefile if you change these static const int kMajorVersion = 1; -static const int kMinorVersion = 10; +static const int kMinorVersion = 11; struct Options; struct ReadOptions; diff --git a/issues/issue178_test.cc b/issues/issue178_test.cc new file mode 100644 index 0000000..c0a21f0 --- /dev/null +++ b/issues/issue178_test.cc @@ -0,0 +1,88 @@ +// Test for issue 178: a manual compaction causes deleted data to reappear. +#include +#include +#include + +#include "leveldb/db.h" +#include "leveldb/write_batch.h" +#include "util/testharness.h" + +namespace { + +const int kNumKeys = 1100000; + +std::string Key1(int i) { + char buf[100]; + snprintf(buf, sizeof(buf), "my_key_%d", i); + return buf; +} + +std::string Key2(int i) { + return Key1(i) + "_xxx"; +} + +class Issue178 { }; + +TEST(Issue178, Test) { + // Get rid of any state from an old run. + std::string dbpath = leveldb::test::TmpDir() + "/leveldb_cbug_test"; + DestroyDB(dbpath, leveldb::Options()); + + // Open database. Disable compression since it affects the creation + // of layers and the code below is trying to test against a very + // specific scenario. + leveldb::DB* db; + leveldb::Options db_options; + db_options.create_if_missing = true; + db_options.compression = leveldb::kNoCompression; + ASSERT_OK(leveldb::DB::Open(db_options, dbpath, &db)); + + // create first key range + leveldb::WriteBatch batch; + for (size_t i = 0; i < kNumKeys; i++) { + batch.Put(Key1(i), "value for range 1 key"); + } + ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch)); + + // create second key range + batch.Clear(); + for (size_t i = 0; i < kNumKeys; i++) { + batch.Put(Key2(i), "value for range 2 key"); + } + ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch)); + + // delete second key range + batch.Clear(); + for (size_t i = 0; i < kNumKeys; i++) { + batch.Delete(Key2(i)); + } + ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch)); + + // compact database + std::string start_key = Key1(0); + std::string end_key = Key1(kNumKeys - 1); + leveldb::Slice least(start_key.data(), start_key.size()); + leveldb::Slice greatest(end_key.data(), end_key.size()); + + // commenting out the line below causes the example to work correctly + db->CompactRange(&least, &greatest); + + // count the keys + leveldb::Iterator* iter = db->NewIterator(leveldb::ReadOptions()); + size_t num_keys = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + num_keys++; + } + delete iter; + ASSERT_EQ(kNumKeys, num_keys) << "Bad number of keys"; + + // close database + delete db; + DestroyDB(dbpath, leveldb::Options()); +} + +} // anonymous namespace + +int main(int argc, char** argv) { + return leveldb::test::RunAllTests(); +} diff --git a/util/coding_test.cc b/util/coding_test.cc index 2c52b17..fb5726e 100644 --- a/util/coding_test.cc +++ b/util/coding_test.cc @@ -109,7 +109,7 @@ TEST(Coding, Varint64) { values.push_back(power); values.push_back(power-1); values.push_back(power+1); - }; + } std::string s; for (int i = 0; i < values.size(); i++) { diff --git a/util/env_posix.cc b/util/env_posix.cc index bb58c1d..a3f197d 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -466,7 +466,7 @@ class PosixEnv : public Env { result = IOError(fname, errno); } return result; - }; + } virtual Status CreateDir(const std::string& name) { Status result; @@ -474,7 +474,7 @@ class PosixEnv : public Env { result = IOError(name, errno); } return result; - }; + } virtual Status DeleteDir(const std::string& name) { Status result; @@ -482,7 +482,7 @@ class PosixEnv : public Env { result = IOError(name, errno); } return result; - }; + } virtual Status GetFileSize(const std::string& fname, uint64_t* size) { Status s;