2011-03-19 06:37:00 +08:00
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "db/db_impl.h"
|
|
|
|
|
|
|
|
#include <algorithm>
|
2019-03-12 04:04:53 +08:00
|
|
|
#include <atomic>
|
2020-04-30 03:59:39 +08:00
|
|
|
#include <cstdint>
|
|
|
|
#include <cstdio>
|
2011-03-19 06:37:00 +08:00
|
|
|
#include <set>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
2018-03-17 01:06:35 +08:00
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
#include "db/builder.h"
|
|
|
|
#include "db/db_iter.h"
|
|
|
|
#include "db/dbformat.h"
|
|
|
|
#include "db/filename.h"
|
|
|
|
#include "db/log_reader.h"
|
|
|
|
#include "db/log_writer.h"
|
|
|
|
#include "db/memtable.h"
|
|
|
|
#include "db/table_cache.h"
|
|
|
|
#include "db/version_set.h"
|
|
|
|
#include "db/write_batch_internal.h"
|
2011-03-31 02:35:40 +08:00
|
|
|
#include "leveldb/db.h"
|
|
|
|
#include "leveldb/env.h"
|
|
|
|
#include "leveldb/status.h"
|
|
|
|
#include "leveldb/table.h"
|
|
|
|
#include "leveldb/table_builder.h"
|
2011-03-19 06:37:00 +08:00
|
|
|
#include "port/port.h"
|
|
|
|
#include "table/block.h"
|
|
|
|
#include "table/merger.h"
|
|
|
|
#include "table/two_level_iterator.h"
|
|
|
|
#include "util/coding.h"
|
|
|
|
#include "util/logging.h"
|
|
|
|
#include "util/mutexlock.h"
|
|
|
|
|
|
|
|
namespace leveldb {
|
|
|
|
|
2023-04-20 06:42:11 +08:00
|
|
|
using namespace path;
|
2023-04-18 05:29:42 +08:00
|
|
|
|
2013-06-14 07:14:06 +08:00
|
|
|
const int kNumNonTableCacheFiles = 10;
|
|
|
|
|
2012-03-09 08:23:21 +08:00
|
|
|
// Information kept for every waiting writer
|
|
|
|
struct DBImpl::Writer {
|
2019-05-04 00:31:18 +08:00
|
|
|
explicit Writer(port::Mutex* mu)
|
|
|
|
: batch(nullptr), sync(false), done(false), cv(mu) {}
|
|
|
|
|
2012-03-09 08:23:21 +08:00
|
|
|
Status status;
|
|
|
|
WriteBatch* batch;
|
|
|
|
bool sync;
|
|
|
|
bool done;
|
|
|
|
port::CondVar cv;
|
|
|
|
};
|
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
struct DBImpl::CompactionState {
|
|
|
|
// Files produced by compaction
|
|
|
|
struct Output {
|
|
|
|
uint64_t number;
|
|
|
|
uint64_t file_size;
|
|
|
|
InternalKey smallest, largest;
|
|
|
|
};
|
|
|
|
|
2019-05-03 02:01:00 +08:00
|
|
|
Output* current_output() { return &outputs[outputs.size() - 1]; }
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
explicit CompactionState(Compaction* c)
|
|
|
|
: compaction(c),
|
2019-04-13 09:34:19 +08:00
|
|
|
smallest_snapshot(0),
|
2018-04-11 07:18:06 +08:00
|
|
|
outfile(nullptr),
|
|
|
|
builder(nullptr),
|
2019-04-13 09:34:19 +08:00
|
|
|
total_bytes(0) {}
|
2019-05-04 00:31:18 +08:00
|
|
|
|
|
|
|
Compaction* const compaction;
|
|
|
|
|
|
|
|
// Sequence numbers < smallest_snapshot are not significant since we
|
|
|
|
// will never have to service a snapshot below smallest_snapshot.
|
|
|
|
// Therefore if we have seen a sequence number S <= smallest_snapshot,
|
|
|
|
// we can drop all entries for the same key with sequence numbers < S.
|
|
|
|
SequenceNumber smallest_snapshot;
|
|
|
|
|
|
|
|
std::vector<Output> outputs;
|
|
|
|
|
|
|
|
// State kept for output being generated
|
|
|
|
WritableFile* outfile;
|
|
|
|
TableBuilder* builder;
|
|
|
|
|
|
|
|
uint64_t total_bytes;
|
2011-03-19 06:37:00 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// Fix user-supplied options to be reasonable
|
2018-03-17 01:06:35 +08:00
|
|
|
template <class T, class V>
|
2011-03-19 06:37:00 +08:00
|
|
|
static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
|
2011-04-21 06:48:11 +08:00
|
|
|
if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue;
|
|
|
|
if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue;
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
Options SanitizeOptions(const std::string& dbname,
|
|
|
|
const InternalKeyComparator* icmp,
|
2012-04-17 23:36:46 +08:00
|
|
|
const InternalFilterPolicy* ipolicy,
|
2011-03-19 06:37:00 +08:00
|
|
|
const Options& src) {
|
|
|
|
Options result = src;
|
|
|
|
result.comparator = icmp;
|
2018-04-11 07:18:06 +08:00
|
|
|
result.filter_policy = (src.filter_policy != nullptr) ? ipolicy : nullptr;
|
2019-05-03 02:01:00 +08:00
|
|
|
ClipToRange(&result.max_open_files, 64 + kNumNonTableCacheFiles, 50000);
|
|
|
|
ClipToRange(&result.write_buffer_size, 64 << 10, 1 << 30);
|
|
|
|
ClipToRange(&result.max_file_size, 1 << 20, 1 << 30);
|
|
|
|
ClipToRange(&result.block_size, 1 << 10, 4 << 20);
|
2018-04-11 07:18:06 +08:00
|
|
|
if (result.info_log == nullptr) {
|
2011-03-19 06:37:00 +08:00
|
|
|
// Open a log file in the same directory as the db
|
|
|
|
src.env->CreateDir(dbname); // In case it does not exist
|
|
|
|
src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname));
|
2011-07-21 10:40:18 +08:00
|
|
|
Status s = src.env->NewLogger(InfoLogFileName(dbname), &result.info_log);
|
2011-03-19 06:37:00 +08:00
|
|
|
if (!s.ok()) {
|
|
|
|
// No place suitable for logging
|
2018-04-11 07:18:06 +08:00
|
|
|
result.info_log = nullptr;
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
}
|
2018-04-11 07:18:06 +08:00
|
|
|
if (result.block_cache == nullptr) {
|
2011-04-13 03:38:58 +08:00
|
|
|
result.block_cache = NewLRUCache(8 << 20);
|
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-03-17 01:06:35 +08:00
|
|
|
static int TableCacheSize(const Options& sanitized_options) {
|
|
|
|
// Reserve ten files or so for other uses and give the rest to TableCache.
|
|
|
|
return sanitized_options.max_open_files - kNumNonTableCacheFiles;
|
|
|
|
}
|
|
|
|
|
2013-08-22 02:12:47 +08:00
|
|
|
DBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
|
|
|
|
: env_(raw_options.env),
|
|
|
|
internal_comparator_(raw_options.comparator),
|
|
|
|
internal_filter_policy_(raw_options.filter_policy),
|
|
|
|
options_(SanitizeOptions(dbname, &internal_comparator_,
|
|
|
|
&internal_filter_policy_, raw_options)),
|
|
|
|
owns_info_log_(options_.info_log != raw_options.info_log),
|
|
|
|
owns_cache_(options_.block_cache != raw_options.block_cache),
|
2011-03-19 06:37:00 +08:00
|
|
|
dbname_(dbname),
|
2023-04-18 05:29:42 +08:00
|
|
|
path_(PathFactory::Create(dbname)),
|
2018-03-17 01:06:35 +08:00
|
|
|
table_cache_(new TableCache(dbname_, options_, TableCacheSize(options_))),
|
2018-04-11 07:18:06 +08:00
|
|
|
db_lock_(nullptr),
|
2019-03-12 04:04:53 +08:00
|
|
|
shutting_down_(false),
|
2018-03-17 01:06:35 +08:00
|
|
|
background_work_finished_signal_(&mutex_),
|
2018-04-11 07:18:06 +08:00
|
|
|
mem_(nullptr),
|
|
|
|
imm_(nullptr),
|
2019-03-12 04:04:53 +08:00
|
|
|
has_imm_(false),
|
2018-04-11 07:18:06 +08:00
|
|
|
logfile_(nullptr),
|
2011-06-22 10:36:45 +08:00
|
|
|
logfile_number_(0),
|
2018-04-11 07:18:06 +08:00
|
|
|
log_(nullptr),
|
2013-08-22 02:12:47 +08:00
|
|
|
seed_(0),
|
2012-03-09 08:23:21 +08:00
|
|
|
tmp_batch_(new WriteBatch),
|
2018-03-17 01:06:35 +08:00
|
|
|
background_compaction_scheduled_(false),
|
2018-04-11 07:18:06 +08:00
|
|
|
manual_compaction_(nullptr),
|
2018-03-17 01:06:35 +08:00
|
|
|
versions_(new VersionSet(dbname_, &options_, table_cache_,
|
2019-03-12 04:04:53 +08:00
|
|
|
&internal_comparator_)) {}
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
DBImpl::~DBImpl() {
|
2019-03-12 04:04:53 +08:00
|
|
|
// Wait for background work to finish.
|
2011-03-19 06:37:00 +08:00
|
|
|
mutex_.Lock();
|
2019-03-12 04:04:53 +08:00
|
|
|
shutting_down_.store(true, std::memory_order_release);
|
2018-03-17 01:06:35 +08:00
|
|
|
while (background_compaction_scheduled_) {
|
|
|
|
background_work_finished_signal_.Wait();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
mutex_.Unlock();
|
|
|
|
|
2018-04-11 07:18:06 +08:00
|
|
|
if (db_lock_ != nullptr) {
|
2011-03-19 06:37:00 +08:00
|
|
|
env_->UnlockFile(db_lock_);
|
|
|
|
}
|
|
|
|
|
|
|
|
delete versions_;
|
2018-04-11 07:18:06 +08:00
|
|
|
if (mem_ != nullptr) mem_->Unref();
|
|
|
|
if (imm_ != nullptr) imm_->Unref();
|
2012-03-09 08:23:21 +08:00
|
|
|
delete tmp_batch_;
|
2011-03-19 06:37:00 +08:00
|
|
|
delete log_;
|
|
|
|
delete logfile_;
|
|
|
|
delete table_cache_;
|
|
|
|
|
|
|
|
if (owns_info_log_) {
|
|
|
|
delete options_.info_log;
|
|
|
|
}
|
2011-04-13 03:38:58 +08:00
|
|
|
if (owns_cache_) {
|
|
|
|
delete options_.block_cache;
|
|
|
|
}
|
2023-04-18 05:29:42 +08:00
|
|
|
if (path_ != nullptr) {
|
|
|
|
delete path_;
|
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Status DBImpl::NewDB() {
|
|
|
|
VersionEdit new_db;
|
|
|
|
new_db.SetComparatorName(user_comparator()->Name());
|
2011-04-13 03:38:58 +08:00
|
|
|
new_db.SetLogNumber(0);
|
2011-03-19 06:37:00 +08:00
|
|
|
new_db.SetNextFile(2);
|
|
|
|
new_db.SetLastSequence(0);
|
|
|
|
|
|
|
|
const std::string manifest = DescriptorFileName(dbname_, 1);
|
|
|
|
WritableFile* file;
|
|
|
|
Status s = env_->NewWritableFile(manifest, &file);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
{
|
|
|
|
log::Writer log(file);
|
|
|
|
std::string record;
|
|
|
|
new_db.EncodeTo(&record);
|
|
|
|
s = log.AddRecord(record);
|
2021-01-13 05:08:52 +08:00
|
|
|
if (s.ok()) {
|
|
|
|
s = file->Sync();
|
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
if (s.ok()) {
|
|
|
|
s = file->Close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
delete file;
|
|
|
|
if (s.ok()) {
|
|
|
|
// Make "CURRENT" file that points to the new manifest file.
|
|
|
|
s = SetCurrentFile(env_, dbname_, 1);
|
|
|
|
} else {
|
Add Env::Remove{File,Dir} which obsolete Env::Delete{File,Dir}.
The "DeleteFile" method name causes pain for Windows developers, because
<windows.h> #defines a DeleteFile macro to DeleteFileW or DeleteFileA.
Current code uses workarounds, like #undefining DeleteFile everywhere an
Env is declared, implemented, or used.
This CL removes the need for workarounds by renaming Env::DeleteFile to
Env::RemoveFile. For consistency, Env::DeleteDir is also renamed to
Env::RemoveDir. A few internal methods are also renamed for consistency.
Software that supports Windows is expected to migrate any Env
implementations and usage to Remove{File,Dir}, and never use the name
Env::Delete{File,Dir} in its code.
The renaming is done in a backwards-compatible way, at the risk of
making it slightly more difficult to build a new correct Env
implementation. The backwards compatibility is achieved using the
following hacks:
1) Env::Remove{File,Dir} methods are added, with a default
implementation that calls into Env::Delete{File,Dir}. This makes old
Env implementations compatible with code that calls into the updated
API.
2) The Env::Delete{File,Dir} methods are no longer pure virtuals.
Instead, they gain a default implementation that calls into
Env::Remove{File,Dir}. This makes updated Env implementations
compatible with code that calls into the old API.
The cost of this approach is that it's possible to write an Env without
overriding either Rename{File,Dir} or Delete{File,Dir}, without getting
a compiler warning. However, attempting to run the test suite will
immediately fail with an infinite call stack ending in
{Remove,Delete}{File,Dir}, making developers aware of the problem.
PiperOrigin-RevId: 288710907
2020-01-09 01:14:53 +08:00
|
|
|
env_->RemoveFile(manifest);
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
void DBImpl::MaybeIgnoreError(Status* s) const {
|
|
|
|
if (s->ok() || options_.paranoid_checks) {
|
|
|
|
// No change needed
|
|
|
|
} else {
|
2011-07-21 10:40:18 +08:00
|
|
|
Log(options_.info_log, "Ignoring error %s", s->ToString().c_str());
|
2011-03-19 06:37:00 +08:00
|
|
|
*s = Status::OK();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Add Env::Remove{File,Dir} which obsolete Env::Delete{File,Dir}.
The "DeleteFile" method name causes pain for Windows developers, because
<windows.h> #defines a DeleteFile macro to DeleteFileW or DeleteFileA.
Current code uses workarounds, like #undefining DeleteFile everywhere an
Env is declared, implemented, or used.
This CL removes the need for workarounds by renaming Env::DeleteFile to
Env::RemoveFile. For consistency, Env::DeleteDir is also renamed to
Env::RemoveDir. A few internal methods are also renamed for consistency.
Software that supports Windows is expected to migrate any Env
implementations and usage to Remove{File,Dir}, and never use the name
Env::Delete{File,Dir} in its code.
The renaming is done in a backwards-compatible way, at the risk of
making it slightly more difficult to build a new correct Env
implementation. The backwards compatibility is achieved using the
following hacks:
1) Env::Remove{File,Dir} methods are added, with a default
implementation that calls into Env::Delete{File,Dir}. This makes old
Env implementations compatible with code that calls into the updated
API.
2) The Env::Delete{File,Dir} methods are no longer pure virtuals.
Instead, they gain a default implementation that calls into
Env::Remove{File,Dir}. This makes updated Env implementations
compatible with code that calls into the old API.
The cost of this approach is that it's possible to write an Env without
overriding either Rename{File,Dir} or Delete{File,Dir}, without getting
a compiler warning. However, attempting to run the test suite will
immediately fail with an infinite call stack ending in
{Remove,Delete}{File,Dir}, making developers aware of the problem.
PiperOrigin-RevId: 288710907
2020-01-09 01:14:53 +08:00
|
|
|
void DBImpl::RemoveObsoleteFiles() {
|
2018-03-17 01:06:35 +08:00
|
|
|
mutex_.AssertHeld();
|
|
|
|
|
2013-12-11 02:36:31 +08:00
|
|
|
if (!bg_error_.ok()) {
|
|
|
|
// After a background error, we don't know whether a new version may
|
|
|
|
// or may not have been committed, so we cannot safely garbage collect.
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
// Make a set of all of the live files
|
|
|
|
std::set<uint64_t> live = pending_outputs_;
|
|
|
|
versions_->AddLiveFiles(&live);
|
|
|
|
|
|
|
|
std::vector<std::string> filenames;
|
2018-03-17 01:06:35 +08:00
|
|
|
env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
|
2011-03-19 06:37:00 +08:00
|
|
|
uint64_t number;
|
|
|
|
FileType type;
|
2019-06-14 05:59:06 +08:00
|
|
|
std::vector<std::string> files_to_delete;
|
|
|
|
for (std::string& filename : filenames) {
|
|
|
|
if (ParseFileName(filename, &number, &type)) {
|
2011-03-19 06:37:00 +08:00
|
|
|
bool keep = true;
|
|
|
|
switch (type) {
|
|
|
|
case kLogFile:
|
2019-06-14 05:59:06 +08:00
|
|
|
keep = ((number >= versions_->LogNumber()) ||
|
|
|
|
(number == versions_->PrevLogNumber()));
|
2011-03-19 06:37:00 +08:00
|
|
|
break;
|
|
|
|
case kDescriptorFile:
|
|
|
|
// Keep my manifest file, and any newer incarnations'
|
|
|
|
// (in case there is a race that allows other incarnations)
|
2019-06-14 05:59:06 +08:00
|
|
|
keep = (number >= versions_->ManifestFileNumber());
|
2011-03-19 06:37:00 +08:00
|
|
|
break;
|
|
|
|
case kTableFile:
|
|
|
|
keep = (live.find(number) != live.end());
|
|
|
|
break;
|
|
|
|
case kTempFile:
|
|
|
|
// Any temp files that are currently being written to must
|
|
|
|
// be recorded in pending_outputs_, which is inserted into "live"
|
|
|
|
keep = (live.find(number) != live.end());
|
|
|
|
break;
|
|
|
|
case kCurrentFile:
|
|
|
|
case kDBLockFile:
|
|
|
|
case kInfoLogFile:
|
|
|
|
keep = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!keep) {
|
2019-06-14 05:59:06 +08:00
|
|
|
files_to_delete.push_back(std::move(filename));
|
2011-03-19 06:37:00 +08:00
|
|
|
if (type == kTableFile) {
|
|
|
|
table_cache_->Evict(number);
|
|
|
|
}
|
2019-05-03 02:01:00 +08:00
|
|
|
Log(options_.info_log, "Delete type=%d #%lld\n", static_cast<int>(type),
|
2011-03-19 06:37:00 +08:00
|
|
|
static_cast<unsigned long long>(number));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-06-14 05:59:06 +08:00
|
|
|
|
|
|
|
// While deleting all files unblock other threads. All files being deleted
|
|
|
|
// have unique names which will not collide with newly created files and
|
|
|
|
// are therefore safe to delete while allowing other threads to proceed.
|
|
|
|
mutex_.Unlock();
|
|
|
|
for (const std::string& filename : files_to_delete) {
|
Add Env::Remove{File,Dir} which obsolete Env::Delete{File,Dir}.
The "DeleteFile" method name causes pain for Windows developers, because
<windows.h> #defines a DeleteFile macro to DeleteFileW or DeleteFileA.
Current code uses workarounds, like #undefining DeleteFile everywhere an
Env is declared, implemented, or used.
This CL removes the need for workarounds by renaming Env::DeleteFile to
Env::RemoveFile. For consistency, Env::DeleteDir is also renamed to
Env::RemoveDir. A few internal methods are also renamed for consistency.
Software that supports Windows is expected to migrate any Env
implementations and usage to Remove{File,Dir}, and never use the name
Env::Delete{File,Dir} in its code.
The renaming is done in a backwards-compatible way, at the risk of
making it slightly more difficult to build a new correct Env
implementation. The backwards compatibility is achieved using the
following hacks:
1) Env::Remove{File,Dir} methods are added, with a default
implementation that calls into Env::Delete{File,Dir}. This makes old
Env implementations compatible with code that calls into the updated
API.
2) The Env::Delete{File,Dir} methods are no longer pure virtuals.
Instead, they gain a default implementation that calls into
Env::Remove{File,Dir}. This makes updated Env implementations
compatible with code that calls into the old API.
The cost of this approach is that it's possible to write an Env without
overriding either Rename{File,Dir} or Delete{File,Dir}, without getting
a compiler warning. However, attempting to run the test suite will
immediately fail with an infinite call stack ending in
{Remove,Delete}{File,Dir}, making developers aware of the problem.
PiperOrigin-RevId: 288710907
2020-01-09 01:14:53 +08:00
|
|
|
env_->RemoveFile(dbname_ + "/" + filename);
|
2019-06-14 05:59:06 +08:00
|
|
|
}
|
2016-06-20 10:41:25 +08:00
|
|
|
mutex_.Lock();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
2019-05-03 02:01:00 +08:00
|
|
|
Status DBImpl::Recover(VersionEdit* edit, bool* save_manifest) {
|
2011-03-19 06:37:00 +08:00
|
|
|
mutex_.AssertHeld();
|
|
|
|
|
|
|
|
// Ignore error from CreateDir since the creation of the DB is
|
|
|
|
// committed only when the descriptor is created, and this directory
|
|
|
|
// may already exist from a previous failed creation attempt.
|
2023-04-20 06:42:11 +08:00
|
|
|
env_->CreateDir(path_->Name());
|
2018-04-11 07:18:06 +08:00
|
|
|
assert(db_lock_ == nullptr);
|
2011-03-19 06:37:00 +08:00
|
|
|
Status s = env_->LockFile(LockFileName(dbname_), &db_lock_);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_->FileExists(CurrentFileName(dbname_))) {
|
|
|
|
if (options_.create_if_missing) {
|
2021-01-13 05:08:52 +08:00
|
|
|
Log(options_.info_log, "Creating DB %s since it was missing.",
|
|
|
|
dbname_.c_str());
|
2011-03-19 06:37:00 +08:00
|
|
|
s = NewDB();
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return Status::InvalidArgument(
|
|
|
|
dbname_, "does not exist (create_if_missing is false)");
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (options_.error_if_exists) {
|
2019-05-03 02:01:00 +08:00
|
|
|
return Status::InvalidArgument(dbname_,
|
|
|
|
"exists (error_if_exists is true)");
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-12-12 00:13:18 +08:00
|
|
|
s = versions_->Recover(save_manifest);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
SequenceNumber max_sequence(0);
|
|
|
|
|
|
|
|
// Recover from all newer log files than the ones named in the
|
|
|
|
// descriptor (new log files may have been added by the previous
|
|
|
|
// incarnation without registering them in the descriptor).
|
|
|
|
//
|
|
|
|
// Note that PrevLogNumber() is no longer used, but we pay
|
|
|
|
// attention to it in case we are recovering a database
|
|
|
|
// produced by an older version of leveldb.
|
|
|
|
const uint64_t min_log = versions_->LogNumber();
|
|
|
|
const uint64_t prev_log = versions_->PrevLogNumber();
|
|
|
|
std::vector<std::string> filenames;
|
|
|
|
s = env_->GetChildren(dbname_, &filenames);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
std::set<uint64_t> expected;
|
|
|
|
versions_->AddLiveFiles(&expected);
|
|
|
|
uint64_t number;
|
|
|
|
FileType type;
|
|
|
|
std::vector<uint64_t> logs;
|
|
|
|
for (size_t i = 0; i < filenames.size(); i++) {
|
|
|
|
if (ParseFileName(filenames[i], &number, &type)) {
|
|
|
|
expected.erase(number);
|
|
|
|
if (type == kLogFile && ((number >= min_log) || (number == prev_log)))
|
|
|
|
logs.push_back(number);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!expected.empty()) {
|
|
|
|
char buf[50];
|
2020-04-30 06:31:41 +08:00
|
|
|
std::snprintf(buf, sizeof(buf), "%d missing files; e.g.",
|
|
|
|
static_cast<int>(expected.size()));
|
2014-12-12 00:13:18 +08:00
|
|
|
return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin())));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Recover in the order in which the logs were generated
|
|
|
|
std::sort(logs.begin(), logs.end());
|
|
|
|
for (size_t i = 0; i < logs.size(); i++) {
|
|
|
|
s = RecoverLogFile(logs[i], (i == logs.size() - 1), save_manifest, edit,
|
|
|
|
&max_sequence);
|
2011-06-22 10:36:45 +08:00
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
2011-04-13 03:38:58 +08:00
|
|
|
}
|
2011-06-22 10:36:45 +08:00
|
|
|
|
2014-12-12 00:13:18 +08:00
|
|
|
// The previous incarnation may not have written any MANIFEST
|
|
|
|
// records after allocating this log number. So we manually
|
|
|
|
// update the file number allocation counter in VersionSet.
|
|
|
|
versions_->MarkFileNumberUsed(logs[i]);
|
|
|
|
}
|
2011-06-22 10:36:45 +08:00
|
|
|
|
2014-12-12 00:13:18 +08:00
|
|
|
if (versions_->LastSequence() < max_sequence) {
|
|
|
|
versions_->SetLastSequence(max_sequence);
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
2014-12-12 00:13:18 +08:00
|
|
|
return Status::OK();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
2014-12-12 00:13:18 +08:00
|
|
|
Status DBImpl::RecoverLogFile(uint64_t log_number, bool last_log,
|
|
|
|
bool* save_manifest, VersionEdit* edit,
|
2011-03-19 06:37:00 +08:00
|
|
|
SequenceNumber* max_sequence) {
|
|
|
|
struct LogReporter : public log::Reader::Reporter {
|
|
|
|
Env* env;
|
2011-07-21 10:40:18 +08:00
|
|
|
Logger* info_log;
|
2011-03-19 06:37:00 +08:00
|
|
|
const char* fname;
|
2018-04-11 07:18:06 +08:00
|
|
|
Status* status; // null if options_.paranoid_checks==false
|
2019-05-10 05:00:07 +08:00
|
|
|
void Corruption(size_t bytes, const Status& s) override {
|
2011-07-21 10:40:18 +08:00
|
|
|
Log(info_log, "%s%s: dropping %d bytes; %s",
|
2019-05-03 02:01:00 +08:00
|
|
|
(this->status == nullptr ? "(ignoring error) " : ""), fname,
|
|
|
|
static_cast<int>(bytes), s.ToString().c_str());
|
2018-04-11 07:18:06 +08:00
|
|
|
if (this->status != nullptr && this->status->ok()) *this->status = s;
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
mutex_.AssertHeld();
|
|
|
|
|
|
|
|
// Open the log file
|
|
|
|
std::string fname = LogFileName(dbname_, log_number);
|
|
|
|
SequentialFile* file;
|
|
|
|
Status status = env_->NewSequentialFile(fname, &file);
|
|
|
|
if (!status.ok()) {
|
|
|
|
MaybeIgnoreError(&status);
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create the log reader.
|
|
|
|
LogReporter reporter;
|
|
|
|
reporter.env = env_;
|
|
|
|
reporter.info_log = options_.info_log;
|
|
|
|
reporter.fname = fname.c_str();
|
2018-04-11 07:18:06 +08:00
|
|
|
reporter.status = (options_.paranoid_checks ? &status : nullptr);
|
2014-09-17 05:19:52 +08:00
|
|
|
// We intentionally make log::Reader do checksumming even if
|
2011-03-19 06:37:00 +08:00
|
|
|
// paranoid_checks==false so that corruptions cause entire commits
|
|
|
|
// to be skipped instead of propagating bad information (like overly
|
|
|
|
// large sequence numbers).
|
2019-05-03 02:01:00 +08:00
|
|
|
log::Reader reader(file, &reporter, true /*checksum*/, 0 /*initial_offset*/);
|
2011-07-21 10:40:18 +08:00
|
|
|
Log(options_.info_log, "Recovering log #%llu",
|
2019-05-03 02:01:00 +08:00
|
|
|
(unsigned long long)log_number);
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
// Read all the records and add to a memtable
|
|
|
|
std::string scratch;
|
|
|
|
Slice record;
|
|
|
|
WriteBatch batch;
|
2014-12-12 00:13:18 +08:00
|
|
|
int compactions = 0;
|
2018-04-11 07:18:06 +08:00
|
|
|
MemTable* mem = nullptr;
|
2019-05-03 02:01:00 +08:00
|
|
|
while (reader.ReadRecord(&record, &scratch) && status.ok()) {
|
2011-03-19 06:37:00 +08:00
|
|
|
if (record.size() < 12) {
|
2019-05-03 02:01:00 +08:00
|
|
|
reporter.Corruption(record.size(),
|
|
|
|
Status::Corruption("log record too small"));
|
2011-03-19 06:37:00 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
WriteBatchInternal::SetContents(&batch, record);
|
|
|
|
|
2018-04-11 07:18:06 +08:00
|
|
|
if (mem == nullptr) {
|
2011-03-19 06:37:00 +08:00
|
|
|
mem = new MemTable(internal_comparator_);
|
2011-05-21 10:17:43 +08:00
|
|
|
mem->Ref();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
status = WriteBatchInternal::InsertInto(&batch, mem);
|
|
|
|
MaybeIgnoreError(&status);
|
|
|
|
if (!status.ok()) {
|
|
|
|
break;
|
|
|
|
}
|
2019-05-03 02:01:00 +08:00
|
|
|
const SequenceNumber last_seq = WriteBatchInternal::Sequence(&batch) +
|
|
|
|
WriteBatchInternal::Count(&batch) - 1;
|
2011-03-19 06:37:00 +08:00
|
|
|
if (last_seq > *max_sequence) {
|
|
|
|
*max_sequence = last_seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) {
|
2014-12-12 00:13:18 +08:00
|
|
|
compactions++;
|
|
|
|
*save_manifest = true;
|
2018-04-11 07:18:06 +08:00
|
|
|
status = WriteLevel0Table(mem, edit, nullptr);
|
2014-12-12 00:13:18 +08:00
|
|
|
mem->Unref();
|
2018-04-11 07:18:06 +08:00
|
|
|
mem = nullptr;
|
2011-03-19 06:37:00 +08:00
|
|
|
if (!status.ok()) {
|
|
|
|
// Reflect errors immediately so that conditions like full
|
|
|
|
// file-systems cause the DB::Open() to fail.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-12-12 00:13:18 +08:00
|
|
|
delete file;
|
|
|
|
|
|
|
|
// See if we should keep reusing the last log file.
|
|
|
|
if (status.ok() && options_.reuse_logs && last_log && compactions == 0) {
|
2018-04-11 07:18:06 +08:00
|
|
|
assert(logfile_ == nullptr);
|
|
|
|
assert(log_ == nullptr);
|
|
|
|
assert(mem_ == nullptr);
|
2014-12-12 00:13:18 +08:00
|
|
|
uint64_t lfile_size;
|
|
|
|
if (env_->GetFileSize(fname, &lfile_size).ok() &&
|
|
|
|
env_->NewAppendableFile(fname, &logfile_).ok()) {
|
|
|
|
Log(options_.info_log, "Reusing old log %s \n", fname.c_str());
|
|
|
|
log_ = new log::Writer(logfile_, lfile_size);
|
|
|
|
logfile_number_ = log_number;
|
2018-04-11 07:18:06 +08:00
|
|
|
if (mem != nullptr) {
|
2014-12-12 00:13:18 +08:00
|
|
|
mem_ = mem;
|
2018-04-11 07:18:06 +08:00
|
|
|
mem = nullptr;
|
2014-12-12 00:13:18 +08:00
|
|
|
} else {
|
2018-04-11 07:18:06 +08:00
|
|
|
// mem can be nullptr if lognum exists but was empty.
|
2014-12-12 00:13:18 +08:00
|
|
|
mem_ = new MemTable(internal_comparator_);
|
|
|
|
mem_->Ref();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-11 07:18:06 +08:00
|
|
|
if (mem != nullptr) {
|
2014-12-12 00:13:18 +08:00
|
|
|
// mem did not get reused; compact it.
|
|
|
|
if (status.ok()) {
|
|
|
|
*save_manifest = true;
|
2018-04-11 07:18:06 +08:00
|
|
|
status = WriteLevel0Table(mem, edit, nullptr);
|
2014-12-12 00:13:18 +08:00
|
|
|
}
|
|
|
|
mem->Unref();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2011-06-22 10:36:45 +08:00
|
|
|
Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit,
|
|
|
|
Version* base) {
|
2011-03-19 06:37:00 +08:00
|
|
|
mutex_.AssertHeld();
|
2011-04-13 03:38:58 +08:00
|
|
|
const uint64_t start_micros = env_->NowMicros();
|
2011-03-19 06:37:00 +08:00
|
|
|
FileMetaData meta;
|
|
|
|
meta.number = versions_->NewFileNumber();
|
|
|
|
pending_outputs_.insert(meta.number);
|
|
|
|
Iterator* iter = mem->NewIterator();
|
2011-07-21 10:40:18 +08:00
|
|
|
Log(options_.info_log, "Level-0 table #%llu: started",
|
2019-05-03 02:01:00 +08:00
|
|
|
(unsigned long long)meta.number);
|
2011-04-13 03:38:58 +08:00
|
|
|
|
|
|
|
Status s;
|
|
|
|
{
|
|
|
|
mutex_.Unlock();
|
2011-06-22 10:36:45 +08:00
|
|
|
s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
|
2011-04-13 03:38:58 +08:00
|
|
|
mutex_.Lock();
|
|
|
|
}
|
|
|
|
|
2011-07-21 10:40:18 +08:00
|
|
|
Log(options_.info_log, "Level-0 table #%llu: %lld bytes %s",
|
2019-05-03 02:01:00 +08:00
|
|
|
(unsigned long long)meta.number, (unsigned long long)meta.file_size,
|
2011-03-19 06:37:00 +08:00
|
|
|
s.ToString().c_str());
|
|
|
|
delete iter;
|
|
|
|
pending_outputs_.erase(meta.number);
|
2011-04-13 03:38:58 +08:00
|
|
|
|
2011-06-22 10:36:45 +08:00
|
|
|
// Note that if file_size is zero, the file has been deleted and
|
|
|
|
// should not be added to the manifest.
|
|
|
|
int level = 0;
|
|
|
|
if (s.ok() && meta.file_size > 0) {
|
2011-07-15 08:20:57 +08:00
|
|
|
const Slice min_user_key = meta.smallest.user_key();
|
|
|
|
const Slice max_user_key = meta.largest.user_key();
|
2018-04-11 07:18:06 +08:00
|
|
|
if (base != nullptr) {
|
2011-10-06 07:30:28 +08:00
|
|
|
level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
|
2011-06-22 10:36:45 +08:00
|
|
|
}
|
2019-05-03 02:01:00 +08:00
|
|
|
edit->AddFile(level, meta.number, meta.file_size, meta.smallest,
|
|
|
|
meta.largest);
|
2011-06-22 10:36:45 +08:00
|
|
|
}
|
|
|
|
|
2011-04-13 03:38:58 +08:00
|
|
|
CompactionStats stats;
|
|
|
|
stats.micros = env_->NowMicros() - start_micros;
|
|
|
|
stats.bytes_written = meta.file_size;
|
2011-06-22 10:36:45 +08:00
|
|
|
stats_[level].Add(stats);
|
2011-03-19 06:37:00 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2013-12-11 02:36:31 +08:00
|
|
|
void DBImpl::CompactMemTable() {
|
2011-03-19 06:37:00 +08:00
|
|
|
mutex_.AssertHeld();
|
2018-04-11 07:18:06 +08:00
|
|
|
assert(imm_ != nullptr);
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
// Save the contents of the memtable as a new Table
|
2011-04-13 03:38:58 +08:00
|
|
|
VersionEdit edit;
|
2011-06-22 10:36:45 +08:00
|
|
|
Version* base = versions_->current();
|
|
|
|
base->Ref();
|
|
|
|
Status s = WriteLevel0Table(imm_, &edit, base);
|
|
|
|
base->Unref();
|
|
|
|
|
2019-03-12 04:04:53 +08:00
|
|
|
if (s.ok() && shutting_down_.load(std::memory_order_acquire)) {
|
2011-06-22 10:36:45 +08:00
|
|
|
s = Status::IOError("Deleting DB during memtable compaction");
|
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
|
2011-04-13 03:38:58 +08:00
|
|
|
// Replace immutable memtable with the generated Table
|
2011-03-19 06:37:00 +08:00
|
|
|
if (s.ok()) {
|
2011-04-13 03:38:58 +08:00
|
|
|
edit.SetPrevLogNumber(0);
|
2011-06-22 10:36:45 +08:00
|
|
|
edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed
|
2011-09-02 03:08:02 +08:00
|
|
|
s = versions_->LogAndApply(&edit, &mutex_);
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
// Commit to the new state
|
2011-05-21 10:17:43 +08:00
|
|
|
imm_->Unref();
|
2018-04-11 07:18:06 +08:00
|
|
|
imm_ = nullptr;
|
2019-03-12 04:04:53 +08:00
|
|
|
has_imm_.store(false, std::memory_order_release);
|
Add Env::Remove{File,Dir} which obsolete Env::Delete{File,Dir}.
The "DeleteFile" method name causes pain for Windows developers, because
<windows.h> #defines a DeleteFile macro to DeleteFileW or DeleteFileA.
Current code uses workarounds, like #undefining DeleteFile everywhere an
Env is declared, implemented, or used.
This CL removes the need for workarounds by renaming Env::DeleteFile to
Env::RemoveFile. For consistency, Env::DeleteDir is also renamed to
Env::RemoveDir. A few internal methods are also renamed for consistency.
Software that supports Windows is expected to migrate any Env
implementations and usage to Remove{File,Dir}, and never use the name
Env::Delete{File,Dir} in its code.
The renaming is done in a backwards-compatible way, at the risk of
making it slightly more difficult to build a new correct Env
implementation. The backwards compatibility is achieved using the
following hacks:
1) Env::Remove{File,Dir} methods are added, with a default
implementation that calls into Env::Delete{File,Dir}. This makes old
Env implementations compatible with code that calls into the updated
API.
2) The Env::Delete{File,Dir} methods are no longer pure virtuals.
Instead, they gain a default implementation that calls into
Env::Remove{File,Dir}. This makes updated Env implementations
compatible with code that calls into the old API.
The cost of this approach is that it's possible to write an Env without
overriding either Rename{File,Dir} or Delete{File,Dir}, without getting
a compiler warning. However, attempting to run the test suite will
immediately fail with an infinite call stack ending in
{Remove,Delete}{File,Dir}, making developers aware of the problem.
PiperOrigin-RevId: 288710907
2020-01-09 01:14:53 +08:00
|
|
|
RemoveObsoleteFiles();
|
2013-12-11 02:36:31 +08:00
|
|
|
} else {
|
|
|
|
RecordBackgroundError(s);
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-06 07:30:28 +08:00
|
|
|
void DBImpl::CompactRange(const Slice* begin, const Slice* end) {
|
|
|
|
int max_level_with_files = 1;
|
|
|
|
{
|
|
|
|
MutexLock l(&mutex_);
|
|
|
|
Version* base = versions_->current();
|
|
|
|
for (int level = 1; level < config::kNumLevels; level++) {
|
|
|
|
if (base->OverlapInLevel(level, begin, end)) {
|
|
|
|
max_level_with_files = level;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-03-17 01:06:35 +08:00
|
|
|
TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap
|
2011-10-06 07:30:28 +08:00
|
|
|
for (int level = 0; level < max_level_with_files; level++) {
|
|
|
|
TEST_CompactRange(level, begin, end);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-17 01:06:35 +08:00
|
|
|
void DBImpl::TEST_CompactRange(int level, const Slice* begin,
|
|
|
|
const Slice* end) {
|
2011-06-22 10:36:45 +08:00
|
|
|
assert(level >= 0);
|
|
|
|
assert(level + 1 < config::kNumLevels);
|
|
|
|
|
2011-10-06 07:30:28 +08:00
|
|
|
InternalKey begin_storage, end_storage;
|
|
|
|
|
2011-06-07 22:40:26 +08:00
|
|
|
ManualCompaction manual;
|
|
|
|
manual.level = level;
|
2011-10-06 07:30:28 +08:00
|
|
|
manual.done = false;
|
2018-04-11 07:18:06 +08:00
|
|
|
if (begin == nullptr) {
|
|
|
|
manual.begin = nullptr;
|
2011-10-06 07:30:28 +08:00
|
|
|
} else {
|
|
|
|
begin_storage = InternalKey(*begin, kMaxSequenceNumber, kValueTypeForSeek);
|
|
|
|
manual.begin = &begin_storage;
|
|
|
|
}
|
2018-04-11 07:18:06 +08:00
|
|
|
if (end == nullptr) {
|
|
|
|
manual.end = nullptr;
|
2011-10-06 07:30:28 +08:00
|
|
|
} else {
|
|
|
|
end_storage = InternalKey(*end, 0, static_cast<ValueType>(0));
|
|
|
|
manual.end = &end_storage;
|
|
|
|
}
|
|
|
|
|
|
|
|
MutexLock l(&mutex_);
|
2019-03-12 04:04:53 +08:00
|
|
|
while (!manual.done && !shutting_down_.load(std::memory_order_acquire) &&
|
|
|
|
bg_error_.ok()) {
|
2018-04-11 07:18:06 +08:00
|
|
|
if (manual_compaction_ == nullptr) { // Idle
|
2013-12-11 02:36:31 +08:00
|
|
|
manual_compaction_ = &manual;
|
|
|
|
MaybeScheduleCompaction();
|
|
|
|
} else { // Running either my compaction or another compaction.
|
2018-03-17 01:06:35 +08:00
|
|
|
background_work_finished_signal_.Wait();
|
2011-10-06 07:30:28 +08:00
|
|
|
}
|
2011-06-07 22:40:26 +08:00
|
|
|
}
|
2013-12-11 02:36:31 +08:00
|
|
|
if (manual_compaction_ == &manual) {
|
|
|
|
// Cancel my manual compaction since we aborted early for some reason.
|
2018-04-11 07:18:06 +08:00
|
|
|
manual_compaction_ = nullptr;
|
2013-12-11 02:36:31 +08:00
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Status DBImpl::TEST_CompactMemTable() {
|
2018-04-11 07:18:06 +08:00
|
|
|
// nullptr batch means just wait for earlier writes to be done
|
|
|
|
Status s = Write(WriteOptions(), nullptr);
|
2011-04-13 03:38:58 +08:00
|
|
|
if (s.ok()) {
|
|
|
|
// Wait until the compaction completes
|
2012-03-09 08:23:21 +08:00
|
|
|
MutexLock l(&mutex_);
|
2018-04-11 07:18:06 +08:00
|
|
|
while (imm_ != nullptr && bg_error_.ok()) {
|
2018-03-17 01:06:35 +08:00
|
|
|
background_work_finished_signal_.Wait();
|
2011-04-13 03:38:58 +08:00
|
|
|
}
|
2018-04-11 07:18:06 +08:00
|
|
|
if (imm_ != nullptr) {
|
2011-04-13 03:38:58 +08:00
|
|
|
s = bg_error_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return s;
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
2013-12-11 02:36:31 +08:00
|
|
|
void DBImpl::RecordBackgroundError(const Status& s) {
|
|
|
|
mutex_.AssertHeld();
|
|
|
|
if (bg_error_.ok()) {
|
|
|
|
bg_error_ = s;
|
2018-03-17 01:06:35 +08:00
|
|
|
background_work_finished_signal_.SignalAll();
|
2013-12-11 02:36:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
void DBImpl::MaybeScheduleCompaction() {
|
|
|
|
mutex_.AssertHeld();
|
2018-03-17 01:06:35 +08:00
|
|
|
if (background_compaction_scheduled_) {
|
2011-03-19 06:37:00 +08:00
|
|
|
// Already scheduled
|
2019-03-12 04:04:53 +08:00
|
|
|
} else if (shutting_down_.load(std::memory_order_acquire)) {
|
2011-03-19 06:37:00 +08:00
|
|
|
// DB is being deleted; no more background compactions
|
2013-12-11 02:36:31 +08:00
|
|
|
} else if (!bg_error_.ok()) {
|
|
|
|
// Already got an error; no more changes
|
2019-05-03 02:01:00 +08:00
|
|
|
} else if (imm_ == nullptr && manual_compaction_ == nullptr &&
|
2011-06-07 22:40:26 +08:00
|
|
|
!versions_->NeedsCompaction()) {
|
2011-03-19 06:37:00 +08:00
|
|
|
// No work to be done
|
|
|
|
} else {
|
2018-03-17 01:06:35 +08:00
|
|
|
background_compaction_scheduled_ = true;
|
2011-03-19 06:37:00 +08:00
|
|
|
env_->Schedule(&DBImpl::BGWork, this);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void DBImpl::BGWork(void* db) {
|
|
|
|
reinterpret_cast<DBImpl*>(db)->BackgroundCall();
|
|
|
|
}
|
|
|
|
|
|
|
|
void DBImpl::BackgroundCall() {
|
|
|
|
MutexLock l(&mutex_);
|
2018-03-17 01:06:35 +08:00
|
|
|
assert(background_compaction_scheduled_);
|
2019-03-12 04:04:53 +08:00
|
|
|
if (shutting_down_.load(std::memory_order_acquire)) {
|
2013-12-11 02:36:31 +08:00
|
|
|
// No more background work when shutting down.
|
|
|
|
} else if (!bg_error_.ok()) {
|
|
|
|
// No more background work after a background error.
|
|
|
|
} else {
|
|
|
|
BackgroundCompaction();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
2012-05-31 00:45:46 +08:00
|
|
|
|
2018-03-17 01:06:35 +08:00
|
|
|
background_compaction_scheduled_ = false;
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
// Previous compaction may have produced too many files in a level,
|
|
|
|
// so reschedule another compaction if needed.
|
|
|
|
MaybeScheduleCompaction();
|
2018-03-17 01:06:35 +08:00
|
|
|
background_work_finished_signal_.SignalAll();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
2013-12-11 02:36:31 +08:00
|
|
|
void DBImpl::BackgroundCompaction() {
|
2011-03-19 06:37:00 +08:00
|
|
|
mutex_.AssertHeld();
|
2011-04-13 03:38:58 +08:00
|
|
|
|
2018-04-11 07:18:06 +08:00
|
|
|
if (imm_ != nullptr) {
|
2013-12-11 02:36:31 +08:00
|
|
|
CompactMemTable();
|
|
|
|
return;
|
2011-04-13 03:38:58 +08:00
|
|
|
}
|
|
|
|
|
2011-06-07 22:40:26 +08:00
|
|
|
Compaction* c;
|
2018-04-11 07:18:06 +08:00
|
|
|
bool is_manual = (manual_compaction_ != nullptr);
|
2011-10-06 07:30:28 +08:00
|
|
|
InternalKey manual_end;
|
2011-06-07 22:40:26 +08:00
|
|
|
if (is_manual) {
|
2011-10-06 07:30:28 +08:00
|
|
|
ManualCompaction* m = manual_compaction_;
|
|
|
|
c = versions_->CompactRange(m->level, m->begin, m->end);
|
2018-04-11 07:18:06 +08:00
|
|
|
m->done = (c == nullptr);
|
|
|
|
if (c != nullptr) {
|
2011-10-06 07:30:28 +08:00
|
|
|
manual_end = c->input(0, c->num_input_files(0) - 1)->largest;
|
|
|
|
}
|
|
|
|
Log(options_.info_log,
|
|
|
|
"Manual compaction at level-%d from %s .. %s; will stop at %s\n",
|
2019-05-03 02:01:00 +08:00
|
|
|
m->level, (m->begin ? m->begin->DebugString().c_str() : "(begin)"),
|
2011-10-06 07:30:28 +08:00
|
|
|
(m->end ? m->end->DebugString().c_str() : "(end)"),
|
|
|
|
(m->done ? "(end)" : manual_end.DebugString().c_str()));
|
2011-06-07 22:40:26 +08:00
|
|
|
} else {
|
|
|
|
c = versions_->PickCompaction();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Status status;
|
2018-04-11 07:18:06 +08:00
|
|
|
if (c == nullptr) {
|
2011-06-07 22:40:26 +08:00
|
|
|
// Nothing to do
|
|
|
|
} else if (!is_manual && c->IsTrivialMove()) {
|
2011-03-19 06:37:00 +08:00
|
|
|
// Move file to next level
|
2011-03-23 02:32:49 +08:00
|
|
|
assert(c->num_input_files(0) == 1);
|
2011-03-19 06:37:00 +08:00
|
|
|
FileMetaData* f = c->input(0, 0);
|
Add Env::Remove{File,Dir} which obsolete Env::Delete{File,Dir}.
The "DeleteFile" method name causes pain for Windows developers, because
<windows.h> #defines a DeleteFile macro to DeleteFileW or DeleteFileA.
Current code uses workarounds, like #undefining DeleteFile everywhere an
Env is declared, implemented, or used.
This CL removes the need for workarounds by renaming Env::DeleteFile to
Env::RemoveFile. For consistency, Env::DeleteDir is also renamed to
Env::RemoveDir. A few internal methods are also renamed for consistency.
Software that supports Windows is expected to migrate any Env
implementations and usage to Remove{File,Dir}, and never use the name
Env::Delete{File,Dir} in its code.
The renaming is done in a backwards-compatible way, at the risk of
making it slightly more difficult to build a new correct Env
implementation. The backwards compatibility is achieved using the
following hacks:
1) Env::Remove{File,Dir} methods are added, with a default
implementation that calls into Env::Delete{File,Dir}. This makes old
Env implementations compatible with code that calls into the updated
API.
2) The Env::Delete{File,Dir} methods are no longer pure virtuals.
Instead, they gain a default implementation that calls into
Env::Remove{File,Dir}. This makes updated Env implementations
compatible with code that calls into the old API.
The cost of this approach is that it's possible to write an Env without
overriding either Rename{File,Dir} or Delete{File,Dir}, without getting
a compiler warning. However, attempting to run the test suite will
immediately fail with an infinite call stack ending in
{Remove,Delete}{File,Dir}, making developers aware of the problem.
PiperOrigin-RevId: 288710907
2020-01-09 01:14:53 +08:00
|
|
|
c->edit()->RemoveFile(c->level(), f->number);
|
2019-05-03 02:01:00 +08:00
|
|
|
c->edit()->AddFile(c->level() + 1, f->number, f->file_size, f->smallest,
|
|
|
|
f->largest);
|
2011-09-02 03:08:02 +08:00
|
|
|
status = versions_->LogAndApply(c->edit(), &mutex_);
|
2013-12-11 02:36:31 +08:00
|
|
|
if (!status.ok()) {
|
|
|
|
RecordBackgroundError(status);
|
|
|
|
}
|
2011-06-07 22:40:26 +08:00
|
|
|
VersionSet::LevelSummaryStorage tmp;
|
2011-07-21 10:40:18 +08:00
|
|
|
Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
|
2019-05-03 02:01:00 +08:00
|
|
|
static_cast<unsigned long long>(f->number), c->level() + 1,
|
2011-03-19 06:37:00 +08:00
|
|
|
static_cast<unsigned long long>(f->file_size),
|
2019-05-03 02:01:00 +08:00
|
|
|
status.ToString().c_str(), versions_->LevelSummary(&tmp));
|
2011-03-19 06:37:00 +08:00
|
|
|
} else {
|
|
|
|
CompactionState* compact = new CompactionState(c);
|
|
|
|
status = DoCompactionWork(compact);
|
2013-12-11 02:36:31 +08:00
|
|
|
if (!status.ok()) {
|
|
|
|
RecordBackgroundError(status);
|
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
CleanupCompaction(compact);
|
2012-01-26 06:56:52 +08:00
|
|
|
c->ReleaseInputs();
|
Add Env::Remove{File,Dir} which obsolete Env::Delete{File,Dir}.
The "DeleteFile" method name causes pain for Windows developers, because
<windows.h> #defines a DeleteFile macro to DeleteFileW or DeleteFileA.
Current code uses workarounds, like #undefining DeleteFile everywhere an
Env is declared, implemented, or used.
This CL removes the need for workarounds by renaming Env::DeleteFile to
Env::RemoveFile. For consistency, Env::DeleteDir is also renamed to
Env::RemoveDir. A few internal methods are also renamed for consistency.
Software that supports Windows is expected to migrate any Env
implementations and usage to Remove{File,Dir}, and never use the name
Env::Delete{File,Dir} in its code.
The renaming is done in a backwards-compatible way, at the risk of
making it slightly more difficult to build a new correct Env
implementation. The backwards compatibility is achieved using the
following hacks:
1) Env::Remove{File,Dir} methods are added, with a default
implementation that calls into Env::Delete{File,Dir}. This makes old
Env implementations compatible with code that calls into the updated
API.
2) The Env::Delete{File,Dir} methods are no longer pure virtuals.
Instead, they gain a default implementation that calls into
Env::Remove{File,Dir}. This makes updated Env implementations
compatible with code that calls into the old API.
The cost of this approach is that it's possible to write an Env without
overriding either Rename{File,Dir} or Delete{File,Dir}, without getting
a compiler warning. However, attempting to run the test suite will
immediately fail with an infinite call stack ending in
{Remove,Delete}{File,Dir}, making developers aware of the problem.
PiperOrigin-RevId: 288710907
2020-01-09 01:14:53 +08:00
|
|
|
RemoveObsoleteFiles();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
delete c;
|
|
|
|
|
|
|
|
if (status.ok()) {
|
|
|
|
// Done
|
2019-03-12 04:04:53 +08:00
|
|
|
} else if (shutting_down_.load(std::memory_order_acquire)) {
|
2011-03-19 06:37:00 +08:00
|
|
|
// Ignore compaction errors found during shutting down
|
|
|
|
} else {
|
2019-05-03 02:01:00 +08:00
|
|
|
Log(options_.info_log, "Compaction error: %s", status.ToString().c_str());
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
2011-06-07 22:40:26 +08:00
|
|
|
|
|
|
|
if (is_manual) {
|
2011-10-06 07:30:28 +08:00
|
|
|
ManualCompaction* m = manual_compaction_;
|
2012-01-26 06:56:52 +08:00
|
|
|
if (!status.ok()) {
|
|
|
|
m->done = true;
|
|
|
|
}
|
2011-10-06 07:30:28 +08:00
|
|
|
if (!m->done) {
|
|
|
|
// We only compacted part of the requested range. Update *m
|
|
|
|
// to the range that is left to be compacted.
|
|
|
|
m->tmp_storage = manual_end;
|
|
|
|
m->begin = &m->tmp_storage;
|
|
|
|
}
|
2018-04-11 07:18:06 +08:00
|
|
|
manual_compaction_ = nullptr;
|
2011-06-07 22:40:26 +08:00
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void DBImpl::CleanupCompaction(CompactionState* compact) {
|
|
|
|
mutex_.AssertHeld();
|
2018-04-11 07:18:06 +08:00
|
|
|
if (compact->builder != nullptr) {
|
2011-03-19 06:37:00 +08:00
|
|
|
// May happen if we get a shutdown call in the middle of compaction
|
|
|
|
compact->builder->Abandon();
|
|
|
|
delete compact->builder;
|
|
|
|
} else {
|
2018-04-11 07:18:06 +08:00
|
|
|
assert(compact->outfile == nullptr);
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
delete compact->outfile;
|
2011-04-21 06:48:11 +08:00
|
|
|
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
2011-03-19 06:37:00 +08:00
|
|
|
const CompactionState::Output& out = compact->outputs[i];
|
|
|
|
pending_outputs_.erase(out.number);
|
|
|
|
}
|
|
|
|
delete compact;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
|
2018-04-11 07:18:06 +08:00
|
|
|
assert(compact != nullptr);
|
|
|
|
assert(compact->builder == nullptr);
|
2011-03-19 06:37:00 +08:00
|
|
|
uint64_t file_number;
|
|
|
|
{
|
|
|
|
mutex_.Lock();
|
|
|
|
file_number = versions_->NewFileNumber();
|
|
|
|
pending_outputs_.insert(file_number);
|
|
|
|
CompactionState::Output out;
|
|
|
|
out.number = file_number;
|
|
|
|
out.smallest.Clear();
|
|
|
|
out.largest.Clear();
|
|
|
|
compact->outputs.push_back(out);
|
|
|
|
mutex_.Unlock();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make the output file
|
|
|
|
std::string fname = TableFileName(dbname_, file_number);
|
|
|
|
Status s = env_->NewWritableFile(fname, &compact->outfile);
|
|
|
|
if (s.ok()) {
|
|
|
|
compact->builder = new TableBuilder(options_, compact->outfile);
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
|
|
|
|
Iterator* input) {
|
2018-04-11 07:18:06 +08:00
|
|
|
assert(compact != nullptr);
|
|
|
|
assert(compact->outfile != nullptr);
|
|
|
|
assert(compact->builder != nullptr);
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
const uint64_t output_number = compact->current_output()->number;
|
|
|
|
assert(output_number != 0);
|
|
|
|
|
|
|
|
// Check for iterator errors
|
|
|
|
Status s = input->status();
|
|
|
|
const uint64_t current_entries = compact->builder->NumEntries();
|
|
|
|
if (s.ok()) {
|
|
|
|
s = compact->builder->Finish();
|
|
|
|
} else {
|
|
|
|
compact->builder->Abandon();
|
|
|
|
}
|
|
|
|
const uint64_t current_bytes = compact->builder->FileSize();
|
|
|
|
compact->current_output()->file_size = current_bytes;
|
|
|
|
compact->total_bytes += current_bytes;
|
|
|
|
delete compact->builder;
|
2018-04-11 07:18:06 +08:00
|
|
|
compact->builder = nullptr;
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
// Finish and check for file errors
|
|
|
|
if (s.ok()) {
|
|
|
|
s = compact->outfile->Sync();
|
|
|
|
}
|
|
|
|
if (s.ok()) {
|
|
|
|
s = compact->outfile->Close();
|
|
|
|
}
|
|
|
|
delete compact->outfile;
|
2018-04-11 07:18:06 +08:00
|
|
|
compact->outfile = nullptr;
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
if (s.ok() && current_entries > 0) {
|
|
|
|
// Verify that the table is usable
|
2019-05-03 02:01:00 +08:00
|
|
|
Iterator* iter =
|
|
|
|
table_cache_->NewIterator(ReadOptions(), output_number, current_bytes);
|
2011-03-19 06:37:00 +08:00
|
|
|
s = iter->status();
|
|
|
|
delete iter;
|
|
|
|
if (s.ok()) {
|
2019-05-03 02:01:00 +08:00
|
|
|
Log(options_.info_log, "Generated table #%llu@%d: %lld keys, %lld bytes",
|
|
|
|
(unsigned long long)output_number, compact->compaction->level(),
|
|
|
|
(unsigned long long)current_entries,
|
|
|
|
(unsigned long long)current_bytes);
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
|
|
|
mutex_.AssertHeld();
|
2019-05-03 02:01:00 +08:00
|
|
|
Log(options_.info_log, "Compacted %d@%d + %d@%d files => %lld bytes",
|
|
|
|
compact->compaction->num_input_files(0), compact->compaction->level(),
|
|
|
|
compact->compaction->num_input_files(1), compact->compaction->level() + 1,
|
2011-03-19 06:37:00 +08:00
|
|
|
static_cast<long long>(compact->total_bytes));
|
|
|
|
|
|
|
|
// Add compaction outputs
|
|
|
|
compact->compaction->AddInputDeletions(compact->compaction->edit());
|
|
|
|
const int level = compact->compaction->level();
|
2011-04-21 06:48:11 +08:00
|
|
|
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
2011-03-19 06:37:00 +08:00
|
|
|
const CompactionState::Output& out = compact->outputs[i];
|
2019-05-03 02:01:00 +08:00
|
|
|
compact->compaction->edit()->AddFile(level + 1, out.number, out.file_size,
|
|
|
|
out.smallest, out.largest);
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
2012-01-26 06:56:52 +08:00
|
|
|
return versions_->LogAndApply(compact->compaction->edit(), &mutex_);
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
2011-04-13 03:38:58 +08:00
|
|
|
const uint64_t start_micros = env_->NowMicros();
|
|
|
|
int64_t imm_micros = 0; // Micros spent doing imm_ compactions
|
|
|
|
|
2019-05-03 02:01:00 +08:00
|
|
|
Log(options_.info_log, "Compacting %d@%d + %d@%d files",
|
|
|
|
compact->compaction->num_input_files(0), compact->compaction->level(),
|
2011-03-19 06:37:00 +08:00
|
|
|
compact->compaction->num_input_files(1),
|
|
|
|
compact->compaction->level() + 1);
|
|
|
|
|
|
|
|
assert(versions_->NumLevelFiles(compact->compaction->level()) > 0);
|
2018-04-11 07:18:06 +08:00
|
|
|
assert(compact->builder == nullptr);
|
|
|
|
assert(compact->outfile == nullptr);
|
2011-03-19 06:37:00 +08:00
|
|
|
if (snapshots_.empty()) {
|
2011-04-13 03:38:58 +08:00
|
|
|
compact->smallest_snapshot = versions_->LastSequence();
|
2011-03-19 06:37:00 +08:00
|
|
|
} else {
|
2018-05-01 06:11:03 +08:00
|
|
|
compact->smallest_snapshot = snapshots_.oldest()->sequence_number();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
2019-05-16 04:13:13 +08:00
|
|
|
Iterator* input = versions_->MakeInputIterator(compact->compaction);
|
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
// Release mutex while we're actually doing the compaction work
|
|
|
|
mutex_.Unlock();
|
|
|
|
|
|
|
|
input->SeekToFirst();
|
|
|
|
Status status;
|
|
|
|
ParsedInternalKey ikey;
|
|
|
|
std::string current_user_key;
|
|
|
|
bool has_current_user_key = false;
|
|
|
|
SequenceNumber last_sequence_for_key = kMaxSequenceNumber;
|
2019-05-14 00:31:30 +08:00
|
|
|
while (input->Valid() && !shutting_down_.load(std::memory_order_acquire)) {
|
2011-04-13 03:38:58 +08:00
|
|
|
// Prioritize immutable compaction work
|
2019-03-12 04:04:53 +08:00
|
|
|
if (has_imm_.load(std::memory_order_relaxed)) {
|
2011-04-13 03:38:58 +08:00
|
|
|
const uint64_t imm_start = env_->NowMicros();
|
|
|
|
mutex_.Lock();
|
2018-04-11 07:18:06 +08:00
|
|
|
if (imm_ != nullptr) {
|
2011-04-13 03:38:58 +08:00
|
|
|
CompactMemTable();
|
2018-03-17 01:06:35 +08:00
|
|
|
// Wake up MakeRoomForWrite() if necessary.
|
|
|
|
background_work_finished_signal_.SignalAll();
|
2011-04-13 03:38:58 +08:00
|
|
|
}
|
|
|
|
mutex_.Unlock();
|
|
|
|
imm_micros += (env_->NowMicros() - imm_start);
|
|
|
|
}
|
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
Slice key = input->key();
|
2011-05-21 10:17:43 +08:00
|
|
|
if (compact->compaction->ShouldStopBefore(key) &&
|
2018-04-11 07:18:06 +08:00
|
|
|
compact->builder != nullptr) {
|
2011-03-23 02:32:49 +08:00
|
|
|
status = FinishCompactionOutputFile(compact, input);
|
|
|
|
if (!status.ok()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle key/value, add to state, etc.
|
2011-03-19 06:37:00 +08:00
|
|
|
bool drop = false;
|
|
|
|
if (!ParseInternalKey(key, &ikey)) {
|
|
|
|
// Do not hide error keys
|
|
|
|
current_user_key.clear();
|
|
|
|
has_current_user_key = false;
|
|
|
|
last_sequence_for_key = kMaxSequenceNumber;
|
|
|
|
} else {
|
|
|
|
if (!has_current_user_key ||
|
2019-05-03 02:01:00 +08:00
|
|
|
user_comparator()->Compare(ikey.user_key, Slice(current_user_key)) !=
|
|
|
|
0) {
|
2011-03-19 06:37:00 +08:00
|
|
|
// First occurrence of this user key
|
|
|
|
current_user_key.assign(ikey.user_key.data(), ikey.user_key.size());
|
|
|
|
has_current_user_key = true;
|
|
|
|
last_sequence_for_key = kMaxSequenceNumber;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (last_sequence_for_key <= compact->smallest_snapshot) {
|
|
|
|
// Hidden by an newer entry for same user key
|
2019-05-03 02:01:00 +08:00
|
|
|
drop = true; // (A)
|
2011-03-19 06:37:00 +08:00
|
|
|
} else if (ikey.type == kTypeDeletion &&
|
|
|
|
ikey.sequence <= compact->smallest_snapshot &&
|
|
|
|
compact->compaction->IsBaseLevelForKey(ikey.user_key)) {
|
|
|
|
// For this user key:
|
|
|
|
// (1) there is no data in higher levels
|
|
|
|
// (2) data in lower levels will have larger sequence numbers
|
|
|
|
// (3) data in layers that are being compacted here and have
|
|
|
|
// smaller sequence numbers will be dropped in the next
|
|
|
|
// few iterations of this loop (by rule (A) above).
|
|
|
|
// Therefore this deletion marker is obsolete and can be dropped.
|
|
|
|
drop = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
last_sequence_for_key = ikey.sequence;
|
|
|
|
}
|
|
|
|
#if 0
|
2011-07-21 10:40:18 +08:00
|
|
|
Log(options_.info_log,
|
2011-03-19 06:37:00 +08:00
|
|
|
" Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
|
|
|
|
"%d smallest_snapshot: %d",
|
|
|
|
ikey.user_key.ToString().c_str(),
|
2011-04-21 06:48:11 +08:00
|
|
|
(int)ikey.sequence, ikey.type, kTypeValue, drop,
|
2011-03-19 06:37:00 +08:00
|
|
|
compact->compaction->IsBaseLevelForKey(ikey.user_key),
|
|
|
|
(int)last_sequence_for_key, (int)compact->smallest_snapshot);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!drop) {
|
|
|
|
// Open output file if necessary
|
2018-04-11 07:18:06 +08:00
|
|
|
if (compact->builder == nullptr) {
|
2011-03-19 06:37:00 +08:00
|
|
|
status = OpenCompactionOutputFile(compact);
|
|
|
|
if (!status.ok()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (compact->builder->NumEntries() == 0) {
|
|
|
|
compact->current_output()->smallest.DecodeFrom(key);
|
|
|
|
}
|
|
|
|
compact->current_output()->largest.DecodeFrom(key);
|
2011-04-21 06:48:11 +08:00
|
|
|
compact->builder->Add(key, input->value());
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
// Close output file if it is big enough
|
|
|
|
if (compact->builder->FileSize() >=
|
|
|
|
compact->compaction->MaxOutputFileSize()) {
|
|
|
|
status = FinishCompactionOutputFile(compact, input);
|
|
|
|
if (!status.ok()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
input->Next();
|
|
|
|
}
|
|
|
|
|
2019-03-12 04:04:53 +08:00
|
|
|
if (status.ok() && shutting_down_.load(std::memory_order_acquire)) {
|
2011-03-19 06:37:00 +08:00
|
|
|
status = Status::IOError("Deleting DB during compaction");
|
|
|
|
}
|
2018-04-11 07:18:06 +08:00
|
|
|
if (status.ok() && compact->builder != nullptr) {
|
2011-03-19 06:37:00 +08:00
|
|
|
status = FinishCompactionOutputFile(compact, input);
|
|
|
|
}
|
|
|
|
if (status.ok()) {
|
|
|
|
status = input->status();
|
|
|
|
}
|
|
|
|
delete input;
|
2018-04-11 07:18:06 +08:00
|
|
|
input = nullptr;
|
2011-03-19 06:37:00 +08:00
|
|
|
|
2011-04-13 03:38:58 +08:00
|
|
|
CompactionStats stats;
|
|
|
|
stats.micros = env_->NowMicros() - start_micros - imm_micros;
|
|
|
|
for (int which = 0; which < 2; which++) {
|
|
|
|
for (int i = 0; i < compact->compaction->num_input_files(which); i++) {
|
|
|
|
stats.bytes_read += compact->compaction->input(which, i)->file_size;
|
|
|
|
}
|
|
|
|
}
|
2011-04-21 06:48:11 +08:00
|
|
|
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
2011-04-13 03:38:58 +08:00
|
|
|
stats.bytes_written += compact->outputs[i].file_size;
|
|
|
|
}
|
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
mutex_.Lock();
|
2011-04-13 03:38:58 +08:00
|
|
|
stats_[compact->compaction->level() + 1].Add(stats);
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
if (status.ok()) {
|
|
|
|
status = InstallCompactionResults(compact);
|
|
|
|
}
|
2013-12-11 02:36:31 +08:00
|
|
|
if (!status.ok()) {
|
|
|
|
RecordBackgroundError(status);
|
|
|
|
}
|
2011-05-21 10:17:43 +08:00
|
|
|
VersionSet::LevelSummaryStorage tmp;
|
2019-05-03 02:01:00 +08:00
|
|
|
Log(options_.info_log, "compacted to: %s", versions_->LevelSummary(&tmp));
|
2011-03-19 06:37:00 +08:00
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2011-05-28 08:53:58 +08:00
|
|
|
namespace {
|
2018-03-24 03:50:14 +08:00
|
|
|
|
2011-05-28 08:53:58 +08:00
|
|
|
struct IterState {
|
2018-03-24 03:50:14 +08:00
|
|
|
port::Mutex* const mu;
|
|
|
|
Version* const version GUARDED_BY(mu);
|
|
|
|
MemTable* const mem GUARDED_BY(mu);
|
|
|
|
MemTable* const imm GUARDED_BY(mu);
|
|
|
|
|
|
|
|
IterState(port::Mutex* mutex, MemTable* mem, MemTable* imm, Version* version)
|
2019-05-03 02:01:00 +08:00
|
|
|
: mu(mutex), version(version), mem(mem), imm(imm) {}
|
2011-05-28 08:53:58 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static void CleanupIteratorState(void* arg1, void* arg2) {
|
|
|
|
IterState* state = reinterpret_cast<IterState*>(arg1);
|
|
|
|
state->mu->Lock();
|
|
|
|
state->mem->Unref();
|
2018-04-11 07:18:06 +08:00
|
|
|
if (state->imm != nullptr) state->imm->Unref();
|
2011-05-28 08:53:58 +08:00
|
|
|
state->version->Unref();
|
|
|
|
state->mu->Unlock();
|
|
|
|
delete state;
|
|
|
|
}
|
2018-03-24 03:50:14 +08:00
|
|
|
|
|
|
|
} // anonymous namespace
|
2011-05-28 08:53:58 +08:00
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
|
2013-08-22 02:12:47 +08:00
|
|
|
SequenceNumber* latest_snapshot,
|
|
|
|
uint32_t* seed) {
|
2011-03-19 06:37:00 +08:00
|
|
|
mutex_.Lock();
|
2011-04-13 03:38:58 +08:00
|
|
|
*latest_snapshot = versions_->LastSequence();
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
// Collect together all needed child iterators
|
|
|
|
std::vector<Iterator*> list;
|
|
|
|
list.push_back(mem_->NewIterator());
|
2011-05-28 08:53:58 +08:00
|
|
|
mem_->Ref();
|
2018-04-11 07:18:06 +08:00
|
|
|
if (imm_ != nullptr) {
|
2011-04-13 03:38:58 +08:00
|
|
|
list.push_back(imm_->NewIterator());
|
2011-05-28 08:53:58 +08:00
|
|
|
imm_->Ref();
|
2011-04-13 03:38:58 +08:00
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
versions_->current()->AddIterators(options, &list);
|
|
|
|
Iterator* internal_iter =
|
|
|
|
NewMergingIterator(&internal_comparator_, &list[0], list.size());
|
|
|
|
versions_->current()->Ref();
|
2011-05-28 08:53:58 +08:00
|
|
|
|
2018-03-24 03:50:14 +08:00
|
|
|
IterState* cleanup = new IterState(&mutex_, mem_, imm_, versions_->current());
|
2018-04-11 07:18:06 +08:00
|
|
|
internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr);
|
2011-03-19 06:37:00 +08:00
|
|
|
|
2013-08-22 02:12:47 +08:00
|
|
|
*seed = ++seed_;
|
2011-03-19 06:37:00 +08:00
|
|
|
mutex_.Unlock();
|
|
|
|
return internal_iter;
|
|
|
|
}
|
|
|
|
|
|
|
|
Iterator* DBImpl::TEST_NewInternalIterator() {
|
|
|
|
SequenceNumber ignored;
|
2013-08-22 02:12:47 +08:00
|
|
|
uint32_t ignored_seed;
|
|
|
|
return NewInternalIterator(ReadOptions(), &ignored, &ignored_seed);
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
2011-03-23 07:24:02 +08:00
|
|
|
int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
|
2011-03-23 02:32:49 +08:00
|
|
|
MutexLock l(&mutex_);
|
|
|
|
return versions_->MaxNextLevelOverlappingBytes();
|
|
|
|
}
|
|
|
|
|
2019-05-03 02:01:00 +08:00
|
|
|
Status DBImpl::Get(const ReadOptions& options, const Slice& key,
|
2011-03-19 06:37:00 +08:00
|
|
|
std::string* value) {
|
2011-06-22 10:36:45 +08:00
|
|
|
Status s;
|
|
|
|
MutexLock l(&mutex_);
|
|
|
|
SequenceNumber snapshot;
|
2018-04-11 07:18:06 +08:00
|
|
|
if (options.snapshot != nullptr) {
|
2018-05-01 06:11:03 +08:00
|
|
|
snapshot =
|
|
|
|
static_cast<const SnapshotImpl*>(options.snapshot)->sequence_number();
|
2011-06-22 10:36:45 +08:00
|
|
|
} else {
|
|
|
|
snapshot = versions_->LastSequence();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
2011-06-22 10:36:45 +08:00
|
|
|
|
2011-08-23 05:08:51 +08:00
|
|
|
MemTable* mem = mem_;
|
|
|
|
MemTable* imm = imm_;
|
2011-06-22 10:36:45 +08:00
|
|
|
Version* current = versions_->current();
|
2011-08-23 05:08:51 +08:00
|
|
|
mem->Ref();
|
2018-04-11 07:18:06 +08:00
|
|
|
if (imm != nullptr) imm->Ref();
|
2011-06-22 10:36:45 +08:00
|
|
|
current->Ref();
|
2011-08-23 05:08:51 +08:00
|
|
|
|
|
|
|
bool have_stat_update = false;
|
2011-06-22 10:36:45 +08:00
|
|
|
Version::GetStats stats;
|
2011-08-23 05:08:51 +08:00
|
|
|
|
|
|
|
// Unlock while reading from files and memtables
|
|
|
|
{
|
2011-06-22 10:36:45 +08:00
|
|
|
mutex_.Unlock();
|
2011-08-23 05:08:51 +08:00
|
|
|
// First look in the memtable, then in the immutable memtable (if any).
|
|
|
|
LookupKey lkey(key, snapshot);
|
2011-09-02 03:08:02 +08:00
|
|
|
if (mem->Get(lkey, value, &s)) {
|
2011-08-23 05:08:51 +08:00
|
|
|
// Done
|
2018-04-11 07:18:06 +08:00
|
|
|
} else if (imm != nullptr && imm->Get(lkey, value, &s)) {
|
2011-08-23 05:08:51 +08:00
|
|
|
// Done
|
|
|
|
} else {
|
|
|
|
s = current->Get(options, lkey, value, &stats);
|
|
|
|
have_stat_update = true;
|
|
|
|
}
|
2011-06-22 10:36:45 +08:00
|
|
|
mutex_.Lock();
|
|
|
|
}
|
2011-08-23 05:08:51 +08:00
|
|
|
|
|
|
|
if (have_stat_update && current->UpdateStats(stats)) {
|
2011-06-22 10:36:45 +08:00
|
|
|
MaybeScheduleCompaction();
|
|
|
|
}
|
2011-08-23 05:08:51 +08:00
|
|
|
mem->Unref();
|
2018-04-11 07:18:06 +08:00
|
|
|
if (imm != nullptr) imm->Unref();
|
2011-06-22 10:36:45 +08:00
|
|
|
current->Unref();
|
|
|
|
return s;
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Iterator* DBImpl::NewIterator(const ReadOptions& options) {
|
|
|
|
SequenceNumber latest_snapshot;
|
2013-08-22 02:12:47 +08:00
|
|
|
uint32_t seed;
|
|
|
|
Iterator* iter = NewInternalIterator(options, &latest_snapshot, &seed);
|
2019-05-03 02:01:00 +08:00
|
|
|
return NewDBIterator(this, user_comparator(), iter,
|
|
|
|
(options.snapshot != nullptr
|
|
|
|
? static_cast<const SnapshotImpl*>(options.snapshot)
|
|
|
|
->sequence_number()
|
|
|
|
: latest_snapshot),
|
|
|
|
seed);
|
2013-08-22 02:12:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void DBImpl::RecordReadSample(Slice key) {
|
|
|
|
MutexLock l(&mutex_);
|
|
|
|
if (versions_->current()->RecordReadSample(key)) {
|
|
|
|
MaybeScheduleCompaction();
|
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const Snapshot* DBImpl::GetSnapshot() {
|
|
|
|
MutexLock l(&mutex_);
|
2011-04-13 03:38:58 +08:00
|
|
|
return snapshots_.New(versions_->LastSequence());
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
2018-05-01 06:11:03 +08:00
|
|
|
void DBImpl::ReleaseSnapshot(const Snapshot* snapshot) {
|
2011-03-19 06:37:00 +08:00
|
|
|
MutexLock l(&mutex_);
|
2018-05-01 06:11:03 +08:00
|
|
|
snapshots_.Delete(static_cast<const SnapshotImpl*>(snapshot));
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Convenience methods
|
|
|
|
Status DBImpl::Put(const WriteOptions& o, const Slice& key, const Slice& val) {
|
|
|
|
return DB::Put(o, key, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
|
|
|
return DB::Delete(options, key);
|
|
|
|
}
|
|
|
|
|
2019-05-03 02:01:00 +08:00
|
|
|
Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
|
2012-03-09 08:23:21 +08:00
|
|
|
Writer w(&mutex_);
|
2019-05-03 02:01:00 +08:00
|
|
|
w.batch = updates;
|
2012-03-09 08:23:21 +08:00
|
|
|
w.sync = options.sync;
|
|
|
|
w.done = false;
|
2011-09-02 03:08:02 +08:00
|
|
|
|
2011-04-21 06:48:11 +08:00
|
|
|
MutexLock l(&mutex_);
|
2012-03-09 08:23:21 +08:00
|
|
|
writers_.push_back(&w);
|
|
|
|
while (!w.done && &w != writers_.front()) {
|
|
|
|
w.cv.Wait();
|
|
|
|
}
|
|
|
|
if (w.done) {
|
|
|
|
return w.status;
|
|
|
|
}
|
|
|
|
|
|
|
|
// May temporarily unlock and wait.
|
2019-05-03 02:01:00 +08:00
|
|
|
Status status = MakeRoomForWrite(updates == nullptr);
|
2011-04-21 06:48:11 +08:00
|
|
|
uint64_t last_sequence = versions_->LastSequence();
|
2012-03-09 08:23:21 +08:00
|
|
|
Writer* last_writer = &w;
|
2019-05-03 02:01:00 +08:00
|
|
|
if (status.ok() && updates != nullptr) { // nullptr batch is for compactions
|
2019-10-29 01:19:33 +08:00
|
|
|
WriteBatch* write_batch = BuildBatchGroup(&last_writer);
|
|
|
|
WriteBatchInternal::SetSequence(write_batch, last_sequence + 1);
|
|
|
|
last_sequence += WriteBatchInternal::Count(write_batch);
|
2011-04-21 06:48:11 +08:00
|
|
|
|
2012-03-09 08:23:21 +08:00
|
|
|
// Add to log and apply to memtable. We can release the lock
|
|
|
|
// during this phase since &w is currently responsible for logging
|
|
|
|
// and protects against concurrent loggers and concurrent writes
|
|
|
|
// into mem_.
|
2011-09-02 03:08:02 +08:00
|
|
|
{
|
|
|
|
mutex_.Unlock();
|
2019-10-29 01:19:33 +08:00
|
|
|
status = log_->AddRecord(WriteBatchInternal::Contents(write_batch));
|
2013-12-11 02:36:31 +08:00
|
|
|
bool sync_error = false;
|
2011-09-02 03:08:02 +08:00
|
|
|
if (status.ok() && options.sync) {
|
|
|
|
status = logfile_->Sync();
|
2013-12-11 02:36:31 +08:00
|
|
|
if (!status.ok()) {
|
|
|
|
sync_error = true;
|
|
|
|
}
|
2011-09-02 03:08:02 +08:00
|
|
|
}
|
|
|
|
if (status.ok()) {
|
2019-10-29 01:19:33 +08:00
|
|
|
status = WriteBatchInternal::InsertInto(write_batch, mem_);
|
2011-09-02 03:08:02 +08:00
|
|
|
}
|
|
|
|
mutex_.Lock();
|
2013-12-11 02:36:31 +08:00
|
|
|
if (sync_error) {
|
|
|
|
// The state of the log file is indeterminate: the log record we
|
|
|
|
// just added may or may not show up when the DB is re-opened.
|
|
|
|
// So we force the DB into a mode where all future writes fail.
|
|
|
|
RecordBackgroundError(status);
|
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
2019-10-29 01:19:33 +08:00
|
|
|
if (write_batch == tmp_batch_) tmp_batch_->Clear();
|
2011-09-02 03:08:02 +08:00
|
|
|
|
|
|
|
versions_->SetLastSequence(last_sequence);
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
2012-03-09 08:23:21 +08:00
|
|
|
|
|
|
|
while (true) {
|
|
|
|
Writer* ready = writers_.front();
|
|
|
|
writers_.pop_front();
|
|
|
|
if (ready != &w) {
|
|
|
|
ready->status = status;
|
|
|
|
ready->done = true;
|
|
|
|
ready->cv.Signal();
|
|
|
|
}
|
|
|
|
if (ready == last_writer) break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Notify new head of write queue
|
|
|
|
if (!writers_.empty()) {
|
|
|
|
writers_.front()->cv.Signal();
|
|
|
|
}
|
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2012-03-09 08:23:21 +08:00
|
|
|
// REQUIRES: Writer list must be non-empty
|
2018-04-11 07:18:06 +08:00
|
|
|
// REQUIRES: First writer must have a non-null batch
|
2012-03-09 08:23:21 +08:00
|
|
|
WriteBatch* DBImpl::BuildBatchGroup(Writer** last_writer) {
|
2018-03-17 01:06:35 +08:00
|
|
|
mutex_.AssertHeld();
|
2012-03-09 08:23:21 +08:00
|
|
|
assert(!writers_.empty());
|
|
|
|
Writer* first = writers_.front();
|
|
|
|
WriteBatch* result = first->batch;
|
2018-04-11 07:18:06 +08:00
|
|
|
assert(result != nullptr);
|
2012-03-09 08:23:21 +08:00
|
|
|
|
|
|
|
size_t size = WriteBatchInternal::ByteSize(first->batch);
|
|
|
|
|
|
|
|
// Allow the group to grow up to a maximum size, but if the
|
|
|
|
// original write is small, limit the growth so we do not slow
|
|
|
|
// down the small write too much.
|
|
|
|
size_t max_size = 1 << 20;
|
2019-05-03 02:01:00 +08:00
|
|
|
if (size <= (128 << 10)) {
|
|
|
|
max_size = size + (128 << 10);
|
2012-03-09 08:23:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
*last_writer = first;
|
|
|
|
std::deque<Writer*>::iterator iter = writers_.begin();
|
|
|
|
++iter; // Advance past "first"
|
|
|
|
for (; iter != writers_.end(); ++iter) {
|
|
|
|
Writer* w = *iter;
|
|
|
|
if (w->sync && !first->sync) {
|
|
|
|
// Do not include a sync write into a batch handled by a non-sync write.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-04-11 07:18:06 +08:00
|
|
|
if (w->batch != nullptr) {
|
2012-03-09 08:23:21 +08:00
|
|
|
size += WriteBatchInternal::ByteSize(w->batch);
|
|
|
|
if (size > max_size) {
|
|
|
|
// Do not make batch too big
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-09-17 05:19:52 +08:00
|
|
|
// Append to *result
|
2012-03-09 08:23:21 +08:00
|
|
|
if (result == first->batch) {
|
|
|
|
// Switch to temporary batch instead of disturbing caller's batch
|
|
|
|
result = tmp_batch_;
|
|
|
|
assert(WriteBatchInternal::Count(result) == 0);
|
|
|
|
WriteBatchInternal::Append(result, first->batch);
|
|
|
|
}
|
|
|
|
WriteBatchInternal::Append(result, w->batch);
|
|
|
|
}
|
|
|
|
*last_writer = w;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2011-09-02 03:08:02 +08:00
|
|
|
// REQUIRES: mutex_ is held
|
2012-03-09 08:23:21 +08:00
|
|
|
// REQUIRES: this thread is currently at the front of the writer queue
|
2011-04-13 03:38:58 +08:00
|
|
|
Status DBImpl::MakeRoomForWrite(bool force) {
|
|
|
|
mutex_.AssertHeld();
|
2012-03-09 08:23:21 +08:00
|
|
|
assert(!writers_.empty());
|
2011-05-21 10:17:43 +08:00
|
|
|
bool allow_delay = !force;
|
2011-04-13 03:38:58 +08:00
|
|
|
Status s;
|
|
|
|
while (true) {
|
|
|
|
if (!bg_error_.ok()) {
|
|
|
|
// Yield previous error
|
|
|
|
s = bg_error_;
|
|
|
|
break;
|
2019-05-03 02:01:00 +08:00
|
|
|
} else if (allow_delay && versions_->NumLevelFiles(0) >=
|
|
|
|
config::kL0_SlowdownWritesTrigger) {
|
2011-05-21 10:17:43 +08:00
|
|
|
// We are getting close to hitting a hard limit on the number of
|
|
|
|
// L0 files. Rather than delaying a single write by several
|
|
|
|
// seconds when we hit the hard limit, start delaying each
|
|
|
|
// individual write by 1ms to reduce latency variance. Also,
|
|
|
|
// this delay hands over some CPU to the compaction thread in
|
|
|
|
// case it is sharing the same core as the writer.
|
|
|
|
mutex_.Unlock();
|
|
|
|
env_->SleepForMicroseconds(1000);
|
|
|
|
allow_delay = false; // Do not delay a single write more than once
|
|
|
|
mutex_.Lock();
|
2011-04-13 03:38:58 +08:00
|
|
|
} else if (!force &&
|
|
|
|
(mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) {
|
|
|
|
// There is room in current memtable
|
|
|
|
break;
|
2018-04-11 07:18:06 +08:00
|
|
|
} else if (imm_ != nullptr) {
|
2011-04-13 03:38:58 +08:00
|
|
|
// We have filled up the current memtable, but the previous
|
|
|
|
// one is still being compacted, so we wait.
|
2013-05-15 07:52:56 +08:00
|
|
|
Log(options_.info_log, "Current memtable full; waiting...\n");
|
2018-03-17 01:06:35 +08:00
|
|
|
background_work_finished_signal_.Wait();
|
2011-05-21 10:17:43 +08:00
|
|
|
} else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) {
|
|
|
|
// There are too many level-0 files.
|
2013-05-15 07:52:56 +08:00
|
|
|
Log(options_.info_log, "Too many L0 files; waiting...\n");
|
2018-03-17 01:06:35 +08:00
|
|
|
background_work_finished_signal_.Wait();
|
2011-04-13 03:38:58 +08:00
|
|
|
} else {
|
|
|
|
// Attempt to switch to a new memtable and trigger compaction of old
|
|
|
|
assert(versions_->PrevLogNumber() == 0);
|
|
|
|
uint64_t new_log_number = versions_->NewFileNumber();
|
2018-04-11 07:18:06 +08:00
|
|
|
WritableFile* lfile = nullptr;
|
2011-04-13 03:38:58 +08:00
|
|
|
s = env_->NewWritableFile(LogFileName(dbname_, new_log_number), &lfile);
|
|
|
|
if (!s.ok()) {
|
2012-05-31 00:45:46 +08:00
|
|
|
// Avoid chewing through file number space in a tight loop.
|
|
|
|
versions_->ReuseFileNumber(new_log_number);
|
2011-04-13 03:38:58 +08:00
|
|
|
break;
|
|
|
|
}
|
2023-01-05 02:22:18 +08:00
|
|
|
|
2011-04-13 03:38:58 +08:00
|
|
|
delete log_;
|
2023-01-05 02:22:18 +08:00
|
|
|
|
|
|
|
s = logfile_->Close();
|
|
|
|
if (!s.ok()) {
|
|
|
|
// We may have lost some data written to the previous log file.
|
|
|
|
// Switch to the new log file anyway, but record as a background
|
|
|
|
// error so we do not attempt any more writes.
|
|
|
|
//
|
|
|
|
// We could perhaps attempt to save the memtable corresponding
|
|
|
|
// to log file and suppress the error if that works, but that
|
|
|
|
// would add more complexity in a critical code path.
|
|
|
|
RecordBackgroundError(s);
|
|
|
|
}
|
2011-04-13 03:38:58 +08:00
|
|
|
delete logfile_;
|
2023-01-05 02:22:18 +08:00
|
|
|
|
2011-04-13 03:38:58 +08:00
|
|
|
logfile_ = lfile;
|
2011-06-22 10:36:45 +08:00
|
|
|
logfile_number_ = new_log_number;
|
2011-04-13 03:38:58 +08:00
|
|
|
log_ = new log::Writer(lfile);
|
|
|
|
imm_ = mem_;
|
2019-03-12 04:04:53 +08:00
|
|
|
has_imm_.store(true, std::memory_order_release);
|
2011-04-13 03:38:58 +08:00
|
|
|
mem_ = new MemTable(internal_comparator_);
|
2011-05-21 10:17:43 +08:00
|
|
|
mem_->Ref();
|
2019-05-03 02:01:00 +08:00
|
|
|
force = false; // Do not force another compaction if have room
|
2011-04-13 03:38:58 +08:00
|
|
|
MaybeScheduleCompaction();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
|
|
|
value->clear();
|
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
MutexLock l(&mutex_);
|
|
|
|
Slice in = property;
|
|
|
|
Slice prefix("leveldb.");
|
|
|
|
if (!in.starts_with(prefix)) return false;
|
|
|
|
in.remove_prefix(prefix.size());
|
|
|
|
|
|
|
|
if (in.starts_with("num-files-at-level")) {
|
|
|
|
in.remove_prefix(strlen("num-files-at-level"));
|
|
|
|
uint64_t level;
|
|
|
|
bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
|
2011-08-06 08:19:37 +08:00
|
|
|
if (!ok || level >= config::kNumLevels) {
|
2011-03-19 06:37:00 +08:00
|
|
|
return false;
|
|
|
|
} else {
|
2011-04-13 03:38:58 +08:00
|
|
|
char buf[100];
|
2020-04-30 06:31:41 +08:00
|
|
|
std::snprintf(buf, sizeof(buf), "%d",
|
|
|
|
versions_->NumLevelFiles(static_cast<int>(level)));
|
2011-04-13 03:38:58 +08:00
|
|
|
*value = buf;
|
2011-03-19 06:37:00 +08:00
|
|
|
return true;
|
|
|
|
}
|
2011-04-13 03:38:58 +08:00
|
|
|
} else if (in == "stats") {
|
|
|
|
char buf[200];
|
2020-04-30 06:31:41 +08:00
|
|
|
std::snprintf(buf, sizeof(buf),
|
|
|
|
" Compactions\n"
|
|
|
|
"Level Files Size(MB) Time(sec) Read(MB) Write(MB)\n"
|
|
|
|
"--------------------------------------------------\n");
|
2011-04-13 03:38:58 +08:00
|
|
|
value->append(buf);
|
|
|
|
for (int level = 0; level < config::kNumLevels; level++) {
|
|
|
|
int files = versions_->NumLevelFiles(level);
|
|
|
|
if (stats_[level].micros > 0 || files > 0) {
|
2020-04-30 06:31:41 +08:00
|
|
|
std::snprintf(buf, sizeof(buf), "%3d %8d %8.0f %9.0f %8.0f %9.0f\n",
|
|
|
|
level, files, versions_->NumLevelBytes(level) / 1048576.0,
|
|
|
|
stats_[level].micros / 1e6,
|
|
|
|
stats_[level].bytes_read / 1048576.0,
|
|
|
|
stats_[level].bytes_written / 1048576.0);
|
2011-04-13 03:38:58 +08:00
|
|
|
value->append(buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
2011-10-06 07:30:28 +08:00
|
|
|
} else if (in == "sstables") {
|
|
|
|
*value = versions_->current()->DebugString();
|
|
|
|
return true;
|
2015-09-30 02:52:21 +08:00
|
|
|
} else if (in == "approximate-memory-usage") {
|
|
|
|
size_t total_usage = options_.block_cache->TotalCharge();
|
|
|
|
if (mem_) {
|
|
|
|
total_usage += mem_->ApproximateMemoryUsage();
|
|
|
|
}
|
|
|
|
if (imm_) {
|
|
|
|
total_usage += imm_->ApproximateMemoryUsage();
|
|
|
|
}
|
|
|
|
char buf[50];
|
2020-04-30 06:31:41 +08:00
|
|
|
std::snprintf(buf, sizeof(buf), "%llu",
|
|
|
|
static_cast<unsigned long long>(total_usage));
|
2015-09-30 02:52:21 +08:00
|
|
|
value->append(buf);
|
|
|
|
return true;
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
2011-04-13 03:38:58 +08:00
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-05-03 02:01:00 +08:00
|
|
|
void DBImpl::GetApproximateSizes(const Range* range, int n, uint64_t* sizes) {
|
2011-03-19 06:37:00 +08:00
|
|
|
// TODO(opt): better implementation
|
2019-05-16 04:13:13 +08:00
|
|
|
MutexLock l(&mutex_);
|
|
|
|
Version* v = versions_->current();
|
|
|
|
v->Ref();
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
|
|
// Convert user_key into a corresponding internal key.
|
|
|
|
InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
|
|
|
|
InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
|
|
|
|
uint64_t start = versions_->ApproximateOffsetOf(v, k1);
|
|
|
|
uint64_t limit = versions_->ApproximateOffsetOf(v, k2);
|
|
|
|
sizes[i] = (limit >= start ? limit - start : 0);
|
|
|
|
}
|
|
|
|
|
2019-05-16 04:13:13 +08:00
|
|
|
v->Unref();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Default implementations of convenience methods that subclasses of DB
|
|
|
|
// can call if they wish
|
|
|
|
Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) {
|
|
|
|
WriteBatch batch;
|
|
|
|
batch.Put(key, value);
|
|
|
|
return Write(opt, &batch);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status DB::Delete(const WriteOptions& opt, const Slice& key) {
|
|
|
|
WriteBatch batch;
|
|
|
|
batch.Delete(key);
|
|
|
|
return Write(opt, &batch);
|
|
|
|
}
|
|
|
|
|
2019-05-05 08:40:21 +08:00
|
|
|
DB::~DB() = default;
|
2011-03-19 06:37:00 +08:00
|
|
|
|
2019-05-03 02:01:00 +08:00
|
|
|
Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) {
|
2018-04-11 07:18:06 +08:00
|
|
|
*dbptr = nullptr;
|
2011-03-19 06:37:00 +08:00
|
|
|
|
|
|
|
DBImpl* impl = new DBImpl(options, dbname);
|
|
|
|
impl->mutex_.Lock();
|
|
|
|
VersionEdit edit;
|
2014-12-12 00:13:18 +08:00
|
|
|
// Recover handles create_if_missing, error_if_exists
|
|
|
|
bool save_manifest = false;
|
|
|
|
Status s = impl->Recover(&edit, &save_manifest);
|
2018-04-11 07:18:06 +08:00
|
|
|
if (s.ok() && impl->mem_ == nullptr) {
|
2014-12-12 00:13:18 +08:00
|
|
|
// Create new log and a corresponding memtable.
|
2011-04-13 03:38:58 +08:00
|
|
|
uint64_t new_log_number = impl->versions_->NewFileNumber();
|
2011-03-19 06:37:00 +08:00
|
|
|
WritableFile* lfile;
|
2011-04-13 03:38:58 +08:00
|
|
|
s = options.env->NewWritableFile(LogFileName(dbname, new_log_number),
|
2011-03-19 06:37:00 +08:00
|
|
|
&lfile);
|
|
|
|
if (s.ok()) {
|
2011-04-13 03:38:58 +08:00
|
|
|
edit.SetLogNumber(new_log_number);
|
2011-03-19 06:37:00 +08:00
|
|
|
impl->logfile_ = lfile;
|
2011-06-22 10:36:45 +08:00
|
|
|
impl->logfile_number_ = new_log_number;
|
2011-03-19 06:37:00 +08:00
|
|
|
impl->log_ = new log::Writer(lfile);
|
2014-12-12 00:13:18 +08:00
|
|
|
impl->mem_ = new MemTable(impl->internal_comparator_);
|
|
|
|
impl->mem_->Ref();
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
}
|
2014-12-12 00:13:18 +08:00
|
|
|
if (s.ok() && save_manifest) {
|
|
|
|
edit.SetPrevLogNumber(0); // No older logs needed after recovery.
|
|
|
|
edit.SetLogNumber(impl->logfile_number_);
|
|
|
|
s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
|
|
|
|
}
|
|
|
|
if (s.ok()) {
|
Add Env::Remove{File,Dir} which obsolete Env::Delete{File,Dir}.
The "DeleteFile" method name causes pain for Windows developers, because
<windows.h> #defines a DeleteFile macro to DeleteFileW or DeleteFileA.
Current code uses workarounds, like #undefining DeleteFile everywhere an
Env is declared, implemented, or used.
This CL removes the need for workarounds by renaming Env::DeleteFile to
Env::RemoveFile. For consistency, Env::DeleteDir is also renamed to
Env::RemoveDir. A few internal methods are also renamed for consistency.
Software that supports Windows is expected to migrate any Env
implementations and usage to Remove{File,Dir}, and never use the name
Env::Delete{File,Dir} in its code.
The renaming is done in a backwards-compatible way, at the risk of
making it slightly more difficult to build a new correct Env
implementation. The backwards compatibility is achieved using the
following hacks:
1) Env::Remove{File,Dir} methods are added, with a default
implementation that calls into Env::Delete{File,Dir}. This makes old
Env implementations compatible with code that calls into the updated
API.
2) The Env::Delete{File,Dir} methods are no longer pure virtuals.
Instead, they gain a default implementation that calls into
Env::Remove{File,Dir}. This makes updated Env implementations
compatible with code that calls into the old API.
The cost of this approach is that it's possible to write an Env without
overriding either Rename{File,Dir} or Delete{File,Dir}, without getting
a compiler warning. However, attempting to run the test suite will
immediately fail with an infinite call stack ending in
{Remove,Delete}{File,Dir}, making developers aware of the problem.
PiperOrigin-RevId: 288710907
2020-01-09 01:14:53 +08:00
|
|
|
impl->RemoveObsoleteFiles();
|
2014-12-12 00:13:18 +08:00
|
|
|
impl->MaybeScheduleCompaction();
|
|
|
|
}
|
2011-03-19 06:37:00 +08:00
|
|
|
impl->mutex_.Unlock();
|
|
|
|
if (s.ok()) {
|
2018-04-11 07:18:06 +08:00
|
|
|
assert(impl->mem_ != nullptr);
|
2011-03-19 06:37:00 +08:00
|
|
|
*dbptr = impl;
|
|
|
|
} else {
|
|
|
|
delete impl;
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2019-05-05 08:40:21 +08:00
|
|
|
Snapshot::~Snapshot() = default;
|
2011-05-21 10:17:43 +08:00
|
|
|
|
2011-03-19 06:37:00 +08:00
|
|
|
Status DestroyDB(const std::string& dbname, const Options& options) {
|
|
|
|
Env* env = options.env;
|
|
|
|
std::vector<std::string> filenames;
|
2017-10-18 04:05:47 +08:00
|
|
|
Status result = env->GetChildren(dbname, &filenames);
|
|
|
|
if (!result.ok()) {
|
|
|
|
// Ignore error in case directory does not exist
|
2011-03-19 06:37:00 +08:00
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
FileLock* lock;
|
2011-07-15 08:20:57 +08:00
|
|
|
const std::string lockname = LockFileName(dbname);
|
2017-10-18 04:05:47 +08:00
|
|
|
result = env->LockFile(lockname, &lock);
|
2011-03-19 06:37:00 +08:00
|
|
|
if (result.ok()) {
|
|
|
|
uint64_t number;
|
|
|
|
FileType type;
|
2011-04-21 06:48:11 +08:00
|
|
|
for (size_t i = 0; i < filenames.size(); i++) {
|
2011-07-15 08:20:57 +08:00
|
|
|
if (ParseFileName(filenames[i], &number, &type) &&
|
2012-03-09 23:51:04 +08:00
|
|
|
type != kDBLockFile) { // Lock file will be deleted at end
|
Add Env::Remove{File,Dir} which obsolete Env::Delete{File,Dir}.
The "DeleteFile" method name causes pain for Windows developers, because
<windows.h> #defines a DeleteFile macro to DeleteFileW or DeleteFileA.
Current code uses workarounds, like #undefining DeleteFile everywhere an
Env is declared, implemented, or used.
This CL removes the need for workarounds by renaming Env::DeleteFile to
Env::RemoveFile. For consistency, Env::DeleteDir is also renamed to
Env::RemoveDir. A few internal methods are also renamed for consistency.
Software that supports Windows is expected to migrate any Env
implementations and usage to Remove{File,Dir}, and never use the name
Env::Delete{File,Dir} in its code.
The renaming is done in a backwards-compatible way, at the risk of
making it slightly more difficult to build a new correct Env
implementation. The backwards compatibility is achieved using the
following hacks:
1) Env::Remove{File,Dir} methods are added, with a default
implementation that calls into Env::Delete{File,Dir}. This makes old
Env implementations compatible with code that calls into the updated
API.
2) The Env::Delete{File,Dir} methods are no longer pure virtuals.
Instead, they gain a default implementation that calls into
Env::Remove{File,Dir}. This makes updated Env implementations
compatible with code that calls into the old API.
The cost of this approach is that it's possible to write an Env without
overriding either Rename{File,Dir} or Delete{File,Dir}, without getting
a compiler warning. However, attempting to run the test suite will
immediately fail with an infinite call stack ending in
{Remove,Delete}{File,Dir}, making developers aware of the problem.
PiperOrigin-RevId: 288710907
2020-01-09 01:14:53 +08:00
|
|
|
Status del = env->RemoveFile(dbname + "/" + filenames[i]);
|
2011-03-19 06:37:00 +08:00
|
|
|
if (result.ok() && !del.ok()) {
|
|
|
|
result = del;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
env->UnlockFile(lock); // Ignore error since state is already gone
|
Add Env::Remove{File,Dir} which obsolete Env::Delete{File,Dir}.
The "DeleteFile" method name causes pain for Windows developers, because
<windows.h> #defines a DeleteFile macro to DeleteFileW or DeleteFileA.
Current code uses workarounds, like #undefining DeleteFile everywhere an
Env is declared, implemented, or used.
This CL removes the need for workarounds by renaming Env::DeleteFile to
Env::RemoveFile. For consistency, Env::DeleteDir is also renamed to
Env::RemoveDir. A few internal methods are also renamed for consistency.
Software that supports Windows is expected to migrate any Env
implementations and usage to Remove{File,Dir}, and never use the name
Env::Delete{File,Dir} in its code.
The renaming is done in a backwards-compatible way, at the risk of
making it slightly more difficult to build a new correct Env
implementation. The backwards compatibility is achieved using the
following hacks:
1) Env::Remove{File,Dir} methods are added, with a default
implementation that calls into Env::Delete{File,Dir}. This makes old
Env implementations compatible with code that calls into the updated
API.
2) The Env::Delete{File,Dir} methods are no longer pure virtuals.
Instead, they gain a default implementation that calls into
Env::Remove{File,Dir}. This makes updated Env implementations
compatible with code that calls into the old API.
The cost of this approach is that it's possible to write an Env without
overriding either Rename{File,Dir} or Delete{File,Dir}, without getting
a compiler warning. However, attempting to run the test suite will
immediately fail with an infinite call stack ending in
{Remove,Delete}{File,Dir}, making developers aware of the problem.
PiperOrigin-RevId: 288710907
2020-01-09 01:14:53 +08:00
|
|
|
env->RemoveFile(lockname);
|
|
|
|
env->RemoveDir(dbname); // Ignore error in case dir contains other files
|
2011-03-19 06:37:00 +08:00
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2011-11-01 01:22:06 +08:00
|
|
|
} // namespace leveldb
|