leveldb: Report missing CURRENT manifest file as database corruption.

BTRFS reorders rename and write operations, so it is possible that a filesystem crash and recovery results in a situation where the file pointed to by CURRENT does not exist. DB::Open currently reports an I/O error in this case. Reporting database corruption is a better hint to the caller, which can attempt to recover the database or erase it and start over.

This issue is not merely theoretical. It was reported as having showed up in the wild at https://github.com/google/leveldb/issues/195 and at https://crbug.com/738961. Also, asides from the BTRFS case described above, incorrect data in CURRENT seems like a possible corruption case that should be handled gracefully.

The Env API changes here can be considered backwards compatible, because an implementation that returns Status::IOError instead of Status::NotFound will still get the same functionality as before.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=161432630
This commit is contained in:
costan 2017-07-10 13:32:58 -07:00 committed by Victor Costan
parent 69e2bd224b
commit 8415f00eee
5 changed files with 76 additions and 31 deletions

View File

@ -47,7 +47,7 @@ class RecoveryTest {
db_ = NULL;
}
void Open(Options* options = NULL) {
Status OpenWithStatus(Options* options = NULL) {
Close();
Options opts;
if (options != NULL) {
@ -59,7 +59,11 @@ class RecoveryTest {
if (opts.env == NULL) {
opts.env = env_;
}
ASSERT_OK(DB::Open(opts, dbname_, &db_));
return DB::Open(opts, dbname_, &db_);
}
void Open(Options* options = NULL) {
ASSERT_OK(OpenWithStatus(options));
ASSERT_EQ(1, NumLogs());
}
@ -100,6 +104,10 @@ class RecoveryTest {
return logs.size();
}
void DeleteManifestFile() {
ASSERT_OK(env_->DeleteFile(ManifestFileName()));
}
uint64_t FirstLogFile() {
return GetFiles(kLogFile)[0];
}
@ -317,6 +325,15 @@ TEST(RecoveryTest, MultipleLogFiles) {
ASSERT_EQ("there", Get("hi"));
}
TEST(RecoveryTest, ManifestMissing) {
ASSERT_OK(Put("foo", "bar"));
Close();
DeleteManifestFile();
Status status = OpenWithStatus();
ASSERT_TRUE(status.IsCorruption());
}
} // namespace leveldb
int main(int argc, char** argv) {

View File

@ -925,6 +925,10 @@ Status VersionSet::Recover(bool *save_manifest) {
SequentialFile* file;
s = env_->NewSequentialFile(dscname, &file);
if (!s.ok()) {
if (s.IsNotFound()) {
return Status::Corruption(
"CURRENT points to a non-existent file", s.ToString());
}
return s;
}

View File

@ -43,7 +43,8 @@ class Env {
// Create a brand new sequentially-readable file with the specified name.
// On success, stores a pointer to the new file in *result and returns OK.
// On failure stores NULL in *result and returns non-OK. If the file does
// not exist, returns a non-OK status.
// not exist, returns a non-OK status. Implementations should return a
// NotFound status when the file does not exist.
//
// The returned file will only be accessed by one thread at a time.
virtual Status NewSequentialFile(const std::string& fname,
@ -53,7 +54,8 @@ class Env {
// specified name. On success, stores a pointer to the new file in
// *result and returns OK. On failure stores NULL in *result and
// returns non-OK. If the file does not exist, returns a non-OK
// status.
// status. Implementations should return a NotFound status when the file does
// not exist.
//
// The returned file may be concurrently accessed by multiple threads.
virtual Status NewRandomAccessFile(const std::string& fname,

View File

@ -34,8 +34,12 @@ namespace {
static int open_read_only_file_limit = -1;
static int mmap_limit = -1;
static Status IOError(const std::string& context, int err_number) {
return Status::IOError(context, strerror(err_number));
static Status PosixError(const std::string& context, int err_number) {
if (err_number == ENOENT) {
return Status::NotFound(context, strerror(err_number));
} else {
return Status::IOError(context, strerror(err_number));
}
}
// Helper class to limit resource usage to avoid exhaustion.
@ -108,7 +112,7 @@ class PosixSequentialFile: public SequentialFile {
// We leave status as ok if we hit the end of the file
} else {
// A partial read with an error: return a non-ok status
s = IOError(filename_, errno);
s = PosixError(filename_, errno);
}
}
return s;
@ -116,7 +120,7 @@ class PosixSequentialFile: public SequentialFile {
virtual Status Skip(uint64_t n) {
if (fseek(file_, n, SEEK_CUR)) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
}
return Status::OK();
}
@ -154,7 +158,7 @@ class PosixRandomAccessFile: public RandomAccessFile {
if (temporary_fd_) {
fd = open(filename_.c_str(), O_RDONLY);
if (fd < 0) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
}
}
@ -163,7 +167,7 @@ class PosixRandomAccessFile: public RandomAccessFile {
*result = Slice(scratch, (r < 0) ? 0 : r);
if (r < 0) {
// An error: return a non-ok status
s = IOError(filename_, errno);
s = PosixError(filename_, errno);
}
if (temporary_fd_) {
// Close the temporary file descriptor opened earlier.
@ -199,7 +203,7 @@ class PosixMmapReadableFile: public RandomAccessFile {
Status s;
if (offset + n > length_) {
*result = Slice();
s = IOError(filename_, EINVAL);
s = PosixError(filename_, EINVAL);
} else {
*result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n);
}
@ -226,7 +230,7 @@ class PosixWritableFile : public WritableFile {
virtual Status Append(const Slice& data) {
size_t r = fwrite_unlocked(data.data(), 1, data.size(), file_);
if (r != data.size()) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
}
return Status::OK();
}
@ -234,7 +238,7 @@ class PosixWritableFile : public WritableFile {
virtual Status Close() {
Status result;
if (fclose(file_) != 0) {
result = IOError(filename_, errno);
result = PosixError(filename_, errno);
}
file_ = NULL;
return result;
@ -242,7 +246,7 @@ class PosixWritableFile : public WritableFile {
virtual Status Flush() {
if (fflush_unlocked(file_) != 0) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
}
return Status::OK();
}
@ -263,10 +267,10 @@ class PosixWritableFile : public WritableFile {
if (basename.starts_with("MANIFEST")) {
int fd = open(dir.c_str(), O_RDONLY);
if (fd < 0) {
s = IOError(dir, errno);
s = PosixError(dir, errno);
} else {
if (fsync(fd) < 0) {
s = IOError(dir, errno);
s = PosixError(dir, errno);
}
close(fd);
}
@ -337,7 +341,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "r");
if (f == NULL) {
*result = NULL;
return IOError(fname, errno);
return PosixError(fname, errno);
} else {
*result = new PosixSequentialFile(fname, f);
return Status::OK();
@ -350,7 +354,7 @@ class PosixEnv : public Env {
Status s;
int fd = open(fname.c_str(), O_RDONLY);
if (fd < 0) {
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else if (mmap_limit_.Acquire()) {
uint64_t size;
s = GetFileSize(fname, &size);
@ -359,7 +363,7 @@ class PosixEnv : public Env {
if (base != MAP_FAILED) {
*result = new PosixMmapReadableFile(fname, base, size, &mmap_limit_);
} else {
s = IOError(fname, errno);
s = PosixError(fname, errno);
}
}
close(fd);
@ -378,7 +382,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "w");
if (f == NULL) {
*result = NULL;
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else {
*result = new PosixWritableFile(fname, f);
}
@ -391,7 +395,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "a");
if (f == NULL) {
*result = NULL;
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else {
*result = new PosixWritableFile(fname, f);
}
@ -407,7 +411,7 @@ class PosixEnv : public Env {
result->clear();
DIR* d = opendir(dir.c_str());
if (d == NULL) {
return IOError(dir, errno);
return PosixError(dir, errno);
}
struct dirent* entry;
while ((entry = readdir(d)) != NULL) {
@ -420,7 +424,7 @@ class PosixEnv : public Env {
virtual Status DeleteFile(const std::string& fname) {
Status result;
if (unlink(fname.c_str()) != 0) {
result = IOError(fname, errno);
result = PosixError(fname, errno);
}
return result;
}
@ -428,7 +432,7 @@ class PosixEnv : public Env {
virtual Status CreateDir(const std::string& name) {
Status result;
if (mkdir(name.c_str(), 0755) != 0) {
result = IOError(name, errno);
result = PosixError(name, errno);
}
return result;
}
@ -436,7 +440,7 @@ class PosixEnv : public Env {
virtual Status DeleteDir(const std::string& name) {
Status result;
if (rmdir(name.c_str()) != 0) {
result = IOError(name, errno);
result = PosixError(name, errno);
}
return result;
}
@ -446,7 +450,7 @@ class PosixEnv : public Env {
struct stat sbuf;
if (stat(fname.c_str(), &sbuf) != 0) {
*size = 0;
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else {
*size = sbuf.st_size;
}
@ -456,7 +460,7 @@ class PosixEnv : public Env {
virtual Status RenameFile(const std::string& src, const std::string& target) {
Status result;
if (rename(src.c_str(), target.c_str()) != 0) {
result = IOError(src, errno);
result = PosixError(src, errno);
}
return result;
}
@ -466,12 +470,12 @@ class PosixEnv : public Env {
Status result;
int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
if (fd < 0) {
result = IOError(fname, errno);
result = PosixError(fname, errno);
} else if (!locks_.Insert(fname)) {
close(fd);
result = Status::IOError("lock " + fname, "already held by process");
} else if (LockOrUnlock(fd, true) == -1) {
result = IOError("lock " + fname, errno);
result = PosixError("lock " + fname, errno);
close(fd);
locks_.Remove(fname);
} else {
@ -487,7 +491,7 @@ class PosixEnv : public Env {
PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock);
Status result;
if (LockOrUnlock(my_lock->fd_, false) == -1) {
result = IOError("unlock", errno);
result = PosixError("unlock", errno);
}
locks_.Remove(my_lock->name_);
close(my_lock->fd_);
@ -524,7 +528,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "w");
if (f == NULL) {
*result = NULL;
return IOError(fname, errno);
return PosixError(fname, errno);
} else {
*result = new PosixLogger(f, &PosixEnv::gettid);
return Status::OK();

View File

@ -99,6 +99,24 @@ TEST(EnvTest, StartThread) {
ASSERT_EQ(state.val, 3);
}
TEST(EnvTest, TestOpenNonExistentFile) {
// Write some test data to a single file that will be opened |n| times.
std::string test_dir;
ASSERT_OK(env_->GetTestDirectory(&test_dir));
std::string non_existent_file = test_dir + "/non_existent_file";
ASSERT_TRUE(!env_->FileExists(non_existent_file));
RandomAccessFile* random_access_file;
Status status = env_->NewRandomAccessFile(
non_existent_file, &random_access_file);
ASSERT_TRUE(status.IsNotFound());
SequentialFile* sequential_file;
status = env_->NewSequentialFile(non_existent_file, &sequential_file);
ASSERT_TRUE(status.IsNotFound());
}
} // namespace leveldb
int main(int argc, char** argv) {