80e5b0d944
Fixed race condition reported by Dave Smit (dizzyd@dizzyd,com) on the leveldb mailing list. We were not signalling waiters after a trivial move from level-0. The result was that in some cases (hard to reproduce), a write would get stuck forever waiting for the number of level-0 files to drop below its hard limit. The new code is simpler: there is just one condition variable instead of two, and the condition variable is signalled after every piece of background work finishes. Also, all compaction work (including for manual compactions) is done in the background thread, and therefore we can remove the "compacting_" variable. git-svn-id: https://leveldb.googlecode.com/svn/trunk@31 62dab493-f737-651d-591e-8d6aee1b9529
185 lines
5.8 KiB
C++
185 lines
5.8 KiB
C++
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_
|
|
#define STORAGE_LEVELDB_DB_DB_IMPL_H_
|
|
|
|
#include <set>
|
|
#include "db/dbformat.h"
|
|
#include "db/log_writer.h"
|
|
#include "db/snapshot.h"
|
|
#include "leveldb/db.h"
|
|
#include "leveldb/env.h"
|
|
#include "port/port.h"
|
|
|
|
namespace leveldb {
|
|
|
|
class MemTable;
|
|
class TableCache;
|
|
class Version;
|
|
class VersionEdit;
|
|
class VersionSet;
|
|
|
|
class DBImpl : public DB {
|
|
public:
|
|
DBImpl(const Options& options, const std::string& dbname);
|
|
virtual ~DBImpl();
|
|
|
|
// Implementations of the DB interface
|
|
virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value);
|
|
virtual Status Delete(const WriteOptions&, const Slice& key);
|
|
virtual Status Write(const WriteOptions& options, WriteBatch* updates);
|
|
virtual Status Get(const ReadOptions& options,
|
|
const Slice& key,
|
|
std::string* value);
|
|
virtual Iterator* NewIterator(const ReadOptions&);
|
|
virtual const Snapshot* GetSnapshot();
|
|
virtual void ReleaseSnapshot(const Snapshot* snapshot);
|
|
virtual bool GetProperty(const Slice& property, std::string* value);
|
|
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
|
|
|
|
// Extra methods (for testing) that are not in the public DB interface
|
|
|
|
// Compact any files in the named level that overlap [begin,end]
|
|
void TEST_CompactRange(
|
|
int level,
|
|
const std::string& begin,
|
|
const std::string& end);
|
|
|
|
// Force current memtable contents to be compacted.
|
|
Status TEST_CompactMemTable();
|
|
|
|
// Return an internal iterator over the current state of the database.
|
|
// The keys of this iterator are internal keys (see format.h).
|
|
// The returned iterator should be deleted when no longer needed.
|
|
Iterator* TEST_NewInternalIterator();
|
|
|
|
// Return the maximum overlapping data (in bytes) at next level for any
|
|
// file at a level >= 1.
|
|
int64_t TEST_MaxNextLevelOverlappingBytes();
|
|
|
|
private:
|
|
friend class DB;
|
|
|
|
Iterator* NewInternalIterator(const ReadOptions&,
|
|
SequenceNumber* latest_snapshot);
|
|
|
|
Status NewDB();
|
|
|
|
// Recover the descriptor from persistent storage. May do a significant
|
|
// amount of work to recover recently logged updates. Any changes to
|
|
// be made to the descriptor are added to *edit.
|
|
Status Recover(VersionEdit* edit);
|
|
|
|
void MaybeIgnoreError(Status* s) const;
|
|
|
|
// Delete any unneeded files and stale in-memory entries.
|
|
void DeleteObsoleteFiles();
|
|
|
|
// Compact the in-memory write buffer to disk. Switches to a new
|
|
// log-file/memtable and writes a new descriptor iff successful.
|
|
Status CompactMemTable();
|
|
|
|
Status RecoverLogFile(uint64_t log_number,
|
|
VersionEdit* edit,
|
|
SequenceNumber* max_sequence);
|
|
|
|
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit);
|
|
|
|
Status MakeRoomForWrite(bool force /* compact even if there is room? */);
|
|
|
|
struct CompactionState;
|
|
|
|
void MaybeScheduleCompaction();
|
|
static void BGWork(void* db);
|
|
void BackgroundCall();
|
|
void BackgroundCompaction();
|
|
void CleanupCompaction(CompactionState* compact);
|
|
Status DoCompactionWork(CompactionState* compact);
|
|
|
|
Status OpenCompactionOutputFile(CompactionState* compact);
|
|
Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input);
|
|
Status InstallCompactionResults(CompactionState* compact);
|
|
|
|
// Constant after construction
|
|
Env* const env_;
|
|
const InternalKeyComparator internal_comparator_;
|
|
const Options options_; // options_.comparator == &internal_comparator_
|
|
bool owns_info_log_;
|
|
bool owns_cache_;
|
|
const std::string dbname_;
|
|
|
|
// table_cache_ provides its own synchronization
|
|
TableCache* table_cache_;
|
|
|
|
// Lock over the persistent DB state. Non-NULL iff successfully acquired.
|
|
FileLock* db_lock_;
|
|
|
|
// State below is protected by mutex_
|
|
port::Mutex mutex_;
|
|
port::AtomicPointer shutting_down_;
|
|
port::CondVar bg_cv_; // Signalled when background work finishes
|
|
MemTable* mem_;
|
|
MemTable* imm_; // Memtable being compacted
|
|
port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_
|
|
WritableFile* logfile_;
|
|
log::Writer* log_;
|
|
SnapshotList snapshots_;
|
|
|
|
// Set of table files to protect from deletion because they are
|
|
// part of ongoing compactions.
|
|
std::set<uint64_t> pending_outputs_;
|
|
|
|
// Has a background compaction been scheduled or is running?
|
|
bool bg_compaction_scheduled_;
|
|
|
|
// Information for a manual compaction
|
|
struct ManualCompaction {
|
|
int level;
|
|
std::string begin;
|
|
std::string end;
|
|
};
|
|
ManualCompaction* manual_compaction_;
|
|
|
|
VersionSet* versions_;
|
|
|
|
// Have we encountered a background error in paranoid mode?
|
|
Status bg_error_;
|
|
|
|
// Per level compaction stats. stats_[level] stores the stats for
|
|
// compactions that produced data for the specified "level".
|
|
struct CompactionStats {
|
|
int64_t micros;
|
|
int64_t bytes_read;
|
|
int64_t bytes_written;
|
|
|
|
CompactionStats() : micros(0), bytes_read(0), bytes_written(0) { }
|
|
|
|
void Add(const CompactionStats& c) {
|
|
this->micros += c.micros;
|
|
this->bytes_read += c.bytes_read;
|
|
this->bytes_written += c.bytes_written;
|
|
}
|
|
};
|
|
CompactionStats stats_[config::kNumLevels];
|
|
|
|
// No copying allowed
|
|
DBImpl(const DBImpl&);
|
|
void operator=(const DBImpl&);
|
|
|
|
const Comparator* user_comparator() const {
|
|
return internal_comparator_.user_comparator();
|
|
}
|
|
};
|
|
|
|
// Sanitize db options. The caller should delete result.info_log if
|
|
// it is not equal to src.info_log.
|
|
extern Options SanitizeOptions(const std::string& db,
|
|
const InternalKeyComparator* icmp,
|
|
const Options& src);
|
|
|
|
}
|
|
|
|
#endif // STORAGE_LEVELDB_DB_DB_IMPL_H_
|