leveldb/include/leveldb/options.h
leveldb Team 1d6e8d64ee Add support for Zstd-based compression in LevelDB.
This change implements support for Zstd-based compression in LevelDB. Building
up from the Snappy compression (which has been supported since inception), this
change adds Zstd as an alternate compression algorithm.

We are implementing this to provide alternative options for users who might
have different performance and efficiency requirements. For instance, the
Zstandard website (https://facebook.github.io/zstd/) claims that the Zstd
algorithm can achieve around 30% higher compression ratios than Snappy, with
relatively smaller (~10%) slowdowns in de/compression speeds.

Benchmarking results:

$ blaze-bin/third_party/leveldb/db_bench
LevelDB:    version 1.23
Date:       Thu Feb  2 18:50:06 2023
CPU:        56 * Intel(R) Xeon(R) CPU E5-2690 v4 @ 2.60GHz
CPUCache:   35840 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
------------------------------------------------
fillseq      :       2.613 micros/op;   42.3 MB/s
fillsync     :    3924.432 micros/op;    0.0 MB/s (1000 ops)
fillrandom   :       3.609 micros/op;   30.7 MB/s
overwrite    :       4.508 micros/op;   24.5 MB/s
readrandom   :       6.136 micros/op; (864322 of 1000000 found)
readrandom   :       5.446 micros/op; (864083 of 1000000 found)
readseq      :       0.180 micros/op;  613.3 MB/s
readreverse  :       0.321 micros/op;  344.7 MB/s
compact      :  827043.000 micros/op;
readrandom   :       4.603 micros/op; (864105 of 1000000 found)
readseq      :       0.169 micros/op;  656.3 MB/s
readreverse  :       0.315 micros/op;  350.8 MB/s
fill100K     :     854.009 micros/op;  111.7 MB/s (1000 ops)
crc32c       :       1.227 micros/op; 3184.0 MB/s (4K per op)
snappycomp   :       3.610 micros/op; 1081.9 MB/s (output: 55.2%)
snappyuncomp :       0.691 micros/op; 5656.3 MB/s
zstdcomp     :      15.731 micros/op;  248.3 MB/s (output: 44.1%)
zstduncomp   :       4.218 micros/op;  926.2 MB/s
PiperOrigin-RevId: 509957778
2023-03-28 16:49:13 -07:00

187 lines
7.2 KiB
C++

// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
#define STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
#include <cstddef>
#include "leveldb/export.h"
namespace leveldb {
class Cache;
class Comparator;
class Env;
class FilterPolicy;
class Logger;
class Snapshot;
// DB contents are stored in a set of blocks, each of which holds a
// sequence of key,value pairs. Each block may be compressed before
// being stored in a file. The following enum describes which
// compression method (if any) is used to compress a block.
enum CompressionType {
// NOTE: do not change the values of existing entries, as these are
// part of the persistent format on disk.
kNoCompression = 0x0,
kSnappyCompression = 0x1,
kZstdCompression = 0x2,
};
// Options to control the behavior of a database (passed to DB::Open)
struct LEVELDB_EXPORT Options {
// Create an Options object with default values for all fields.
Options();
// -------------------
// Parameters that affect behavior
// Comparator used to define the order of keys in the table.
// Default: a comparator that uses lexicographic byte-wise ordering
//
// REQUIRES: The client must ensure that the comparator supplied
// here has the same name and orders keys *exactly* the same as the
// comparator provided to previous open calls on the same DB.
const Comparator* comparator;
// If true, the database will be created if it is missing.
bool create_if_missing = false;
// If true, an error is raised if the database already exists.
bool error_if_exists = false;
// If true, the implementation will do aggressive checking of the
// data it is processing and will stop early if it detects any
// errors. This may have unforeseen ramifications: for example, a
// corruption of one DB entry may cause a large number of entries to
// become unreadable or for the entire DB to become unopenable.
bool paranoid_checks = false;
// Use the specified object to interact with the environment,
// e.g. to read/write files, schedule background work, etc.
// Default: Env::Default()
Env* env;
// Any internal progress/error information generated by the db will
// be written to info_log if it is non-null, or to a file stored
// in the same directory as the DB contents if info_log is null.
Logger* info_log = nullptr;
// -------------------
// Parameters that affect performance
// Amount of data to build up in memory (backed by an unsorted log
// on disk) before converting to a sorted on-disk file.
//
// Larger values increase performance, especially during bulk loads.
// Up to two write buffers may be held in memory at the same time,
// so you may wish to adjust this parameter to control memory usage.
// Also, a larger write buffer will result in a longer recovery time
// the next time the database is opened.
size_t write_buffer_size = 4 * 1024 * 1024;
// Number of open files that can be used by the DB. You may need to
// increase this if your database has a large working set (budget
// one open file per 2MB of working set).
int max_open_files = 1000;
// Control over blocks (user data is stored in a set of blocks, and
// a block is the unit of reading from disk).
// If non-null, use the specified cache for blocks.
// If null, leveldb will automatically create and use an 8MB internal cache.
Cache* block_cache = nullptr;
// Approximate size of user data packed per block. Note that the
// block size specified here corresponds to uncompressed data. The
// actual size of the unit read from disk may be smaller if
// compression is enabled. This parameter can be changed dynamically.
size_t block_size = 4 * 1024;
// Number of keys between restart points for delta encoding of keys.
// This parameter can be changed dynamically. Most clients should
// leave this parameter alone.
int block_restart_interval = 16;
// Leveldb will write up to this amount of bytes to a file before
// switching to a new one.
// Most clients should leave this parameter alone. However if your
// filesystem is more efficient with larger files, you could
// consider increasing the value. The downside will be longer
// compactions and hence longer latency/performance hiccups.
// Another reason to increase this parameter might be when you are
// initially populating a large database.
size_t max_file_size = 2 * 1024 * 1024;
// Compress blocks using the specified compression algorithm. This
// parameter can be changed dynamically.
//
// Default: kSnappyCompression, which gives lightweight but fast
// compression.
//
// Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz:
// ~200-500MB/s compression
// ~400-800MB/s decompression
// Note that these speeds are significantly faster than most
// persistent storage speeds, and therefore it is typically never
// worth switching to kNoCompression. Even if the input data is
// incompressible, the kSnappyCompression implementation will
// efficiently detect that and will switch to uncompressed mode.
CompressionType compression = kSnappyCompression;
// EXPERIMENTAL: If true, append to existing MANIFEST and log files
// when a database is opened. This can significantly speed up open.
//
// Default: currently false, but may become true later.
bool reuse_logs = false;
// If non-null, use the specified filter policy to reduce disk reads.
// Many applications will benefit from passing the result of
// NewBloomFilterPolicy() here.
const FilterPolicy* filter_policy = nullptr;
};
// Options that control read operations
struct LEVELDB_EXPORT ReadOptions {
// If true, all data read from underlying storage will be
// verified against corresponding checksums.
bool verify_checksums = false;
// Should the data read for this iteration be cached in memory?
// Callers may wish to set this field to false for bulk scans.
bool fill_cache = true;
// If "snapshot" is non-null, read as of the supplied snapshot
// (which must belong to the DB that is being read and which must
// not have been released). If "snapshot" is null, use an implicit
// snapshot of the state at the beginning of this read operation.
const Snapshot* snapshot = nullptr;
};
// Options that control write operations
struct LEVELDB_EXPORT WriteOptions {
WriteOptions() = default;
// If true, the write will be flushed from the operating system
// buffer cache (by calling WritableFile::Sync()) before the write
// is considered complete. If this flag is true, writes will be
// slower.
//
// If this flag is false, and the machine crashes, some recent
// writes may be lost. Note that if it is just the process that
// crashes (i.e., the machine does not reboot), no writes will be
// lost even if sync==false.
//
// In other words, a DB write with sync==false has similar
// crash semantics as the "write()" system call. A DB write
// with sync==true has similar crash semantics to a "write()"
// system call followed by "fsync()".
bool sync = false;
};
} // namespace leveldb
#endif // STORAGE_LEVELDB_INCLUDE_OPTIONS_H_