Compare commits

..

345 Commits

Author SHA1 Message Date
leveldb Team
068d5ee1a3 leveldb: Check slice length in Footer::DecodeFrom()
Without this check decoding the footer in Table::Open() can read
uninitialized bytes from a buffer allocated on the stack if the file
was unexpectedly short.

In practice this is probably fine since this function validates a magic
number but MSan complains about branching on uninitialized data.

PiperOrigin-RevId: 525271012
2023-04-20 18:09:06 +00:00
leveldb Team
c61238dcf3 Support Zstd compression level in Leveldb
PiperOrigin-RevId: 520556840
2023-04-20 18:08:55 +00:00
Victor Costan
77d66aaf3e Fix GitHub CI on Linux.
This PR temporarily removes a package that is currently broken on
GitHub's Ubuntu 22.04 installation. This is the most expedient way to
make the CI green again, so we can test any other changes we may want to
land.

PiperOrigin-RevId: 520206940
2023-03-28 20:17:54 -07:00
Victor Costan
9cbbc5fb75 Merge pull request #1104 from reillyeon:chromium_env
PiperOrigin-RevId: 520172744
2023-03-28 20:17:45 -07:00
Victor Costan
80d858fb2a Merge pull request #1106 from reillyeon:run_many
PiperOrigin-RevId: 520171344
2023-03-28 16:49:26 -07:00
leveldb Team
1d6e8d64ee Add support for Zstd-based compression in LevelDB.
This change implements support for Zstd-based compression in LevelDB. Building
up from the Snappy compression (which has been supported since inception), this
change adds Zstd as an alternate compression algorithm.

We are implementing this to provide alternative options for users who might
have different performance and efficiency requirements. For instance, the
Zstandard website (https://facebook.github.io/zstd/) claims that the Zstd
algorithm can achieve around 30% higher compression ratios than Snappy, with
relatively smaller (~10%) slowdowns in de/compression speeds.

Benchmarking results:

$ blaze-bin/third_party/leveldb/db_bench
LevelDB:    version 1.23
Date:       Thu Feb  2 18:50:06 2023
CPU:        56 * Intel(R) Xeon(R) CPU E5-2690 v4 @ 2.60GHz
CPUCache:   35840 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
------------------------------------------------
fillseq      :       2.613 micros/op;   42.3 MB/s
fillsync     :    3924.432 micros/op;    0.0 MB/s (1000 ops)
fillrandom   :       3.609 micros/op;   30.7 MB/s
overwrite    :       4.508 micros/op;   24.5 MB/s
readrandom   :       6.136 micros/op; (864322 of 1000000 found)
readrandom   :       5.446 micros/op; (864083 of 1000000 found)
readseq      :       0.180 micros/op;  613.3 MB/s
readreverse  :       0.321 micros/op;  344.7 MB/s
compact      :  827043.000 micros/op;
readrandom   :       4.603 micros/op; (864105 of 1000000 found)
readseq      :       0.169 micros/op;  656.3 MB/s
readreverse  :       0.315 micros/op;  350.8 MB/s
fill100K     :     854.009 micros/op;  111.7 MB/s (1000 ops)
crc32c       :       1.227 micros/op; 3184.0 MB/s (4K per op)
snappycomp   :       3.610 micros/op; 1081.9 MB/s (output: 55.2%)
snappyuncomp :       0.691 micros/op; 5656.3 MB/s
zstdcomp     :      15.731 micros/op;  248.3 MB/s (output: 44.1%)
zstduncomp   :       4.218 micros/op;  926.2 MB/s
PiperOrigin-RevId: 509957778
2023-03-28 16:49:13 -07:00
Reilly Grant
13ebad24dc Address comments. 2023-03-28 16:28:35 -07:00
Reilly Grant
df68d9578c Fix EnvTest.RunMany to allow parallel execution
As allowed by the documentation for Env::Schedule(), ChromiumEnv may
execute functions on multiple threads and guarantees no sequencing.
EnvTest.RunMany assumed that functions ran in order, is the case for the
stock PosixEnv and WindowsEnv implementations. This change updates the
test to not assume sequential execution.
2023-03-28 16:17:21 -07:00
Reilly Grant
bfae97ff7d Roll third_party/benchmark to f7547e29ccaed7b64ef4f7495ecfff1c9f6f3d03
Fixes an unused variable warning.
2023-03-28 15:13:57 -07:00
Reilly Grant
89ea7f2643 Fix tests when run against ChromiumEnv
There are a couple differences between ChromiumEnv and
PosixEnv/WindowsEnv which cause test failures that are fixed (or at
least patched over) in this change:

* NewSequentialFile() and NewRandomAccessFile() return Status::IOError
  rather than Status::NotFound when a file is not found, due to
  https://crbug.com/760362. This means a few tests need to expect a
  different error result.
* GetChildren() never returns the '.' or '..' entries.
* As allowed by the documentation for Env::Schedule(), ChromiumEnv may
  execute functions on multiple threads and guarantees no sequencing.
  EnvTest.RunMany assumed that functions ran in order. The test has been
  updated.
2023-03-28 14:37:48 -07:00
Sanjay Ghemawat
fb644cb445 Stop future writes if a log file Close() fails.
See https://github.com/google/leveldb/issues/1081

PiperOrigin-RevId: 499519182
2023-01-04 20:41:21 +00:00
leveldb Team
aa5479bbf4 Fix maintenance text
PiperOrigin-RevId: 461725664
2022-07-18 22:11:43 +00:00
Victor Costan
ca684d00b5 Fix Markdown formatting in README.
PiperOrigin-RevId: 461722304
2022-07-18 21:59:46 +00:00
leveldb Team
7b650f85de Add note on current development state
PiperOrigin-RevId: 461695246
2022-07-18 21:20:02 +00:00
Victor Costan
0a9b7b8e95 Merge pull request #1036 from chjj:benchmark-compression
PiperOrigin-RevId: 461612590
2022-07-18 21:19:56 +00:00
leveldb Team
fff74f20ff Use GTEST_SKIP in leveldb:table_test
This replaces a usage of fprintf to stderr

PiperOrigin-RevId: 454620969
2022-07-18 21:19:46 +00:00
Christopher Jeffrey
9e1c274074
Add compression flag to benchmarks. 2022-06-19 20:45:14 -04:00
Victor Costan
d019e3605f Merge pull request #1008 from pkasting:main
PiperOrigin-RevId: 447466466
2022-05-09 16:20:11 +00:00
Peter Kasting
8b5b1ca96c Fixes for C++20 support.
Structs with user-declared constructors are no longer considered
aggregates.  Just remove the constructor declaration where applicable.

Bug: chromium:1284275
2022-05-05 07:44:11 -07:00
Victor Costan
4fb146810c The master branch was renamed to main.
PiperOrigin-RevId: 422409116
2022-01-17 21:37:53 +00:00
Victor Costan
479a1f4e9b Update contributing guidelines.
* Align CONTRIBUTING.md with the google/new-project template.
* Explain the support story for the CMake config.

PiperOrigin-RevId: 421120645
2022-01-11 22:37:16 +00:00
Victor Costan
1b51a3a968 Merge pull request #506 from lingbin:fix_issue_505
PiperOrigin-RevId: 420787858
2022-01-10 18:01:30 +00:00
Victor Costan
a797000713 Merge pull request #893 from myccccccc:master
PiperOrigin-RevId: 420783873
2022-01-10 17:56:16 +00:00
Victor Costan
bf4fcd85b5 Merge pull request #888 from JayiceZ:fix_typo
PiperOrigin-RevId: 420783487
2022-01-10 17:56:09 +00:00
Victor Costan
8a68093c84 Merge pull request #652 from caodhuan:master
PiperOrigin-RevId: 420782536
2022-01-10 17:56:02 +00:00
Victor Costan
bfea90d883 Merge pull request #744 from HenryRLee:patch-1
PiperOrigin-RevId: 420781374
2022-01-10 17:55:55 +00:00
Victor Costan
bda46dd00d Merge pull request #602 from andyli029:feature_fix_lack_tag_comment
PiperOrigin-RevId: 420781095
2022-01-10 17:55:49 +00:00
Victor Costan
ec4e3a5cb3 Merge pull request #747 from zltl:patch-1
PiperOrigin-RevId: 420778907
2022-01-10 17:55:39 +00:00
Victor Costan
3ab94e7da8
Merge branch 'master' into fix_issue_505 2022-01-10 09:48:52 -08:00
Victor Costan
3180f9cb40
Merge branch 'master' into patch-1 2022-01-09 23:08:24 -08:00
Victor Costan
8ccb79b57e Merge pull request #901 from mapleFU:opt-using-move
PiperOrigin-RevId: 420662938
2022-01-10 06:35:14 +00:00
Victor Costan
74f0be238f Merge pull request #897 from raynolmenezes:patch-1
PiperOrigin-RevId: 420662891
2022-01-10 06:35:06 +00:00
Victor Costan
56f2394250 Merge pull request #945 from xiong-ang:master
PiperOrigin-RevId: 420645727
2022-01-10 02:29:23 +00:00
Victor Costan
068a0f1214 Merge pull request #934 from BilyZ98:master
PiperOrigin-RevId: 420645080
2022-01-10 02:29:16 +00:00
Victor Costan
8f5aa6375e Merge pull request #919 from wineway:fix_posix_test
PiperOrigin-RevId: 420644954
2022-01-10 02:29:08 +00:00
Victor Costan
7a2f90460a Merge pull request #928 from ehds:fix-comment
PiperOrigin-RevId: 420541137
2022-01-09 07:31:01 +00:00
Victor Costan
4db0eaccf1 Merge pull request #960 from ericuni:dedup
PiperOrigin-RevId: 420534594
2022-01-09 03:18:18 +00:00
Victor Costan
7ee3889a61 VersionSet::Builder::Apply() does not mutate its argument.
PiperOrigin-RevId: 420533763
2022-01-09 03:15:31 +00:00
Victor Costan
42cf899927 Merge pull request #903 from LazyWolfLin:dev_random
PiperOrigin-RevId: 420532625
2022-01-09 03:15:22 +00:00
Victor Costan
8796c44772 Merge pull request #902 from ehds:update-table-cache
PiperOrigin-RevId: 420517390
2022-01-09 03:15:10 +00:00
Victor Costan
e4ccaa0c9c Merge pull request #965 from ShawnZhong:cpp20
PiperOrigin-RevId: 420504266
2022-01-08 20:55:42 +00:00
Victor Costan
c8b708d496 Merge pull request #967 from rex4539:typos
PiperOrigin-RevId: 420403341
2022-01-08 02:55:46 +00:00
Victor Costan
639195221c Merge pull request #968 from xindubawukong:dxy_remove_code
PiperOrigin-RevId: 420399272
2022-01-08 01:29:06 +00:00
xindubawukong
87b3a371b1 remove useless code in cache.h 2022-01-06 03:11:11 +08:00
Dimitris Apostolou
0e8aa26c4e
Fix typos 2022-01-05 11:04:16 +02:00
Victor Costan
8f464e7f68 Remove main() from most tests.
This gives some flexibility to embedders.

Currently, embedders have to build a binary for each test file.

After this CL, embedders can still choose to have a binary for each test
file, by linking each test file with a googletest target that includes
main() (usually "gtest_main"). Embedders can also choose to build a
single binary for almost all test files, and link with a googletest
target that includes main(). The latter is more convenient for projects
that have very few test binaries, like Chromium.

PiperOrigin-RevId: 419470798
2022-01-03 21:05:04 +00:00
Shawn Zhong
7a2f64ed50 Update env_posix.cc 2021-12-30 18:33:55 -06:00
Victor Costan
b2801ee1a0 Extract benchmark from db_test.cc.
The benchmark in db/db_test.cc is extracted to its own file,
benchmarks/db_bench_log.cc.

PiperOrigin-RevId: 418713499
2021-12-29 03:49:16 +00:00
Victor Costan
335876a133 Add invariant checks to Limiter in Env implementations.
PiperOrigin-RevId: 417853172
2021-12-22 19:26:31 +00:00
Eric Wang
42d00a80cc rm redundant code: SetNextFile has already been called before in this function 2021-12-05 11:44:55 +08:00
Victor Costan
e426c83e88 Merge pull request #941 from pmmp:no-handle-inheritance
PiperOrigin-RevId: 412997201
2021-11-30 00:09:27 +00:00
Victor Costan
6124f47490 Merge pull request #951 from philix:no_pthread
PiperOrigin-RevId: 412965575
2021-11-29 21:45:09 +00:00
Felipe Oliveira Carvalho
dd6658754f Remove <pthread.h> include and find_package() from build files 2021-11-28 20:59:14 +01:00
xiong-ang
d7da5d9d35 fix some trifling points 2021-10-22 18:00:57 +08:00
Dylan K. Taylor
68d14a723a
Prevent handle used for LOG from being inherited by subprocesses
I recently encountered a problem with this because Windows doesn't allow
files to be deleted when there's open handles to them.

Other files opened by leveldb are not affected because by and large they
are using CreateFileA, which does not allow inheritance when
lpSecurityAttributes is null (ref:
https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea)

However, fopen() _does_ allow inheritance, and it needs to be expressly
disabled.
https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/fopen-wfopen?view=msvc-160
2021-10-09 16:22:08 +01:00
leveldb Team
c5d5174a66 Get env_posix.cc building under Fuchsia.
PiperOrigin-RevId: 395824737
2021-09-12 14:12:56 +00:00
zzt
11aafab31f
Fix version_set.cc comments typo
Fix typo of comment of FindLargestKey function
2021-09-03 11:18:31 +08:00
Victor Costan
5783a79309 Switch CI to GitHub Actions.
PiperOrigin-RevId: 394542401
2021-09-02 21:33:50 +00:00
ehds
54340b4a10 Fix comments position 2021-08-08 22:24:37 +08:00
leveldb Team
8e62cc5124 Remove the / prefix from the recovery_test test file to prevent a double /.
PiperOrigin-RevId: 388341429
2021-08-03 01:12:08 +00:00
wineway
8949158f5d fixed random access file exhaust random mmap file use wrong limit count 2021-07-01 20:52:01 +08:00
Victor Costan
5d94ad4d95 Update Travis CI config.
Xcode (drives macOS image) : 12.2 => 12.5
Clang                      : 10 => 12
GCC                        : 10 => 11
PiperOrigin-RevId: 375582717
2021-05-25 01:40:12 +00:00
Victor Costan
c7a0fa28a4
Merge pull request #906 from pwnall/third-party-bump
Roll third-party dependencies.
2021-05-24 16:13:58 -07:00
Sanjay Ghemawat
13e3c4efc6 Fix compactions that could end up breaking a run of the same user
key across multiple files.

As reported in Github issue #339, it is incorrect to split the
same user key across multiple compacted files since it causes
tombstones/newer-versions to be dropped, thereby exposing obsolete
data. There was a fix for #339, but it ended up not fully fixing
the problem. (It checked for boundary problems in the first level
being compacted, but not the second). This problem was revealed
by Github issue 887.

We now adjust boundaries to avoid splitting user keys in both the
first level and the second level.

PiperOrigin-RevId: 374921082
2021-05-20 19:13:04 +00:00
Victor Costan
f6fe2ec561 Roll third-party dependencies. 2021-05-17 19:32:29 -07:00
LazyWolfLin
3806fbc23c Small fix.
Use function instead of original expression.
2021-05-11 22:56:37 +08:00
ehds
dbf24d9a0c Make table cache non-copyable 2021-05-08 13:48:39 +08:00
mwish
1ca4f5b466 [Init] initial commit 2021-05-02 14:16:51 +08:00
Raynol Menezes
f6d094e994
Update log_reader.h 2021-04-16 13:00:59 +05:30
mayingchun
e2ad7dad54 delete an unnecessary forward declaration 2021-04-05 13:58:20 +08:00
Jayice
9e8500518f fix typo in port_example.h 2021-03-28 16:38:37 +08:00
Victor Costan
f57513a1d6 Merge pull request #881 from firebase:apple-toolchain-fixes
PiperOrigin-RevId: 360972284
2021-03-04 20:35:18 +00:00
Paul Beusterien
24bcf7f7ce Don't include C++ headers in extern C 2021-03-03 17:53:26 -08:00
Chris Mumford
99b3c03b32 Change version to 1.23.
PiperOrigin-RevId: 359111035
2021-02-23 12:54:37 -08:00
Chris Mumford
37aaf2fccd Fix fprintf format string.
Using %zu for size_t instead of %ld.

PiperOrigin-RevId: 357976882
2021-02-17 10:25:15 -08:00
Chris Mumford
2a47801868 Use partial path to benchmark/benchmark.h.
Using the partial path offers more flexibility to projects which
may checkout google/benchmark to a different location.

PiperOrigin-RevId: 357819911
2021-02-16 16:51:09 -08:00
Victor Costan
4a919ea4f7 IWYU fixes in db/c.cc.
Fixes https://github.com/google/leveldb/issues/872

PiperOrigin-RevId: 353657701
2021-01-25 17:18:22 +00:00
Victor Costan
1998c0ef15 Fix build errors.
PiperOrigin-RevId: 351442409
2021-01-12 21:58:19 +00:00
leveldb Team
8f1861462b Sync MANIFEST before closing in db_impl when creating a new DB.
Add logging with debugging information when failing to load a version set.

PiperOrigin-RevId: 351432332
2021-01-12 21:58:08 +00:00
leveldb Team
8cce47e450 Optimize leveldb block seeks to utilize the current iterator location.
This is beneficial when iterators are reused and seeks are not random
but increasing. It is additionally beneficial with larger block sizes and keys with common prefixes.

Add a benchmark "seekordered" to db_bench that reuses iterators across
increasing seeks.  Add support to the benchmark to count comparisons made and to support common key prefix length. Change benchmark random seeds to be reproducible for entire benchmark suite executions but unique for threads in different benchmarks runs.  This changes a benchmark suite of readrandom,seekrandom from having a 100% found ratio as previously it had the same seed used for fillrandom.

./db_bench --benchmarks=fillrandom,compact,seekordered --block_size=262144 --comparisons=1 --key_prefix=100

without this change (though with benchmark changes):
seekrandom   :      55.309 micros/op; (631820 of 1000000 found)
Comparisons: 27001049
seekordered  :       1.732 micros/op; (631882 of 1000000 found)
Comparisons: 26998402

with this change:
seekrandom   :      55.866 micros/op; (631820 of 1000000 found)
Comparisons: 26952143
seekordered  :       1.686 micros/op; (631882 of 1000000 found)
Comparisons: 25549369

For ordered seeking, this is a reduction of 5% comparisons and a 3% speedup. For random seeking (with single use iterators) the comparisons and speed are less than 1% and likely noise.

PiperOrigin-RevId: 351149832
2021-01-11 15:41:38 +00:00
Victor Costan
fdc8f72895 Merge pull request #862 from rex4539:https
PiperOrigin-RevId: 349711809
2021-01-01 09:32:07 +00:00
Dimitris Apostolou
532be85306
Fix insecure links 2020-12-19 20:07:38 +02:00
Victor Costan
6721eda0b4 Update Travis CI config.
PiperOrigin-RevId: 347391876
2020-12-14 08:37:35 -08:00
Victor Costan
295ce1336f
Merge pull request #855 from cmumford/submodule-fix
Fixup for adding the third_party/benchmark submodule.
2020-11-30 16:37:05 -08:00
Chris Mumford
c3b52f7db6 Fixup for adding the third_party/benchmark submodule. 2020-11-30 16:26:16 -08:00
Chris Mumford
28df52115d Merge pull request #853 from cmumford:benchmark
PiperOrigin-RevId: 344909677
2020-11-30 16:16:39 -08:00
Chris Mumford
2dcbd4a2c5 Merge pull request #854 from cmumford:printf-fix
PiperOrigin-RevId: 344871226
2020-11-30 16:16:25 -08:00
Chris Mumford
b754fdca72 Fixed fprintf of 64-bit value. 2020-11-30 10:48:17 -08:00
Chris Mumford
37d36c92f8 Added google/benchmark submodule. 2020-11-30 10:47:15 -08:00
Sanjay Ghemawat
2802398c94 Fix bug in filter policy documentation example.
PiperOrigin-RevId: 344817715
2020-11-30 09:11:08 -08:00
leveldb Team
99ab4730d6 Use external benchmark API header
PiperOrigin-RevId: 339310928
2020-11-30 09:10:59 -08:00
leveldb Team
ed781070b4 Internal test cleanup
PiperOrigin-RevId: 339287832
2020-11-30 09:10:46 -08:00
leveldb Team
b7d3023269 Internal cleanup migrating StatusOr.
PiperOrigin-RevId: 329720018
2020-10-07 21:15:26 +00:00
Chris Mumford
1454924aac Merge pull request #822 from jl0x61:bugFix
PiperOrigin-RevId: 321372819
2020-07-15 09:20:04 -07:00
jl0x61
1754c12c54 update index.md
remove return value of GetApproximateSizes in index.md
2020-07-14 19:32:03 +08:00
Victor Costan
c46e79c760 Merge pull request #819 from wzk784533:master
PiperOrigin-RevId: 321000544
2020-07-13 19:14:04 +00:00
wzk784533
28602d3625 avoid unnecessary memory copy 2020-07-11 13:44:11 +08:00
Victor Costan
5bd5f0f67a Merge pull request #798 from lntotk:master
PiperOrigin-RevId: 309738404
2020-05-04 22:47:40 +00:00
Victor Costan
23b6337f69 Fix Travis CI build.
PiperOrigin-RevId: 309138195
2020-04-30 01:21:01 +00:00
Victor Costan
5c6dd75897 Fix accidental double std:: qualifiers.
PiperOrigin-RevId: 309136120
2020-04-30 01:20:50 +00:00
Victor Costan
a6b3a2012e Add some std:: qualifiers to types and functions.
PiperOrigin-RevId: 309110431
2020-04-29 22:33:14 +00:00
Victor Costan
3f934e3705 Switch from C headers to C++ headers.
This CL makes the following substitutions.

* assert.h -> cassert
* math.h -> cmath
* stdarg.h -> cstdarg
* stddef.h -> cstddef
* stdint.h -> cstdint
* stdio.h -> cstdio
* stdlib.h -> cstdlib
* string.h -> cstring

PiperOrigin-RevId: 309080151
2020-04-29 20:51:13 +00:00
Victor Costan
23d67e7c1f Fix C++11 build.
PiperOrigin-RevId: 308839805
2020-04-28 18:05:22 +00:00
leveldb Team
98a3b8cf65 change const to constexpr
PiperOrigin-RevId: 307113877
2020-04-28 00:17:51 +00:00
lntotk
10bc0f2595 remove unnessary status judge 2020-04-24 02:00:12 +00:00
Victor Costan
201f52201f Remove leveldb::port::kLittleEndian.
Clang 10 includes the optimizations described in
https://bugs.llvm.org/show_bug.cgi?id=41761. This means that the
platform-independent implementations of {Decode,Encode}Fixed{32,64}()
compile to one instruction on the most recent Clang and GCC.

PiperOrigin-RevId: 306330166
2020-04-14 01:10:05 +00:00
Victor Costan
ba369ddbaf Use LLVM 10 on Travis CI.
PiperOrigin-RevId: 306236199
2020-04-14 01:09:54 +00:00
Victor Costan
5903e7a112 Remove Windows workarounds in some tests.
leveldb::Env::DeleteFile was replaced with leveldb::Env::RemoveFile in
all tests. This allows us to remove workarounds for windows.h #defining
DeleteFile.
PiperOrigin-RevId: 289121105
2020-01-14 18:31:37 -08:00
Victor Costan
a0191e5563 Add Env::Remove{File,Dir} which obsolete Env::Delete{File,Dir}.
The "DeleteFile" method name causes pain for Windows developers, because
<windows.h> #defines a DeleteFile macro to DeleteFileW or DeleteFileA.
Current code uses workarounds, like #undefining DeleteFile everywhere an
Env is declared, implemented, or used.

This CL removes the need for workarounds by renaming Env::DeleteFile to
Env::RemoveFile. For consistency, Env::DeleteDir is also renamed to
Env::RemoveDir. A few internal methods are also renamed for consistency.
Software that supports Windows is expected to migrate any Env
implementations and usage to Remove{File,Dir}, and never use the name
Env::Delete{File,Dir} in its code.

The renaming is done in a backwards-compatible way, at the risk of
making it slightly more difficult to build a new correct Env
implementation. The backwards compatibility is achieved using the
following hacks:

1) Env::Remove{File,Dir} methods are added, with a default
    implementation that calls into Env::Delete{File,Dir}. This makes old
    Env implementations compatible with code that calls into the updated
    API.
2) The Env::Delete{File,Dir} methods are no longer pure virtuals.
    Instead, they gain a default implementation that calls into
    Env::Remove{File,Dir}. This makes updated Env implementations
    compatible with code that calls into the old API.

The cost of this approach is that it's possible to write an Env without
overriding either Rename{File,Dir} or Delete{File,Dir}, without getting
a compiler warning. However, attempting to run the test suite will
immediately fail with an infinite call stack ending in
{Remove,Delete}{File,Dir}, making developers aware of the problem.

PiperOrigin-RevId: 288710907
2020-01-09 09:18:14 -08:00
leveldb Team
d152b23f3b Defend against inclusion of windows.h in tests that invoke
Env::DeleteFile.

PiperOrigin-RevId: 283607548
2020-01-09 09:17:59 -08:00
Victor Costan
58a89bbcb2 Add WITHOUT ROWID to SQLite benchmark.
The SQLite-specific schema feature is documented at
https://www.sqlite.org/withoutrowid.html and
https://www.sqlite.org/rowidtable.html.

By default, SQLite stores each table in a B-tree keyed by an integer,
called the ROWID. Any index, including the PRIMARY KEY index, is a
separate B-tree mapping index keys to ROWIDs. Tables without ROWIDs are
stored in a B-tree keyed by the primary key. Additional indexes (the
PRIMARY KEY index is implicitly built into the table) are stored as
B-trees mapping index keys to row primary keys.

This CL introduces a boolean --use-rowids flag to db_bench_sqlite. When
the flag is false (default), the schema of the test table includes
WITHOUT ROWID. The test table uses a primary key, so adding WITHOUT
ROWID to the schema reduces the number of B-trees used by the benchmark
from 2 to 1. This brings SQLite's disk usage closer to LevelDB.

When WITHOUT ROWID is used, SQLite fares better (than today) on
benchmarks with small (16-byte) keys, and worse on benchmarks with large
(100kb) keys.

Baseline results:
fillseq      :      21.310 micros/op;    5.2 MB/s
fillseqsync  :     146.377 micros/op;    0.8 MB/s (10000 ops)
fillseqbatch :       2.065 micros/op;   53.6 MB/s
fillrandom   :      34.767 micros/op;    3.2 MB/s
fillrandsync :     159.943 micros/op;    0.7 MB/s (10000 ops)
fillrandbatch :      15.055 micros/op;    7.3 MB/s
overwrite    :      43.660 micros/op;    2.5 MB/s
overwritebatch :      27.691 micros/op;    4.0 MB/s
readrandom   :      12.725 micros/op;
readseq      :       2.602 micros/op;   36.7 MB/s
fillrand100K :     606.333 micros/op;  157.3 MB/s (1000 ops)
fillseq100K  :     657.457 micros/op;  145.1 MB/s (1000 ops)
readseq      :      46.523 micros/op; 2049.9 MB/s
readrand100K :      54.943 micros/op;

Results after this CL:
fillseq      :      16.231 micros/op;    6.8 MB/s
fillseqsync  :     147.460 micros/op;    0.8 MB/s (10000 ops)
fillseqbatch :       2.294 micros/op;   48.2 MB/s
fillrandom   :      27.871 micros/op;    4.0 MB/s
fillrandsync :     141.979 micros/op;    0.8 MB/s (10000 ops)
fillrandbatch :      16.087 micros/op;    6.9 MB/s
overwrite    :      26.829 micros/op;    4.1 MB/s
overwritebatch :      19.014 micros/op;    5.8 MB/s
readrandom   :      11.657 micros/op;
readseq      :       0.155 micros/op;  615.0 MB/s
fillrand100K :     816.812 micros/op;  116.8 MB/s (1000 ops)
fillseq100K  :     754.689 micros/op;  126.4 MB/s (1000 ops)
readseq      :      47.112 micros/op; 2024.3 MB/s
readrand100K :     287.679 micros/op;

Results after this CL, with --use-rowids=1
fillseq      :      20.655 micros/op;    5.4 MB/s
fillseqsync  :     146.408 micros/op;    0.8 MB/s (10000 ops)
fillseqbatch :       2.045 micros/op;   54.1 MB/s
fillrandom   :      34.080 micros/op;    3.2 MB/s
fillrandsync :     154.582 micros/op;    0.7 MB/s (10000 ops)
fillrandbatch :      14.404 micros/op;    7.7 MB/s
overwrite    :      42.928 micros/op;    2.6 MB/s
overwritebatch :      27.829 micros/op;    4.0 MB/s
readrandom   :      12.835 micros/op;
readseq      :       2.483 micros/op;   38.4 MB/s
fillrand100K :     603.265 micros/op;  158.1 MB/s (1000 ops)
fillseq100K  :     662.473 micros/op;  144.0 MB/s (1000 ops)
readseq      :      45.478 micros/op; 2097.0 MB/s
readrand100K :      54.439 micros/op;
PiperOrigin-RevId: 283407101
2019-12-02 13:51:20 -08:00
Victor Costan
c0d43142ff
Merge pull request #756 from pwnall/third_party_2
Fixup for adding the third_party/googletest submodule.
2019-12-02 13:50:43 -08:00
Victor Costan
e36b831851 Fixup for adding the third_party/googletest submodule. 2019-12-02 12:18:34 -08:00
leveldb Team
583a42b596 Internal change.
PiperOrigin-RevId: 282373286
2019-12-02 11:44:39 -08:00
Victor Costan
db8352187b Fixup for adding the third_party/googletest submodule. (#754) 2019-11-25 07:22:35 -08:00
Victor Costan
1c58902bdc Switch testing harness to googletest.
PiperOrigin-RevId: 281815695
2019-11-21 13:11:40 -08:00
Victor Costan
2c9c80bd53 Move CI to Visual Studio 2019.
PiperOrigin-RevId: 279785825
2019-11-11 12:07:40 -08:00
Victor Costan
ed72a3496e Allow different C/C++ standards when this is used as a subproject.
Inspired by https://github.com/google/snappy/pull/85

PiperOrigin-RevId: 279649967
2019-11-10 18:22:41 -08:00
Victor Costan
41c8d83914 Align CMake configuration with related projects.
PiperOrigin-RevId: 279238007
2019-11-07 22:44:08 -08:00
Victor Costan
0c40829872 Remove redundant PROJECT_SOURCE_DIR usage from CMake config.
Inspired by https://github.com/google/crc32c/pull/32

PiperOrigin-RevId: 278718726
2019-11-05 16:32:26 -08:00
Victor Costan
5abdf4c019 Fix installed target definition.
Using CMAKE_INSTALL_INCLUDEDIR before including GNUINstallDirs results
in a broken installation when CMAKE_INSTALL_PREFIX is a non-standard
directory.

Inspired from https://github.com/google/crc32c/pull/39

PiperOrigin-RevId: 278427974
2019-11-05 00:32:51 -08:00
Victor Costan
cf4d9ab23d Test CMake installation on Travis.
PiperOrigin-RevId: 278300591
2019-11-03 21:42:23 -08:00
Chris Mumford
95d0ba1cb0 Renamed local variable in DBImpl::Write.
The local variable `updates` in DBImpl::Write was hiding the
`updates` parameter. Renamed to avoid this conflict.

PiperOrigin-RevId: 277089971
2019-10-28 13:24:08 -07:00
Chris Mumford
657ba51429 Added return in Version::Get::State::Match to quiet warning.
Added unreached return at the end of Version::Get::State::Match
to stop this _incorrect_ warning:

    version_set.cc:376:5: warning: control reaches end of
    non-void function [-Wreturn-type]

This warning was being emitted when building with clang 6.0.1-10
and also emitted by lgtm.com when statically analyzing leveldb even
though all SaverState enumeration values were handled.

PiperOrigin-RevId: 272455474
2019-10-28 13:23:53 -07:00
Liao Tonglang
f2dae4e74a
fix typo in comment of LRUHandle
LRUHandle has no member "next_", fix it to "next" instead.
2019-10-22 18:46:43 +08:00
Henry Lee
f933ad1693
Remove unused variable kDelayMicros in env_test.cc 2019-10-15 10:48:33 +11:00
Chris Mumford
370d532a00 Using CMake's check_cxx_compiler_flag to check support for -Wthread-safety.
Previously used check_cxx_source_compiles to attempt a
build to determine support for clang thread safety checks.

This change is to support static analysis of the leveldb source by
lgtm.com (using Semmle). It failed to build with the following error:

```
[2019-07-04 22:29:58] [build] c++: error: unrecognized command line option ‘-Wthread-safety’; did you mean ‘-fthread-jumps’?
[2019-07-04 22:30:02] [build] make[2]: *** [CMakeFiles/leveldb.dir/build.make:66: CMakeFiles/leveldb.dir/db/builder.cc.o] Error 1
```

PiperOrigin-RevId: 272275528
2019-10-01 13:03:19 -07:00
Victor Costan
45ee61579c Update Travis CI configuration.
* Use Ubuntu 18.04 and LLVM 9 on Travis.
* Fix bash conditionals: [ a == b ] should be [ a = b ].

PiperOrigin-RevId: 271898719
2019-09-29 20:40:40 -07:00
Sanjay Ghemawat
60db170a43 Fix tsan problem in env_test.
PiperOrigin-RevId: 268265314
2019-09-29 20:40:29 -07:00
Victor Costan
21304d41f7 Merge pull request #698 from neal-zhu:master
PiperOrigin-RevId: 266001777
2019-08-28 15:05:07 -07:00
neal-zhu
5e921896ee drop fileds in State that are duplicates of fileds in Saver and fix typo 2019-08-28 23:43:34 +08:00
Chris Mumford
53e280b568 Simplify unlocking in DeleteObsoleteFiles.
A recent change (4cb80b7ddce6f) to DBImpl::DeleteObsoleteFiles
unlocked DBImpl::mutex_ while deleting files to allow for
greater concurrency. This change improves on the prior in
a few areas:

1. The table is evicted from the table cache before unlocking
   the mutex. This should only improve performance.
2. This implementation is slightly simpler, but at the cost of
   a bit more memory usage.
3. A comment adding more detail as to why the mutex is being
   unlocked and why it is safe to do so.

PiperOrigin-RevId: 253111645
2019-06-13 15:22:52 -07:00
Chris Mumford
046216a7ca Add "leveldb" subdirectory to public include paths.
The documentation (README.md and index.md) referred to the
public headers using an incorrect path - fixing.

PiperOrigin-RevId: 252922925
2019-06-13 15:20:12 -07:00
Chris Mumford
9ee91ac747 Ending sentences with periods in README.md.
This change was submitted in https://github.com/google/leveldb/pull/575
by @prajwalchalla.

This fixes issue #523.

PiperOrigin-RevId: 252912613
2019-06-13 13:48:54 -07:00
Victor Costan
e0d5f83a4f Align EnvPosix and EnvWindows.
Fixes #695.

PiperOrigin-RevId: 252895299
2019-06-13 13:43:09 -07:00
Victor Costan
69061b464a Disable exceptions and RTTI in CMake configuration.
PiperOrigin-RevId: 252842234
2019-06-13 13:41:24 -07:00
neal-zhu
107a75b62c cache Saver in State object 2019-06-12 07:05:00 +08:00
neal-zhu
76ca116276 fix bug(uninitialized options pointer in State) 2019-06-12 05:58:00 +08:00
neal-zhu
f668239bb2 remove TODO in Version::ForEachOverlapping 2019-06-11 20:33:18 +08:00
neal-zhu
177cd08629 format 2019-06-11 20:30:54 +08:00
neal-zhu
8fa7a937ee fix bug 2019-06-11 20:20:58 +08:00
neal-zhu
6a90bb91ee use ForEachOverlapping to impl Get 2019-06-11 19:16:49 +08:00
Chris Mumford
4cb80b7ddc Merge pull request #386 from ivanabc:master
PiperOrigin-RevId: 250702492
2019-05-30 09:56:34 -07:00
Victor Costan
72a38ff7f2 Replace "> >" with ">>"
PiperOrigin-RevId: 250383036
2019-05-30 09:55:43 -07:00
Victor Costan
863f185970 unsigned char -> uint8_t
PiperOrigin-RevId: 250309603
2019-05-28 15:44:32 -07:00
Victor Costan
a3b71c1ff6 Use GCC 9 on Travis CI
PiperOrigin-RevId: 249899128
2019-05-24 14:49:54 -07:00
Chris Mumford
ae49533210 Add explicit typecasts to avoid compiler warning.
Fixes issue #684.

PiperOrigin-RevId: 249531001
2019-05-24 14:49:37 -07:00
ivan
63d5315e1c
Merge branch 'master' into master 2019-05-23 14:02:04 +08:00
Chris Mumford
c00e177f36 Guard DBImpl::versions_ by mutex_.
mutex_ was already acquired before accessing DBImpl::versions_ in all
but one place: DBImpl::GetApproximateSizes. This change requires mutex_
to be held before accessing versions_.

PiperOrigin-RevId: 248390814
2019-05-16 12:07:21 -07:00
Chris Mumford
1d0b101165 Converted two for-loops to while-loops.
Converted `for (;<condition>;)` to `while (<condition>)`.

PiperOrigin-RevId: 247950510
2019-05-13 13:51:11 -07:00
Chris Mumford
28e6d238be Switch to using C++ 11 override specifier.
PiperOrigin-RevId: 247491163
2019-05-09 14:11:06 -07:00
Chris Mumford
85cd40d108 Added unit test for InternalKey::DecodeFrom with empty string.
PiperOrigin-RevId: 247483339
2019-05-09 14:10:55 -07:00
Chris Mumford
1aae5c9f29 Merge pull request #411 from proller:assert1
PiperOrigin-RevId: 247424040
2019-05-09 08:37:49 -07:00
Chris Mumford
b7b86baec9 Using std::ostringstream in key DebugString.
Switching from snprintf to std::ostringstream eliminates
cast warning for (unsigned long long).

PiperOrigin-RevId: 247326681
2019-05-08 17:36:35 -07:00
Chris Mumford
3e6c000e18 Merge pull request #457 from jellor:patch-2
PiperOrigin-RevId: 247261470
2019-05-08 17:36:22 -07:00
果冻
1d94fe2f4d
Merge branch 'master' into patch-2 2019-05-09 00:08:49 +08:00
Victor Costan
27dc99fb26 Fix EnvPosix tests on Travis CI.
The previous attempt of having EnvPosix use O_CLOEXEC (close-on-exec()) when opening file descriptors added tests that relied on procfs, which is Linux-specific. These tests failed on macOS. Unfortunately, the test failures were not caught due to a (since fixed) error in our Travis CI configuration.

This CL re-structures the tests to only rely on POSIX features. Since there is no POSIX-compliant way to get a file name/path out of a file descriptor, this CL breaks up the O_CLOEXEC test into multiple tests, where each Env method that creates an FD gets its own test. This is intended to make it easier to find and fix errors in Env implementations.

This CL also fixes the implementation of NewLogger() to use O_CLOEXEC on macOS. The current implementation passes "we" to fopen(), but the macOS standard C library does not implement the "e" flag yet.

PiperOrigin-RevId: 247088953
2019-05-07 14:20:31 -07:00
Chris Mumford
9521545b06 Formatting changes for prior O_CLOEXEC fix.
Two minor corrections to correct the 900f7d37eb322 commit
to conform to the Google C++ style guide.

PiperOrigin-RevId: 246907647
2019-05-06 15:34:20 -07:00
Chris Mumford
900f7d37eb Merge pull request #624 from adam-azarchs:master
PiperOrigin-RevId: 246903086
2019-05-06 15:00:48 -07:00
Victor Costan
a7528a5d2b Clean up util/coding.{h,cc}.
1) Inline EncodeFixed{32,64}(). They emit single machine instructions on 64-bit processors.
2) Remove size narrowing compiler warnings from DecodeFixed{32,64}().
3) Add comments explaining the current state of optimizations in compilers we care about.
4) Change C-style includes, like <stdint.h>, to C++ style, like <cstdint>.
5) memcpy -> std::memcpy.

The optimization comments are based on https://godbolt.org/z/RdIqS1. The missed optimization opportunities in clang have been reported as https://bugs.llvm.org/show_bug.cgi?id=41761

The change does not have significant impact on benchmarks. Results below.

LevelDB:    version 1.22
Date:       Mon May  6 10:42:18 2019
CPU:        72 * Intel(R) Xeon(R) Gold 6154 CPU @ 3.00GHz
CPUCache:   25344 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)

With change
------------------------------------------------
fillseq      :       2.327 micros/op;   47.5 MB/s
fillsync     :    4185.526 micros/op;    0.0 MB/s (1000 ops)
fillrandom   :       3.662 micros/op;   30.2 MB/s
overwrite    :       4.261 micros/op;   26.0 MB/s
readrandom   :       4.239 micros/op; (1000000 of 1000000 found)
readrandom   :       3.649 micros/op; (1000000 of 1000000 found)
readseq      :       0.174 micros/op;  636.7 MB/s
readreverse  :       0.271 micros/op;  408.7 MB/s
compact      :  570495.000 micros/op;
readrandom   :       2.735 micros/op; (1000000 of 1000000 found)
readseq      :       0.118 micros/op;  937.3 MB/s
readreverse  :       0.190 micros/op;  583.7 MB/s
fill100K     :     860.164 micros/op;  110.9 MB/s (1000 ops)
crc32c       :       1.131 micros/op; 3455.2 MB/s (4K per op)
snappycomp   :       3.034 micros/op; 1287.5 MB/s (output: 55.1%)
snappyuncomp :       0.544 micros/op; 7176.0 MB/s

Baseline
------------------------------------------------
fillseq      :       2.365 micros/op;   46.8 MB/s
fillsync     :    4240.165 micros/op;    0.0 MB/s (1000 ops)
fillrandom   :       3.244 micros/op;   34.1 MB/s
overwrite    :       4.153 micros/op;   26.6 MB/s
readrandom   :       4.698 micros/op; (1000000 of 1000000 found)
readrandom   :       4.065 micros/op; (1000000 of 1000000 found)
readseq      :       0.192 micros/op;  576.3 MB/s
readreverse  :       0.286 micros/op;  386.7 MB/s
compact      :  635979.000 micros/op;
readrandom   :       3.264 micros/op; (1000000 of 1000000 found)
readseq      :       0.169 micros/op;  652.8 MB/s
readreverse  :       0.213 micros/op;  519.5 MB/s
fill100K     :    1055.367 micros/op;   90.4 MB/s (1000 ops)
crc32c       :       1.353 micros/op; 2887.3 MB/s (4K per op)
snappycomp   :       3.036 micros/op; 1286.7 MB/s (output: 55.1%)
snappyuncomp :       0.540 micros/op; 7238.6 MB/s
PiperOrigin-RevId: 246856811
2019-05-06 11:23:02 -07:00
Chris Mumford
142035edd4 Initialize Stats::start_ before first use in Stats::Start().
Avoids a use before initialization error. This fixes issue #676.

PiperOrigin-RevId: 246855204
2019-05-06 10:52:16 -07:00
Victor Costan
e22b1cec6e Merge pull request #365 from allangj:c-strict-prototypes
PiperOrigin-RevId: 246848402
2019-05-06 10:29:40 -07:00
allangj
cd1ec032cd Add argument definition for void c functions.
Allow the use c.h on projects with -Wstrict-prototypes
Modify CMakelist to include -Wstrict-prototypes
2019-05-05 18:13:39 -06:00
Victor Costan
4bd052d7e8 Consolidate benchmark code to benchmarks/.
Currently, the benchmark used to assess leveldb changes lives in db/. The codebase also contains two benchmarks against other database engines in doc/bench/. Moving all the benchmarks in one place opens up the way for extracting common code.

PiperOrigin-RevId: 246737541
2019-05-05 12:59:23 -07:00
Victor Costan
506b1722ef Convert missed virtual -> override in db_test.cc.
PiperOrigin-RevId: 246680419
2019-05-04 18:05:53 -07:00
Victor Costan
24424a1ef2 Style cleanup.
1) Convert iterator-based for loops to C++11 foreach loops.
2) Convert "void operator=" to "T& operator=".
3) Switch from copy operators from private to public deleted.
4) Switch from empty ctors / dtors to "= default" where appropriate.

PiperOrigin-RevId: 246679195
2019-05-04 17:42:20 -07:00
Victor Costan
9a56c49ed4 Merge pull request #679 from smartxworks:optimize-readseq
PiperOrigin-RevId: 246668103
2019-05-04 16:53:17 -07:00
Victor Costan
abf441b657 Merge pull request #278 from wankai:master
PiperOrigin-RevId: 246591372
2019-05-04 02:12:27 -07:00
Chris Mumford
78b39d68c1 Bump the version number from 1.21 to 1.22.
PiperOrigin-RevId: 246558281
2019-05-03 13:24:26 -07:00
Chris Mumford
9bd23c7676 Correct class/structure declaration order.
1. Correct the class/struct declaration order to be IAW
   the Google C++ style guide[1].
2. For non-copyable classes, switched from non-implemented
   private methods to explicitly deleted[2] methods.
3. Minor const and member initialization fixes.

[1] https://google.github.io/styleguide/cppguide.html#Declaration_Order
[2] http://eel.is/c++draft/dcl.fct.def.delete

PiperOrigin-RevId: 246521844
2019-05-03 09:48:57 -07:00
Chris Mumford
c784d63b93 Moved port/README to port/README.md.
Easier to read on sites supporting Markdown (i.e. GitHub).

PiperOrigin-RevId: 246385089
2019-05-02 19:05:04 -07:00
Chris Mumford
297e66afc1 Format all files IAW the Google C++ Style Guide.
Use clang-format to correct formatting to be in agreement with the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). Doing this simplifies the process of accepting changes. Also fixed a few warnings flagged by clang-tidy.

PiperOrigin-RevId: 246350737
2019-05-02 19:04:50 -07:00
Victor Costan
3724030179 Update Travis CI configuration.
The Travis configuration:
1) Installs recent versions of clang and GCC.
2) Sets up the environment so that CMake picks up the installed
   compilers. Previously, the pre-installed clang compiler was used
   instead.
3) Requests a modern macOS image that has all the headers needed by GCC.

The CL also removes now-unnecessary old workarounds from the
Travis configuration.

PiperOrigin-RevId: 245831188
2019-04-29 15:38:15 -07:00
Kyle Zhang
d3d1c8a0f4 don't check current key in DBIter::Next()
When iter_ is pointing to current key, we can safely move to the next
key to avoid checking current key, which is of course not necessary.

Benchmark shows that 'readseq' has about 8% performance improvement.

Without patch:

>./db_bench --benchmarks=readseq --num=$((4<<20)) --db=/tmp/db --use_existing_db=1
LevelDB:    version 1.21
Date:       Thu Apr 25 09:37:21 2019
CPU:        32 * Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz
CPUCache:   20480 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    4194304
RawSize:    464.0 MB (estimated)
FileSize:   264.0 MB (estimated)
------------------------------------------------
readseq      :       0.196 micros/op;  565.7 MB/s

With patch:

>./db_bench --benchmarks=readseq --num=$((4<<20)) --db=/tmp/db --use_existing_db=1
LevelDB:    version 1.21
Date:       Thu Apr 25 09:38:20 2019
CPU:        32 * Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz
CPUCache:   20480 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    4194304
RawSize:    464.0 MB (estimated)
FileSize:   264.0 MB (estimated)
------------------------------------------------
readseq      :       0.181 micros/op;  612.3 MB/s

Signed-off-by: Kyle Zhang <kyle@smartx.com>
2019-04-25 09:54:05 +08:00
leveldb Team
3dc9202f78 [leveldb] Specifically export the WriteBatch::Handler inner class for Windows link
Windows linking visibility in shared libraries requires that inner classes are
specifically exported as visible, even if the containing class is exported.

PiperOrigin-RevId: 244886019
2019-04-23 11:24:04 -07:00
Chris Mumford
2ccb45c33a Check for possibly invalid offset in test.
Fix a possible array bounds offset issue flagged in
issue #668. Not the source of any known bug, but will
silence any static analyzers.

PiperOrigin-RevId: 243697659
2019-04-23 11:23:51 -07:00
Chris Mumford
7b11745190 Changed Windows specific highlighting from bash to cmd.
This makes the syntax highlighting a little nicer on GitHub.

PiperOrigin-RevId: 243426806
2019-04-13 09:20:48 -07:00
Chris Mumford
2f008ac19e Initialize class members to default values in constructors.
There were a few members which were identified to have been left
uninitialized in some constructors. These were very likely to
have been set before being used, otherwise the ASan tests would
have caught them, but still good practice to have them
initialized. This addresses some items reported in issue #668.

PiperOrigin-RevId: 243370145
2019-04-12 18:50:15 -07:00
Chris Mumford
ffabb1ae86 Merge pull request #665 from cheng-chang:coding
PiperOrigin-RevId: 243316002
2019-04-12 13:22:46 -07:00
Chris Mumford
7da571cf2b Merge pull request #669 from pavel-pimenov:fix-readme-windows-mkdir
PiperOrigin-RevId: 243314673
2019-04-12 13:22:36 -07:00
Chris Mumford
df4a323aaf Merge pull request #472 from zhoudayang:patch-1
PiperOrigin-RevId: 243314507
2019-04-12 13:22:24 -07:00
Chris Mumford
5a2a472741 Fixed missing std namespaces and make_unique.
cout/endl were missing the std namespace. Also std::make_unique
was used inadvertently which is part of C++14 and only C++11
is currently supported.

PiperOrigin-RevId: 243221310
2019-04-12 01:11:25 -07:00
Chris Mumford
08e771901f Simplify issue320_test.
Use std::unique_ptr to simplify issue320_test.

PiperOrigin-RevId: 243190799
2019-04-12 00:17:13 -07:00
Chris Mumford
65e86f75ea Fix formatting of recent snapshot compaction fix.
Fix variable names, line lengths, namespace use, and a few other
minor issues to conform to Google C++ style guide.

PiperOrigin-RevId: 243187729
2019-04-12 00:17:03 -07:00
Chris Mumford
7711e76766 Merge pull request #339 from richcole-at-amazon:master
PiperOrigin-RevId: 243156105
2019-04-12 00:16:52 -07:00
Chris Mumford
71ed7c401e Fixed typo in comment in version_set.h.
Flagged by presubmit check.

PiperOrigin-RevId: 243118632
2019-04-12 00:16:41 -07:00
Victor Costan
09fa8868db Align version/soversion CMake setup closer with other repositories.
PiperOrigin-RevId: 241432456
2019-04-01 17:37:45 -07:00
Richard Cole
20fb601aa9 Fix snapshot compaction bug
Closes google/leveldb#320

During compaction it was possible that records from a block b1=(l1,u1)
would be pushed down from level i to level i+1. If there is a block
b2=(l2,u2) at level i with k1 = user_key(u1) = user_key(l2) then
a subsequent search for k1 will yield the record l2 which has a smaller
sequence number than u1 because the sort order for records sorts
increasing by user key but decreaing by sequence number.

This change add a call to a new function AddBoundaryInputs to
SetupOtherInputs. AddBoundaryInputs searches for a block b2 matching the
criteria above and adds it to the set of files to be compacted. Whenever
AddBoundaryInputs is called it is important that the compaction fileset
in level i+1 (known as c->inputs_[1] in the code) be recomputed. Each
call to AddBoundaryInputs is followed by a call to GetOverlappingInputs.

SetupOtherInputs is called on both manual and automated compaction
passes. It is called for both level zero and for levels greater than 0.

The original change posted in https://github.com/google/leveldb/pull/339
has been modified to also include changed made by Chris Mumford<cmumford@google.com>
in 4b72cb14f8

  1. Releasing snapshots during test cleanup to avoid
     memory leak warnings.
  2. Refactored test to use testutil.h to be in line
     with other issue tests and to create the test
     database in the correct temporary location.
  3. Added copyright banner.

  Otherwise, just minor formatting and limiting character
  width to 80 characters.

Additionally the change was rebased on top of current master and
changes previously made to the Makefile were ported to the
CMakeLists.txt.

Testing Done:

  A test program (issue320_test) was constructed that performs mutations
  while snapshots are active. issue320_test fails without this bug fix
  after 64k writes. It passes with this bug fix. It was run with 200M
  writes and passed.

  Unit tests were written for the new function that was added to the
  code. Make test was run and seen to pass.

Signed-off-by: Richard Cole <richcole@amazon.com>
2019-04-01 13:10:09 -07:00
leveldb Team
37300aa54b Restore soname versioning with CMake build
Before:

$ readelf -d build/libleveldb.so | grep soname
 0x000000000000000e (SONAME)             Library soname: [libleveldb.so]

After:
$ readelf -d build/libleveldb.so | grep soname
 0x000000000000000e (SONAME)             Library soname: [libleveldb.so.1]

This matches the soname from v1.20.

PiperOrigin-RevId: 241334113
2019-04-01 09:04:40 -07:00
Pavel Pimenov
952be04df6 Fix mkdir (windows) 2019-03-31 10:46:31 +03:00
Chris Mumford
56178ddaf4 Update the version to 1.21 in preparation for a new release.
PiperOrigin-RevId: 241053616
2019-03-29 14:37:39 -07:00
leveldb Team
35619d248d Project import generated by Copybara.
PiperOrigin-RevId: 241045448
2019-03-29 13:59:02 -07:00
costan
416344de2f leveldb: Register in copybara whitelist.
The documentation recommends modifying the whitelist after evaluating Copybara. However, evaluating requires significant workarounds without the whitelist entry. So, this CL adds leveldb to the whitelist early.

leveldb is currently open sourced to https://github.com/google/leveldb using MOE.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=240786286
2019-03-29 11:22:22 -07:00
costan
da94ac67e9 leveldb: Minor cleanup in ports.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=240619768
2019-03-29 11:22:22 -07:00
costan
bd24b96306 leveldb: Silence unused argument warnings in MSVC.
This CL uses a well-known workaround for silencing arguments that may be unused, depending on the build configuration. The silenced warnings were responsible for a large amount of noise in the MSVC build on Windows.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=240357359
2019-03-29 11:22:22 -07:00
costan
6188a54ce9 leveldb: Add tests for empty keys and values.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=239695281
2019-03-29 11:22:22 -07:00
Cheng Chang
cf1b5f4732 Remove unnecessary bit operation. 2019-03-22 17:32:20 +08:00
Felipe Oliveira Carvalho
7035af5fc3 Two small fixes for the Windows implementation (#661)
* Check if NOMIMMAX is defined before defining it

* Pass char* for a %s format in a snprintf call
2019-03-21 08:45:04 -07:00
usurai
6571279d6d fix a typo in the comment of skiplist_test.cc (#664) 2019-03-21 07:58:29 -07:00
costan
15e2278966 Use override consistently in leveldb::test::ErrorEnv.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=239453565
2019-03-20 13:57:32 -07:00
cmumford
ea49b27d06 Switch corruption_test to use InMemEnv.
This change switches corruption_test, which previously used direct file
I/O to corrupt table files for open databases, to use InMemEnv. Using an
Env eliminates some platform dependencies thus simplifying the tests.

Also removed EnvWindowsTestHelper::RelaxFilePermissions().  This was
only added because the Windows Env opens files for exclusive access.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=239305329
2019-03-20 13:57:03 -07:00
cmumford
ce399ac28a Always copy bytes to scratch buffer when reading w/MemEnv.
FileState::Read (used by InMemoryEnv) creates a new Slice when reading.
If all the bytes for the read are in the first block then the Slice
points to the private block data in FileState and is not copied to the
|scratch| buffer.

A recent change allows files in InMemEnv to be overwritten which deletes
these blocks and in this case can result in a Slice having a dangling
pointer. This change fixes this bug by always copying to the |scratch|
buffer.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=239301930
2019-03-20 13:56:39 -07:00
costan
201f77d137 Inline defaults in options.
This CL moves default values for
leveldb::{Options,ReadOptions,WriteOptions} from constructors to member
declarations, and removes now-redundant comments stating the defaults.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=239271242
2019-03-20 13:56:22 -07:00
cmumford
9ce30510d4 Deleted dangling reference to deleted atomic_pointer.h.
Forgot one reference to atomic_pointer.h in CMakeLists.txt
from prior CL.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=237870915
2019-03-11 13:41:25 -07:00
costan
7d8e41e49b leveldb: Replace AtomicPointer with std::atomic.
This CL removes AtomicPointer from leveldb's port interface. Its usage is replaced with std::atomic<> from the C++11 standard library.

AtomicPointer was used to wrap flags, numbers, and pointers, so its instances are replaced with std::atomic<bool>, std::atomic<int>, std::atomic<size_t> and std::atomic<Node*>.

This CL does not revise the memory ordering. AtomicPointer's methods are replaced mechanically with their std::atomic equivalents, even when the underlying usage is incorrect. (Example: DBImpl::has_imm_ is written using release stores, even though it is always read using relaxed ordering.) Revising the memory ordering is left for future CLs.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=237865146
2019-03-11 13:41:25 -07:00
cmumford
dd906262fd Make InMemoryEnv more consistent with filesystem based Env's.
Env's (like the POSIX Env) which use an actual filesystem behave
differently than InMemoryEnv with regards to writing data to a currently
open file.

InMemoryEnv::NewWritableFile would previously delete that file,
if it was open, before creating a new file so any previously
open file would be unlinked. This change truncates an open file
so that subsequent reads will read that new data.

This should have no impact on leveldb as it never has the same
file open for both read and write access. This change is only
being made for tests (specifically a future change to corruption_test)
to allow them to be decoupled from the underlying platform and
allow them to use an Env.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=237858231
2019-03-11 13:41:25 -07:00
costan
cf1d1ab255 leveldb: Remove unused file port/win/stdint.h.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=237832823
2019-03-11 13:41:25 -07:00
Dimitris Apostolou
a20508dc6a Fix typo (#565) 2019-03-11 10:36:11 -07:00
costan
04470825ac Add AppVeyor (Windows CI) badge to README.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=237295321
2019-03-07 15:59:36 -08:00
costan
ed76289b25 Align windows_logger with posix_logger.
Fixes GitHub issue #657.

This CL also makes the Windows CI green.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=237255887
2019-03-07 10:04:01 -08:00
costan
808e59ec6a Improve CI configuration.
This CL fixes the following issues:
* The Travis CI had the ctest invocation followed by a ";", so non-zero
  exit codes (indicating test failures) did not cause the build to fail.
* The AppVeyor CI had the ctest invocation followed by a ";", causing an
  error on Windows, where "&" plays the role of ";" [1].

The Windows CI (AppVeyor) will still be red after this CL, as some of
the tests are failing. However, this CL is a step forward, as it gets us
from failing to start tests to running tests and recording success/error
states.

[1] https://docs.microsoft.com/en-us/previous-versions/windows/it-pro/windows-xp/bb490954(v=technet.10)#using-multiple-commands-and-conditional-processing-symbols

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=236765633
2019-03-04 18:30:09 -08:00
cmumford
c69d33b0ec Added native support for Windows.
This change adds a native Windows port (port_windows.h) and a
Windows Env (WindowsEnv).

Note1: "small" is defined when including <Windows.h> so some
parameters were renamed to avoid conflict.

Note2: leveldb::Env defines the method: "DeleteFile" which is
also a constant defined when including <Windows.h>. The solution
was to ensure this macro is defined in env.h which forces
the function, when compiled, to be either DeleteFileA or
DeleteFileW when building for MBCS or UNICODE respectively.

This resolves #519 on GitHub.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=236364778
2019-03-01 18:00:35 -08:00
Adam Azarchs
75fceae700 Add O_CLOEXEC to open calls.
This prevents file descriptors from leaking to child processes.

When compiled for older (pre-2.6.23) kernels which lack support for
O_CLOEXEC there is no change in behavior.  With newer kernels, child
processes will no longer inherit leveldb's file handles, which
reduces the changes of accidentally corrupting the database.

Fixes https://github.com/google/leveldb/issues/623
2019-02-22 13:00:56 -08:00
caodhuan
77e9dfad9f add:compact_pointers_ should be clear when Clear() called 2019-01-22 10:10:40 +08:00
costan
fe4494804f leveldb: Make WriteBatch::ApproximateSize() const.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=229395810
2019-01-15 18:43:13 +00:00
costan
296de8d5b8 leveldb: Fix PosixWritableFile::Sync() on Apple systems.
Apple doesn't follow POSIX specifications for fsync(). Instead, fsync() guarantees to flush the buffer cache to the device, which means the data will survive kernel panics, but may not survive power outages. Applications that need stronger guarantees (like databases) need to use fcntl(F_FULLFSYNC).

This CL switches PosixWritableFile::Sync() to get the stronger guarantees on Apple systems. The improved implementation follows the same principles as SQLite [1] and node.js [2].

Research for the fcntl() to fsync() fallback strategy:

Apple's released source code at https://opensource.apple.com/ shows at least three different error codes being returned when a filesystem does not support F_FULLFSYNC.

fcntl() is implemented in xnu-4903.221.2 in bsd/kern/kern_descrip.c, where it delegates to fcntl_nocancel(). The documentation for fcntl_nocancel() mentions error codes for some operations, but does not include F_FULLFSYNC. The F_FULLSYNC branch in fcntl_nocancel() calls VNOP_IOCTL(_, F_FULLSYNC, NULL, 0, _), whose return value sets the error
code.

VNOP_IOCTL() is implemented in bsd/vfs/kpi_vfs.c and calls the ioctl function in the vnode's operation vector. The per-filesystem function names follow the pattern _vnop_ioctl() for all the instances in opensource code: {hfs,msdosfs,nfs,ntfs,smbfs,webdav,zfs}_vnop_ioctl().

hfs-407.30.1, msdosfs-229.200.3, and nfs in xnu-4903.221.2 handle F_FULLFSYNC. ntfs-94.200.1 and smb-759.40.1 do not handle F_FULLFSYNC, and the default branch returns ENOSUP. webdav-380.200.1 also does not handle F_FULLFSYNC, but the default branch returns EINVAL. zfs-59 also does not handle F_FULLSYNC, and its default branch returns ENOTTY.

From a different angle, Apple's ntfs-94.200.1 includes utility code that uses fcntl(F_FULLFSYNC) and falls back to fsync() just like we do, supporting the hypothesis that there is no good way to detect lack of F_FULLFSYNC support. Also, Apple's fcntl() man page [3] does not mention a way to detect lack of F_FULLFSYNC support.

[1] https://www.sqlite.org/src/doc/trunk/src/os_unix.c
[2] https://github.com/libuv/libuv/blob/master/src/unix/fs.c
[3] https://developer.apple.com/library/archive/documentatiVon/System/Conceptual/ManPages_iPhoneOS/man2/fcntl.2.html
Tested:
    https://travis-ci.org/pwnall/leveldb/builds/477318498
    TAP global presubmit

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=228593729
2019-01-09 14:58:22 -08:00
costan
b70493ca85 Fix fdatasync() feature detection in opensource build.
The CMake feature-detection code used check_symbol_exists(), which
invokes the C compiler. However, some glibc versions don't expose the
fdatasync() declaration when compiled with -std=c11, but do expose it
when compiled with -std=c++11. This most likely comes down to how
_POSIX_SOURCE is defined -- it needs to be >= 201112L for <unistd.h> to
expose fdatasync().

This CL switches to check_cxx_symbol_exists(), which uses the C++
compiler. Asides from fixing the problem above, this is the right thing
to do, because we use <unistd.h> in env_posix.cc, which is compiled with
the C++ compiler.

This CL also fixes a previously introduced inconsistency, where the
macro indicating the fdatasync() feature detection result was referred
to as HAVE_FDATASYNC and HAVE_FUNC_FDATASYNC. The former appears to be
used in other libraries, so this CL switches all our references to
HAVE_FDATASYNC.

Fixes https://github.com/google/leveldb/issues/629

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=228392612
2019-01-08 13:58:52 -08:00
cmumford
af7abf06ea Add back space to POSIX Logger.
The space in between the header and log message was mistakenly omitted
in a prior commit. Re-adding.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=228202737
2019-01-07 22:03:34 -08:00
costan
58d70545af Update Travis CI configuration.
The Travis CI configuration updates reflect the following changes:
* Container-based builds (sudo: false) have been removed.
  https://changelog.travis-ci.com/the-container-based-build-environment-is-fully-deprecated-84517
* Ubuntu Xenial (16.04) is available as a base image.
  https://blog.travis-ci.com/2018-11-08-xenial-release
* Homebrew now has a dedicated DSL.
  https://docs.travis-ci.com/user/installing-dependencies/#installing-packages-on-os-x

To take full advantage of VM resources, CI builds now use Ninja
https://ninja-build.org/ instead of Make.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=227611641
2019-01-02 20:40:08 -08:00
costan
1cb3840881 Clean up env_posix.cc.
General cleanup principles:
* Use override when applicable.
* Remove static when redundant (methods and  globals in anonymous
  namespaces).
* Use const on class members where possible.
* Standardize on "status" for Status local variables.
* Renames where clarity can be improved.
* Qualify standard library names with std:: when possible, to
  distinguish from POSIX names.
* Qualify POSIX names with the global namespace (::) when possible, to
  distinguish from standard library names.

This also refactors the background thread synchronization logic so that
it's statically analyzable.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=219212089
2018-10-29 16:40:15 -07:00
costan
a7dc502e9f Rework once initialization in env_posix.cc.
C++11 guarantees thread-safe initialization of static variables inside
functions. This is a more restricted form of std::call_once or
pthread_once_t (e.g., single call site), so the compiler might be able
to generate better code [1]. Equally important, having less
platform-dependent code in env_posix.cc makes it easier to port to other
platforms.

Due to the change above, this CL introduced a new approach for storing
the singleton PosixEnv instance returned by Env::Default(). The new
approach avoids a dynamic memory allocation, which eliminates the false
positive from LeakSanitizer reported in
https://github.com/google/leveldb/issues/539 and
https://github.com/google/leveldb/issues/113

[1] https://stackoverflow.com/a/27206650/

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=214293129
2018-09-24 13:37:31 -07:00
costan
c43565dd39 C++11 cleanup for util/mutexlock.h.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=213398583
2018-09-24 13:37:01 -07:00
costan
0145a94ab6 Update .gitignore.
The version in the repository covers the Makefile build. The new version
is simpler and contains entries relevant to the CMake build.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=212661504
2018-09-24 13:36:30 -07:00
costan
73d5834ece Rework threading in env_posix.cc.
This commit replaces the use of pthreads in the POSIX port with std::thread
and port::Mutex + port::CondVar. This is intended to simplify porting
the env to a different platform.

The indirect use of pthreads in PosixLogger is replaced with
std:🧵:id(), based on an approach prototyped by @cmumfordx@.

The pthreads dependency in CMakeFiles is not removed, because some C++
standard library implementations must be linked against pthreads for
std::thread use. Figuring out this dependency is left for future work.

Switching away from pthreads also fixes
https://github.com/google/leveldb/issues/381

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=212478311
2018-09-11 11:02:14 -07:00
costan
05709fb43e Remove InitOnce from the port API.
This is not an API-breaking change, because it reduces the API that the
leveldb embedder must implement. The project will build just fine
against ports that still implement InitOnce.

C++11 guarantees thread-safe initialization of static variables inside
functions. This is a more restricted form of std::call_once or
pthread_once_t (e.g., single call site), so the compiler might be able
to generate better code [1]. Equally important, having less code in
port_example.h makes it easier to port to other platforms.

Due to the change above, this CL introduces a new approach for storing
the singleton BytewiseComparatorImpl instance returned by
BytewiseComparator(). The new approach avoids a dynamic memory
allocation, which eliminates the false positive from LeakSanitizer
reported in https://github.com/google/leveldb/issues/200

[1] https://stackoverflow.com/a/27206650/

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=212348004
2018-09-10 19:04:59 -07:00
costan
bb88f25115 Clean up PosixWritableFile in env_posix.cc.
This is separated from the general cleanup because of the logic changes
in SyncDirIfManifest().

General cleanup principles:
* Use override when applicable.
* Remove static when redundant (methods and  globals in anonymous
  namespaces).
* Use const on class members where possible.
* Standardize on "status" for Status local variables.
* Renames where clarity can be improved.
* Qualify standard library names with std:: when possible, to
  distinguish from POSIX names.
* Qualify POSIX names with the global namespace (::) when possible, to
  distinguish from standard library names.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=211709673
2018-09-08 02:17:01 -07:00
costan
7b945f2003 Clean up posix_logger.h.
General cleanup principles:
* Use override when applicable.
* Use const on class members where possible.
* Renames where clarity can be improved.
* Qualify standard library names with std:: when possible, to
  distinguish from POSIX names.
* Qualify POSIX names with the global namespace (::) when possible, to
  distinguish from standard library names.

This also revamps the logic for putting together a message into the
in-memory buffer before that is passed to fwrite(). While correct in
practice, the current implementation advances a char pointer past the
size of its buffer, which is technically undefined behavior.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=211472570
2018-09-04 10:38:12 -07:00
costan
89af27bde5 Remove ssize_t from code that is not POSIX-specific.
ssize_t is not standard C++. It is a POSIX extension. Therefore, it does
not belong in generic code.

This change tweaks the logic in DBIter to remove the need for signed
integers, so ssize_t can be replaced with size_t. The impacted method
and private member are renamed to better express their purpose.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=211471606
2018-09-04 10:37:22 -07:00
costan
03064cbbb2 Simplify Limiter in env_posix.cc.
Now that we require C++11, we can use std::atomic<int>, which has
primitives for most of the logic we need. As a bonus, the happy path for
Limiter::Acquire() and Limiter::Release() only performs one atomic
operation.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=211469518
2018-09-04 10:36:40 -07:00
costan
9b44da73d9 Clarify comments for leveldb::Env file reading methods.
"Create a brand new [adjective] file" seems like the description for a
method that will create a new file, but is used for methods that open
existing files for read access.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=211468002
2018-09-04 10:36:18 -07:00
costan
0ef2310f67 Remove GCC on OSX from the Travis CI matrix.
Equivalent of
db082d2cd6

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=211467181
2018-09-04 10:35:55 -07:00
costan
16a2b8bb3a Expose WriteBatch::Append in the C API.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=209345072
2018-08-19 19:54:34 -07:00
costan
f7b0e1d901 Expose WriteBatch::Append().
WriteBatchInternal has a method for efficiently concatenating two
WriteBatches. This commit exposes the method to the public API.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=208724311
2018-08-14 15:30:29 -07:00
andy
f314b63e5e lack of sequence and type in comments to introduce entry format 2018-06-28 22:05:56 +08:00
costan
6caf73ad9d Clean up Iterator.
This CL renames the private struct Iterator::Cleanup ->
Iterator::CleanupNode, to better reflect that it's a linked list node,
and extracts duplicated code from its user in IsEmpty() and Run()
methods.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=199175058
2018-06-04 17:24:44 -07:00
cmumford
6a6bdafcf1 Corrected typo in docs: "cache" to "block_cache".
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=197452015
2018-06-04 16:14:34 -07:00
costan
1868398150 Clean up SnapshotImpl.
* Omit SnapshotImpl::list_ when assert() isn't on
* Make SnapshotImpl::number_ const and set it in the constructor
* Make SnapshotImpl::number_ private and access it via a getter
* Rename SnapshotImpl::number_ to SnapshotImpl::sequence_number_
* Rename SnapshotList::list_ to SnapshotList::head_
* Wrap casting from Snapshot* to SnapshotImpl* in ToSnapshotImpl()

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=194852828
2018-04-30 16:01:39 -07:00
cmumford
e7840de9f3 Fix documentation for log file growth.
This fixes #546 reported on GitHub.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=194549692
2018-04-30 15:50:26 -07:00
cmumford
bc23e00f95 Update default log file size in doc.
The default size was changed in #f779e7a5 but the documentation was
never updated.

This fixes #566 reported on GitHub.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=194547959
2018-04-30 15:49:58 -07:00
costan
4de9594f6f Add move constructor to Status.
This will result in smaller code generation when Status instances are
passed around.

Benchmarks don't indicate a significant change either way.
CPU:        48 * Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz
CPUCache:   30720 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)

Baseline:
fillseq      :       3.589 micros/op;   30.8 MB/s
fillsync     :    4165.299 micros/op;    0.0 MB/s (1000 ops)
fillrandom   :       5.864 micros/op;   18.9 MB/s
overwrite    :       7.830 micros/op;   14.1 MB/s
readrandom   :       5.534 micros/op; (1000000 of 1000000 found)
readrandom   :       4.292 micros/op; (1000000 of 1000000 found)
readseq      :       0.312 micros/op;  354.1 MB/s
readreverse  :       0.501 micros/op;  220.8 MB/s
compact      :  886211.000 micros/op;
readrandom   :       3.518 micros/op; (1000000 of 1000000 found)
readseq      :       0.251 micros/op;  441.2 MB/s
readreverse  :       0.456 micros/op;  242.4 MB/s
fill100K     :    1329.723 micros/op;   71.7 MB/s (1000 ops)
crc32c       :       1.976 micros/op; 1976.7 MB/s (4K per op)
snappycomp   :       4.705 micros/op;  830.2 MB/s (output: 55.1%)
snappyuncomp :       0.958 micros/op; 4079.1 MB/s
acquireload  :       0.727 micros/op; (each op is 1000 loads)

New:
fillseq      :       3.129 micros/op;   35.4 MB/s
fillsync     :    2748.099 micros/op;    0.0 MB/s (1000 ops)
fillrandom   :       5.394 micros/op;   20.5 MB/s
overwrite    :       7.253 micros/op;   15.3 MB/s
readrandom   :       5.655 micros/op; (1000000 of 1000000 found)
readrandom   :       4.425 micros/op; (1000000 of 1000000 found)
readseq      :       0.298 micros/op;  371.3 MB/s
readreverse  :       0.508 micros/op;  217.9 MB/s
compact      :  885842.000 micros/op;
readrandom   :       3.545 micros/op; (1000000 of 1000000 found)
readseq      :       0.252 micros/op;  438.2 MB/s
readreverse  :       0.425 micros/op;  260.2 MB/s
fill100K     :    1418.347 micros/op;   67.2 MB/s (1000 ops)
crc32c       :       1.987 micros/op; 1966.0 MB/s (4K per op)
snappycomp   :       4.767 micros/op;  819.4 MB/s (output: 55.1%)
snappyuncomp :       0.916 micros/op; 4264.9 MB/s
acquireload  :       0.665 micros/op; (each op is 1000 loads)

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=194002392
2018-04-23 16:22:30 -07:00
costan
d177a0263c Replace port_posix with port_stdcxx.
The porting layer implements threading primitives: atomic pointers,
condition variables, mutexes, thread-safe initialization. These are all
specified in C++11, so the reference open source port implementation can
become platform-independent.

The porting layer will remain in place to allow the use of other
implementations with more features, such as the built-in deadlock
detection in abseil's Mutex.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=193245934
2018-04-17 13:26:47 -07:00
MarcoFalke
14cce848e7 Fix sign mismatch warnings in GCC.
This was contributed in https://github.com/google/leveldb/pull/492

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=193080913
2018-04-16 18:13:09 -07:00
costan
8046a51b21 Add forgotten <limits> header to util/logging.cc.
Commit a0008deb679480fd30e845d7e52421af72160c2c introduced
std::numeric_limits usage in logging.cc, but didn't #include <limits>

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=192840190
2018-04-13 16:21:07 -07:00
costan
a0008deb67 Reimplement ConsumeDecimalNumber.
The old implementation caused odd crashes on ARM, which were fixed by
changing a local variable type. The main suspect is the use of a static
local variable. This CL replaces the static local variable with
constexpr, which still ensures the compiler sees the expressions as
constants.

The CL also replaces Slice operations in the functions' inner loop with
iterator-style pointer operations, which can help the compiler generate
less code.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=192832175
2018-04-13 15:37:20 -07:00
costan
1f7dd5d5f6 Add tests for ConsumeDecimalNumber.
ConsumeDecimalNumber has fairly non-trivial logic, and a previous
version has crashed inexplicably on Android. Having some test coverage
will make it easier to tweak / simplify the function later on.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=192821751
2018-04-13 15:36:55 -07:00
costan
1cc8b10b82 Document the building process.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=192490601
2018-04-11 12:57:42 -07:00
costan
09217fd067 Replace NULL with nullptr in C++ files.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=192365747
2018-04-10 16:26:43 -07:00
costan
6a3b915166 Remove PLATFORM_IS_LITTLE_ENDIAN from port/posix.h.
This is an accidental leftover from the CMake migration. The macro has
been replaced with LEVELDB_IS_BIG_ENDIAN.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=192364918
2018-04-10 16:26:27 -07:00
costan
260655b4c2 Define LEVELDB_HAS_PORT_CONFIG_H for old compilers.
The CMake-based build relies on __has_include, which is standardized in
C++17. Unfortunately, __has_include is available without requiring
--std=c++17 on all the compilers on CI, so this problem was not caught.

Fixes https://github.com/google/leveldb/issues/572

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=192208842
2018-04-09 16:56:50 -07:00
costan
6fa4566670 Rename CMake project / targets from Leveldb to leveldb.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=190262375
2018-03-23 13:53:50 -07:00
costan
0db30413a4 leveldb: Add more thread safety annotations.
After this CL, all classes with Mutex members should be covered by annotations. Exceptions are atomic members, which shouldn't need locking, and DBImpl members that cause errors when annotated, which will be tackled separately.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=190260865
2018-03-23 12:56:14 -07:00
costan
04f39105c5 Take <atomic> for granted in port/atomic_pointer.h.
C++11 requires <atomic>. This lets us remove the header detection
(LEVELDB_ATOMIC_PRESENT) and simplify port/atomic_pointer.h.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=189919098
2018-03-21 09:40:40 -07:00
costan
74f032ff6f leveldb: Require C++11.
This CL switches the public headers to C++11 default and deleted constructors, and adds override to the relevant leveldb::EnvWrapper methods. This should be a good test for C++11 compiler support.

Once this CL settles, the rest of the codebase can be safely modernized to C++11.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=189873212
2018-03-21 01:17:59 -07:00
costan
8e75db8623 Remove build configuration for make.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=189824264
2018-03-20 16:50:13 -07:00
Hyzeta
df9a841a4f Add export.h to CMakeLists.txt
The header is missing from the file lists, resulting in incomplete installs.

This was originally contributed in https://github.com/google/leveldb/pull/568.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=189822907
2018-03-20 16:48:58 -07:00
costan
50fbc87e8c Replace SIZE_MAX with std::numeric_limits.
helpers/memenv/memenv.cc used SIZE_MAX without including <stdint.h>.
Since we're fixing this problem, replace SIZE_MAX with
std::numeric_limits<size_t>::max(), which is clearer.

Fixes https://github.com/google/leveldb/issues/562

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=189821707
2018-03-20 16:48:42 -07:00
costan
739c25100e Add CMake build support.
Fixes https://github.com/google/leveldb/issues/466

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=189425354
2018-03-16 19:17:27 -07:00
costan
0fa5a4f7b1 Extend thread safety annotations.
This CL makes it easier to reason about thread safety by:

1) Adding Clang thread safety annotations according to comments.
2) Expanding a couple of variable names, without adding extra lines of code.
3) Adding const in a couple of places.
4) Replacing an always-non-null const pointer with a reference.
5) Fixing style warnings in the modified files.

This CL does not annotate the DBImpl members that claim to be protected
by the instance mutex, but are accessed without the mutex being held.
Those members (and their unprotected accesses) will be addressed in
future CLs.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=189354657
2018-03-16 10:32:40 -07:00
costan
8143c12f3f Fix includes in util/testharness.h.
This CL removes unused headers included by util/testharness.h, adds
precise includes where the build breaks, and fixes style errors in the
edited files.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=189331061
2018-03-16 10:31:48 -07:00
costan
aece2068d7 Remove extern from function declarations.
External linkage is the default for function declarations in C++.

This also fixes ClangTidy errors generated by removing the "extern"
keyword as described above.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=188730416
2018-03-12 09:24:48 -07:00
costan
ddab751002 Add tests for {Old}InfoLogFileName().
This change was contributed by GitHub user @LopatkinEvgeniy in
https://github.com/google/leveldb/pull/559.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=188728845
2018-03-12 09:24:25 -07:00
costan
7fd7c00721 Remove unused function ExtractValueType.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=188728505
2018-03-12 09:24:07 -07:00
costan
594cc987af Bypass OSMemoryBarrier() warning on Mac.
This is a stopgap for removing warnings on Mac builds, so -Werror can be
turned on. C++11 will be required in the nearby future, which guarantees
<atomic> support. Once that happens, the simplified version of this will
match https://github.com/google/leveldb/pull/503

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=188553251
2018-03-09 16:37:44 -08:00
costan
49f35d3fc9 leveldb: Update Travis CI configuration for open source build.
This aligns the Travis CI configuration with google/crc32c and
google/snappy, to simplify maintenance.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=188547648
2018-03-09 15:49:36 -08:00
costan
623d014a54 Expose Env::GetTempDirectory() for use in C test.
This removes the use of the non-portable headers <sys/types.h> and <unistd.h> in c_test.c.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=188503102
2018-03-09 10:38:04 -08:00
costan
8c8024ea33 Switch HAVE_ library detection macros to 0/1.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=188488298
2018-03-09 09:34:42 -08:00
costan
41172a2401 Enable thread safety annotations in open source version.
The thread safety annotations used by leveldb got opensourced in Abseil
[1]. This CL replaces leveldb's stubs with the relevant definitions from
[1], and adds annotations to the Mutex classes in the POSIX port.

[1] https://github.com/abseil/abseil-cpp/blob/master/absl/base/thread_annotations.h
2018-02-13 22:40:41 -08:00
cmumford
47cb9e2a21 Add leveldb_options_set_max_file_size to the C API.
When the max file size option was added in CL 134391640 the C API
was not modified to support this.

This change was contributed by GitHub user @olt and fixes issue #439.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=173466388
2017-11-03 15:04:26 -07:00
cmumford
b5d4a22e64 Fixed style guide link in CONTRIBUTING.md
The C++ style guide URL was wrong.

This fixes issue #394. Reported by GitHub user @Loki-Astari.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=173188573
2017-11-03 15:04:08 -07:00
cmumford
3da4d8b989 Deleted unused assignments in Reader.
Deleted two unused assignments:

1. offset_in_block in Reader::SkipToInitialBlock().
2. in_fragmented_record in Reader::ReadRecord().

Reasons for the change:
1. offset_in_block is not read again after the if condition.
2. The kFullRecordType switch branch returns, so
   in_fragmented_record isn't read again.
3. The kFirstType switch branch sets in_fragmented_record to
   true after the if, so the write in the if is ignored.

Change contributed by @C0deAi on GitHub.

This fixes https://github.com/google/leveldb/issues/517

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=172763897
2017-11-03 15:03:44 -07:00
cmumford
0509414f85 leveldb::DestroyDB will now delete empty directories.
Env's that filtered out dot files ("." and "..") would return an
empty vector of children causing DestroyDB to do nothing. This fixes
https://github.com/google/leveldb/issues/215

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=172501335
2017-11-03 15:03:20 -07:00
costan
23162ca1c6 Fix typo (forgotten reference operator) in test.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=171708408
2017-10-10 11:47:13 -07:00
costan
5c39524f36 Replace SSE-optimized CRC32C in POSIX port with external library.
Maintaining a hardware-accelerated CRC32C implementation tailored for
all modern platforms deserves a repository of its own. We extracted the
implementation here into https://github.com/google/crc32c and improved
it in that repository. This CL removes the SSE-optimized implementation
from this codebase, and adds the ability to use the google/crc32c
library, if it is present on the system.

The benchmarks below show the performance impact of the change. In
summary, open source builds that use the google/crc32c library can
expect a 3x improvement in CRC32C throughput, whereas builds that do not
use the library will see a 50% drop in CRC32C throughput. This
translates in much smaller changes in overall leveldb performance.

Baseline, MacBookPro13,3 with Core i7 6920HQ:
LevelDB:    version 1.20
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
------------------------------------------------
fillseq      :       3.064 micros/op;   36.1 MB/s
fillsync     :      57.861 micros/op;    1.9 MB/s (1000 ops)
fillrandom   :       3.887 micros/op;   28.5 MB/s
overwrite    :       4.140 micros/op;   26.7 MB/s
readrandom   :       7.433 micros/op; (1000000 of 1000000 found)
readrandom   :       6.825 micros/op; (1000000 of 1000000 found)
readseq      :       0.244 micros/op;  453.4 MB/s
readreverse  :       0.387 micros/op;  285.8 MB/s
compact      :  449707.000 micros/op;
readrandom   :       4.196 micros/op; (1000000 of 1000000 found)
readseq      :       0.228 micros/op;  485.8 MB/s
readreverse  :       0.320 micros/op;  345.2 MB/s
fill100K     :     562.556 micros/op;  169.6 MB/s (1000 ops)
crc32c       :       0.768 micros/op; 5085.0 MB/s (4K per op)
snappycomp   :       4.220 micros/op;  925.7 MB/s (output: 55.1%)
snappyuncomp :       0.635 micros/op; 6155.7 MB/s
acquireload  :      13.054 micros/op; (each op is 1000 loads)

New with crc32c, MacBookPro13,3 with Core i7 6920HQ:
LevelDB:    version 1.20
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
------------------------------------------------
fillseq      :       2.820 micros/op;   39.2 MB/s
fillsync     :      51.988 micros/op;    2.1 MB/s (1000 ops)
fillrandom   :       3.747 micros/op;   29.5 MB/s
overwrite    :       4.047 micros/op;   27.3 MB/s
readrandom   :       7.287 micros/op; (1000000 of 1000000 found)
readrandom   :       6.927 micros/op; (1000000 of 1000000 found)
readseq      :       0.253 micros/op;  437.5 MB/s
readreverse  :       0.411 micros/op;  269.2 MB/s
compact      :  440405.000 micros/op;
readrandom   :       4.159 micros/op; (1000000 of 1000000 found)
readseq      :       0.230 micros/op;  481.1 MB/s
readreverse  :       0.320 micros/op;  345.9 MB/s
fill100K     :     558.222 micros/op;  170.9 MB/s (1000 ops)
crc32c       :       0.214 micros/op; 18263.5 MB/s (4K per op)
snappycomp   :       4.471 micros/op;  873.7 MB/s (output: 55.1%)
snappyuncomp :       0.833 micros/op; 4688.5 MB/s
acquireload  :      13.289 micros/op; (each op is 1000 loads)

New without crc32c, MacBookPro13,3 with Core i7 6920HQ
LevelDB:    version 1.20
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
------------------------------------------------
fillseq      :       3.094 micros/op;   35.8 MB/s
fillsync     :      52.160 micros/op;    2.1 MB/s (1000 ops)
fillrandom   :       4.090 micros/op;   27.0 MB/s
overwrite    :       4.006 micros/op;   27.6 MB/s
readrandom   :       6.584 micros/op; (1000000 of 1000000 found)
readrandom   :       6.676 micros/op; (1000000 of 1000000 found)
readseq      :       0.280 micros/op;  395.2 MB/s
readreverse  :       0.391 micros/op;  283.2 MB/s
compact      :  433911.000 micros/op;
readrandom   :       4.261 micros/op; (1000000 of 1000000 found)
readseq      :       0.251 micros/op;  440.5 MB/s
readreverse  :       0.356 micros/op;  310.9 MB/s
fill100K     :     584.023 micros/op;  163.3 MB/s (1000 ops)
crc32c       :       1.384 micros/op; 2822.3 MB/s (4K per op)
snappycomp   :       4.763 micros/op;  820.1 MB/s (output: 55.1%)
snappyuncomp :       0.766 micros/op; 5098.6 MB/s
acquireload  :      12.931 micros/op; (each op is 1000 loads)

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=171667771
2017-10-10 11:46:40 -07:00
costan
ca216e493f leveldb: Rename SNAPPY to HAVE_SNAPPY.
This follows the general naming convention for preprocessor macros used
to detect feature (library / header file / symbol) presence.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=171184641
2017-10-05 12:19:09 -07:00
costan
25767d066c leveldb: Remove *_unlocked feature detection from POSIX port.
CL 170738066 removed all instances of fread_unlocked, fwrite_unlocked
and fflush_unlocked calls from the codebase, so the feature detection
can be removed as well.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=171154269
2017-10-05 12:18:49 -07:00
cmumford
4a7e7f50dc Add LEVELDB_EXPORT macro to export public symbols.
gcc defaults to exporting all symbols, but other linkers do not. Adding
the LEVELDB_EXPORT macro allows a project to set LEVELDB_SHARED_LIBRARY
when building/linking with leveldb as a shared library.

This is to allow leveldb to be created as a shared library on all
platforms support by Chrome and enables a fix for
https://bugs.chromium.org/p/chromium/issues/detail?id=764810.

This also has the benefit of reducing the shared library size from
418863 to 380367 bytes (64-bit Linux).

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=171037148
2017-10-04 11:53:12 -07:00
costan
542590d2a8 leveldb: Include <algorithm> in util/env_test.cc.
CL 170738066 introduced std::min and std::max to env_test.cc. These
require the <algorithm> header.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=171024062
2017-10-04 10:38:33 -07:00
costan
8ae7998aab Fix FD leak in POSIX Env.
Deleting a PosixWritableFile without calling Close() leaks the file
descriptor. While the API description in include/leveldb/env.h does not
specify whether the caller is responsible for Close()ing the file before
deleting it, all other Env file implementations do release underlying
resources when destroyed, even if Close() is not called.

The leak shows up when running db_tests on Mac Travis, or on a vanilla
MacOS install.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=170906843
2017-10-03 13:45:04 -07:00
costan
d9a9e02edf leveldb: Add tests for CL 170769101.
This also removes std::unique_ptr introduced in CL 170738066, because
it's C++11-only, and the open source version still supports older
versions at the moment.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=170876919
2017-10-03 11:32:02 -07:00
costan
4447f9cace Remove handling for unused LRUHandle representation special case.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=170876103
2017-10-03 11:32:02 -07:00
sanjay
2372ac574f Fix file writing bug in CL 170738066.
If the file already existed, we should have truncated it. This was not
detected by leveldb tests since leveldb code avoids reusing same files,
but there was code elsewhere that was directly using leveldb files and
relying on this behavior.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=170769101
2017-10-03 11:32:02 -07:00
cmumford
1c75e88055 Fix use of uninitialized value in LRUHandle.
If leveldb::Options::block_cache is set to a cache of zero capacity
then it is possible for LRUHandle::next to be used without having been
set.

Conditional jump or move depends on uninitialised value(s):
  leveldb::(anonymous namespace)::LRUHandle::key() const (cache.cc:58)
  leveldb::(anonymous namespace)::LRUCache::Unref(leveldb::(anonymous namespace)::LRUHandle*) (cache.cc:234)
  leveldb::(anonymous namespace)::LRUCache::Release(leveldb::Cache::Handle*) (cache.cc:266)
  leveldb::(anonymous namespace)::ShardedLRUCache::Release(leveldb::Cache::Handle*) (cache.cc:375)
  leveldb::CacheTest::Insert(int, int, int) (cache_test.cc:59)

This bug forced a commit reversion in Chromium. For more information see
https://bugs.chromium.org/p/chromium/issues/detail?id=761398#c4

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=170749054
2017-10-03 11:30:48 -07:00
sanjay
7e12c00ecf Fix issue 474: a race between the f*_unlocked() STDIO calls in
env_posix.cc and concurrent application calls to fflush(NULL).

The fix is to avoid using stdio in env_posix.cc but add our own
buffering where we need it.

Added a test to reproduce the bug.

Added a test for Env reads/writes.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=170738066
2017-10-03 11:27:09 -07:00
costan
bcd9a8ea4a Use portable CRC32C from google/crc32c.
Benchmark results below. More results at
354d61ef97.

New, MacBookPro13,3 with Core i7 6920HQ:
LevelDB:    version 1.20
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
WARNING: Snappy compression is not enabled
------------------------------------------------
fillseq      :       2.952 micros/op;   37.5 MB/s
fillsync     :      43.932 micros/op;    2.5 MB/s (1000 ops)
fillrandom   :       3.856 micros/op;   28.7 MB/s
overwrite    :       4.053 micros/op;   27.3 MB/s
readrandom   :       4.234 micros/op; (1000000 of 1000000 found)
readrandom   :       3.923 micros/op; (1000000 of 1000000 found)
readseq      :       0.201 micros/op;  550.8 MB/s
readreverse  :       0.356 micros/op;  310.6 MB/s
compact      :  436800.000 micros/op;
readrandom   :       2.375 micros/op; (1000000 of 1000000 found)
readseq      :       0.151 micros/op;  734.3 MB/s
readreverse  :       0.298 micros/op;  370.7 MB/s
fill100K     :     554.075 micros/op;  172.1 MB/s (1000 ops)
crc32c       :       1.393 micros/op; 2805.0 MB/s (4K per op)
snappycomp   :    3902.000 micros/op; (snappy failure)
snappyuncomp :    3821.000 micros/op; (snappy failure)
acquireload  :      13.088 micros/op; (each op is 1000 loads)

Baseline, MacBookPro13,3 with Core i7 6920HQ:
LevelDB:    version 1.20
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
WARNING: Snappy compression is not enabled
------------------------------------------------
fillseq      :       3.000 micros/op;   36.9 MB/s
fillsync     :      46.721 micros/op;    2.4 MB/s (1000 ops)
fillrandom   :       3.922 micros/op;   28.2 MB/s
overwrite    :       4.080 micros/op;   27.1 MB/s
readrandom   :       4.409 micros/op; (1000000 of 1000000 found)
readrandom   :       3.895 micros/op; (1000000 of 1000000 found)
readseq      :       0.190 micros/op;  582.4 MB/s
readreverse  :       0.413 micros/op;  267.6 MB/s
compact      :  441076.000 micros/op;
readrandom   :       2.308 micros/op; (1000000 of 1000000 found)
readseq      :       0.170 micros/op;  651.2 MB/s
readreverse  :       0.302 micros/op;  366.2 MB/s
fill100K     :     614.289 micros/op;  155.3 MB/s (1000 ops)
crc32c       :       3.547 micros/op; 1101.2 MB/s (4K per op)
snappycomp   :    3393.000 micros/op; (snappy failure)
snappyuncomp :    3171.000 micros/op; (snappy failure)
acquireload  :      12.761 micros/op; (each op is 1000 loads)

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=170100372
2017-09-26 18:50:41 -07:00
lingbin
3ee04c5cea fix style and remove unused code 2017-09-04 16:10:38 +08:00
costan
ea0a7586b8 Remove confusing and unnecessary if.
12 lines above, there is an "if (!s.ok()) { return s; }" block of code.
"s" is never modified between that block and the "if" removed by this
CL, so "s.ok()" must be true.

The code most likely intended to say "if (!builder->ok())", because the
builder->Add() call above can modify the TableBuilder's status, as a
side-effect. However, this approach would have required setting "s =
builder.status()" in the "else" branch, near the "builder.Abandon()"
call. So, removing the "if" outright is simpler than following that line
of thought.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=167326229
2017-09-01 14:41:28 -07:00
costan
141e767135 Simplify Table::Open() flow and remove a delete call.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=167303843
2017-09-01 14:41:01 -07:00
cmumford
09a3c8e741 Switched variable type from int to uint64_t in ConsumeDecimalNumber.
An Android test was occasionally crashing with a SEGV in ConsumeDecimalNumber
Switching a local variable from an int to uint64_t eliminated these crashes.
Speculating this is either a compiler, runtime library, or emulator issue.

Switching this type to uint64_t also eliminates a compiler warning
about comparing an int with a uint64_t.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=166399695
2017-08-24 15:40:54 -07:00
costan
2964b803b8 leveldb: Fix alignment code in SSE4.2-optimized CRC32C.
When faced with a pointer that is misaligned by K bytes (pointer % 8 ==
K), the code previously moved forward by K bytes. In order to end up
with an aligned pointer, the code must move by 8 - K bytes.

This lands https://github.com/google/leveldb/pull/488

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=166295921
2017-08-24 15:00:52 -07:00
davidair
02f43c0fcd Remove dead code.
The dead code has been in the codebase since the initial commit and is
generating a compiler warning when used in Xcode.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=164174594
2017-08-24 15:00:52 -07:00
scrubbed
0b402e96a7 Use __APPLE__ instead of OS_MACOS. The former is compiler-provided.
Use __APPLE__ instead of OS_MACOS when testing for the Apple platform and
remove the latter symbol from the BUILD file. This fixes incompatibility issues
when using the library on an Apple device.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=162958094
2017-08-24 15:00:45 -07:00
costan
8415f00eee leveldb: Report missing CURRENT manifest file as database corruption.
BTRFS reorders rename and write operations, so it is possible that a filesystem crash and recovery results in a situation where the file pointed to by CURRENT does not exist. DB::Open currently reports an I/O error in this case. Reporting database corruption is a better hint to the caller, which can attempt to recover the database or erase it and start over.

This issue is not merely theoretical. It was reported as having showed up in the wild at https://github.com/google/leveldb/issues/195 and at https://crbug.com/738961. Also, asides from the BTRFS case described above, incorrect data in CURRENT seems like a possible corruption case that should be handled gracefully.

The Env API changes here can be considered backwards compatible, because an implementation that returns Status::IOError instead of Status::NotFound will still get the same functionality as before.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=161432630
2017-07-10 14:14:00 -07:00
costan
69e2bd224b LevelDB: Add WriteBatch::ApproximateSize().
This can be used to report metrics on LevelDB usage.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=156934930
2017-07-10 14:13:30 -07:00
周炀
471f0b84ec fix comment 2017-05-22 14:01:38 +08:00
果冻
5b817400a0 fix comment 2017-03-10 14:23:19 +08:00
costan
a53934a3ae Increase leveldb version to 1.20.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=148937577
2017-03-01 16:08:02 -08:00
costan
f3f139737c Separate Env tests from PosixEnv tests.
env_test.cc defines EnvPosixTest which tests the Env implementation returned by Env::Default(). The naming is a bit unfortunate, as the tests in env_test.cc are written against the Env contract, and therefore are applicable to any Env implementation. An instance of the confusion caused by the naming is [] which added a dependency from env_test.cc to EnvPosixTestHelper, which is closely coupled to EnvPosix.

This change disentangles EnvPosix-specific test code into a env_posix_test.cc file. The code there uses EnvPosixTestHelper and specifically targets the EnvPosix implementation. env_test.cc now implements EnvTest, and contains tests that are also applicable to other ports, which may define their own Env implementation.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=148914642
2017-03-01 13:53:23 -08:00
costan
eb4f0972fd leveldb: Fix compilation warnings in port_posix_sse.cc on x86 (32-bit).
LE_LOAD64 is only used when _mm_crc32_u64 is available, on 64-bit x86 processors.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=148906169
2017-03-01 11:37:43 -08:00
cmumford
d0883b6006 Fixed path to doc file: index.md.
Prior index.html was using rawgit.com which doesn't process
Markdown and therefore only serves the markdown source.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=148902180
2017-03-01 10:28:56 -08:00
cmumford
7fa20948d5 Convert documentation to markdown.
Markdown is more readable in a text editor and when hosted
on GitHub is more readable than HTML.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=148830423
2017-03-01 09:42:25 -08:00
costan
ea175e28f8 Implement support for Intel crc32 instruction (SSE 4.2)
This change authored by vadimskipin and submitted via:

    https://github.com/google/leveldb/pull/309

Changes made to support iOS builds and other architectures
without support for SSE 4.2.

db_bench reports original crc32 speed at:

    crc32c : 3.610 micros/op; 1082.0 MB/s (4K per op)

with this change performance has increased to:

    crc32c : 0.843 micros/op; 4633.6 MB/s (4K per op)

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=148694935
2017-02-28 14:08:46 -08:00
cmumford
95cd743e5e Including <limits> for std::numeric_limits.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=146841327
2017-02-09 14:09:51 -08:00
cmumford
646c3588de Limit the number of read-only files the POSIX Env will have open.
Background compaction can create an unbounded number of
leveldb::RandomAccessFile instances. On 64-bit systems mmap is used and
file descriptors are only used beyond a certain number of mmap's.
32-bit systems to not use mmap at all. leveldb::RandomAccessFile does not
observe Options.max_open_files so compaction could exhaust the file
descriptor limit.

This change uses getrlimit to determine the maximum number of open
files and limits RandomAccessFile to approximately 20% of that value.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=143505556
2017-01-04 09:13:20 -08:00
proller
7d060117fa broken db: fix assertion in leveldb::InternalKey::Encode, mark base as corrupt 2016-10-03 19:40:07 +03:00
corrado
a2fb086d07 Add option for max file size. The currend hard-coded value of 2M is inefficient in colossus.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=134391640
2016-09-28 10:52:24 -07:00
cmumford
3080a45b62 Increase leveldb version to 1.19.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=129930720
2016-08-11 07:33:30 -07:00
sanjay
fa6dc010a2 A zippy change broke test assumptions about the size of compressed output.
Fix the tests by allowing more slop in zippy's behavior.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=123432472
2016-07-06 09:16:11 -07:00
m3b
06a191b8de fix problems in LevelDB's caching code
Background:

LevelDB uses a cache (util/cache.h, util/cache.cc) of (key,value)
pairs for two purposes:
- a cache of (table, file handle) pairs
- a cache of blocks

The cache places the (key,value) pairs in a reference-counted
wrapper.  When it returns a value, it returns a reference to this
wrapper.  When the client has finished using the reference and
its enclosed (key,value), it calls Release() to decrement the
reference count.

Each (key,value) pair has an associated resource usage.  The
cache maintains the sum of the usages of the elements it holds,
and removes values as needed to keep the sum below a capacity
threshold.  It maintains an LRU list so that it will remove the
least-recently used elements first.

The max_open_files option to LevelDB sets the size of the cache
of (table, file handle) pairs.  The option is not used in any
other way.

The observed behaviour:

If LevelDB at any time used more file handles concurrently than
the cache size set via max_open_files, it attempted to reduce the
number by evicting entries from the table cache.  This could
happen most easily during compaction, and if max_open_files was
low.  Because the handles were in use, their reference count did
not drop to zero, and so the usage sum in the cache was not
modified by the evictions.  Subsequent Insert() calls returned
valid handles, but their entries were immediately evicted from
the cache, which though empty still acted as though full.  As a
result, there was effectively no caching, and the number of open
file handles rose []ly until it hit system-imposed limits and
the process died.

If one set max_open_files lower, the cache was more likely to
exhibit this beahviour, and cause the process to run out of file
descriptors.  That is, max_open_files acted in almost exactly the
opposite manner from what was intended.

The problems:

1. The cache kept all elements on its LRU list eligible for capacity
   eviction---even those with outstanding references from clients.  This was
   ineffective in reducing resource consumption because there was an
   outstanding reference, guaranteeing that the items remained.  A secondary
   issue was that there is no guarantee that these in-use items will be the
   last things reached in the LRU chain, which actually recorded
   "least-recently requested" rather than "least-recently used".

2. The sum of usages was decremented not when a (key,value) was evicted from
   the cache, but when its reference count went to zero.  Thus, when things
   were removed from the cache, either by garbage collection or via Erase(),
   the usage sum was not necessarily decreased.  This allowed the cache to act
   as though full when it was in fact not, reducing caching effectiveness, and
   leading to more resources being consumed---the opposite of what the
   evictions were intended to achieve.

3. (minor) The cache's clients insert items into it by first looking up the
   key, and inserting only if no value is found.  Although the cache has an
   internal lock, the clients use no locking to ensure atomicity of the
   Lookup/Insert pair.  (see table/table.cc:  block_cache->Insert() and
   db/table_cache.cc:  cache_->Insert()).  Thus, if two threads Insert() at
   about the same time, they can both Lookup(), find nothing, and both
   Insert().  The second Insert() would evict the first value, leaving each
   thread with a handle on its own version of the data, and with the second
   version in the cache.  It would be better if both threads ended up with a
   handle on the same (key,value) pair, which implies it must be the first item
   inserted.  This suggests that Insert() should not replace an existing value.

   This can be made safe with current usage inside LeveDB itself, but this is
   not easy to change first because Cache is a public interface, so to change
   the semantics of an existing call might break things, second because Cache
   is an abstract virtual class, so adding a new abstract virtual method may
   break other implementations, and third, the new method "insert without
   replacing" cannot be implemented in terms of the existing methods, so cannot
   be implemented with a non-abstract default.   But fortunately, the effects
   of this issue are minor, so this issue is not fixed by this change.

The changes:

The assumption in the fixes is that it is always better to cache
entries unless removal from the cache would lead to deallocation.

Cache entries now have an "in_cache" boolean indicating whether
the cache has a reference on the entry.  The only ways that this can
become false without the entry being passed to its "deleter" are via
Erase(), via Insert() when an element with a duplicate key is inserted,
or on destruction of the cache.

The cache now keeps two linked lists instead of one.  All items
in the cache are in one list or the other, and never both.  Items
still referenced by clients but erased from the cache are in
neither list.  The lists are:
- in-use:  contains the items currently referenced by clients, in no particular
  order.  (This list is used for invariant checking.  If we removed the check,
  elements that would otherwise be on this list could be left as disconnected
  singleton lists.)
- LRU:  contains the items not currently referenced by clients, in LRU order

A new internal Ref() method increments the reference count.  If
incrementing from 1 to 2 for an item in the cache, it is moved
from the LRU list to the in-use list.

The Unref() call now moves things from the in-use list to the LRU
list if the reference count falls to 1, and the item is in the
cache.  It no longer adjusts the usage sum.  The usage sum now
reflects only what is in the cache, rather than including
still-referenced items that have been evicted.

The LRU_Append() now takes a "list" parameter so that it can be
used to append either to the LRU list or the in-use list.

Lookup() is modified to use the new Ref() call, rather than
adjusting the reference count and LRU chain directly.

Insert() eviction code is also modified to adjust the usage sum and the
in_cache boolean of the evicted elements.  Some LevelDB tests assume that there
will be no caching whatsoever if the cache size is set to zero, so this is
handled as a special case.

A new private method FinishErase() is factored out
with the common code from where items are removed from the cache.

Erase() is modified to adjust the usage sum and the in_cache
boolean of the erased elements, and to use FinishErase().

Prune() is modified to use FinishErase() also, and to make use of the fact that
the lru_ list now contains only items with reference count 1.

- EvictionPolicy is modified to test that an entry with an
outstanding handle is not evicted.  This test fails with the old cache.cc.

- A new test case UseExceedsCacheSize verifies that even when the
cache is overfull of entries with outstanding handles, none are
evicted.  This test fails with the old cache.cc, and is the key
issue that causes file descriptors to run out when the cache
size is set too small.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=123247237
2016-07-06 09:15:53 -07:00
ivanabc
2883fcd849 set const property 2016-06-21 16:57:57 +08:00
ivanabc
e5f0a51fa4 reduce lock's range in DeleteObsoleteFiles 2016-06-20 13:18:07 +08:00
John Abd-El-Malek
a7bff697ba Fix LevelDB build when asserts are enabled in release builds. (#367)
* Fix LevelDB build when asserts are enabled in release builds.

BUG=https://bugs.chromium.org/p/chromium/issues/detail?id=603166

* fix

* Add comment
2016-04-15 10:58:27 -07:00
Nicholas Westlake
ea992b467b Change std::uint64_t to uint64_t (#354)
-This fixes compile errors with default setup on RHEL 6 systems.
2016-04-12 15:38:09 -07:00
mjwiacek
e84b5bdb5a This CL fixes a bug encountered when reading records from leveldb files that have been split, as in a [] input task split.
Detailed description:

Suppose an input split is generated between two leveldb record blocks and the preceding block ends with null padding.

A reader that previously read at least 1 record within the first block (before encountering the padding) upon trying to read the next record, will successfully and correctly read the next logical record from the subsequent block, but will return a last record offset pointing to the padding in the first block.

When this happened in a [], it resulted in duplicate records being handled at what appeared to be different offsets that were separated by only a few bytes.

This behavior is only observed when at least 1 record was read from the first block before encountering the padding. If the initial offset for a reader was within the padding, the correct record offset would be reported, namely the offset within the second block.

The tests failed to catch this scenario/bug, because each read test only read a single record with an initial offset. This CL adds an explicit test case for this scenario, and modifies the test structure to read all remaining records in the test case after an initial offset is specified.  Thus an initial offset that jumps to record #3, with 5 total records in the test file, will result in reading 2 records, and validating the offset of each of them in order to pass successfully.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115338487
2016-03-31 15:53:34 -07:00
cmumford
3211343909 Deleted redundant null ptr check prior to delete.
Fixes issue #338.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113439460
2016-03-31 15:53:30 -07:00
Chris Mumford
7306ef856a Merge pull request #348 from randomascii/master
Fix signed/unsigned mismatch on VC++ builds
2016-02-24 14:39:03 -08:00
Bruce Dawson
6b18316d01 Fix signed/unsigned mismatch on VC++ builds 2016-02-19 13:59:19 -08:00
cmumford
adbe3eb073 Putting build artifacts in subdirectory.
1. Object files, libraries, and compiled executables are put
   into subdirectories.
2. The shared library is linked from individual object files.
   This provides for greater parallelism on large desktops
   while at the same time making for easier builds on small
   (i.e. embedded) systems. Fixes issue #279.
3. One program, db_bench, is compiled using the shared library.
4. The source file for "leveldbutil" was renamed from
   leveldb_main.cc to leveldbutil.cc. This provides for simpler
   makefile rules.
5. Because all targets placed the library (libleveldb.a) at the top
   level, the last platform built (desktop/device) always overwrote
   any prior artifact.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113407013
2016-01-29 16:10:00 -08:00
Chris Mumford
2d0320a458 Merge pull request #329 from ralphtheninja/travis-badge
Add travis build badge to README
2016-01-15 11:17:41 -08:00
Lars-Magnus Skog
dd1c3c3572 add travis build badge 2016-01-15 18:29:01 +01:00
Chris Mumford
43fcf23af0 Merge pull request #328 from cmumford/master
Added a Travis CI build file.
2016-01-14 21:17:21 -08:00
Chris Mumford
9fcae61641 Added a Travis CI build file.
This allows for continuous integration builds by travis-ci.org.
More information at https://docs.travis-ci.com/user/languages/cpp
2016-01-14 17:41:48 -08:00
Chris Mumford
dac40d25f6 Merge pull request #284 from ideawu/master
log compaction output file's level along with number
2016-01-12 11:30:32 -08:00
Chris Mumford
8ec241a3b0 Merge pull request #317 from falvojr/patch-1
Update README.md
2016-01-12 10:52:33 -08:00
Chris Mumford
5d36bedd1c Merge pull request #272 from vapier/master
Fix Android/MIPS build.
2016-01-12 10:47:33 -08:00
cmumford
4753c9b617 Added a contributors section to README.md
In preparation for accepting GitHub pull requests this new README
section outlines the general criteria that the leveldb project owners
will use when accepting external (and internal) project contributions.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111349899
2016-01-04 13:29:41 -08:00
Chris Mumford
e2446d0848 Merge pull request #275 from paulirish/patch-1
readme: improved documentation link
2015-12-09 15:04:30 -08:00
ssid
706b7f8d43 Resolve race when getting approximate-memory-usage property
The write operations in the table happens without holding the mutex
lock, but concurrent writes are avoided using "writers_" queue.
The Arena::MemoryUsage could access the blocks when write happens.
So, the memory usage is cached in atomic word and can be loaded
from any thread safely.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107573379
2015-12-09 11:27:50 -08:00
cmumford
3c9ff3c691 Only compiling TrimSpace on linux.
Incorporated change by zmodem at https://github.com/google/leveldb/pull/310
to fix issue #310.

This change will only build TrimSace on linux to avoid unused function
warning/error.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105323419
2015-12-09 10:35:07 -08:00
cmumford
f8d205cf89 Including atomic_pointer.h in port_posix
A recent CL (104348226) created the port_posix library, but omitted: port/atomic_pointer.h.

And when:

    [] test third_party/leveldb:all

was run this error was reported:

    //third_party/leveldb:port_posix does not depend on a
    module exporting 'third_party/leveldb/port/atomic_pointer.h'
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105243399
2015-12-09 10:35:07 -08:00
ndmatthews
889de31a5a Let LevelDB use xcrun to determine Xcode.app path instead of using a hardcoded path.
This allows build agents to select from multiple Xcode installations.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104859097
2015-12-09 10:34:58 -08:00
ssid
528c2bc6ad Add "approximate-memory-usage" property to leveldb::DB::GetProperty
The approximate RAM usage of the database is calculated from the memory
allocated for write buffers and the block cache. This is to give an
estimate of memory usage to leveldb clients.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104222307
2015-12-09 10:34:58 -08:00
tzik
359b6bcec2 Add leveldb::Cache::Prune
Prune() drops on-memory read cache of the database, so that the client can
relief its memory shortage.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=101335710
2015-12-09 10:34:58 -08:00
pkasting
50e77a8263 Fix size_t/int comparison/conversion issues in leveldb.
The create function took |num_keys| as an int, but callers and implementers wanted it to function as a size_t (e.g. passing std::vector::size() in, passing it to vector constructors as a size arg, indexing containers by it, etc.).  This resulted in implicit conversions between the two types as well as warnings (found with Chromium's external copy of these sources, built with MSVC) about signed vs. unsigned comparisons.

The leveldb sources were already widely using size_t elsewhere, e.g. for key and filter lengths, so using size_t here is not inconsistent with the existing code.  However, it does change the public C API.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=101074871
2015-12-09 10:34:58 -08:00
cmumford
5208e7952d Added leveldb::Status::IsInvalidArgument() method.
All other Status::Code enum values have an Is**() method with the one
exception of InvalidArgument.
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=97166441
2015-12-09 10:34:57 -08:00
Mike Wiacek
ce45404bba Suppress error reporting after seeking but before a valid First or Full record is encountered.
Fix a spelling mistake.
2015-12-09 10:34:57 -08:00
Chris Mumford
b9afa1f2e7 include <assert> -> <cassert>
Fixes reported public issue #280.
2015-12-09 10:34:57 -08:00
Venilton FalvoJr
edf2939c0d Update README.md 2015-11-23 16:24:16 -02:00
Chris Mumford
65190ac48b Will not reuse manifest if reuse_logs options is false.
Prior implementation would always try to reuse the manifest, even if reuse_logs
was false (the default). This was missed because the stock
Env::NewAppendableFile implementation returns false forcing the creation of a
new log.
2015-08-11 14:59:48 -07:00
Sanjay Ghemawat
ac1d69da31 LevelDB now attempts to reuse the preceding MANIFEST and log file when re-opened.
(Based on a suggestion by cmumford.)

"open" benchmark on my workstation speeds up significantly since we
can now avoid three fdatasync calls and a compaction per open:

  Before: ~80000 microseconds
  After:    ~130 microseconds

Details:

(1) Added Options::reuse_logs (currently defaults to false) to control
new behavior.  The intention is to change the default to true after some
baking.

(2) Added Env::NewAppendableFile() whose default implementation returns
a not-supported error.

(3) VersionSet::Recovery attempts to reuse the MANIFEST from which
it is recovering.

(4) DBImpl recovery attempts to reuse the last log file and memtable.

(5) db_test.cc now tests a new configuration that sets reuse_logs to true.

(6) fault_injection_test also tests a reuse_logs==true config.

(7) Added a new recovery_test.
2015-08-11 14:56:39 -07:00
ideawu
76bba139c0 fix indent 2015-04-20 12:41:01 +08:00
ideawu
8fcceb2a6f log compaction output file's level along with number 2015-04-20 12:39:14 +08:00
Paul Irish
0e0f07417c documentation. improved link 2015-02-17 09:55:40 -08:00
Wankai Zhang
dd598676cd block_builder header file dependency fixed 2015-01-29 14:15:31 +08:00
Paul Irish
c85addcdf3 readme: improved documentation link
This replaces htmlpreview with [rawgit](http://rawgit.com/).
rawgit is faster and doesnt use JS to render the page, making it SEO friendly.
2015-01-10 17:36:40 -08:00
David Turner
ceff6f1215 Fix Android/MIPS build.
port/atomic_pointer.h was missing an implementation for
MemoryBarrier() for this platform.
2014-12-17 14:18:54 -05:00
163 changed files with 11369 additions and 8385 deletions

18
.clang-format Normal file
View File

@ -0,0 +1,18 @@
# Run manually to reformat a file:
# clang-format -i --style=file <file>
# find . -iname '*.cc' -o -iname '*.h' -o -iname '*.h.in' | xargs clang-format -i --style=file
BasedOnStyle: Google
DerivePointerAlignment: false
# Public headers are in a different location in the internal Google repository.
# Order them so that when imported to the authoritative repository they will be
# in correct alphabetical order.
IncludeCategories:
- Regex: '^(<|"(benchmarks|db|helpers)/)'
Priority: 1
- Regex: '^"(leveldb)/'
Priority: 2
- Regex: '^(<|"(issues|port|table|third_party|util)/)'
Priority: 3
- Regex: '.*'
Priority: 4

102
.github/workflows/build.yml vendored Normal file
View File

@ -0,0 +1,102 @@
# Copyright 2021 The LevelDB Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. See the AUTHORS file for names of contributors.
name: ci
on: [push, pull_request]
permissions:
contents: read
jobs:
build-and-test:
name: >-
CI
${{ matrix.os }}
${{ matrix.compiler }}
${{ matrix.optimized && 'release' || 'debug' }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
compiler: [clang, gcc, msvc]
os: [ubuntu-latest, macos-latest, windows-latest]
optimized: [true, false]
exclude:
# MSVC only works on Windows.
- os: ubuntu-latest
compiler: msvc
- os: macos-latest
compiler: msvc
# Not testing with GCC on macOS.
- os: macos-latest
compiler: gcc
# Only testing with MSVC on Windows.
- os: windows-latest
compiler: clang
- os: windows-latest
compiler: gcc
include:
- compiler: clang
CC: clang
CXX: clang++
- compiler: gcc
CC: gcc
CXX: g++
- compiler: msvc
CC:
CXX:
env:
CMAKE_BUILD_DIR: ${{ github.workspace }}/build
CMAKE_BUILD_TYPE: ${{ matrix.optimized && 'RelWithDebInfo' || 'Debug' }}
CC: ${{ matrix.CC }}
CXX: ${{ matrix.CXX }}
BINARY_SUFFIX: ${{ startsWith(matrix.os, 'windows') && '.exe' || '' }}
BINARY_PATH: >-
${{ format(
startsWith(matrix.os, 'windows') && '{0}\build\{1}\' || '{0}/build/',
github.workspace,
matrix.optimized && 'RelWithDebInfo' || 'Debug') }}
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Install dependencies on Linux
if: ${{ runner.os == 'Linux' }}
# libgoogle-perftools-dev is temporarily removed from the package list
# because it is currently broken on GitHub's Ubuntu 22.04.
run: |
sudo apt-get update
sudo apt-get install libkyotocabinet-dev libsnappy-dev libsqlite3-dev
- name: Generate build config
run: >-
cmake -S "${{ github.workspace }}" -B "${{ env.CMAKE_BUILD_DIR }}"
-DCMAKE_BUILD_TYPE=${{ env.CMAKE_BUILD_TYPE }}
-DCMAKE_INSTALL_PREFIX=${{ runner.temp }}/install_test/
- name: Build
run: >-
cmake --build "${{ env.CMAKE_BUILD_DIR }}"
--config "${{ env.CMAKE_BUILD_TYPE }}"
- name: Run Tests
working-directory: ${{ github.workspace }}/build
run: ctest -C "${{ env.CMAKE_BUILD_TYPE }}" --verbose
- name: Run LevelDB Benchmarks
run: ${{ env.BINARY_PATH }}db_bench${{ env.BINARY_SUFFIX }}
- name: Run SQLite Benchmarks
if: ${{ runner.os != 'Windows' }}
run: ${{ env.BINARY_PATH }}db_bench_sqlite3${{ env.BINARY_SUFFIX }}
- name: Run Kyoto Cabinet Benchmarks
if: ${{ runner.os == 'Linux' && matrix.compiler == 'clang' }}
run: ${{ env.BINARY_PATH }}db_bench_tree_db${{ env.BINARY_SUFFIX }}
- name: Test CMake installation
run: cmake --build "${{ env.CMAKE_BUILD_DIR }}" --target install

17
.gitignore vendored
View File

@ -1,9 +1,8 @@
build_config.mk
*.a
*.o
*.dylib*
*.so
*.so.*
*_test
db_bench
leveldbutil
# Editors.
*.sw*
.vscode
.DS_Store
# Build directory.
build/
out/

6
.gitmodules vendored Normal file
View File

@ -0,0 +1,6 @@
[submodule "third_party/googletest"]
path = third_party/googletest
url = https://github.com/google/googletest.git
[submodule "third_party/benchmark"]
path = third_party/benchmark
url = https://github.com/google/benchmark

519
CMakeLists.txt Normal file
View File

@ -0,0 +1,519 @@
# Copyright 2017 The LevelDB Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. See the AUTHORS file for names of contributors.
cmake_minimum_required(VERSION 3.9)
# Keep the version below in sync with the one in db.h
project(leveldb VERSION 1.23.0 LANGUAGES C CXX)
# C standard can be overridden when this is used as a sub-project.
if(NOT CMAKE_C_STANDARD)
# This project can use C11, but will gracefully decay down to C89.
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED OFF)
set(CMAKE_C_EXTENSIONS OFF)
endif(NOT CMAKE_C_STANDARD)
# C++ standard can be overridden when this is used as a sub-project.
if(NOT CMAKE_CXX_STANDARD)
# This project requires C++11.
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
endif(NOT CMAKE_CXX_STANDARD)
if (WIN32)
set(LEVELDB_PLATFORM_NAME LEVELDB_PLATFORM_WINDOWS)
# TODO(cmumford): Make UNICODE configurable for Windows.
add_definitions(-D_UNICODE -DUNICODE)
else (WIN32)
set(LEVELDB_PLATFORM_NAME LEVELDB_PLATFORM_POSIX)
endif (WIN32)
option(LEVELDB_BUILD_TESTS "Build LevelDB's unit tests" ON)
option(LEVELDB_BUILD_BENCHMARKS "Build LevelDB's benchmarks" ON)
option(LEVELDB_INSTALL "Install LevelDB's header and library" ON)
include(CheckIncludeFile)
check_include_file("unistd.h" HAVE_UNISTD_H)
include(CheckLibraryExists)
check_library_exists(crc32c crc32c_value "" HAVE_CRC32C)
check_library_exists(snappy snappy_compress "" HAVE_SNAPPY)
check_library_exists(zstd zstd_compress "" HAVE_ZSTD)
check_library_exists(tcmalloc malloc "" HAVE_TCMALLOC)
include(CheckCXXSymbolExists)
# Using check_cxx_symbol_exists() instead of check_c_symbol_exists() because
# we're including the header from C++, and feature detection should use the same
# compiler language that the project will use later. Principles aside, some
# versions of do not expose fdatasync() in <unistd.h> in standard C mode
# (-std=c11), but do expose the function in standard C++ mode (-std=c++11).
check_cxx_symbol_exists(fdatasync "unistd.h" HAVE_FDATASYNC)
check_cxx_symbol_exists(F_FULLFSYNC "fcntl.h" HAVE_FULLFSYNC)
check_cxx_symbol_exists(O_CLOEXEC "fcntl.h" HAVE_O_CLOEXEC)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# Disable C++ exceptions.
string(REGEX REPLACE "/EH[a-z]+" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHs-c-")
add_definitions(-D_HAS_EXCEPTIONS=0)
# Disable RTTI.
string(REGEX REPLACE "/GR" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-")
else(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# Enable strict prototype warnings for C code in clang and gcc.
if(NOT CMAKE_C_FLAGS MATCHES "-Wstrict-prototypes")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wstrict-prototypes")
endif(NOT CMAKE_C_FLAGS MATCHES "-Wstrict-prototypes")
# Disable C++ exceptions.
string(REGEX REPLACE "-fexceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
# Disable RTTI.
string(REGEX REPLACE "-frtti" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
endif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# Test whether -Wthread-safety is available. See
# https://clang.llvm.org/docs/ThreadSafetyAnalysis.html
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag(-Wthread-safety HAVE_CLANG_THREAD_SAFETY)
# Used by googletest.
check_cxx_compiler_flag(-Wno-missing-field-initializers
LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS)
include(CheckCXXSourceCompiles)
# Test whether C++17 __has_include is available.
check_cxx_source_compiles("
#if defined(__has_include) && __has_include(<string>)
#include <string>
#endif
int main() { std::string str; return 0; }
" HAVE_CXX17_HAS_INCLUDE)
set(LEVELDB_PUBLIC_INCLUDE_DIR "include/leveldb")
set(LEVELDB_PORT_CONFIG_DIR "include/port")
configure_file(
"port/port_config.h.in"
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
)
include_directories(
"${PROJECT_BINARY_DIR}/include"
"."
)
if(BUILD_SHARED_LIBS)
# Only export LEVELDB_EXPORT symbols from the shared library.
add_compile_options(-fvisibility=hidden)
endif(BUILD_SHARED_LIBS)
# Must be included before CMAKE_INSTALL_INCLUDEDIR is used.
include(GNUInstallDirs)
add_library(leveldb "")
target_sources(leveldb
PRIVATE
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
"db/builder.cc"
"db/builder.h"
"db/c.cc"
"db/db_impl.cc"
"db/db_impl.h"
"db/db_iter.cc"
"db/db_iter.h"
"db/dbformat.cc"
"db/dbformat.h"
"db/dumpfile.cc"
"db/filename.cc"
"db/filename.h"
"db/log_format.h"
"db/log_reader.cc"
"db/log_reader.h"
"db/log_writer.cc"
"db/log_writer.h"
"db/memtable.cc"
"db/memtable.h"
"db/repair.cc"
"db/skiplist.h"
"db/snapshot.h"
"db/table_cache.cc"
"db/table_cache.h"
"db/version_edit.cc"
"db/version_edit.h"
"db/version_set.cc"
"db/version_set.h"
"db/write_batch_internal.h"
"db/write_batch.cc"
"port/port_stdcxx.h"
"port/port.h"
"port/thread_annotations.h"
"table/block_builder.cc"
"table/block_builder.h"
"table/block.cc"
"table/block.h"
"table/filter_block.cc"
"table/filter_block.h"
"table/format.cc"
"table/format.h"
"table/iterator_wrapper.h"
"table/iterator.cc"
"table/merger.cc"
"table/merger.h"
"table/table_builder.cc"
"table/table.cc"
"table/two_level_iterator.cc"
"table/two_level_iterator.h"
"util/arena.cc"
"util/arena.h"
"util/bloom.cc"
"util/cache.cc"
"util/coding.cc"
"util/coding.h"
"util/comparator.cc"
"util/crc32c.cc"
"util/crc32c.h"
"util/env.cc"
"util/filter_policy.cc"
"util/hash.cc"
"util/hash.h"
"util/logging.cc"
"util/logging.h"
"util/mutexlock.h"
"util/no_destructor.h"
"util/options.cc"
"util/random.h"
"util/status.cc"
# Only CMake 3.3+ supports PUBLIC sources in targets exported by "install".
$<$<VERSION_GREATER:CMAKE_VERSION,3.2>:PUBLIC>
"${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h"
)
if (WIN32)
target_sources(leveldb
PRIVATE
"util/env_windows.cc"
"util/windows_logger.h"
)
else (WIN32)
target_sources(leveldb
PRIVATE
"util/env_posix.cc"
"util/posix_logger.h"
)
endif (WIN32)
# MemEnv is not part of the interface and could be pulled to a separate library.
target_sources(leveldb
PRIVATE
"helpers/memenv/memenv.cc"
"helpers/memenv/memenv.h"
)
target_include_directories(leveldb
PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)
set_target_properties(leveldb
PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
target_compile_definitions(leveldb
PRIVATE
# Used by include/export.h when building shared libraries.
LEVELDB_COMPILE_LIBRARY
# Used by port/port.h.
${LEVELDB_PLATFORM_NAME}=1
)
if (NOT HAVE_CXX17_HAS_INCLUDE)
target_compile_definitions(leveldb
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
)
endif(NOT HAVE_CXX17_HAS_INCLUDE)
if(BUILD_SHARED_LIBS)
target_compile_definitions(leveldb
PUBLIC
# Used by include/export.h.
LEVELDB_SHARED_LIBRARY
)
endif(BUILD_SHARED_LIBS)
if(HAVE_CLANG_THREAD_SAFETY)
target_compile_options(leveldb
PUBLIC
-Werror -Wthread-safety)
endif(HAVE_CLANG_THREAD_SAFETY)
if(HAVE_CRC32C)
target_link_libraries(leveldb crc32c)
endif(HAVE_CRC32C)
if(HAVE_SNAPPY)
target_link_libraries(leveldb snappy)
endif(HAVE_SNAPPY)
if(HAVE_ZSTD)
target_link_libraries(leveldb zstd)
endif(HAVE_ZSTD)
if(HAVE_TCMALLOC)
target_link_libraries(leveldb tcmalloc)
endif(HAVE_TCMALLOC)
# Needed by port_stdcxx.h
find_package(Threads REQUIRED)
target_link_libraries(leveldb Threads::Threads)
add_executable(leveldbutil
"db/leveldbutil.cc"
)
target_link_libraries(leveldbutil leveldb)
if(LEVELDB_BUILD_TESTS)
enable_testing()
# Prevent overriding the parent project's compiler/linker settings on Windows.
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
set(install_gtest OFF)
set(install_gmock OFF)
set(build_gmock ON)
# This project is tested using GoogleTest.
add_subdirectory("third_party/googletest")
# GoogleTest triggers a missing field initializers warning.
if(LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS)
set_property(TARGET gtest
APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
set_property(TARGET gmock
APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
endif(LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS)
add_executable(leveldb_tests "")
target_sources(leveldb_tests
PRIVATE
# "db/fault_injection_test.cc"
# "issues/issue178_test.cc"
# "issues/issue200_test.cc"
# "issues/issue320_test.cc"
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
# "util/env_test.cc"
"util/status_test.cc"
"util/no_destructor_test.cc"
"util/testutil.cc"
"util/testutil.h"
)
if(NOT BUILD_SHARED_LIBS)
target_sources(leveldb_tests
PRIVATE
"db/autocompact_test.cc"
"db/corruption_test.cc"
"db/db_test.cc"
"db/dbformat_test.cc"
"db/filename_test.cc"
"db/log_test.cc"
"db/recovery_test.cc"
"db/skiplist_test.cc"
"db/version_edit_test.cc"
"db/version_set_test.cc"
"db/write_batch_test.cc"
"helpers/memenv/memenv_test.cc"
"table/filter_block_test.cc"
"table/table_test.cc"
"util/arena_test.cc"
"util/bloom_test.cc"
"util/cache_test.cc"
"util/coding_test.cc"
"util/crc32c_test.cc"
"util/hash_test.cc"
"util/logging_test.cc"
)
endif(NOT BUILD_SHARED_LIBS)
target_link_libraries(leveldb_tests leveldb gmock gtest gtest_main)
target_compile_definitions(leveldb_tests
PRIVATE
${LEVELDB_PLATFORM_NAME}=1
)
if (NOT HAVE_CXX17_HAS_INCLUDE)
target_compile_definitions(leveldb_tests
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
)
endif(NOT HAVE_CXX17_HAS_INCLUDE)
add_test(NAME "leveldb_tests" COMMAND "leveldb_tests")
function(leveldb_test test_file)
get_filename_component(test_target_name "${test_file}" NAME_WE)
add_executable("${test_target_name}" "")
target_sources("${test_target_name}"
PRIVATE
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
"util/testutil.cc"
"util/testutil.h"
"${test_file}"
)
target_link_libraries("${test_target_name}" leveldb gmock gtest)
target_compile_definitions("${test_target_name}"
PRIVATE
${LEVELDB_PLATFORM_NAME}=1
)
if (NOT HAVE_CXX17_HAS_INCLUDE)
target_compile_definitions("${test_target_name}"
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
)
endif(NOT HAVE_CXX17_HAS_INCLUDE)
add_test(NAME "${test_target_name}" COMMAND "${test_target_name}")
endfunction(leveldb_test)
leveldb_test("db/c_test.c")
if(NOT BUILD_SHARED_LIBS)
# TODO(costan): This test also uses
# "util/env_{posix|windows}_test_helper.h"
if (WIN32)
leveldb_test("util/env_windows_test.cc")
else (WIN32)
leveldb_test("util/env_posix_test.cc")
endif (WIN32)
endif(NOT BUILD_SHARED_LIBS)
endif(LEVELDB_BUILD_TESTS)
if(LEVELDB_BUILD_BENCHMARKS)
# This project uses Google benchmark for benchmarking.
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE)
add_subdirectory("third_party/benchmark")
function(leveldb_benchmark bench_file)
get_filename_component(bench_target_name "${bench_file}" NAME_WE)
add_executable("${bench_target_name}" "")
target_sources("${bench_target_name}"
PRIVATE
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
"util/histogram.cc"
"util/histogram.h"
"util/testutil.cc"
"util/testutil.h"
"${bench_file}"
)
target_link_libraries("${bench_target_name}" leveldb gmock gtest benchmark)
target_compile_definitions("${bench_target_name}"
PRIVATE
${LEVELDB_PLATFORM_NAME}=1
)
if (NOT HAVE_CXX17_HAS_INCLUDE)
target_compile_definitions("${bench_target_name}"
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
)
endif(NOT HAVE_CXX17_HAS_INCLUDE)
endfunction(leveldb_benchmark)
if(NOT BUILD_SHARED_LIBS)
leveldb_benchmark("benchmarks/db_bench.cc")
endif(NOT BUILD_SHARED_LIBS)
check_library_exists(sqlite3 sqlite3_open "" HAVE_SQLITE3)
if(HAVE_SQLITE3)
leveldb_benchmark("benchmarks/db_bench_sqlite3.cc")
target_link_libraries(db_bench_sqlite3 sqlite3)
endif(HAVE_SQLITE3)
# check_library_exists is insufficient here because the library names have
# different manglings when compiled with clang or gcc, at least when installed
# with Homebrew on Mac.
set(OLD_CMAKE_REQURED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES})
list(APPEND CMAKE_REQUIRED_LIBRARIES kyotocabinet)
check_cxx_source_compiles("
#include <kcpolydb.h>
int main() {
kyotocabinet::TreeDB* db = new kyotocabinet::TreeDB();
delete db;
return 0;
}
" HAVE_KYOTOCABINET)
set(CMAKE_REQUIRED_LIBRARIES ${OLD_CMAKE_REQURED_LIBRARIES})
if(HAVE_KYOTOCABINET)
leveldb_benchmark("benchmarks/db_bench_tree_db.cc")
target_link_libraries(db_bench_tree_db kyotocabinet)
endif(HAVE_KYOTOCABINET)
endif(LEVELDB_BUILD_BENCHMARKS)
if(LEVELDB_INSTALL)
install(TARGETS leveldb
EXPORT leveldbTargets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
install(
FILES
"${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/leveldb"
)
include(CMakePackageConfigHelpers)
configure_package_config_file(
"cmake/${PROJECT_NAME}Config.cmake.in"
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake"
INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
)
write_basic_package_version_file(
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake"
COMPATIBILITY SameMajorVersion
)
install(
EXPORT leveldbTargets
NAMESPACE leveldb::
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
)
install(
FILES
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake"
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
)
endif(LEVELDB_INSTALL)

View File

@ -1,36 +1,31 @@
# Contributing
# How to Contribute
We'd love to accept your code patches! However, before we can take them, we
have to jump a couple of legal hurdles.
We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.
## Contributor License Agreements
## Contributor License Agreement
Please fill out either the individual or corporate Contributor License
Agreement as appropriate.
Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.
* If you are an individual writing original source code and you're sure you
own the intellectual property, then sign an [individual CLA](https://developers.google.com/open-source/cla/individual).
* If you work for a company that wants to allow you to contribute your work,
then sign a [corporate CLA](https://developers.google.com/open-source/cla/corporate).
You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.
Follow either of the two links above to access the appropriate CLA and
instructions for how to sign and return it.
## Code Reviews
## Submitting a Patch
All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.
1. Sign the contributors license agreement above.
2. Decide which code you want to submit. A submission should be a set of changes
that addresses one issue in the [issue tracker](https://github.com/google/leveldb/issues).
Please don't mix more than one logical change per submission, because it makes
the history hard to follow. If you want to make a change
(e.g. add a sample or feature) that doesn't have a corresponding issue in the
issue tracker, please create one.
3. **Submitting**: When you are ready to submit, send us a Pull Request. Be
sure to include the issue number you fixed and the name you used to sign
the CLA.
See [the README](README.md#contributing-to-the-leveldb-project) for areas
where we are likely to accept external contributions.
## Writing Code ##
## Community Guidelines
If your contribution contains code, please make sure that it follows
[the style guide](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml).
Otherwise we will have to ask you to make changes, and that's no fun for anyone.
This project follows [Google's Open Source Community
Guidelines](https://opensource.google/conduct/).

233
Makefile
View File

@ -1,233 +0,0 @@
# Copyright (c) 2011 The LevelDB Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. See the AUTHORS file for names of contributors.
#-----------------------------------------------
# Uncomment exactly one of the lines labelled (A), (B), and (C) below
# to switch between compilation modes.
# (A) Production use (optimized mode)
OPT ?= -O2 -DNDEBUG
# (B) Debug mode, w/ full line-level debugging symbols
# OPT ?= -g2
# (C) Profiling mode: opt, but w/debugging symbols
# OPT ?= -O2 -g2 -DNDEBUG
#-----------------------------------------------
# detect what platform we're building on
$(shell CC="$(CC)" CXX="$(CXX)" TARGET_OS="$(TARGET_OS)" \
./build_detect_platform build_config.mk ./)
# this file is generated by the previous line to set build flags and sources
include build_config.mk
CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
CXXFLAGS += -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT)
LDFLAGS += $(PLATFORM_LDFLAGS)
LIBS += $(PLATFORM_LIBS)
LIBOBJECTS = $(SOURCES:.cc=.o)
MEMENVOBJECTS = $(MEMENV_SOURCES:.cc=.o)
TESTUTIL = ./util/testutil.o
TESTHARNESS = ./util/testharness.o $(TESTUTIL)
# Note: iOS should probably be using libtool, not ar.
ifeq ($(PLATFORM), IOS)
AR=xcrun ar
endif
TESTS = \
arena_test \
autocompact_test \
bloom_test \
c_test \
cache_test \
coding_test \
corruption_test \
crc32c_test \
db_test \
dbformat_test \
env_test \
fault_injection_test \
filename_test \
filter_block_test \
hash_test \
issue178_test \
issue200_test \
log_test \
memenv_test \
recovery_test \
skiplist_test \
table_test \
version_edit_test \
version_set_test \
write_batch_test
PROGRAMS = db_bench leveldbutil $(TESTS)
BENCHMARKS = db_bench_sqlite3 db_bench_tree_db
LIBRARY = libleveldb.a
MEMENVLIBRARY = libmemenv.a
default: all
# Should we build shared libraries?
ifneq ($(PLATFORM_SHARED_EXT),)
ifneq ($(PLATFORM_SHARED_VERSIONED),true)
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
SHARED2 = $(SHARED1)
SHARED3 = $(SHARED1)
SHARED = $(SHARED1)
else
# Update db.h if you change these.
SHARED_MAJOR = 1
SHARED_MINOR = 18
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
SHARED2 = $(SHARED1).$(SHARED_MAJOR)
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
SHARED = $(SHARED1) $(SHARED2) $(SHARED3)
$(SHARED1): $(SHARED3)
ln -fs $(SHARED3) $(SHARED1)
$(SHARED2): $(SHARED3)
ln -fs $(SHARED3) $(SHARED2)
endif
$(SHARED3):
$(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $(SHARED3) $(LIBS)
endif # PLATFORM_SHARED_EXT
all: $(SHARED) $(LIBRARY)
check: all $(PROGRAMS) $(TESTS)
for t in $(TESTS); do echo "***** Running $$t"; ./$$t || exit 1; done
clean:
-rm -f $(PROGRAMS) $(BENCHMARKS) $(LIBRARY) $(SHARED) $(MEMENVLIBRARY) */*.o */*/*.o ios-x86/*/*.o ios-arm/*/*.o build_config.mk
-rm -rf ios-x86/* ios-arm/*
$(LIBRARY): $(LIBOBJECTS)
rm -f $@
$(AR) -rs $@ $(LIBOBJECTS)
db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(LIBS)
db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) -o $@ -lsqlite3 $(LIBS)
db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) -o $@ -lkyotocabinet $(LIBS)
leveldbutil: db/leveldb_main.o $(LIBOBJECTS)
$(CXX) $(LDFLAGS) db/leveldb_main.o $(LIBOBJECTS) -o $@ $(LIBS)
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
autocompact_test: db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
fault_injection_test: db/fault_injection_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/fault_injection_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
hash_test: util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
issue178_test: issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
issue200_test: issues/issue200_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) issues/issue200_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
recovery_test: db/recovery_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/recovery_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(MEMENVLIBRARY) : $(MEMENVOBJECTS)
rm -f $@
$(AR) -rs $@ $(MEMENVOBJECTS)
memenv_test : helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS)
$(CXX) $(LDFLAGS) helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) -o $@ $(LIBS)
ifeq ($(PLATFORM), IOS)
# For iOS, create universal object files to be used on both the simulator and
# a device.
SIMULATORSDK=$(shell xcrun -sdk iphonesimulator --show-sdk-path)
DEVICESDK=$(shell xcrun -sdk iphoneos --show-sdk-path)
IOSARCH=-arch armv6 -arch armv7 -arch armv7s -arch arm64
.cc.o:
mkdir -p ios-x86/$(dir $@)
xcrun -sdk iphonesimulator $(CXX) $(CXXFLAGS) -isysroot "$(SIMULATORSDK)" -arch i686 -arch x86_64 -c $< -o ios-x86/$@
mkdir -p ios-arm/$(dir $@)
xcrun -sdk iphoneos $(CXX) $(CXXFLAGS) -isysroot "$(DEVICESDK)" $(IOSARCH) -c $< -o ios-arm/$@
xcrun lipo ios-x86/$@ ios-arm/$@ -create -output $@
.c.o:
mkdir -p ios-x86/$(dir $@)
xcrun -sdk iphonesimulator $(CC) $(CFLAGS) -isysroot "$(SIMULATORSDK)" -arch i686 -arch x86_64 -c $< -o ios-x86/$@
mkdir -p ios-arm/$(dir $@)
xcrun -sdk iphoneos $(CC) $(CFLAGS) -isysroot "$(DEVICESDK)" $(IOSARCH) -c $< -o ios-arm/$@
xcrun lipo ios-x86/$@ ios-arm/$@ -create -output $@
else
.cc.o:
$(CXX) $(CXXFLAGS) -c $< -o $@
.c.o:
$(CC) $(CFLAGS) -c $< -o $@
endif

158
README.md
View File

@ -1,8 +1,16 @@
**LevelDB is a fast key-value storage library written at Google that provides an ordered mapping from string keys to string values.**
LevelDB is a fast key-value storage library written at Google that provides an ordered mapping from string keys to string values.
> **This repository is receiving very limited maintenance. We will only review the following types of changes.**
>
> * Fixes for critical bugs, such as data loss or memory corruption
> * Changes absolutely needed by internally supported leveldb clients. These typically fix breakage introduced by a language/standard library/OS update
[![ci](https://github.com/google/leveldb/actions/workflows/build.yml/badge.svg)](https://github.com/google/leveldb/actions/workflows/build.yml)
Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
# Features
* Keys and values are arbitrary byte arrays.
* Data is stored sorted by key.
* Callers can provide a custom comparison function to override the sort order.
@ -10,16 +18,115 @@ Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
* Multiple changes can be made in one atomic batch.
* Users can create a transient snapshot to get a consistent view of data.
* Forward and backward iteration is supported over the data.
* Data is automatically compressed using the [Snappy compression library](http://code.google.com/p/snappy).
* Data is automatically compressed using the [Snappy compression library](https://google.github.io/snappy/), but [Zstd compression](https://facebook.github.io/zstd/) is also supported.
* External activity (file system operations etc.) is relayed through a virtual interface so users can customize the operating system interactions.
* [Detailed documentation](http://htmlpreview.github.io/?https://github.com/google/leveldb/blob/master/doc/index.html) about how to use the library is included with the source code.
# Documentation
[LevelDB library documentation](https://github.com/google/leveldb/blob/main/doc/index.md) is online and bundled with the source code.
# Limitations
* This is not a SQL database. It does not have a relational data model, it does not support SQL queries, and it has no support for indexes.
* Only a single process (possibly multi-threaded) can access a particular database at a time.
* There is no client-server support builtin to the library. An application that needs such support will have to wrap their own server around the library.
# Getting the Source
```bash
git clone --recurse-submodules https://github.com/google/leveldb.git
```
# Building
This project supports [CMake](https://cmake.org/) out of the box.
### Build for POSIX
Quick start:
```bash
mkdir -p build && cd build
cmake -DCMAKE_BUILD_TYPE=Release .. && cmake --build .
```
### Building for Windows
First generate the Visual Studio 2017 project/solution files:
```cmd
mkdir build
cd build
cmake -G "Visual Studio 15" ..
```
The default default will build for x86. For 64-bit run:
```cmd
cmake -G "Visual Studio 15 Win64" ..
```
To compile the Windows solution from the command-line:
```cmd
devenv /build Debug leveldb.sln
```
or open leveldb.sln in Visual Studio and build from within.
Please see the CMake documentation and `CMakeLists.txt` for more advanced usage.
# Contributing to the leveldb Project
> **This repository is receiving very limited maintenance. We will only review the following types of changes.**
>
> * Bug fixes
> * Changes absolutely needed by internally supported leveldb clients. These typically fix breakage introduced by a language/standard library/OS update
The leveldb project welcomes contributions. leveldb's primary goal is to be
a reliable and fast key/value store. Changes that are in line with the
features/limitations outlined above, and meet the requirements below,
will be considered.
Contribution requirements:
1. **Tested platforms only**. We _generally_ will only accept changes for
platforms that are compiled and tested. This means POSIX (for Linux and
macOS) or Windows. Very small changes will sometimes be accepted, but
consider that more of an exception than the rule.
2. **Stable API**. We strive very hard to maintain a stable API. Changes that
require changes for projects using leveldb _might_ be rejected without
sufficient benefit to the project.
3. **Tests**: All changes must be accompanied by a new (or changed) test, or
a sufficient explanation as to why a new (or changed) test is not required.
4. **Consistent Style**: This project conforms to the
[Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
To ensure your changes are properly formatted please run:
```
clang-format -i --style=file <file>
```
We are unlikely to accept contributions to the build configuration files, such
as `CMakeLists.txt`. We are focused on maintaining a build configuration that
allows us to test that the project works in a few supported configurations
inside Google. We are not currently interested in supporting other requirements,
such as different operating systems, compilers, or build systems.
## Submitting a Pull Request
Before any pull request will be accepted the author must first sign a
Contributor License Agreement (CLA) at https://cla.developers.google.com/.
In order to keep the commit timeline linear
[squash](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Squashing-Commits)
your changes down to a single commit and [rebase](https://git-scm.com/docs/git-rebase)
on google/leveldb/main. This keeps the commit timeline linear and more easily sync'ed
with the internal repository at Google. More information at GitHub's
[About Git rebase](https://help.github.com/articles/about-git-rebase/) page.
# Performance
Here is a performance report (with explanations) from the run of the
@ -78,61 +185,62 @@ by the one or two disk seeks needed to fetch the data from disk.
Write performance will be mostly unaffected by whether or not the
working set fits in memory.
readrandom : 16.677 micros/op; (approximately 60,000 reads per second)
readseq : 0.476 micros/op; 232.3 MB/s
readreverse : 0.724 micros/op; 152.9 MB/s
readrandom : 16.677 micros/op; (approximately 60,000 reads per second)
readseq : 0.476 micros/op; 232.3 MB/s
readreverse : 0.724 micros/op; 152.9 MB/s
LevelDB compacts its underlying storage data in the background to
improve read performance. The results listed above were done
immediately after a lot of random writes. The results after
compactions (which are usually triggered automatically) are better.
readrandom : 11.602 micros/op; (approximately 85,000 reads per second)
readseq : 0.423 micros/op; 261.8 MB/s
readreverse : 0.663 micros/op; 166.9 MB/s
readrandom : 11.602 micros/op; (approximately 85,000 reads per second)
readseq : 0.423 micros/op; 261.8 MB/s
readreverse : 0.663 micros/op; 166.9 MB/s
Some of the high cost of reads comes from repeated decompression of blocks
read from disk. If we supply enough cache to the leveldb so it can hold the
uncompressed blocks in memory, the read performance improves again:
readrandom : 9.775 micros/op; (approximately 100,000 reads per second before compaction)
readrandom : 5.215 micros/op; (approximately 190,000 reads per second after compaction)
readrandom : 9.775 micros/op; (approximately 100,000 reads per second before compaction)
readrandom : 5.215 micros/op; (approximately 190,000 reads per second after compaction)
## Repository contents
See doc/index.html for more explanation. See doc/impl.html for a brief overview of the implementation.
See [doc/index.md](doc/index.md) for more explanation. See
[doc/impl.md](doc/impl.md) for a brief overview of the implementation.
The public interface is in include/*.h. Callers should not include or
The public interface is in include/leveldb/*.h. Callers should not include or
rely on the details of any other header files in this package. Those
internal APIs may be changed without warning.
Guide to header files:
* **include/db.h**: Main interface to the DB: Start here
* **include/leveldb/db.h**: Main interface to the DB: Start here.
* **include/options.h**: Control over the behavior of an entire database,
* **include/leveldb/options.h**: Control over the behavior of an entire database,
and also control over the behavior of individual reads and writes.
* **include/comparator.h**: Abstraction for user-specified comparison function.
* **include/leveldb/comparator.h**: Abstraction for user-specified comparison function.
If you want just bytewise comparison of keys, you can use the default
comparator, but clients can write their own comparator implementations if they
want custom ordering (e.g. to handle different character encodings, etc.)
want custom ordering (e.g. to handle different character encodings, etc.).
* **include/iterator.h**: Interface for iterating over data. You can get
* **include/leveldb/iterator.h**: Interface for iterating over data. You can get
an iterator from a DB object.
* **include/write_batch.h**: Interface for atomically applying multiple
* **include/leveldb/write_batch.h**: Interface for atomically applying multiple
updates to a database.
* **include/slice.h**: A simple module for maintaining a pointer and a
* **include/leveldb/slice.h**: A simple module for maintaining a pointer and a
length into some other byte array.
* **include/status.h**: Status is returned from many of the public interfaces
* **include/leveldb/status.h**: Status is returned from many of the public interfaces
and is used to report success and various kinds of errors.
* **include/env.h**:
* **include/leveldb/env.h**:
Abstraction of the OS environment. A posix implementation of this interface is
in util/env_posix.cc
in util/env_posix.cc.
* **include/table.h, include/table_builder.h**: Lower-level modules that most
clients probably won't use directly
* **include/leveldb/table.h, include/leveldb/table_builder.h**: Lower-level modules that most
clients probably won't use directly.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,92 @@
// Copyright (c) 2019 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <cinttypes>
#include <cstdio>
#include <string>
#include "gtest/gtest.h"
#include "benchmark/benchmark.h"
#include "db/version_set.h"
#include "leveldb/comparator.h"
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "leveldb/options.h"
#include "port/port.h"
#include "util/mutexlock.h"
#include "util/testutil.h"
namespace leveldb {
namespace {
std::string MakeKey(unsigned int num) {
char buf[30];
std::snprintf(buf, sizeof(buf), "%016u", num);
return std::string(buf);
}
void BM_LogAndApply(benchmark::State& state) {
const int num_base_files = state.range(0);
std::string dbname = testing::TempDir() + "leveldb_test_benchmark";
DestroyDB(dbname, Options());
DB* db = nullptr;
Options opts;
opts.create_if_missing = true;
Status s = DB::Open(opts, dbname, &db);
ASSERT_LEVELDB_OK(s);
ASSERT_TRUE(db != nullptr);
delete db;
db = nullptr;
Env* env = Env::Default();
port::Mutex mu;
MutexLock l(&mu);
InternalKeyComparator cmp(BytewiseComparator());
Options options;
VersionSet vset(dbname, &options, nullptr, &cmp);
bool save_manifest;
ASSERT_LEVELDB_OK(vset.Recover(&save_manifest));
VersionEdit vbase;
uint64_t fnum = 1;
for (int i = 0; i < num_base_files; i++) {
InternalKey start(MakeKey(2 * fnum), 1, kTypeValue);
InternalKey limit(MakeKey(2 * fnum + 1), 1, kTypeDeletion);
vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
}
ASSERT_LEVELDB_OK(vset.LogAndApply(&vbase, &mu));
uint64_t start_micros = env->NowMicros();
for (auto st : state) {
VersionEdit vedit;
vedit.RemoveFile(2, fnum);
InternalKey start(MakeKey(2 * fnum), 1, kTypeValue);
InternalKey limit(MakeKey(2 * fnum + 1), 1, kTypeDeletion);
vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
vset.LogAndApply(&vedit, &mu);
}
uint64_t stop_micros = env->NowMicros();
unsigned int us = stop_micros - start_micros;
char buf[16];
std::snprintf(buf, sizeof(buf), "%d", num_base_files);
std::fprintf(stderr,
"BM_LogAndApply/%-6s %8" PRIu64
" iters : %9u us (%7.0f us / iter)\n",
buf, state.iterations(), us, ((float)us) / state.iterations());
}
BENCHMARK(BM_LogAndApply)->Arg(1)->Arg(100)->Arg(10000)->Arg(100000);
} // namespace
} // namespace leveldb
BENCHMARK_MAIN();

View File

@ -2,9 +2,11 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <stdio.h>
#include <stdlib.h>
#include <sqlite3.h>
#include <cstdio>
#include <cstdlib>
#include "util/histogram.h"
#include "util/random.h"
#include "util/testutil.h"
@ -38,8 +40,7 @@ static const char* FLAGS_benchmarks =
"fillrand100K,"
"fillseq100K,"
"readseq,"
"readrand100K,"
;
"readrand100K,";
// Number of key/values to place in database
static int FLAGS_num = 1000000;
@ -69,6 +70,9 @@ static int FLAGS_num_pages = 4096;
// benchmark will fail.
static bool FLAGS_use_existing_db = false;
// If true, the SQLite table has ROWIDs.
static bool FLAGS_use_rowids = false;
// If true, we allow batch writes to occur
static bool FLAGS_transaction = true;
@ -76,38 +80,35 @@ static bool FLAGS_transaction = true;
static bool FLAGS_WAL_enabled = true;
// Use the db with the following name.
static const char* FLAGS_db = NULL;
static const char* FLAGS_db = nullptr;
inline
static void ExecErrorCheck(int status, char *err_msg) {
inline static void ExecErrorCheck(int status, char* err_msg) {
if (status != SQLITE_OK) {
fprintf(stderr, "SQL error: %s\n", err_msg);
std::fprintf(stderr, "SQL error: %s\n", err_msg);
sqlite3_free(err_msg);
exit(1);
std::exit(1);
}
}
inline
static void StepErrorCheck(int status) {
inline static void StepErrorCheck(int status) {
if (status != SQLITE_DONE) {
fprintf(stderr, "SQL step error: status = %d\n", status);
exit(1);
std::fprintf(stderr, "SQL step error: status = %d\n", status);
std::exit(1);
}
}
inline
static void ErrorCheck(int status) {
inline static void ErrorCheck(int status) {
if (status != SQLITE_OK) {
fprintf(stderr, "sqlite3 error: status = %d\n", status);
exit(1);
std::fprintf(stderr, "sqlite3 error: status = %d\n", status);
std::exit(1);
}
}
inline
static void WalCheckpoint(sqlite3* db_) {
inline static void WalCheckpoint(sqlite3* db_) {
// Flush all writes to disk
if (FLAGS_WAL_enabled) {
sqlite3_wal_checkpoint_v2(db_, NULL, SQLITE_CHECKPOINT_FULL, NULL, NULL);
sqlite3_wal_checkpoint_v2(db_, nullptr, SQLITE_CHECKPOINT_FULL, nullptr,
nullptr);
}
}
@ -152,7 +153,7 @@ static Slice TrimSpace(Slice s) {
start++;
}
int limit = s.size();
while (limit > start && isspace(s[limit-1])) {
while (limit > start && isspace(s[limit - 1])) {
limit--;
}
return Slice(s.data() + start, limit - start);
@ -176,49 +177,51 @@ class Benchmark {
// State kept for progress messages
int done_;
int next_report_; // When to report next
int next_report_; // When to report next
void PrintHeader() {
const int kKeySize = 16;
PrintEnvironment();
fprintf(stdout, "Keys: %d bytes each\n", kKeySize);
fprintf(stdout, "Values: %d bytes each\n", FLAGS_value_size);
fprintf(stdout, "Entries: %d\n", num_);
fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
/ 1048576.0));
std::fprintf(stdout, "Keys: %d bytes each\n", kKeySize);
std::fprintf(stdout, "Values: %d bytes each\n", FLAGS_value_size);
std::fprintf(stdout, "Entries: %d\n", num_);
std::fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_) /
1048576.0));
PrintWarnings();
fprintf(stdout, "------------------------------------------------\n");
std::fprintf(stdout, "------------------------------------------------\n");
}
void PrintWarnings() {
#if defined(__GNUC__) && !defined(__OPTIMIZE__)
fprintf(stdout,
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
);
std::fprintf(
stdout,
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n");
#endif
#ifndef NDEBUG
fprintf(stdout,
"WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
std::fprintf(
stdout,
"WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
#endif
}
void PrintEnvironment() {
fprintf(stderr, "SQLite: version %s\n", SQLITE_VERSION);
std::fprintf(stderr, "SQLite: version %s\n", SQLITE_VERSION);
#if defined(__linux)
time_t now = time(NULL);
fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline
time_t now = time(nullptr);
std::fprintf(stderr, "Date: %s",
ctime(&now)); // ctime() adds newline
FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
if (cpuinfo != NULL) {
FILE* cpuinfo = std::fopen("/proc/cpuinfo", "r");
if (cpuinfo != nullptr) {
char line[1000];
int num_cpus = 0;
std::string cpu_type;
std::string cache_size;
while (fgets(line, sizeof(line), cpuinfo) != NULL) {
while (fgets(line, sizeof(line), cpuinfo) != nullptr) {
const char* sep = strchr(line, ':');
if (sep == NULL) {
if (sep == nullptr) {
continue;
}
Slice key = TrimSpace(Slice(line, sep - 1 - line));
@ -230,9 +233,9 @@ class Benchmark {
cache_size = val.ToString();
}
}
fclose(cpuinfo);
fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str());
fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
std::fclose(cpuinfo);
std::fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str());
std::fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
}
#endif
}
@ -253,23 +256,30 @@ class Benchmark {
double micros = (now - last_op_finish_) * 1e6;
hist_.Add(micros);
if (micros > 20000) {
fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
fflush(stderr);
std::fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
std::fflush(stderr);
}
last_op_finish_ = now;
}
done_++;
if (done_ >= next_report_) {
if (next_report_ < 1000) next_report_ += 100;
else if (next_report_ < 5000) next_report_ += 500;
else if (next_report_ < 10000) next_report_ += 1000;
else if (next_report_ < 50000) next_report_ += 5000;
else if (next_report_ < 100000) next_report_ += 10000;
else if (next_report_ < 500000) next_report_ += 50000;
else next_report_ += 100000;
fprintf(stderr, "... finished %d ops%30s\r", done_, "");
fflush(stderr);
if (next_report_ < 1000)
next_report_ += 100;
else if (next_report_ < 5000)
next_report_ += 500;
else if (next_report_ < 10000)
next_report_ += 1000;
else if (next_report_ < 50000)
next_report_ += 5000;
else if (next_report_ < 100000)
next_report_ += 10000;
else if (next_report_ < 500000)
next_report_ += 50000;
else
next_report_ += 100000;
std::fprintf(stderr, "... finished %d ops%30s\r", done_, "");
std::fflush(stderr);
}
}
@ -282,43 +292,36 @@ class Benchmark {
if (bytes_ > 0) {
char rate[100];
snprintf(rate, sizeof(rate), "%6.1f MB/s",
(bytes_ / 1048576.0) / (finish - start_));
std::snprintf(rate, sizeof(rate), "%6.1f MB/s",
(bytes_ / 1048576.0) / (finish - start_));
if (!message_.empty()) {
message_ = std::string(rate) + " " + message_;
message_ = std::string(rate) + " " + message_;
} else {
message_ = rate;
}
}
fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
name.ToString().c_str(),
(finish - start_) * 1e6 / done_,
(message_.empty() ? "" : " "),
message_.c_str());
std::fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
name.ToString().c_str(), (finish - start_) * 1e6 / done_,
(message_.empty() ? "" : " "), message_.c_str());
if (FLAGS_histogram) {
fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
std::fprintf(stdout, "Microseconds per op:\n%s\n",
hist_.ToString().c_str());
}
fflush(stdout);
std::fflush(stdout);
}
public:
enum Order {
SEQUENTIAL,
RANDOM
};
enum DBState {
FRESH,
EXISTING
};
enum Order { SEQUENTIAL, RANDOM };
enum DBState { FRESH, EXISTING };
Benchmark()
: db_(NULL),
db_num_(0),
num_(FLAGS_num),
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
bytes_(0),
rand_(301) {
: db_(nullptr),
db_num_(0),
num_(FLAGS_num),
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
bytes_(0),
rand_(301) {
std::vector<std::string> files;
std::string test_dir;
Env::Default()->GetTestDirectory(&test_dir);
@ -329,7 +332,7 @@ class Benchmark {
std::string file_name(test_dir);
file_name += "/";
file_name += files[i];
Env::Default()->DeleteFile(file_name.c_str());
Env::Default()->RemoveFile(file_name.c_str());
}
}
}
@ -345,12 +348,12 @@ class Benchmark {
Open();
const char* benchmarks = FLAGS_benchmarks;
while (benchmarks != NULL) {
while (benchmarks != nullptr) {
const char* sep = strchr(benchmarks, ',');
Slice name;
if (sep == NULL) {
if (sep == nullptr) {
name = benchmarks;
benchmarks = NULL;
benchmarks = nullptr;
} else {
name = Slice(benchmarks, sep - benchmarks);
benchmarks = sep + 1;
@ -405,7 +408,8 @@ class Benchmark {
} else {
known = false;
if (name != Slice()) { // No error message for empty name
fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
std::fprintf(stderr, "unknown benchmark '%s'\n",
name.ToString().c_str());
}
}
if (known) {
@ -415,39 +419,37 @@ class Benchmark {
}
void Open() {
assert(db_ == NULL);
assert(db_ == nullptr);
int status;
char file_name[100];
char* err_msg = NULL;
char* err_msg = nullptr;
db_num_++;
// Open database
std::string tmp_dir;
Env::Default()->GetTestDirectory(&tmp_dir);
snprintf(file_name, sizeof(file_name),
"%s/dbbench_sqlite3-%d.db",
tmp_dir.c_str(),
db_num_);
std::snprintf(file_name, sizeof(file_name), "%s/dbbench_sqlite3-%d.db",
tmp_dir.c_str(), db_num_);
status = sqlite3_open(file_name, &db_);
if (status) {
fprintf(stderr, "open error: %s\n", sqlite3_errmsg(db_));
exit(1);
std::fprintf(stderr, "open error: %s\n", sqlite3_errmsg(db_));
std::exit(1);
}
// Change SQLite cache size
char cache_size[100];
snprintf(cache_size, sizeof(cache_size), "PRAGMA cache_size = %d",
FLAGS_num_pages);
status = sqlite3_exec(db_, cache_size, NULL, NULL, &err_msg);
std::snprintf(cache_size, sizeof(cache_size), "PRAGMA cache_size = %d",
FLAGS_num_pages);
status = sqlite3_exec(db_, cache_size, nullptr, nullptr, &err_msg);
ExecErrorCheck(status, err_msg);
// FLAGS_page_size is defaulted to 1024
if (FLAGS_page_size != 1024) {
char page_size[100];
snprintf(page_size, sizeof(page_size), "PRAGMA page_size = %d",
FLAGS_page_size);
status = sqlite3_exec(db_, page_size, NULL, NULL, &err_msg);
std::snprintf(page_size, sizeof(page_size), "PRAGMA page_size = %d",
FLAGS_page_size);
status = sqlite3_exec(db_, page_size, nullptr, nullptr, &err_msg);
ExecErrorCheck(status, err_msg);
}
@ -457,26 +459,29 @@ class Benchmark {
// LevelDB's default cache size is a combined 4 MB
std::string WAL_checkpoint = "PRAGMA wal_autocheckpoint = 4096";
status = sqlite3_exec(db_, WAL_stmt.c_str(), NULL, NULL, &err_msg);
status = sqlite3_exec(db_, WAL_stmt.c_str(), nullptr, nullptr, &err_msg);
ExecErrorCheck(status, err_msg);
status = sqlite3_exec(db_, WAL_checkpoint.c_str(), NULL, NULL, &err_msg);
status =
sqlite3_exec(db_, WAL_checkpoint.c_str(), nullptr, nullptr, &err_msg);
ExecErrorCheck(status, err_msg);
}
// Change locking mode to exclusive and create tables/index for database
std::string locking_stmt = "PRAGMA locking_mode = EXCLUSIVE";
std::string create_stmt =
"CREATE TABLE test (key blob, value blob, PRIMARY KEY(key))";
std::string stmt_array[] = { locking_stmt, create_stmt };
"CREATE TABLE test (key blob, value blob, PRIMARY KEY(key))";
if (!FLAGS_use_rowids) create_stmt += " WITHOUT ROWID";
std::string stmt_array[] = {locking_stmt, create_stmt};
int stmt_array_length = sizeof(stmt_array) / sizeof(std::string);
for (int i = 0; i < stmt_array_length; i++) {
status = sqlite3_exec(db_, stmt_array[i].c_str(), NULL, NULL, &err_msg);
status =
sqlite3_exec(db_, stmt_array[i].c_str(), nullptr, nullptr, &err_msg);
ExecErrorCheck(status, err_msg);
}
}
void Write(bool write_sync, Order order, DBState state,
int num_entries, int value_size, int entries_per_batch) {
void Write(bool write_sync, Order order, DBState state, int num_entries,
int value_size, int entries_per_batch) {
// Create new database if state == FRESH
if (state == FRESH) {
if (FLAGS_use_existing_db) {
@ -484,18 +489,18 @@ class Benchmark {
return;
}
sqlite3_close(db_);
db_ = NULL;
db_ = nullptr;
Open();
Start();
}
if (num_entries != num_) {
char msg[100];
snprintf(msg, sizeof(msg), "(%d ops)", num_entries);
std::snprintf(msg, sizeof(msg), "(%d ops)", num_entries);
message_ = msg;
}
char* err_msg = NULL;
char* err_msg = nullptr;
int status;
sqlite3_stmt *replace_stmt, *begin_trans_stmt, *end_trans_stmt;
@ -504,20 +509,20 @@ class Benchmark {
std::string end_trans_str = "END TRANSACTION;";
// Check for synchronous flag in options
std::string sync_stmt = (write_sync) ? "PRAGMA synchronous = FULL" :
"PRAGMA synchronous = OFF";
status = sqlite3_exec(db_, sync_stmt.c_str(), NULL, NULL, &err_msg);
std::string sync_stmt =
(write_sync) ? "PRAGMA synchronous = FULL" : "PRAGMA synchronous = OFF";
status = sqlite3_exec(db_, sync_stmt.c_str(), nullptr, nullptr, &err_msg);
ExecErrorCheck(status, err_msg);
// Preparing sqlite3 statements
status = sqlite3_prepare_v2(db_, replace_str.c_str(), -1,
&replace_stmt, NULL);
status = sqlite3_prepare_v2(db_, replace_str.c_str(), -1, &replace_stmt,
nullptr);
ErrorCheck(status);
status = sqlite3_prepare_v2(db_, begin_trans_str.c_str(), -1,
&begin_trans_stmt, NULL);
&begin_trans_stmt, nullptr);
ErrorCheck(status);
status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1,
&end_trans_stmt, NULL);
status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1, &end_trans_stmt,
nullptr);
ErrorCheck(status);
bool transaction = (entries_per_batch > 1);
@ -535,16 +540,16 @@ class Benchmark {
const char* value = gen_.Generate(value_size).data();
// Create values for key-value pair
const int k = (order == SEQUENTIAL) ? i + j :
(rand_.Next() % num_entries);
const int k =
(order == SEQUENTIAL) ? i + j : (rand_.Next() % num_entries);
char key[100];
snprintf(key, sizeof(key), "%016d", k);
std::snprintf(key, sizeof(key), "%016d", k);
// Bind KV values into replace_stmt
status = sqlite3_bind_blob(replace_stmt, 1, key, 16, SQLITE_STATIC);
ErrorCheck(status);
status = sqlite3_bind_blob(replace_stmt, 2, value,
value_size, SQLITE_STATIC);
status = sqlite3_bind_blob(replace_stmt, 2, value, value_size,
SQLITE_STATIC);
ErrorCheck(status);
// Execute replace_stmt
@ -588,12 +593,12 @@ class Benchmark {
// Preparing sqlite3 statements
status = sqlite3_prepare_v2(db_, begin_trans_str.c_str(), -1,
&begin_trans_stmt, NULL);
&begin_trans_stmt, nullptr);
ErrorCheck(status);
status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1,
&end_trans_stmt, NULL);
status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1, &end_trans_stmt,
nullptr);
ErrorCheck(status);
status = sqlite3_prepare_v2(db_, read_str.c_str(), -1, &read_stmt, NULL);
status = sqlite3_prepare_v2(db_, read_str.c_str(), -1, &read_stmt, nullptr);
ErrorCheck(status);
bool transaction = (entries_per_batch > 1);
@ -611,14 +616,15 @@ class Benchmark {
// Create key value
char key[100];
int k = (order == SEQUENTIAL) ? i + j : (rand_.Next() % reads_);
snprintf(key, sizeof(key), "%016d", k);
std::snprintf(key, sizeof(key), "%016d", k);
// Bind key value into read_stmt
status = sqlite3_bind_blob(read_stmt, 1, key, 16, SQLITE_STATIC);
ErrorCheck(status);
// Execute read statement
while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW) {}
while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW) {
}
StepErrorCheck(status);
// Reset SQLite statement for another use
@ -648,10 +654,10 @@ class Benchmark {
void ReadSequential() {
int status;
sqlite3_stmt *pStmt;
sqlite3_stmt* pStmt;
std::string read_str = "SELECT * FROM test ORDER BY key";
status = sqlite3_prepare_v2(db_, read_str.c_str(), -1, &pStmt, NULL);
status = sqlite3_prepare_v2(db_, read_str.c_str(), -1, &pStmt, nullptr);
ErrorCheck(status);
for (int i = 0; i < reads_ && SQLITE_ROW == sqlite3_step(pStmt); i++) {
bytes_ += sqlite3_column_bytes(pStmt, 1) + sqlite3_column_bytes(pStmt, 2);
@ -661,7 +667,6 @@ class Benchmark {
status = sqlite3_finalize(pStmt);
ErrorCheck(status);
}
};
} // namespace leveldb
@ -682,6 +687,9 @@ int main(int argc, char** argv) {
} else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 &&
(n == 0 || n == 1)) {
FLAGS_use_existing_db = n;
} else if (sscanf(argv[i], "--use_rowids=%d%c", &n, &junk) == 1 &&
(n == 0 || n == 1)) {
FLAGS_use_rowids = n;
} else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
FLAGS_num = n;
} else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
@ -700,16 +708,16 @@ int main(int argc, char** argv) {
} else if (strncmp(argv[i], "--db=", 5) == 0) {
FLAGS_db = argv[i] + 5;
} else {
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
exit(1);
std::fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
std::exit(1);
}
}
// Choose a location for the test database if none given with --db=<path>
if (FLAGS_db == NULL) {
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
default_db_path += "/dbbench";
FLAGS_db = default_db_path.c_str();
if (FLAGS_db == nullptr) {
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
default_db_path += "/dbbench";
FLAGS_db = default_db_path.c_str();
}
leveldb::Benchmark benchmark;

View File

@ -2,9 +2,11 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <stdio.h>
#include <stdlib.h>
#include <kcpolydb.h>
#include <cstdio>
#include <cstdlib>
#include "util/histogram.h"
#include "util/random.h"
#include "util/testutil.h"
@ -34,8 +36,7 @@ static const char* FLAGS_benchmarks =
"fillrand100K,"
"fillseq100K,"
"readseq100K,"
"readrand100K,"
;
"readrand100K,";
// Number of key/values to place in database
static int FLAGS_num = 1000000;
@ -69,14 +70,12 @@ static bool FLAGS_use_existing_db = false;
static bool FLAGS_compression = true;
// Use the db with the following name.
static const char* FLAGS_db = NULL;
static const char* FLAGS_db = nullptr;
inline
static void DBSynchronize(kyotocabinet::TreeDB* db_)
{
inline static void DBSynchronize(kyotocabinet::TreeDB* db_) {
// Synchronize will flush writes to disk
if (!db_->synchronize()) {
fprintf(stderr, "synchronize error: %s\n", db_->error().name());
std::fprintf(stderr, "synchronize error: %s\n", db_->error().name());
}
}
@ -121,7 +120,7 @@ static Slice TrimSpace(Slice s) {
start++;
}
int limit = s.size();
while (limit > start && isspace(s[limit-1])) {
while (limit > start && isspace(s[limit - 1])) {
limit--;
}
return Slice(s.data() + start, limit - start);
@ -146,55 +145,60 @@ class Benchmark {
// State kept for progress messages
int done_;
int next_report_; // When to report next
int next_report_; // When to report next
void PrintHeader() {
const int kKeySize = 16;
PrintEnvironment();
fprintf(stdout, "Keys: %d bytes each\n", kKeySize);
fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n",
FLAGS_value_size,
static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
fprintf(stdout, "Entries: %d\n", num_);
fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
/ 1048576.0));
fprintf(stdout, "FileSize: %.1f MB (estimated)\n",
(((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
/ 1048576.0));
std::fprintf(stdout, "Keys: %d bytes each\n", kKeySize);
std::fprintf(
stdout, "Values: %d bytes each (%d bytes after compression)\n",
FLAGS_value_size,
static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
std::fprintf(stdout, "Entries: %d\n", num_);
std::fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_) /
1048576.0));
std::fprintf(
stdout, "FileSize: %.1f MB (estimated)\n",
(((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) /
1048576.0));
PrintWarnings();
fprintf(stdout, "------------------------------------------------\n");
std::fprintf(stdout, "------------------------------------------------\n");
}
void PrintWarnings() {
#if defined(__GNUC__) && !defined(__OPTIMIZE__)
fprintf(stdout,
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
);
std::fprintf(
stdout,
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n");
#endif
#ifndef NDEBUG
fprintf(stdout,
"WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
std::fprintf(
stdout,
"WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
#endif
}
void PrintEnvironment() {
fprintf(stderr, "Kyoto Cabinet: version %s, lib ver %d, lib rev %d\n",
kyotocabinet::VERSION, kyotocabinet::LIBVER, kyotocabinet::LIBREV);
std::fprintf(
stderr, "Kyoto Cabinet: version %s, lib ver %d, lib rev %d\n",
kyotocabinet::VERSION, kyotocabinet::LIBVER, kyotocabinet::LIBREV);
#if defined(__linux)
time_t now = time(NULL);
fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline
time_t now = time(nullptr);
std::fprintf(stderr, "Date: %s",
ctime(&now)); // ctime() adds newline
FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
if (cpuinfo != NULL) {
FILE* cpuinfo = std::fopen("/proc/cpuinfo", "r");
if (cpuinfo != nullptr) {
char line[1000];
int num_cpus = 0;
std::string cpu_type;
std::string cache_size;
while (fgets(line, sizeof(line), cpuinfo) != NULL) {
while (fgets(line, sizeof(line), cpuinfo) != nullptr) {
const char* sep = strchr(line, ':');
if (sep == NULL) {
if (sep == nullptr) {
continue;
}
Slice key = TrimSpace(Slice(line, sep - 1 - line));
@ -206,9 +210,10 @@ class Benchmark {
cache_size = val.ToString();
}
}
fclose(cpuinfo);
fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str());
fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
std::fclose(cpuinfo);
std::fprintf(stderr, "CPU: %d * %s\n", num_cpus,
cpu_type.c_str());
std::fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
}
#endif
}
@ -229,23 +234,30 @@ class Benchmark {
double micros = (now - last_op_finish_) * 1e6;
hist_.Add(micros);
if (micros > 20000) {
fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
fflush(stderr);
std::fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
std::fflush(stderr);
}
last_op_finish_ = now;
}
done_++;
if (done_ >= next_report_) {
if (next_report_ < 1000) next_report_ += 100;
else if (next_report_ < 5000) next_report_ += 500;
else if (next_report_ < 10000) next_report_ += 1000;
else if (next_report_ < 50000) next_report_ += 5000;
else if (next_report_ < 100000) next_report_ += 10000;
else if (next_report_ < 500000) next_report_ += 50000;
else next_report_ += 100000;
fprintf(stderr, "... finished %d ops%30s\r", done_, "");
fflush(stderr);
if (next_report_ < 1000)
next_report_ += 100;
else if (next_report_ < 5000)
next_report_ += 500;
else if (next_report_ < 10000)
next_report_ += 1000;
else if (next_report_ < 50000)
next_report_ += 5000;
else if (next_report_ < 100000)
next_report_ += 10000;
else if (next_report_ < 500000)
next_report_ += 50000;
else
next_report_ += 100000;
std::fprintf(stderr, "... finished %d ops%30s\r", done_, "");
std::fflush(stderr);
}
}
@ -258,42 +270,35 @@ class Benchmark {
if (bytes_ > 0) {
char rate[100];
snprintf(rate, sizeof(rate), "%6.1f MB/s",
(bytes_ / 1048576.0) / (finish - start_));
std::snprintf(rate, sizeof(rate), "%6.1f MB/s",
(bytes_ / 1048576.0) / (finish - start_));
if (!message_.empty()) {
message_ = std::string(rate) + " " + message_;
message_ = std::string(rate) + " " + message_;
} else {
message_ = rate;
}
}
fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
name.ToString().c_str(),
(finish - start_) * 1e6 / done_,
(message_.empty() ? "" : " "),
message_.c_str());
std::fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
name.ToString().c_str(), (finish - start_) * 1e6 / done_,
(message_.empty() ? "" : " "), message_.c_str());
if (FLAGS_histogram) {
fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
std::fprintf(stdout, "Microseconds per op:\n%s\n",
hist_.ToString().c_str());
}
fflush(stdout);
std::fflush(stdout);
}
public:
enum Order {
SEQUENTIAL,
RANDOM
};
enum DBState {
FRESH,
EXISTING
};
enum Order { SEQUENTIAL, RANDOM };
enum DBState { FRESH, EXISTING };
Benchmark()
: db_(NULL),
num_(FLAGS_num),
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
bytes_(0),
rand_(301) {
: db_(nullptr),
num_(FLAGS_num),
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
bytes_(0),
rand_(301) {
std::vector<std::string> files;
std::string test_dir;
Env::Default()->GetTestDirectory(&test_dir);
@ -304,7 +309,7 @@ class Benchmark {
std::string file_name(test_dir);
file_name += "/";
file_name += files[i];
Env::Default()->DeleteFile(file_name.c_str());
Env::Default()->RemoveFile(file_name.c_str());
}
}
}
@ -312,7 +317,7 @@ class Benchmark {
~Benchmark() {
if (!db_->close()) {
fprintf(stderr, "close error: %s\n", db_->error().name());
std::fprintf(stderr, "close error: %s\n", db_->error().name());
}
}
@ -321,12 +326,12 @@ class Benchmark {
Open(false);
const char* benchmarks = FLAGS_benchmarks;
while (benchmarks != NULL) {
while (benchmarks != nullptr) {
const char* sep = strchr(benchmarks, ',');
Slice name;
if (sep == NULL) {
if (sep == nullptr) {
name = benchmarks;
benchmarks = NULL;
benchmarks = nullptr;
} else {
name = Slice(benchmarks, sep - benchmarks);
benchmarks = sep + 1;
@ -376,7 +381,8 @@ class Benchmark {
} else {
known = false;
if (name != Slice()) { // No error message for empty name
fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
std::fprintf(stderr, "unknown benchmark '%s'\n",
name.ToString().c_str());
}
}
if (known) {
@ -386,8 +392,8 @@ class Benchmark {
}
private:
void Open(bool sync) {
assert(db_ == NULL);
void Open(bool sync) {
assert(db_ == nullptr);
// Initialize db_
db_ = new kyotocabinet::TreeDB();
@ -395,16 +401,14 @@ class Benchmark {
db_num_++;
std::string test_dir;
Env::Default()->GetTestDirectory(&test_dir);
snprintf(file_name, sizeof(file_name),
"%s/dbbench_polyDB-%d.kct",
test_dir.c_str(),
db_num_);
std::snprintf(file_name, sizeof(file_name), "%s/dbbench_polyDB-%d.kct",
test_dir.c_str(), db_num_);
// Create tuning options and open the database
int open_options = kyotocabinet::PolyDB::OWRITER |
kyotocabinet::PolyDB::OCREATE;
int tune_options = kyotocabinet::TreeDB::TSMALL |
kyotocabinet::TreeDB::TLINEAR;
int open_options =
kyotocabinet::PolyDB::OWRITER | kyotocabinet::PolyDB::OCREATE;
int tune_options =
kyotocabinet::TreeDB::TSMALL | kyotocabinet::TreeDB::TLINEAR;
if (FLAGS_compression) {
tune_options |= kyotocabinet::TreeDB::TCOMPRESS;
db_->tune_compressor(&comp_);
@ -412,17 +416,17 @@ class Benchmark {
db_->tune_options(tune_options);
db_->tune_page_cache(FLAGS_cache_size);
db_->tune_page(FLAGS_page_size);
db_->tune_map(256LL<<20);
db_->tune_map(256LL << 20);
if (sync) {
open_options |= kyotocabinet::PolyDB::OAUTOSYNC;
}
if (!db_->open(file_name, open_options)) {
fprintf(stderr, "open error: %s\n", db_->error().name());
std::fprintf(stderr, "open error: %s\n", db_->error().name());
}
}
void Write(bool sync, Order order, DBState state,
int num_entries, int value_size, int entries_per_batch) {
void Write(bool sync, Order order, DBState state, int num_entries,
int value_size, int entries_per_batch) {
// Create new database if state == FRESH
if (state == FRESH) {
if (FLAGS_use_existing_db) {
@ -430,27 +434,26 @@ class Benchmark {
return;
}
delete db_;
db_ = NULL;
db_ = nullptr;
Open(sync);
Start(); // Do not count time taken to destroy/open
}
if (num_entries != num_) {
char msg[100];
snprintf(msg, sizeof(msg), "(%d ops)", num_entries);
std::snprintf(msg, sizeof(msg), "(%d ops)", num_entries);
message_ = msg;
}
// Write to database
for (int i = 0; i < num_entries; i++)
{
for (int i = 0; i < num_entries; i++) {
const int k = (order == SEQUENTIAL) ? i : (rand_.Next() % num_entries);
char key[100];
snprintf(key, sizeof(key), "%016d", k);
std::snprintf(key, sizeof(key), "%016d", k);
bytes_ += value_size + strlen(key);
std::string cpp_key = key;
if (!db_->set(cpp_key, gen_.Generate(value_size).ToString())) {
fprintf(stderr, "set error: %s\n", db_->error().name());
std::fprintf(stderr, "set error: %s\n", db_->error().name());
}
FinishedSingleOp();
}
@ -472,7 +475,7 @@ class Benchmark {
for (int i = 0; i < reads_; i++) {
char key[100];
const int k = rand_.Next() % reads_;
snprintf(key, sizeof(key), "%016d", k);
std::snprintf(key, sizeof(key), "%016d", k);
db_->get(key, &value);
FinishedSingleOp();
}
@ -510,16 +513,16 @@ int main(int argc, char** argv) {
} else if (strncmp(argv[i], "--db=", 5) == 0) {
FLAGS_db = argv[i] + 5;
} else {
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
exit(1);
std::fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
std::exit(1);
}
}
// Choose a location for the test database if none given with --db=<path>
if (FLAGS_db == NULL) {
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
default_db_path += "/dbbench";
FLAGS_db = default_db_path.c_str();
if (FLAGS_db == nullptr) {
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
default_db_path += "/dbbench";
FLAGS_db = default_db_path.c_str();
}
leveldb::Benchmark benchmark;

View File

@ -1,228 +0,0 @@
#!/bin/sh
#
# Detects OS we're compiling on and outputs a file specified by the first
# argument, which in turn gets read while processing Makefile.
#
# The output will set the following variables:
# CC C Compiler path
# CXX C++ Compiler path
# PLATFORM_LDFLAGS Linker flags
# PLATFORM_LIBS Libraries flags
# PLATFORM_SHARED_EXT Extension for shared libraries
# PLATFORM_SHARED_LDFLAGS Flags for building shared library
# This flag is embedded just before the name
# of the shared library without intervening spaces
# PLATFORM_SHARED_CFLAGS Flags for compiling objects for shared library
# PLATFORM_CCFLAGS C compiler flags
# PLATFORM_CXXFLAGS C++ compiler flags. Will contain:
# PLATFORM_SHARED_VERSIONED Set to 'true' if platform supports versioned
# shared libraries, empty otherwise.
#
# The PLATFORM_CCFLAGS and PLATFORM_CXXFLAGS might include the following:
#
# -DLEVELDB_ATOMIC_PRESENT if <atomic> is present
# -DLEVELDB_PLATFORM_POSIX for Posix-based platforms
# -DSNAPPY if the Snappy library is present
#
OUTPUT=$1
PREFIX=$2
if test -z "$OUTPUT" || test -z "$PREFIX"; then
echo "usage: $0 <output-filename> <directory_prefix>" >&2
exit 1
fi
# Delete existing output, if it exists
rm -f $OUTPUT
touch $OUTPUT
if test -z "$CC"; then
CC=cc
fi
if test -z "$CXX"; then
CXX=g++
fi
if test -z "$TMPDIR"; then
TMPDIR=/tmp
fi
# Detect OS
if test -z "$TARGET_OS"; then
TARGET_OS=`uname -s`
fi
COMMON_FLAGS=
CROSS_COMPILE=
PLATFORM_CCFLAGS=
PLATFORM_CXXFLAGS=
PLATFORM_LDFLAGS=
PLATFORM_LIBS=
PLATFORM_SHARED_EXT="so"
PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl,"
PLATFORM_SHARED_CFLAGS="-fPIC"
PLATFORM_SHARED_VERSIONED=true
MEMCMP_FLAG=
if [ "$CXX" = "g++" ]; then
# Use libc's memcmp instead of GCC's memcmp. This results in ~40%
# performance improvement on readrandom under gcc 4.4.3 on Linux/x86.
MEMCMP_FLAG="-fno-builtin-memcmp"
fi
case "$TARGET_OS" in
CYGWIN_*)
PLATFORM=OS_LINUX
COMMON_FLAGS="$MEMCMP_FLAG -lpthread -DOS_LINUX -DCYGWIN"
PLATFORM_LDFLAGS="-lpthread"
PORT_FILE=port/port_posix.cc
;;
Darwin)
PLATFORM=OS_MACOSX
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
PLATFORM_SHARED_EXT=dylib
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name $INSTALL_PATH/"
PORT_FILE=port/port_posix.cc
;;
Linux)
PLATFORM=OS_LINUX
COMMON_FLAGS="$MEMCMP_FLAG -pthread -DOS_LINUX"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
;;
SunOS)
PLATFORM=OS_SOLARIS
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_SOLARIS"
PLATFORM_LIBS="-lpthread -lrt"
PORT_FILE=port/port_posix.cc
;;
FreeBSD)
PLATFORM=OS_FREEBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_FREEBSD"
PLATFORM_LIBS="-lpthread"
PORT_FILE=port/port_posix.cc
;;
NetBSD)
PLATFORM=OS_NETBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_NETBSD"
PLATFORM_LIBS="-lpthread -lgcc_s"
PORT_FILE=port/port_posix.cc
;;
OpenBSD)
PLATFORM=OS_OPENBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_OPENBSD"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
;;
DragonFly)
PLATFORM=OS_DRAGONFLYBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD"
PLATFORM_LIBS="-lpthread"
PORT_FILE=port/port_posix.cc
;;
OS_ANDROID_CROSSCOMPILE)
PLATFORM=OS_ANDROID
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX"
PLATFORM_LDFLAGS="" # All pthread features are in the Android C library
PORT_FILE=port/port_posix.cc
CROSS_COMPILE=true
;;
HP-UX)
PLATFORM=OS_HPUX
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_HPUX"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
# man ld: +h internal_name
PLATFORM_SHARED_LDFLAGS="-shared -Wl,+h -Wl,"
;;
IOS)
PLATFORM=IOS
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
PORT_FILE=port/port_posix.cc
PLATFORM_SHARED_EXT=
PLATFORM_SHARED_LDFLAGS=
PLATFORM_SHARED_CFLAGS=
PLATFORM_SHARED_VERSIONED=
;;
*)
echo "Unknown platform!" >&2
exit 1
esac
# We want to make a list of all cc files within util, db, table, and helpers
# except for the test and benchmark files. By default, find will output a list
# of all files matching either rule, so we need to append -print to make the
# prune take effect.
DIRS="$PREFIX/db $PREFIX/util $PREFIX/table"
set -f # temporarily disable globbing so that our patterns aren't expanded
PRUNE_TEST="-name *test*.cc -prune"
PRUNE_BENCH="-name *_bench.cc -prune"
PRUNE_TOOL="-name leveldb_main.cc -prune"
PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o $PRUNE_TOOL -o -name '*.cc' -print | sort | sed "s,^$PREFIX/,," | tr "\n" " "`
set +f # re-enable globbing
# The sources consist of the portable files, plus the platform-specific port
# file.
echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> $OUTPUT
echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> $OUTPUT
if [ "$CROSS_COMPILE" = "true" ]; then
# Cross-compiling; do not try any compilation tests.
true
else
CXXOUTPUT="${TMPDIR}/leveldb_build_detect_platform-cxx.$$"
# If -std=c++0x works, use <atomic> as fallback for when memory barriers
# are not available.
$CXX $CXXFLAGS -std=c++0x -x c++ - -o $CXXOUTPUT 2>/dev/null <<EOF
#include <atomic>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DLEVELDB_PLATFORM_POSIX -DLEVELDB_ATOMIC_PRESENT"
PLATFORM_CXXFLAGS="-std=c++0x"
else
COMMON_FLAGS="$COMMON_FLAGS -DLEVELDB_PLATFORM_POSIX"
fi
# Test whether Snappy library is installed
# http://code.google.com/p/snappy/
$CXX $CXXFLAGS -x c++ - -o $CXXOUTPUT 2>/dev/null <<EOF
#include <snappy.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DSNAPPY"
PLATFORM_LIBS="$PLATFORM_LIBS -lsnappy"
fi
# Test whether tcmalloc is available
$CXX $CXXFLAGS -x c++ - -o $CXXOUTPUT -ltcmalloc 2>/dev/null <<EOF
int main() {}
EOF
if [ "$?" = 0 ]; then
PLATFORM_LIBS="$PLATFORM_LIBS -ltcmalloc"
fi
rm -f $CXXOUTPUT 2>/dev/null
fi
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
echo "CC=$CC" >> $OUTPUT
echo "CXX=$CXX" >> $OUTPUT
echo "PLATFORM=$PLATFORM" >> $OUTPUT
echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> $OUTPUT
echo "PLATFORM_LIBS=$PLATFORM_LIBS" >> $OUTPUT
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> $OUTPUT
echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> $OUTPUT
echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> $OUTPUT
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> $OUTPUT
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> $OUTPUT
echo "PLATFORM_SHARED_VERSIONED=$PLATFORM_SHARED_VERSIONED" >> $OUTPUT

View File

@ -0,0 +1,9 @@
# Copyright 2019 The LevelDB Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. See the AUTHORS file for names of contributors.
@PACKAGE_INIT@
include("${CMAKE_CURRENT_LIST_DIR}/leveldbTargets.cmake")
check_required_components(leveldb)

View File

@ -2,29 +2,24 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "leveldb/db.h"
#include "gtest/gtest.h"
#include "db/db_impl.h"
#include "leveldb/cache.h"
#include "util/testharness.h"
#include "leveldb/db.h"
#include "util/testutil.h"
namespace leveldb {
class AutoCompactTest {
class AutoCompactTest : public testing::Test {
public:
std::string dbname_;
Cache* tiny_cache_;
Options options_;
DB* db_;
AutoCompactTest() {
dbname_ = test::TmpDir() + "/autocompact_test";
dbname_ = testing::TempDir() + "autocompact_test";
tiny_cache_ = NewLRUCache(100);
options_.block_cache = tiny_cache_;
DestroyDB(dbname_, options_);
options_.create_if_missing = true;
options_.compression = kNoCompression;
ASSERT_OK(DB::Open(options_, dbname_, &db_));
EXPECT_LEVELDB_OK(DB::Open(options_, dbname_, &db_));
}
~AutoCompactTest() {
@ -35,7 +30,7 @@ class AutoCompactTest {
std::string Key(int i) {
char buf[100];
snprintf(buf, sizeof(buf), "key%06d", i);
std::snprintf(buf, sizeof(buf), "key%06d", i);
return std::string(buf);
}
@ -47,6 +42,12 @@ class AutoCompactTest {
}
void DoReads(int n);
private:
std::string dbname_;
Cache* tiny_cache_;
Options options_;
DB* db_;
};
static const int kValueSize = 200 * 1024;
@ -61,15 +62,15 @@ void AutoCompactTest::DoReads(int n) {
// Fill database
for (int i = 0; i < kCount; i++) {
ASSERT_OK(db_->Put(WriteOptions(), Key(i), value));
ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), Key(i), value));
}
ASSERT_OK(dbi->TEST_CompactMemTable());
ASSERT_LEVELDB_OK(dbi->TEST_CompactMemTable());
// Delete everything
for (int i = 0; i < kCount; i++) {
ASSERT_OK(db_->Delete(WriteOptions(), Key(i)));
ASSERT_LEVELDB_OK(db_->Delete(WriteOptions(), Key(i)));
}
ASSERT_OK(dbi->TEST_CompactMemTable());
ASSERT_LEVELDB_OK(dbi->TEST_CompactMemTable());
// Get initial measurement of the space we will be reading.
const int64_t initial_size = Size(Key(0), Key(n));
@ -81,17 +82,16 @@ void AutoCompactTest::DoReads(int n) {
ASSERT_LT(read, 100) << "Taking too long to compact";
Iterator* iter = db_->NewIterator(ReadOptions());
for (iter->SeekToFirst();
iter->Valid() && iter->key().ToString() < limit_key;
iter->Next()) {
iter->Valid() && iter->key().ToString() < limit_key; iter->Next()) {
// Drop data
}
delete iter;
// Wait a little bit to allow any triggered compactions to complete.
Env::Default()->SleepForMicroseconds(1000000);
uint64_t size = Size(Key(0), Key(n));
fprintf(stderr, "iter %3d => %7.3f MB [other %7.3f MB]\n",
read+1, size/1048576.0, Size(Key(n), Key(kCount))/1048576.0);
if (size <= initial_size/10) {
std::fprintf(stderr, "iter %3d => %7.3f MB [other %7.3f MB]\n", read + 1,
size / 1048576.0, Size(Key(n), Key(kCount)) / 1048576.0);
if (size <= initial_size / 10) {
break;
}
}
@ -100,19 +100,11 @@ void AutoCompactTest::DoReads(int n) {
// is pretty much unchanged.
const int64_t final_other_size = Size(Key(n), Key(kCount));
ASSERT_LE(final_other_size, initial_other_size + 1048576);
ASSERT_GE(final_other_size, initial_other_size/5 - 1048576);
ASSERT_GE(final_other_size, initial_other_size / 5 - 1048576);
}
TEST(AutoCompactTest, ReadAll) {
DoReads(kCount);
}
TEST_F(AutoCompactTest, ReadAll) { DoReads(kCount); }
TEST(AutoCompactTest, ReadHalf) {
DoReads(kCount/2);
}
TEST_F(AutoCompactTest, ReadHalf) { DoReads(kCount / 2); }
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

View File

@ -4,8 +4,8 @@
#include "db/builder.h"
#include "db/filename.h"
#include "db/dbformat.h"
#include "db/filename.h"
#include "db/table_cache.h"
#include "db/version_edit.h"
#include "leveldb/db.h"
@ -14,12 +14,8 @@
namespace leveldb {
Status BuildTable(const std::string& dbname,
Env* env,
const Options& options,
TableCache* table_cache,
Iterator* iter,
FileMetaData* meta) {
Status BuildTable(const std::string& dbname, Env* env, const Options& options,
TableCache* table_cache, Iterator* iter, FileMetaData* meta) {
Status s;
meta->file_size = 0;
iter->SeekToFirst();
@ -34,21 +30,20 @@ Status BuildTable(const std::string& dbname,
TableBuilder* builder = new TableBuilder(options, file);
meta->smallest.DecodeFrom(iter->key());
Slice key;
for (; iter->Valid(); iter->Next()) {
Slice key = iter->key();
meta->largest.DecodeFrom(key);
key = iter->key();
builder->Add(key, iter->value());
}
if (!key.empty()) {
meta->largest.DecodeFrom(key);
}
// Finish and check for builder errors
s = builder->Finish();
if (s.ok()) {
s = builder->Finish();
if (s.ok()) {
meta->file_size = builder->FileSize();
assert(meta->file_size > 0);
}
} else {
builder->Abandon();
meta->file_size = builder->FileSize();
assert(meta->file_size > 0);
}
delete builder;
@ -60,12 +55,11 @@ Status BuildTable(const std::string& dbname,
s = file->Close();
}
delete file;
file = NULL;
file = nullptr;
if (s.ok()) {
// Verify that the table is usable
Iterator* it = table_cache->NewIterator(ReadOptions(),
meta->number,
Iterator* it = table_cache->NewIterator(ReadOptions(), meta->number,
meta->file_size);
s = it->status();
delete it;
@ -80,7 +74,7 @@ Status BuildTable(const std::string& dbname,
if (s.ok() && meta->file_size > 0) {
// Keep it
} else {
env->DeleteFile(fname);
env->RemoveFile(fname);
}
return s;
}

View File

@ -22,12 +22,8 @@ class VersionEdit;
// *meta will be filled with metadata about the generated table.
// If no data is present in *iter, meta->file_size will be set to
// zero, and no Table file will be produced.
extern Status BuildTable(const std::string& dbname,
Env* env,
const Options& options,
TableCache* table_cache,
Iterator* iter,
FileMetaData* meta);
Status BuildTable(const std::string& dbname, Env* env, const Options& options,
TableCache* table_cache, Iterator* iter, FileMetaData* meta);
} // namespace leveldb

402
db/c.cc
View File

@ -4,8 +4,11 @@
#include "leveldb/c.h"
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <cstdint>
#include <cstdlib>
#include "leveldb/cache.h"
#include "leveldb/comparator.h"
#include "leveldb/db.h"
@ -43,69 +46,72 @@ using leveldb::WriteOptions;
extern "C" {
struct leveldb_t { DB* rep; };
struct leveldb_iterator_t { Iterator* rep; };
struct leveldb_writebatch_t { WriteBatch rep; };
struct leveldb_snapshot_t { const Snapshot* rep; };
struct leveldb_readoptions_t { ReadOptions rep; };
struct leveldb_writeoptions_t { WriteOptions rep; };
struct leveldb_options_t { Options rep; };
struct leveldb_cache_t { Cache* rep; };
struct leveldb_seqfile_t { SequentialFile* rep; };
struct leveldb_randomfile_t { RandomAccessFile* rep; };
struct leveldb_writablefile_t { WritableFile* rep; };
struct leveldb_logger_t { Logger* rep; };
struct leveldb_filelock_t { FileLock* rep; };
struct leveldb_t {
DB* rep;
};
struct leveldb_iterator_t {
Iterator* rep;
};
struct leveldb_writebatch_t {
WriteBatch rep;
};
struct leveldb_snapshot_t {
const Snapshot* rep;
};
struct leveldb_readoptions_t {
ReadOptions rep;
};
struct leveldb_writeoptions_t {
WriteOptions rep;
};
struct leveldb_options_t {
Options rep;
};
struct leveldb_cache_t {
Cache* rep;
};
struct leveldb_seqfile_t {
SequentialFile* rep;
};
struct leveldb_randomfile_t {
RandomAccessFile* rep;
};
struct leveldb_writablefile_t {
WritableFile* rep;
};
struct leveldb_logger_t {
Logger* rep;
};
struct leveldb_filelock_t {
FileLock* rep;
};
struct leveldb_comparator_t : public Comparator {
void* state_;
void (*destructor_)(void*);
int (*compare_)(
void*,
const char* a, size_t alen,
const char* b, size_t blen);
const char* (*name_)(void*);
~leveldb_comparator_t() override { (*destructor_)(state_); }
virtual ~leveldb_comparator_t() {
(*destructor_)(state_);
}
virtual int Compare(const Slice& a, const Slice& b) const {
int Compare(const Slice& a, const Slice& b) const override {
return (*compare_)(state_, a.data(), a.size(), b.data(), b.size());
}
virtual const char* Name() const {
return (*name_)(state_);
}
const char* Name() const override { return (*name_)(state_); }
// No-ops since the C binding does not support key shortening methods.
virtual void FindShortestSeparator(std::string*, const Slice&) const { }
virtual void FindShortSuccessor(std::string* key) const { }
void FindShortestSeparator(std::string*, const Slice&) const override {}
void FindShortSuccessor(std::string* key) const override {}
void* state_;
void (*destructor_)(void*);
int (*compare_)(void*, const char* a, size_t alen, const char* b,
size_t blen);
const char* (*name_)(void*);
};
struct leveldb_filterpolicy_t : public FilterPolicy {
void* state_;
void (*destructor_)(void*);
const char* (*name_)(void*);
char* (*create_)(
void*,
const char* const* key_array, const size_t* key_length_array,
int num_keys,
size_t* filter_length);
unsigned char (*key_match_)(
void*,
const char* key, size_t length,
const char* filter, size_t filter_length);
~leveldb_filterpolicy_t() override { (*destructor_)(state_); }
virtual ~leveldb_filterpolicy_t() {
(*destructor_)(state_);
}
const char* Name() const override { return (*name_)(state_); }
virtual const char* Name() const {
return (*name_)(state_);
}
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
std::vector<const char*> key_pointers(n);
std::vector<size_t> key_sizes(n);
for (int i = 0; i < n; i++) {
@ -115,13 +121,22 @@ struct leveldb_filterpolicy_t : public FilterPolicy {
size_t len;
char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len);
dst->append(filter, len);
free(filter);
std::free(filter);
}
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
return (*key_match_)(state_, key.data(), key.size(),
filter.data(), filter.size());
bool KeyMayMatch(const Slice& key, const Slice& filter) const override {
return (*key_match_)(state_, key.data(), key.size(), filter.data(),
filter.size());
}
void* state_;
void (*destructor_)(void*);
const char* (*name_)(void*);
char* (*create_)(void*, const char* const* key_array,
const size_t* key_length_array, int num_keys,
size_t* filter_length);
uint8_t (*key_match_)(void*, const char* key, size_t length,
const char* filter, size_t filter_length);
};
struct leveldb_env_t {
@ -130,32 +145,31 @@ struct leveldb_env_t {
};
static bool SaveError(char** errptr, const Status& s) {
assert(errptr != NULL);
assert(errptr != nullptr);
if (s.ok()) {
return false;
} else if (*errptr == NULL) {
} else if (*errptr == nullptr) {
*errptr = strdup(s.ToString().c_str());
} else {
// TODO(sanjay): Merge with existing error?
free(*errptr);
std::free(*errptr);
*errptr = strdup(s.ToString().c_str());
}
return true;
}
static char* CopyString(const std::string& str) {
char* result = reinterpret_cast<char*>(malloc(sizeof(char) * str.size()));
memcpy(result, str.data(), sizeof(char) * str.size());
char* result =
reinterpret_cast<char*>(std::malloc(sizeof(char) * str.size()));
std::memcpy(result, str.data(), sizeof(char) * str.size());
return result;
}
leveldb_t* leveldb_open(
const leveldb_options_t* options,
const char* name,
char** errptr) {
leveldb_t* leveldb_open(const leveldb_options_t* options, const char* name,
char** errptr) {
DB* db;
if (SaveError(errptr, DB::Open(options->rep, std::string(name), &db))) {
return NULL;
return nullptr;
}
leveldb_t* result = new leveldb_t;
result->rep = db;
@ -167,40 +181,27 @@ void leveldb_close(leveldb_t* db) {
delete db;
}
void leveldb_put(
leveldb_t* db,
const leveldb_writeoptions_t* options,
const char* key, size_t keylen,
const char* val, size_t vallen,
char** errptr) {
void leveldb_put(leveldb_t* db, const leveldb_writeoptions_t* options,
const char* key, size_t keylen, const char* val, size_t vallen,
char** errptr) {
SaveError(errptr,
db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen)));
}
void leveldb_delete(
leveldb_t* db,
const leveldb_writeoptions_t* options,
const char* key, size_t keylen,
char** errptr) {
void leveldb_delete(leveldb_t* db, const leveldb_writeoptions_t* options,
const char* key, size_t keylen, char** errptr) {
SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen)));
}
void leveldb_write(
leveldb_t* db,
const leveldb_writeoptions_t* options,
leveldb_writebatch_t* batch,
char** errptr) {
void leveldb_write(leveldb_t* db, const leveldb_writeoptions_t* options,
leveldb_writebatch_t* batch, char** errptr) {
SaveError(errptr, db->rep->Write(options->rep, &batch->rep));
}
char* leveldb_get(
leveldb_t* db,
const leveldb_readoptions_t* options,
const char* key, size_t keylen,
size_t* vallen,
char** errptr) {
char* result = NULL;
char* leveldb_get(leveldb_t* db, const leveldb_readoptions_t* options,
const char* key, size_t keylen, size_t* vallen,
char** errptr) {
char* result = nullptr;
std::string tmp;
Status s = db->rep->Get(options->rep, Slice(key, keylen), &tmp);
if (s.ok()) {
@ -216,45 +217,40 @@ char* leveldb_get(
}
leveldb_iterator_t* leveldb_create_iterator(
leveldb_t* db,
const leveldb_readoptions_t* options) {
leveldb_t* db, const leveldb_readoptions_t* options) {
leveldb_iterator_t* result = new leveldb_iterator_t;
result->rep = db->rep->NewIterator(options->rep);
return result;
}
const leveldb_snapshot_t* leveldb_create_snapshot(
leveldb_t* db) {
const leveldb_snapshot_t* leveldb_create_snapshot(leveldb_t* db) {
leveldb_snapshot_t* result = new leveldb_snapshot_t;
result->rep = db->rep->GetSnapshot();
return result;
}
void leveldb_release_snapshot(
leveldb_t* db,
const leveldb_snapshot_t* snapshot) {
void leveldb_release_snapshot(leveldb_t* db,
const leveldb_snapshot_t* snapshot) {
db->rep->ReleaseSnapshot(snapshot->rep);
delete snapshot;
}
char* leveldb_property_value(
leveldb_t* db,
const char* propname) {
char* leveldb_property_value(leveldb_t* db, const char* propname) {
std::string tmp;
if (db->rep->GetProperty(Slice(propname), &tmp)) {
// We use strdup() since we expect human readable output.
return strdup(tmp.c_str());
} else {
return NULL;
return nullptr;
}
}
void leveldb_approximate_sizes(
leveldb_t* db,
int num_ranges,
const char* const* range_start_key, const size_t* range_start_key_len,
const char* const* range_limit_key, const size_t* range_limit_key_len,
uint64_t* sizes) {
void leveldb_approximate_sizes(leveldb_t* db, int num_ranges,
const char* const* range_start_key,
const size_t* range_start_key_len,
const char* const* range_limit_key,
const size_t* range_limit_key_len,
uint64_t* sizes) {
Range* ranges = new Range[num_ranges];
for (int i = 0; i < num_ranges; i++) {
ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]);
@ -264,28 +260,23 @@ void leveldb_approximate_sizes(
delete[] ranges;
}
void leveldb_compact_range(
leveldb_t* db,
const char* start_key, size_t start_key_len,
const char* limit_key, size_t limit_key_len) {
void leveldb_compact_range(leveldb_t* db, const char* start_key,
size_t start_key_len, const char* limit_key,
size_t limit_key_len) {
Slice a, b;
db->rep->CompactRange(
// Pass NULL Slice if corresponding "const char*" is NULL
(start_key ? (a = Slice(start_key, start_key_len), &a) : NULL),
(limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL));
// Pass null Slice if corresponding "const char*" is null
(start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
(limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
}
void leveldb_destroy_db(
const leveldb_options_t* options,
const char* name,
char** errptr) {
void leveldb_destroy_db(const leveldb_options_t* options, const char* name,
char** errptr) {
SaveError(errptr, DestroyDB(name, options->rep));
}
void leveldb_repair_db(
const leveldb_options_t* options,
const char* name,
char** errptr) {
void leveldb_repair_db(const leveldb_options_t* options, const char* name,
char** errptr) {
SaveError(errptr, RepairDB(name, options->rep));
}
@ -294,7 +285,7 @@ void leveldb_iter_destroy(leveldb_iterator_t* iter) {
delete iter;
}
unsigned char leveldb_iter_valid(const leveldb_iterator_t* iter) {
uint8_t leveldb_iter_valid(const leveldb_iterator_t* iter) {
return iter->rep->Valid();
}
@ -310,13 +301,9 @@ void leveldb_iter_seek(leveldb_iterator_t* iter, const char* k, size_t klen) {
iter->rep->Seek(Slice(k, klen));
}
void leveldb_iter_next(leveldb_iterator_t* iter) {
iter->rep->Next();
}
void leveldb_iter_next(leveldb_iterator_t* iter) { iter->rep->Next(); }
void leveldb_iter_prev(leveldb_iterator_t* iter) {
iter->rep->Prev();
}
void leveldb_iter_prev(leveldb_iterator_t* iter) { iter->rep->Prev(); }
const char* leveldb_iter_key(const leveldb_iterator_t* iter, size_t* klen) {
Slice s = iter->rep->key();
@ -338,41 +325,34 @@ leveldb_writebatch_t* leveldb_writebatch_create() {
return new leveldb_writebatch_t;
}
void leveldb_writebatch_destroy(leveldb_writebatch_t* b) {
delete b;
}
void leveldb_writebatch_destroy(leveldb_writebatch_t* b) { delete b; }
void leveldb_writebatch_clear(leveldb_writebatch_t* b) {
b->rep.Clear();
}
void leveldb_writebatch_clear(leveldb_writebatch_t* b) { b->rep.Clear(); }
void leveldb_writebatch_put(
leveldb_writebatch_t* b,
const char* key, size_t klen,
const char* val, size_t vlen) {
void leveldb_writebatch_put(leveldb_writebatch_t* b, const char* key,
size_t klen, const char* val, size_t vlen) {
b->rep.Put(Slice(key, klen), Slice(val, vlen));
}
void leveldb_writebatch_delete(
leveldb_writebatch_t* b,
const char* key, size_t klen) {
void leveldb_writebatch_delete(leveldb_writebatch_t* b, const char* key,
size_t klen) {
b->rep.Delete(Slice(key, klen));
}
void leveldb_writebatch_iterate(
leveldb_writebatch_t* b,
void* state,
void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
void (*deleted)(void*, const char* k, size_t klen)) {
void leveldb_writebatch_iterate(const leveldb_writebatch_t* b, void* state,
void (*put)(void*, const char* k, size_t klen,
const char* v, size_t vlen),
void (*deleted)(void*, const char* k,
size_t klen)) {
class H : public WriteBatch::Handler {
public:
void* state_;
void (*put_)(void*, const char* k, size_t klen, const char* v, size_t vlen);
void (*deleted_)(void*, const char* k, size_t klen);
virtual void Put(const Slice& key, const Slice& value) {
void Put(const Slice& key, const Slice& value) override {
(*put_)(state_, key.data(), key.size(), value.data(), value.size());
}
virtual void Delete(const Slice& key) {
void Delete(const Slice& key) override {
(*deleted_)(state_, key.data(), key.size());
}
};
@ -383,47 +363,43 @@ void leveldb_writebatch_iterate(
b->rep.Iterate(&handler);
}
leveldb_options_t* leveldb_options_create() {
return new leveldb_options_t;
void leveldb_writebatch_append(leveldb_writebatch_t* destination,
const leveldb_writebatch_t* source) {
destination->rep.Append(source->rep);
}
void leveldb_options_destroy(leveldb_options_t* options) {
delete options;
}
leveldb_options_t* leveldb_options_create() { return new leveldb_options_t; }
void leveldb_options_set_comparator(
leveldb_options_t* opt,
leveldb_comparator_t* cmp) {
void leveldb_options_destroy(leveldb_options_t* options) { delete options; }
void leveldb_options_set_comparator(leveldb_options_t* opt,
leveldb_comparator_t* cmp) {
opt->rep.comparator = cmp;
}
void leveldb_options_set_filter_policy(
leveldb_options_t* opt,
leveldb_filterpolicy_t* policy) {
void leveldb_options_set_filter_policy(leveldb_options_t* opt,
leveldb_filterpolicy_t* policy) {
opt->rep.filter_policy = policy;
}
void leveldb_options_set_create_if_missing(
leveldb_options_t* opt, unsigned char v) {
void leveldb_options_set_create_if_missing(leveldb_options_t* opt, uint8_t v) {
opt->rep.create_if_missing = v;
}
void leveldb_options_set_error_if_exists(
leveldb_options_t* opt, unsigned char v) {
void leveldb_options_set_error_if_exists(leveldb_options_t* opt, uint8_t v) {
opt->rep.error_if_exists = v;
}
void leveldb_options_set_paranoid_checks(
leveldb_options_t* opt, unsigned char v) {
void leveldb_options_set_paranoid_checks(leveldb_options_t* opt, uint8_t v) {
opt->rep.paranoid_checks = v;
}
void leveldb_options_set_env(leveldb_options_t* opt, leveldb_env_t* env) {
opt->rep.env = (env ? env->rep : NULL);
opt->rep.env = (env ? env->rep : nullptr);
}
void leveldb_options_set_info_log(leveldb_options_t* opt, leveldb_logger_t* l) {
opt->rep.info_log = (l ? l->rep : NULL);
opt->rep.info_log = (l ? l->rep : nullptr);
}
void leveldb_options_set_write_buffer_size(leveldb_options_t* opt, size_t s) {
@ -446,17 +422,18 @@ void leveldb_options_set_block_restart_interval(leveldb_options_t* opt, int n) {
opt->rep.block_restart_interval = n;
}
void leveldb_options_set_max_file_size(leveldb_options_t* opt, size_t s) {
opt->rep.max_file_size = s;
}
void leveldb_options_set_compression(leveldb_options_t* opt, int t) {
opt->rep.compression = static_cast<CompressionType>(t);
}
leveldb_comparator_t* leveldb_comparator_create(
void* state,
void (*destructor)(void*),
int (*compare)(
void*,
const char* a, size_t alen,
const char* b, size_t blen),
void* state, void (*destructor)(void*),
int (*compare)(void*, const char* a, size_t alen, const char* b,
size_t blen),
const char* (*name)(void*)) {
leveldb_comparator_t* result = new leveldb_comparator_t;
result->state_ = state;
@ -466,22 +443,15 @@ leveldb_comparator_t* leveldb_comparator_create(
return result;
}
void leveldb_comparator_destroy(leveldb_comparator_t* cmp) {
delete cmp;
}
void leveldb_comparator_destroy(leveldb_comparator_t* cmp) { delete cmp; }
leveldb_filterpolicy_t* leveldb_filterpolicy_create(
void* state,
void (*destructor)(void*),
char* (*create_filter)(
void*,
const char* const* key_array, const size_t* key_length_array,
int num_keys,
size_t* filter_length),
unsigned char (*key_may_match)(
void*,
const char* key, size_t length,
const char* filter, size_t filter_length),
void* state, void (*destructor)(void*),
char* (*create_filter)(void*, const char* const* key_array,
const size_t* key_length_array, int num_keys,
size_t* filter_length),
uint8_t (*key_may_match)(void*, const char* key, size_t length,
const char* filter, size_t filter_length),
const char* (*name)(void*)) {
leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t;
result->state_ = state;
@ -501,7 +471,8 @@ leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
// they delegate to a NewBloomFilterPolicy() instead of user
// supplied C functions.
struct Wrapper : public leveldb_filterpolicy_t {
const FilterPolicy* rep_;
static void DoNothing(void*) {}
~Wrapper() { delete rep_; }
const char* Name() const { return rep_->Name(); }
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
@ -510,11 +481,12 @@ leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
bool KeyMayMatch(const Slice& key, const Slice& filter) const {
return rep_->KeyMayMatch(key, filter);
}
static void DoNothing(void*) { }
const FilterPolicy* rep_;
};
Wrapper* wrapper = new Wrapper;
wrapper->rep_ = NewBloomFilterPolicy(bits_per_key);
wrapper->state_ = NULL;
wrapper->state_ = nullptr;
wrapper->destructor_ = &Wrapper::DoNothing;
return wrapper;
}
@ -523,37 +495,29 @@ leveldb_readoptions_t* leveldb_readoptions_create() {
return new leveldb_readoptions_t;
}
void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) {
delete opt;
}
void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) { delete opt; }
void leveldb_readoptions_set_verify_checksums(
leveldb_readoptions_t* opt,
unsigned char v) {
void leveldb_readoptions_set_verify_checksums(leveldb_readoptions_t* opt,
uint8_t v) {
opt->rep.verify_checksums = v;
}
void leveldb_readoptions_set_fill_cache(
leveldb_readoptions_t* opt, unsigned char v) {
void leveldb_readoptions_set_fill_cache(leveldb_readoptions_t* opt, uint8_t v) {
opt->rep.fill_cache = v;
}
void leveldb_readoptions_set_snapshot(
leveldb_readoptions_t* opt,
const leveldb_snapshot_t* snap) {
opt->rep.snapshot = (snap ? snap->rep : NULL);
void leveldb_readoptions_set_snapshot(leveldb_readoptions_t* opt,
const leveldb_snapshot_t* snap) {
opt->rep.snapshot = (snap ? snap->rep : nullptr);
}
leveldb_writeoptions_t* leveldb_writeoptions_create() {
return new leveldb_writeoptions_t;
}
void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) {
delete opt;
}
void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) { delete opt; }
void leveldb_writeoptions_set_sync(
leveldb_writeoptions_t* opt, unsigned char v) {
void leveldb_writeoptions_set_sync(leveldb_writeoptions_t* opt, uint8_t v) {
opt->rep.sync = v;
}
@ -580,16 +544,22 @@ void leveldb_env_destroy(leveldb_env_t* env) {
delete env;
}
void leveldb_free(void* ptr) {
free(ptr);
char* leveldb_env_get_test_directory(leveldb_env_t* env) {
std::string result;
if (!env->rep->GetTestDirectory(&result).ok()) {
return nullptr;
}
char* buffer = static_cast<char*>(std::malloc(result.size() + 1));
std::memcpy(buffer, result.data(), result.size());
buffer[result.size()] = '\0';
return buffer;
}
int leveldb_major_version() {
return kMajorVersion;
}
void leveldb_free(void* ptr) { std::free(ptr); }
int leveldb_minor_version() {
return kMinorVersion;
}
int leveldb_major_version() { return kMajorVersion; }
int leveldb_minor_version() { return kMinorVersion; }
} // end extern "C"

View File

@ -8,24 +8,14 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
const char* phase = "";
static char dbname[200];
static void StartPhase(const char* name) {
fprintf(stderr, "=== Test %s\n", name);
phase = name;
}
static const char* GetTempDir(void) {
const char* ret = getenv("TEST_TMPDIR");
if (ret == NULL || ret[0] == '\0')
ret = "/tmp";
return ret;
}
#define CheckNoError(err) \
if ((err) != NULL) { \
fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, (err)); \
@ -130,7 +120,7 @@ static const char* CmpName(void* arg) {
}
// Custom filter policy
static unsigned char fake_filter_result = 1;
static uint8_t fake_filter_result = 1;
static void FilterDestroy(void* arg) { }
static const char* FilterName(void* arg) {
return "TestFilter";
@ -145,10 +135,8 @@ static char* FilterCreate(
memcpy(result, "fake", 4);
return result;
}
unsigned char FilterKeyMatch(
void* arg,
const char* key, size_t length,
const char* filter, size_t filter_length) {
uint8_t FilterKeyMatch(void* arg, const char* key, size_t length,
const char* filter, size_t filter_length) {
CheckCondition(filter_length == 4);
CheckCondition(memcmp(filter, "fake", 4) == 0);
return fake_filter_result;
@ -162,21 +150,19 @@ int main(int argc, char** argv) {
leveldb_options_t* options;
leveldb_readoptions_t* roptions;
leveldb_writeoptions_t* woptions;
char* dbname;
char* err = NULL;
int run = -1;
CheckCondition(leveldb_major_version() >= 1);
CheckCondition(leveldb_minor_version() >= 1);
snprintf(dbname, sizeof(dbname),
"%s/leveldb_c_test-%d",
GetTempDir(),
((int) geteuid()));
StartPhase("create_objects");
cmp = leveldb_comparator_create(NULL, CmpDestroy, CmpCompare, CmpName);
env = leveldb_create_default_env();
cache = leveldb_cache_create_lru(100000);
dbname = leveldb_env_get_test_directory(env);
CheckCondition(dbname != NULL);
options = leveldb_options_create();
leveldb_options_set_comparator(options, cmp);
@ -189,6 +175,7 @@ int main(int argc, char** argv) {
leveldb_options_set_max_open_files(options, 10);
leveldb_options_set_block_size(options, 1024);
leveldb_options_set_block_restart_interval(options, 8);
leveldb_options_set_max_file_size(options, 3 << 20);
leveldb_options_set_compression(options, leveldb_no_compression);
roptions = leveldb_readoptions_create();
@ -239,12 +226,18 @@ int main(int argc, char** argv) {
leveldb_writebatch_clear(wb);
leveldb_writebatch_put(wb, "bar", 3, "b", 1);
leveldb_writebatch_put(wb, "box", 3, "c", 1);
leveldb_writebatch_delete(wb, "bar", 3);
leveldb_writebatch_t* wb2 = leveldb_writebatch_create();
leveldb_writebatch_delete(wb2, "bar", 3);
leveldb_writebatch_append(wb, wb2);
leveldb_writebatch_destroy(wb2);
leveldb_write(db, woptions, wb, &err);
CheckNoError(err);
CheckGet(db, roptions, "foo", "hello");
CheckGet(db, roptions, "bar", NULL);
CheckGet(db, roptions, "box", "c");
int pos = 0;
leveldb_writebatch_iterate(wb, &pos, CheckPut, CheckDel);
CheckCondition(pos == 3);
@ -381,6 +374,7 @@ int main(int argc, char** argv) {
leveldb_options_destroy(options);
leveldb_readoptions_destroy(roptions);
leveldb_writeoptions_destroy(woptions);
leveldb_free(dbname);
leveldb_cache_destroy(cache);
leveldb_comparator_destroy(cmp);
leveldb_env_destroy(env);

View File

@ -2,76 +2,63 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "leveldb/db.h"
#include <errno.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "leveldb/cache.h"
#include "leveldb/env.h"
#include "leveldb/table.h"
#include "leveldb/write_batch.h"
#include "gtest/gtest.h"
#include "db/db_impl.h"
#include "db/filename.h"
#include "db/log_format.h"
#include "db/version_set.h"
#include "leveldb/cache.h"
#include "leveldb/db.h"
#include "leveldb/table.h"
#include "leveldb/write_batch.h"
#include "util/logging.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace leveldb {
static const int kValueSize = 1000;
class CorruptionTest {
class CorruptionTest : public testing::Test {
public:
test::ErrorEnv env_;
std::string dbname_;
Cache* tiny_cache_;
Options options_;
DB* db_;
CorruptionTest() {
tiny_cache_ = NewLRUCache(100);
CorruptionTest()
: db_(nullptr),
dbname_("/memenv/corruption_test"),
tiny_cache_(NewLRUCache(100)) {
options_.env = &env_;
options_.block_cache = tiny_cache_;
dbname_ = test::TmpDir() + "/corruption_test";
DestroyDB(dbname_, options_);
db_ = NULL;
options_.create_if_missing = true;
Reopen();
options_.create_if_missing = false;
}
~CorruptionTest() {
delete db_;
DestroyDB(dbname_, Options());
delete tiny_cache_;
delete db_;
delete tiny_cache_;
}
Status TryReopen() {
delete db_;
db_ = NULL;
db_ = nullptr;
return DB::Open(options_, dbname_, &db_);
}
void Reopen() {
ASSERT_OK(TryReopen());
}
void Reopen() { ASSERT_LEVELDB_OK(TryReopen()); }
void RepairDB() {
delete db_;
db_ = NULL;
ASSERT_OK(::leveldb::RepairDB(dbname_, options_));
db_ = nullptr;
ASSERT_LEVELDB_OK(::leveldb::RepairDB(dbname_, options_));
}
void Build(int n) {
std::string key_space, value_space;
WriteBatch batch;
for (int i = 0; i < n; i++) {
//if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n);
// if ((i % 100) == 0) std::fprintf(stderr, "@ %d of %d\n", i, n);
Slice key = Key(i, &key_space);
batch.Clear();
batch.Put(key, Value(i, &value_space));
@ -81,7 +68,7 @@ class CorruptionTest {
if (i == n - 1) {
options.sync = true;
}
ASSERT_OK(db_->Write(options, &batch));
ASSERT_LEVELDB_OK(db_->Write(options, &batch));
}
}
@ -100,8 +87,7 @@ class CorruptionTest {
// Ignore boundary keys.
continue;
}
if (!ConsumeDecimalNumber(&in, &key) ||
!in.empty() ||
if (!ConsumeDecimalNumber(&in, &key) || !in.empty() ||
key < next_expected) {
bad_keys++;
continue;
@ -116,9 +102,10 @@ class CorruptionTest {
}
delete iter;
fprintf(stderr,
"expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%d\n",
min_expected, max_expected, correct, bad_keys, bad_values, missed);
std::fprintf(
stderr,
"expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%d\n",
min_expected, max_expected, correct, bad_keys, bad_values, missed);
ASSERT_LE(min_expected, correct);
ASSERT_GE(max_expected, correct);
}
@ -126,14 +113,13 @@ class CorruptionTest {
void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) {
// Pick file to corrupt
std::vector<std::string> filenames;
ASSERT_OK(env_.GetChildren(dbname_, &filenames));
ASSERT_LEVELDB_OK(env_.target()->GetChildren(dbname_, &filenames));
uint64_t number;
FileType type;
std::string fname;
int picked_number = -1;
for (size_t i = 0; i < filenames.size(); i++) {
if (ParseFileName(filenames[i], &number, &type) &&
type == filetype &&
if (ParseFileName(filenames[i], &number, &type) && type == filetype &&
int(number) > picked_number) { // Pick latest file
fname = dbname_ + "/" + filenames[i];
picked_number = number;
@ -141,35 +127,32 @@ class CorruptionTest {
}
ASSERT_TRUE(!fname.empty()) << filetype;
struct stat sbuf;
if (stat(fname.c_str(), &sbuf) != 0) {
const char* msg = strerror(errno);
ASSERT_TRUE(false) << fname << ": " << msg;
}
uint64_t file_size;
ASSERT_LEVELDB_OK(env_.target()->GetFileSize(fname, &file_size));
if (offset < 0) {
// Relative to end of file; make it absolute
if (-offset > sbuf.st_size) {
if (-offset > file_size) {
offset = 0;
} else {
offset = sbuf.st_size + offset;
offset = file_size + offset;
}
}
if (offset > sbuf.st_size) {
offset = sbuf.st_size;
if (offset > file_size) {
offset = file_size;
}
if (offset + bytes_to_corrupt > sbuf.st_size) {
bytes_to_corrupt = sbuf.st_size - offset;
if (offset + bytes_to_corrupt > file_size) {
bytes_to_corrupt = file_size - offset;
}
// Do it
std::string contents;
Status s = ReadFileToString(Env::Default(), fname, &contents);
Status s = ReadFileToString(env_.target(), fname, &contents);
ASSERT_TRUE(s.ok()) << s.ToString();
for (int i = 0; i < bytes_to_corrupt; i++) {
contents[i + offset] ^= 0x80;
}
s = WriteStringToFile(Env::Default(), contents, fname);
s = WriteStringToFile(env_.target(), contents, fname);
ASSERT_TRUE(s.ok()) << s.ToString();
}
@ -187,7 +170,7 @@ class CorruptionTest {
// Return the ith key
Slice Key(int i, std::string* storage) {
char buf[100];
snprintf(buf, sizeof(buf), "%016d", i);
std::snprintf(buf, sizeof(buf), "%016d", i);
storage->assign(buf, strlen(buf));
return Slice(*storage);
}
@ -197,12 +180,20 @@ class CorruptionTest {
Random r(k);
return test::RandomString(&r, kValueSize, storage);
}
test::ErrorEnv env_;
Options options_;
DB* db_;
private:
std::string dbname_;
Cache* tiny_cache_;
};
TEST(CorruptionTest, Recovery) {
TEST_F(CorruptionTest, Recovery) {
Build(100);
Check(100, 100);
Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record
Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record
Corrupt(kLogFile, log::kBlockSize + 1000, 1); // Somewhere in second block
Reopen();
@ -210,13 +201,13 @@ TEST(CorruptionTest, Recovery) {
Check(36, 36);
}
TEST(CorruptionTest, RecoverWriteError) {
TEST_F(CorruptionTest, RecoverWriteError) {
env_.writable_file_error_ = true;
Status s = TryReopen();
ASSERT_TRUE(!s.ok());
}
TEST(CorruptionTest, NewFileErrorDuringWrite) {
TEST_F(CorruptionTest, NewFileErrorDuringWrite) {
// Do enough writing to force minor compaction
env_.writable_file_error_ = true;
const int num = 3 + (Options().write_buffer_size / kValueSize);
@ -233,26 +224,26 @@ TEST(CorruptionTest, NewFileErrorDuringWrite) {
Reopen();
}
TEST(CorruptionTest, TableFile) {
TEST_F(CorruptionTest, TableFile) {
Build(100);
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
dbi->TEST_CompactMemTable();
dbi->TEST_CompactRange(0, NULL, NULL);
dbi->TEST_CompactRange(1, NULL, NULL);
dbi->TEST_CompactRange(0, nullptr, nullptr);
dbi->TEST_CompactRange(1, nullptr, nullptr);
Corrupt(kTableFile, 100, 1);
Check(90, 99);
}
TEST(CorruptionTest, TableFileRepair) {
TEST_F(CorruptionTest, TableFileRepair) {
options_.block_size = 2 * kValueSize; // Limit scope of corruption
options_.paranoid_checks = true;
Reopen();
Build(100);
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
dbi->TEST_CompactMemTable();
dbi->TEST_CompactRange(0, NULL, NULL);
dbi->TEST_CompactRange(1, NULL, NULL);
dbi->TEST_CompactRange(0, nullptr, nullptr);
dbi->TEST_CompactRange(1, nullptr, nullptr);
Corrupt(kTableFile, 100, 1);
RepairDB();
@ -260,7 +251,7 @@ TEST(CorruptionTest, TableFileRepair) {
Check(95, 99);
}
TEST(CorruptionTest, TableFileIndexData) {
TEST_F(CorruptionTest, TableFileIndexData) {
Build(10000); // Enough to build multiple Tables
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
dbi->TEST_CompactMemTable();
@ -270,39 +261,39 @@ TEST(CorruptionTest, TableFileIndexData) {
Check(5000, 9999);
}
TEST(CorruptionTest, MissingDescriptor) {
TEST_F(CorruptionTest, MissingDescriptor) {
Build(1000);
RepairDB();
Reopen();
Check(1000, 1000);
}
TEST(CorruptionTest, SequenceNumberRecovery) {
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v3"));
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v4"));
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v5"));
TEST_F(CorruptionTest, SequenceNumberRecovery) {
ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), "foo", "v1"));
ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), "foo", "v2"));
ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), "foo", "v3"));
ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), "foo", "v4"));
ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), "foo", "v5"));
RepairDB();
Reopen();
std::string v;
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
ASSERT_LEVELDB_OK(db_->Get(ReadOptions(), "foo", &v));
ASSERT_EQ("v5", v);
// Write something. If sequence number was not recovered properly,
// it will be hidden by an earlier write.
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v6"));
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), "foo", "v6"));
ASSERT_LEVELDB_OK(db_->Get(ReadOptions(), "foo", &v));
ASSERT_EQ("v6", v);
Reopen();
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
ASSERT_LEVELDB_OK(db_->Get(ReadOptions(), "foo", &v));
ASSERT_EQ("v6", v);
}
TEST(CorruptionTest, CorruptedDescriptor) {
ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
TEST_F(CorruptionTest, CorruptedDescriptor) {
ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), "foo", "hello"));
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
dbi->TEST_CompactMemTable();
dbi->TEST_CompactRange(0, NULL, NULL);
dbi->TEST_CompactRange(0, nullptr, nullptr);
Corrupt(kDescriptorFile, 0, 1000);
Status s = TryReopen();
@ -311,11 +302,11 @@ TEST(CorruptionTest, CorruptedDescriptor) {
RepairDB();
Reopen();
std::string v;
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
ASSERT_LEVELDB_OK(db_->Get(ReadOptions(), "foo", &v));
ASSERT_EQ("hello", v);
}
TEST(CorruptionTest, CompactionInputError) {
TEST_F(CorruptionTest, CompactionInputError) {
Build(10);
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
dbi->TEST_CompactMemTable();
@ -330,7 +321,7 @@ TEST(CorruptionTest, CompactionInputError) {
Check(10000, 10000);
}
TEST(CorruptionTest, CompactionInputErrorParanoid) {
TEST_F(CorruptionTest, CompactionInputErrorParanoid) {
options_.paranoid_checks = true;
options_.write_buffer_size = 512 << 10;
Reopen();
@ -343,7 +334,7 @@ TEST(CorruptionTest, CompactionInputErrorParanoid) {
Corrupt(kTableFile, 100, 1);
env_.SleepForMicroseconds(100000);
}
dbi->CompactRange(NULL, NULL);
dbi->CompactRange(nullptr, nullptr);
// Write must fail because of corrupted table
std::string tmp1, tmp2;
@ -351,24 +342,21 @@ TEST(CorruptionTest, CompactionInputErrorParanoid) {
ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db";
}
TEST(CorruptionTest, UnrelatedKeys) {
TEST_F(CorruptionTest, UnrelatedKeys) {
Build(10);
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
dbi->TEST_CompactMemTable();
Corrupt(kTableFile, 100, 1);
std::string tmp1, tmp2;
ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2)));
ASSERT_LEVELDB_OK(
db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2)));
std::string v;
ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
ASSERT_LEVELDB_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
ASSERT_EQ(Value(1000, &tmp2).ToString(), v);
dbi->TEST_CompactMemTable();
ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
ASSERT_LEVELDB_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
ASSERT_EQ(Value(1000, &tmp2).ToString(), v);
}
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

File diff suppressed because it is too large Load Diff

View File

@ -5,8 +5,11 @@
#ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_
#define STORAGE_LEVELDB_DB_DB_IMPL_H_
#include <atomic>
#include <deque>
#include <set>
#include <string>
#include "db/dbformat.h"
#include "db/log_writer.h"
#include "db/snapshot.h"
@ -26,21 +29,25 @@ class VersionSet;
class DBImpl : public DB {
public:
DBImpl(const Options& options, const std::string& dbname);
virtual ~DBImpl();
DBImpl(const DBImpl&) = delete;
DBImpl& operator=(const DBImpl&) = delete;
~DBImpl() override;
// Implementations of the DB interface
virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value);
virtual Status Delete(const WriteOptions&, const Slice& key);
virtual Status Write(const WriteOptions& options, WriteBatch* updates);
virtual Status Get(const ReadOptions& options,
const Slice& key,
std::string* value);
virtual Iterator* NewIterator(const ReadOptions&);
virtual const Snapshot* GetSnapshot();
virtual void ReleaseSnapshot(const Snapshot* snapshot);
virtual bool GetProperty(const Slice& property, std::string* value);
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
virtual void CompactRange(const Slice* begin, const Slice* end);
Status Put(const WriteOptions&, const Slice& key,
const Slice& value) override;
Status Delete(const WriteOptions&, const Slice& key) override;
Status Write(const WriteOptions& options, WriteBatch* updates) override;
Status Get(const ReadOptions& options, const Slice& key,
std::string* value) override;
Iterator* NewIterator(const ReadOptions&) override;
const Snapshot* GetSnapshot() override;
void ReleaseSnapshot(const Snapshot* snapshot) override;
bool GetProperty(const Slice& property, std::string* value) override;
void GetApproximateSizes(const Range* range, int n, uint64_t* sizes) override;
void CompactRange(const Slice* begin, const Slice* end) override;
// Extra methods (for testing) that are not in the public DB interface
@ -69,6 +76,31 @@ class DBImpl : public DB {
struct CompactionState;
struct Writer;
// Information for a manual compaction
struct ManualCompaction {
int level;
bool done;
const InternalKey* begin; // null means beginning of key range
const InternalKey* end; // null means end of key range
InternalKey tmp_storage; // Used to keep track of compaction progress
};
// Per level compaction stats. stats_[level] stores the stats for
// compactions that produced data for the specified "level".
struct CompactionStats {
CompactionStats() : micros(0), bytes_read(0), bytes_written(0) {}
void Add(const CompactionStats& c) {
this->micros += c.micros;
this->bytes_read += c.bytes_read;
this->bytes_written += c.bytes_written;
}
int64_t micros;
int64_t bytes_read;
int64_t bytes_written;
};
Iterator* NewInternalIterator(const ReadOptions&,
SequenceNumber* latest_snapshot,
uint32_t* seed);
@ -84,7 +116,7 @@ class DBImpl : public DB {
void MaybeIgnoreError(Status* s) const;
// Delete any unneeded files and stale in-memory entries.
void DeleteObsoleteFiles();
void RemoveObsoleteFiles() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Compact the in-memory write buffer to disk. Switches to a new
// log-file/memtable and writes a new descriptor iff successful.
@ -100,14 +132,15 @@ class DBImpl : public DB {
Status MakeRoomForWrite(bool force /* compact even if there is room? */)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
WriteBatch* BuildBatchGroup(Writer** last_writer);
WriteBatch* BuildBatchGroup(Writer** last_writer)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
void RecordBackgroundError(const Status& s);
void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
static void BGWork(void* db);
void BackgroundCall();
void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
void CleanupCompaction(CompactionState* compact)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Status DoCompactionWork(CompactionState* compact)
@ -118,93 +151,66 @@ class DBImpl : public DB {
Status InstallCompactionResults(CompactionState* compact)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
const Comparator* user_comparator() const {
return internal_comparator_.user_comparator();
}
// Constant after construction
Env* const env_;
const InternalKeyComparator internal_comparator_;
const InternalFilterPolicy internal_filter_policy_;
const Options options_; // options_.comparator == &internal_comparator_
bool owns_info_log_;
bool owns_cache_;
const bool owns_info_log_;
const bool owns_cache_;
const std::string dbname_;
// table_cache_ provides its own synchronization
TableCache* table_cache_;
TableCache* const table_cache_;
// Lock over the persistent DB state. Non-NULL iff successfully acquired.
// Lock over the persistent DB state. Non-null iff successfully acquired.
FileLock* db_lock_;
// State below is protected by mutex_
port::Mutex mutex_;
port::AtomicPointer shutting_down_;
port::CondVar bg_cv_; // Signalled when background work finishes
std::atomic<bool> shutting_down_;
port::CondVar background_work_finished_signal_ GUARDED_BY(mutex_);
MemTable* mem_;
MemTable* imm_; // Memtable being compacted
port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_
MemTable* imm_ GUARDED_BY(mutex_); // Memtable being compacted
std::atomic<bool> has_imm_; // So bg thread can detect non-null imm_
WritableFile* logfile_;
uint64_t logfile_number_;
uint64_t logfile_number_ GUARDED_BY(mutex_);
log::Writer* log_;
uint32_t seed_; // For sampling.
uint32_t seed_ GUARDED_BY(mutex_); // For sampling.
// Queue of writers.
std::deque<Writer*> writers_;
WriteBatch* tmp_batch_;
std::deque<Writer*> writers_ GUARDED_BY(mutex_);
WriteBatch* tmp_batch_ GUARDED_BY(mutex_);
SnapshotList snapshots_;
SnapshotList snapshots_ GUARDED_BY(mutex_);
// Set of table files to protect from deletion because they are
// part of ongoing compactions.
std::set<uint64_t> pending_outputs_;
std::set<uint64_t> pending_outputs_ GUARDED_BY(mutex_);
// Has a background compaction been scheduled or is running?
bool bg_compaction_scheduled_;
bool background_compaction_scheduled_ GUARDED_BY(mutex_);
// Information for a manual compaction
struct ManualCompaction {
int level;
bool done;
const InternalKey* begin; // NULL means beginning of key range
const InternalKey* end; // NULL means end of key range
InternalKey tmp_storage; // Used to keep track of compaction progress
};
ManualCompaction* manual_compaction_;
ManualCompaction* manual_compaction_ GUARDED_BY(mutex_);
VersionSet* versions_;
VersionSet* const versions_ GUARDED_BY(mutex_);
// Have we encountered a background error in paranoid mode?
Status bg_error_;
Status bg_error_ GUARDED_BY(mutex_);
// Per level compaction stats. stats_[level] stores the stats for
// compactions that produced data for the specified "level".
struct CompactionStats {
int64_t micros;
int64_t bytes_read;
int64_t bytes_written;
CompactionStats() : micros(0), bytes_read(0), bytes_written(0) { }
void Add(const CompactionStats& c) {
this->micros += c.micros;
this->bytes_read += c.bytes_read;
this->bytes_written += c.bytes_written;
}
};
CompactionStats stats_[config::kNumLevels];
// No copying allowed
DBImpl(const DBImpl&);
void operator=(const DBImpl&);
const Comparator* user_comparator() const {
return internal_comparator_.user_comparator();
}
CompactionStats stats_[config::kNumLevels] GUARDED_BY(mutex_);
};
// Sanitize db options. The caller should delete result.info_log if
// it is not equal to src.info_log.
extern Options SanitizeOptions(const std::string& db,
const InternalKeyComparator* icmp,
const InternalFilterPolicy* ipolicy,
const Options& src);
Options SanitizeOptions(const std::string& db,
const InternalKeyComparator* icmp,
const InternalFilterPolicy* ipolicy,
const Options& src);
} // namespace leveldb

View File

@ -4,9 +4,9 @@
#include "db/db_iter.h"
#include "db/filename.h"
#include "db/db_impl.h"
#include "db/dbformat.h"
#include "db/filename.h"
#include "leveldb/env.h"
#include "leveldb/iterator.h"
#include "port/port.h"
@ -21,9 +21,9 @@ static void DumpInternalIter(Iterator* iter) {
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ParsedInternalKey k;
if (!ParseInternalKey(iter->key(), &k)) {
fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str());
std::fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str());
} else {
fprintf(stderr, "@ '%s'\n", k.DebugString().c_str());
std::fprintf(stderr, "@ '%s'\n", k.DebugString().c_str());
}
}
}
@ -36,17 +36,14 @@ namespace {
// combines multiple entries for the same userkey found in the DB
// representation into a single entry while accounting for sequence
// numbers, deletion markers, overwrites, etc.
class DBIter: public Iterator {
class DBIter : public Iterator {
public:
// Which direction is the iterator currently moving?
// (1) When moving forward, the internal iterator is positioned at
// the exact entry that yields this->key(), this->value()
// (2) When moving backwards, the internal iterator is positioned
// just before all entries whose user key == this->key().
enum Direction {
kForward,
kReverse
};
enum Direction { kForward, kReverse };
DBIter(DBImpl* db, const Comparator* cmp, Iterator* iter, SequenceNumber s,
uint32_t seed)
@ -57,21 +54,22 @@ class DBIter: public Iterator {
direction_(kForward),
valid_(false),
rnd_(seed),
bytes_counter_(RandomPeriod()) {
}
virtual ~DBIter() {
delete iter_;
}
virtual bool Valid() const { return valid_; }
virtual Slice key() const {
bytes_until_read_sampling_(RandomCompactionPeriod()) {}
DBIter(const DBIter&) = delete;
DBIter& operator=(const DBIter&) = delete;
~DBIter() override { delete iter_; }
bool Valid() const override { return valid_; }
Slice key() const override {
assert(valid_);
return (direction_ == kForward) ? ExtractUserKey(iter_->key()) : saved_key_;
}
virtual Slice value() const {
Slice value() const override {
assert(valid_);
return (direction_ == kForward) ? iter_->value() : saved_value_;
}
virtual Status status() const {
Status status() const override {
if (status_.ok()) {
return iter_->status();
} else {
@ -79,11 +77,11 @@ class DBIter: public Iterator {
}
}
virtual void Next();
virtual void Prev();
virtual void Seek(const Slice& target);
virtual void SeekToFirst();
virtual void SeekToLast();
void Next() override;
void Prev() override;
void Seek(const Slice& target) override;
void SeekToFirst() override;
void SeekToLast() override;
private:
void FindNextUserEntry(bool skipping, std::string* skip);
@ -103,38 +101,35 @@ class DBIter: public Iterator {
}
}
// Pick next gap with average value of config::kReadBytesPeriod.
ssize_t RandomPeriod() {
return rnd_.Uniform(2*config::kReadBytesPeriod);
// Picks the number of bytes that can be read until a compaction is scheduled.
size_t RandomCompactionPeriod() {
return rnd_.Uniform(2 * config::kReadBytesPeriod);
}
DBImpl* db_;
const Comparator* const user_comparator_;
Iterator* const iter_;
SequenceNumber const sequence_;
Status status_;
std::string saved_key_; // == current key when direction_==kReverse
std::string saved_value_; // == current raw value when direction_==kReverse
std::string saved_key_; // == current key when direction_==kReverse
std::string saved_value_; // == current raw value when direction_==kReverse
Direction direction_;
bool valid_;
Random rnd_;
ssize_t bytes_counter_;
// No copying allowed
DBIter(const DBIter&);
void operator=(const DBIter&);
size_t bytes_until_read_sampling_;
};
inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
Slice k = iter_->key();
ssize_t n = k.size() + iter_->value().size();
bytes_counter_ -= n;
while (bytes_counter_ < 0) {
bytes_counter_ += RandomPeriod();
size_t bytes_read = k.size() + iter_->value().size();
while (bytes_until_read_sampling_ < bytes_read) {
bytes_until_read_sampling_ += RandomCompactionPeriod();
db_->RecordReadSample(k);
}
assert(bytes_until_read_sampling_ >= bytes_read);
bytes_until_read_sampling_ -= bytes_read;
if (!ParseInternalKey(k, ikey)) {
status_ = Status::Corruption("corrupted internal key in DBIter");
return false;
@ -165,6 +160,15 @@ void DBIter::Next() {
} else {
// Store in saved_key_ the current key so we skip it below.
SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
// iter_ is pointing to current key. We can now safely move to the next to
// avoid checking current key.
iter_->Next();
if (!iter_->Valid()) {
valid_ = false;
saved_key_.clear();
return;
}
}
FindNextUserEntry(true, &saved_key_);
@ -218,8 +222,8 @@ void DBIter::Prev() {
ClearSavedValue();
return;
}
if (user_comparator_->Compare(ExtractUserKey(iter_->key()),
saved_key_) < 0) {
if (user_comparator_->Compare(ExtractUserKey(iter_->key()), saved_key_) <
0) {
break;
}
}
@ -275,8 +279,8 @@ void DBIter::Seek(const Slice& target) {
direction_ = kForward;
ClearSavedValue();
saved_key_.clear();
AppendInternalKey(
&saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek));
AppendInternalKey(&saved_key_,
ParsedInternalKey(target, sequence_, kValueTypeForSeek));
iter_->Seek(saved_key_);
if (iter_->Valid()) {
FindNextUserEntry(false, &saved_key_ /* temporary storage */);
@ -305,12 +309,9 @@ void DBIter::SeekToLast() {
} // anonymous namespace
Iterator* NewDBIterator(
DBImpl* db,
const Comparator* user_key_comparator,
Iterator* internal_iter,
SequenceNumber sequence,
uint32_t seed) {
Iterator* NewDBIterator(DBImpl* db, const Comparator* user_key_comparator,
Iterator* internal_iter, SequenceNumber sequence,
uint32_t seed) {
return new DBIter(db, user_key_comparator, internal_iter, sequence, seed);
}

View File

@ -5,9 +5,10 @@
#ifndef STORAGE_LEVELDB_DB_DB_ITER_H_
#define STORAGE_LEVELDB_DB_DB_ITER_H_
#include <stdint.h>
#include "leveldb/db.h"
#include <cstdint>
#include "db/dbformat.h"
#include "leveldb/db.h"
namespace leveldb {
@ -16,12 +17,9 @@ class DBImpl;
// Return a new iterator that converts internal keys (yielded by
// "*internal_iter") that were live at the specified "sequence" number
// into appropriate user keys.
extern Iterator* NewDBIterator(
DBImpl* db,
const Comparator* user_key_comparator,
Iterator* internal_iter,
SequenceNumber sequence,
uint32_t seed);
Iterator* NewDBIterator(DBImpl* db, const Comparator* user_key_comparator,
Iterator* internal_iter, SequenceNumber sequence,
uint32_t seed);
} // namespace leveldb

File diff suppressed because it is too large Load Diff

View File

@ -2,8 +2,11 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <stdio.h>
#include "db/dbformat.h"
#include <cstdio>
#include <sstream>
#include "port/port.h"
#include "util/coding.h"
@ -21,26 +24,20 @@ void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
}
std::string ParsedInternalKey::DebugString() const {
char buf[50];
snprintf(buf, sizeof(buf), "' @ %llu : %d",
(unsigned long long) sequence,
int(type));
std::string result = "'";
result += EscapeString(user_key.ToString());
result += buf;
return result;
std::ostringstream ss;
ss << '\'' << EscapeString(user_key.ToString()) << "' @ " << sequence << " : "
<< static_cast<int>(type);
return ss.str();
}
std::string InternalKey::DebugString() const {
std::string result;
ParsedInternalKey parsed;
if (ParseInternalKey(rep_, &parsed)) {
result = parsed.DebugString();
} else {
result = "(bad)";
result.append(EscapeString(rep_));
return parsed.DebugString();
}
return result;
std::ostringstream ss;
ss << "(bad)" << EscapeString(rep_);
return ss.str();
}
const char* InternalKeyComparator::Name() const {
@ -65,9 +62,8 @@ int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const {
return r;
}
void InternalKeyComparator::FindShortestSeparator(
std::string* start,
const Slice& limit) const {
void InternalKeyComparator::FindShortestSeparator(std::string* start,
const Slice& limit) const {
// Attempt to shorten the user portion of the key
Slice user_start = ExtractUserKey(*start);
Slice user_limit = ExtractUserKey(limit);
@ -77,7 +73,8 @@ void InternalKeyComparator::FindShortestSeparator(
user_comparator_->Compare(user_start, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek));
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(this->Compare(*start, tmp) < 0);
assert(this->Compare(tmp, limit) < 0);
start->swap(tmp);
@ -92,15 +89,14 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
user_comparator_->Compare(user_key, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek));
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(this->Compare(*key, tmp) < 0);
key->swap(tmp);
}
}
const char* InternalFilterPolicy::Name() const {
return user_policy_->Name();
}
const char* InternalFilterPolicy::Name() const { return user_policy_->Name(); }
void InternalFilterPolicy::CreateFilter(const Slice* keys, int n,
std::string* dst) const {
@ -130,7 +126,7 @@ LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
start_ = dst;
dst = EncodeVarint32(dst, usize + 8);
kstart_ = dst;
memcpy(dst, user_key.data(), usize);
std::memcpy(dst, user_key.data(), usize);
dst += usize;
EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek));
dst += 8;

View File

@ -5,7 +5,10 @@
#ifndef STORAGE_LEVELDB_DB_DBFORMAT_H_
#define STORAGE_LEVELDB_DB_DBFORMAT_H_
#include <stdio.h>
#include <cstddef>
#include <cstdint>
#include <string>
#include "leveldb/comparator.h"
#include "leveldb/db.h"
#include "leveldb/filter_policy.h"
@ -48,10 +51,7 @@ class InternalKey;
// Value types encoded as the last component of internal keys.
// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
// data structures.
enum ValueType {
kTypeDeletion = 0x0,
kTypeValue = 0x1
};
enum ValueType { kTypeDeletion = 0x0, kTypeValue = 0x1 };
// kValueTypeForSeek defines the ValueType that should be passed when
// constructing a ParsedInternalKey object for seeking to a particular
// sequence number (since we sort sequence numbers in decreasing order
@ -64,17 +64,16 @@ typedef uint64_t SequenceNumber;
// We leave eight bits empty at the bottom so a type and sequence#
// can be packed together into 64-bits.
static const SequenceNumber kMaxSequenceNumber =
((0x1ull << 56) - 1);
static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
struct ParsedInternalKey {
Slice user_key;
SequenceNumber sequence;
ValueType type;
ParsedInternalKey() { } // Intentionally left uninitialized (for speed)
ParsedInternalKey() {} // Intentionally left uninitialized (for speed)
ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
: user_key(u), sequence(seq), type(t) { }
: user_key(u), sequence(seq), type(t) {}
std::string DebugString() const;
};
@ -84,15 +83,13 @@ inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
}
// Append the serialization of "key" to *result.
extern void AppendInternalKey(std::string* result,
const ParsedInternalKey& key);
void AppendInternalKey(std::string* result, const ParsedInternalKey& key);
// Attempt to parse an internal key from "internal_key". On success,
// stores the parsed data in "*result", and returns true.
//
// On error, returns false, leaves "*result" in an undefined state.
extern bool ParseInternalKey(const Slice& internal_key,
ParsedInternalKey* result);
bool ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result);
// Returns the user key portion of an internal key.
inline Slice ExtractUserKey(const Slice& internal_key) {
@ -100,27 +97,19 @@ inline Slice ExtractUserKey(const Slice& internal_key) {
return Slice(internal_key.data(), internal_key.size() - 8);
}
inline ValueType ExtractValueType(const Slice& internal_key) {
assert(internal_key.size() >= 8);
const size_t n = internal_key.size();
uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
unsigned char c = num & 0xff;
return static_cast<ValueType>(c);
}
// A comparator for internal keys that uses a specified comparator for
// the user key portion and breaks ties by decreasing sequence number.
class InternalKeyComparator : public Comparator {
private:
const Comparator* user_comparator_;
public:
explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { }
virtual const char* Name() const;
virtual int Compare(const Slice& a, const Slice& b) const;
virtual void FindShortestSeparator(
std::string* start,
const Slice& limit) const;
virtual void FindShortSuccessor(std::string* key) const;
explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) {}
const char* Name() const override;
int Compare(const Slice& a, const Slice& b) const override;
void FindShortestSeparator(std::string* start,
const Slice& limit) const override;
void FindShortSuccessor(std::string* key) const override;
const Comparator* user_comparator() const { return user_comparator_; }
@ -131,11 +120,12 @@ class InternalKeyComparator : public Comparator {
class InternalFilterPolicy : public FilterPolicy {
private:
const FilterPolicy* const user_policy_;
public:
explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { }
virtual const char* Name() const;
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const;
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const;
explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) {}
const char* Name() const override;
void CreateFilter(const Slice* keys, int n, std::string* dst) const override;
bool KeyMayMatch(const Slice& key, const Slice& filter) const override;
};
// Modules in this directory should keep internal keys wrapped inside
@ -144,13 +134,18 @@ class InternalFilterPolicy : public FilterPolicy {
class InternalKey {
private:
std::string rep_;
public:
InternalKey() { } // Leave rep_ as empty to indicate it is invalid
InternalKey() {} // Leave rep_ as empty to indicate it is invalid
InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) {
AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t));
}
void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
bool DecodeFrom(const Slice& s) {
rep_.assign(s.data(), s.size());
return !rep_.empty();
}
Slice Encode() const {
assert(!rep_.empty());
return rep_;
@ -168,8 +163,8 @@ class InternalKey {
std::string DebugString() const;
};
inline int InternalKeyComparator::Compare(
const InternalKey& a, const InternalKey& b) const {
inline int InternalKeyComparator::Compare(const InternalKey& a,
const InternalKey& b) const {
return Compare(a.Encode(), b.Encode());
}
@ -178,11 +173,11 @@ inline bool ParseInternalKey(const Slice& internal_key,
const size_t n = internal_key.size();
if (n < 8) return false;
uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
unsigned char c = num & 0xff;
uint8_t c = num & 0xff;
result->sequence = num >> 8;
result->type = static_cast<ValueType>(c);
result->user_key = Slice(internal_key.data(), n - 8);
return (c <= static_cast<unsigned char>(kTypeValue));
return (c <= static_cast<uint8_t>(kTypeValue));
}
// A helper class useful for DBImpl::Get()
@ -192,6 +187,9 @@ class LookupKey {
// the specified sequence number.
LookupKey(const Slice& user_key, SequenceNumber sequence);
LookupKey(const LookupKey&) = delete;
LookupKey& operator=(const LookupKey&) = delete;
~LookupKey();
// Return a key suitable for lookup in a MemTable.
@ -214,11 +212,7 @@ class LookupKey {
const char* start_;
const char* kstart_;
const char* end_;
char space_[200]; // Avoid allocation for short keys
// No copying allowed
LookupKey(const LookupKey&);
void operator=(const LookupKey&);
char space_[200]; // Avoid allocation for short keys
};
inline LookupKey::~LookupKey() {

View File

@ -3,13 +3,13 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/dbformat.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "util/testharness.h"
namespace leveldb {
static std::string IKey(const std::string& user_key,
uint64_t seq,
static std::string IKey(const std::string& user_key, uint64_t seq,
ValueType vt) {
std::string encoded;
AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt));
@ -28,9 +28,7 @@ static std::string ShortSuccessor(const std::string& s) {
return result;
}
static void TestKey(const std::string& key,
uint64_t seq,
ValueType vt) {
static void TestKey(const std::string& key, uint64_t seq, ValueType vt) {
std::string encoded = IKey(key, seq, vt);
Slice in(encoded);
@ -44,16 +42,20 @@ static void TestKey(const std::string& key,
ASSERT_TRUE(!ParseInternalKey(Slice("bar"), &decoded));
}
class FormatTest { };
TEST(FormatTest, InternalKey_EncodeDecode) {
const char* keys[] = { "", "k", "hello", "longggggggggggggggggggggg" };
const uint64_t seq[] = {
1, 2, 3,
(1ull << 8) - 1, 1ull << 8, (1ull << 8) + 1,
(1ull << 16) - 1, 1ull << 16, (1ull << 16) + 1,
(1ull << 32) - 1, 1ull << 32, (1ull << 32) + 1
};
const char* keys[] = {"", "k", "hello", "longggggggggggggggggggggg"};
const uint64_t seq[] = {1,
2,
3,
(1ull << 8) - 1,
1ull << 8,
(1ull << 8) + 1,
(1ull << 16) - 1,
1ull << 16,
(1ull << 16) + 1,
(1ull << 32) - 1,
1ull << 32,
(1ull << 32) + 1};
for (int k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) {
for (int s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) {
TestKey(keys[k], seq[s], kTypeValue);
@ -62,40 +64,44 @@ TEST(FormatTest, InternalKey_EncodeDecode) {
}
}
TEST(FormatTest, InternalKey_DecodeFromEmpty) {
InternalKey internal_key;
ASSERT_TRUE(!internal_key.DecodeFrom(""));
}
TEST(FormatTest, InternalKeyShortSeparator) {
// When user keys are same
ASSERT_EQ(IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue),
IKey("foo", 99, kTypeValue)));
ASSERT_EQ(IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue),
IKey("foo", 101, kTypeValue)));
ASSERT_EQ(IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue),
IKey("foo", 100, kTypeValue)));
ASSERT_EQ(IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue),
IKey("foo", 100, kTypeDeletion)));
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 99, kTypeValue)));
ASSERT_EQ(
IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 101, kTypeValue)));
ASSERT_EQ(
IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeValue)));
ASSERT_EQ(
IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeDeletion)));
// When user keys are misordered
ASSERT_EQ(IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue),
IKey("bar", 99, kTypeValue)));
Shorten(IKey("foo", 100, kTypeValue), IKey("bar", 99, kTypeValue)));
// When user keys are different, but correctly ordered
ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek),
Shorten(IKey("foo", 100, kTypeValue),
IKey("hello", 200, kTypeValue)));
ASSERT_EQ(
IKey("g", kMaxSequenceNumber, kValueTypeForSeek),
Shorten(IKey("foo", 100, kTypeValue), IKey("hello", 200, kTypeValue)));
// When start user key is prefix of limit user key
ASSERT_EQ(IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue),
IKey("foobar", 200, kTypeValue)));
ASSERT_EQ(
IKey("foo", 100, kTypeValue),
Shorten(IKey("foo", 100, kTypeValue), IKey("foobar", 200, kTypeValue)));
// When limit user key is prefix of start user key
ASSERT_EQ(IKey("foobar", 100, kTypeValue),
Shorten(IKey("foobar", 100, kTypeValue),
IKey("foo", 200, kTypeValue)));
ASSERT_EQ(
IKey("foobar", 100, kTypeValue),
Shorten(IKey("foobar", 100, kTypeValue), IKey("foo", 200, kTypeValue)));
}
TEST(FormatTest, InternalKeyShortestSuccessor) {
@ -105,8 +111,18 @@ TEST(FormatTest, InternalKeyShortestSuccessor) {
ShortSuccessor(IKey("\xff\xff", 100, kTypeValue)));
}
} // namespace leveldb
TEST(FormatTest, ParsedInternalKeyDebugString) {
ParsedInternalKey key("The \"key\" in 'single quotes'", 42, kTypeValue);
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
ASSERT_EQ("'The \"key\" in 'single quotes'' @ 42 : 1", key.DebugString());
}
TEST(FormatTest, InternalKeyDebugString) {
InternalKey key("The \"key\" in 'single quotes'", 42, kTypeValue);
ASSERT_EQ("'The \"key\" in 'single quotes'' @ 42 : 1", key.DebugString());
InternalKey invalid_key;
ASSERT_EQ("(bad)", invalid_key.DebugString());
}
} // namespace leveldb

View File

@ -2,7 +2,10 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <stdio.h>
#include "leveldb/dumpfile.h"
#include <cstdio>
#include "db/dbformat.h"
#include "db/filename.h"
#include "db/log_reader.h"
@ -35,8 +38,7 @@ bool GuessType(const std::string& fname, FileType* type) {
// Notified when log reader encounters corruption.
class CorruptionReporter : public log::Reader::Reporter {
public:
WritableFile* dst_;
virtual void Corruption(size_t bytes, const Status& status) {
void Corruption(size_t bytes, const Status& status) override {
std::string r = "corruption: ";
AppendNumberTo(&r, bytes);
r += " bytes; ";
@ -44,6 +46,8 @@ class CorruptionReporter : public log::Reader::Reporter {
r.push_back('\n');
dst_->Append(r);
}
WritableFile* dst_;
};
// Print contents of a log file. (*func)() is called on every record.
@ -70,8 +74,7 @@ Status PrintLogContents(Env* env, const std::string& fname,
// Called on every item found in a WriteBatch.
class WriteBatchItemPrinter : public WriteBatch::Handler {
public:
WritableFile* dst_;
virtual void Put(const Slice& key, const Slice& value) {
void Put(const Slice& key, const Slice& value) override {
std::string r = " put '";
AppendEscapedStringTo(&r, key);
r += "' '";
@ -79,14 +82,15 @@ class WriteBatchItemPrinter : public WriteBatch::Handler {
r += "'\n";
dst_->Append(r);
}
virtual void Delete(const Slice& key) {
void Delete(const Slice& key) override {
std::string r = " del '";
AppendEscapedStringTo(&r, key);
r += "'\n";
dst_->Append(r);
}
};
WritableFile* dst_;
};
// Called on every log record (each one of which is a WriteBatch)
// found in a kLogFile.
@ -142,8 +146,8 @@ Status DumpDescriptor(Env* env, const std::string& fname, WritableFile* dst) {
Status DumpTable(Env* env, const std::string& fname, WritableFile* dst) {
uint64_t file_size;
RandomAccessFile* file = NULL;
Table* table = NULL;
RandomAccessFile* file = nullptr;
Table* table = nullptr;
Status s = env->GetFileSize(fname, &file_size);
if (s.ok()) {
s = env->NewRandomAccessFile(fname, &file);
@ -213,9 +217,12 @@ Status DumpFile(Env* env, const std::string& fname, WritableFile* dst) {
return Status::InvalidArgument(fname + ": unknown file type");
}
switch (ftype) {
case kLogFile: return DumpLog(env, fname, dst);
case kDescriptorFile: return DumpDescriptor(env, fname, dst);
case kTableFile: return DumpTable(env, fname, dst);
case kLogFile:
return DumpLog(env, fname, dst);
case kDescriptorFile:
return DumpDescriptor(env, fname, dst);
case kTableFile:
return DumpTable(env, fname, dst);
default:
break;
}

View File

@ -6,21 +6,23 @@
// the last "sync". It then checks for data loss errors by purposely dropping
// file data (or entire files) not protected by a "sync".
#include "leveldb/db.h"
#include <map>
#include <set>
#include "gtest/gtest.h"
#include "db/db_impl.h"
#include "db/filename.h"
#include "db/log_format.h"
#include "db/version_set.h"
#include "leveldb/cache.h"
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "leveldb/table.h"
#include "leveldb/write_batch.h"
#include "port/port.h"
#include "port/thread_annotations.h"
#include "util/logging.h"
#include "util/mutexlock.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace leveldb {
@ -34,7 +36,7 @@ class FaultInjectionTestEnv;
namespace {
// Assume a filename, and not a directory name like "/foo/bar/"
static std::string GetDirName(const std::string filename) {
static std::string GetDirName(const std::string& filename) {
size_t found = filename.find_last_of("/\\");
if (found == std::string::npos) {
return "";
@ -54,8 +56,7 @@ Status Truncate(const std::string& filename, uint64_t length) {
SequentialFile* orig_file;
Status s = env->NewSequentialFile(filename, &orig_file);
if (!s.ok())
return s;
if (!s.ok()) return s;
char* scratch = new char[length];
leveldb::Slice result;
@ -71,7 +72,7 @@ Status Truncate(const std::string& filename, uint64_t length) {
if (s.ok()) {
s = env->RenameFile(tmp_name, filename);
} else {
env->DeleteFile(tmp_name);
env->RemoveFile(tmp_name);
}
}
}
@ -83,15 +84,15 @@ Status Truncate(const std::string& filename, uint64_t length) {
struct FileState {
std::string filename_;
ssize_t pos_;
ssize_t pos_at_last_sync_;
ssize_t pos_at_last_flush_;
int64_t pos_;
int64_t pos_at_last_sync_;
int64_t pos_at_last_flush_;
FileState(const std::string& filename)
: filename_(filename),
pos_(-1),
pos_at_last_sync_(-1),
pos_at_last_flush_(-1) { }
pos_at_last_flush_(-1) {}
FileState() : pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {}
@ -106,14 +107,13 @@ struct FileState {
// is written to or sync'ed.
class TestWritableFile : public WritableFile {
public:
TestWritableFile(const FileState& state,
WritableFile* f,
TestWritableFile(const FileState& state, WritableFile* f,
FaultInjectionTestEnv* env);
virtual ~TestWritableFile();
virtual Status Append(const Slice& data);
virtual Status Close();
virtual Status Flush();
virtual Status Sync();
~TestWritableFile() override;
Status Append(const Slice& data) override;
Status Close() override;
Status Flush() override;
Status Sync() override;
private:
FileState state_;
@ -126,18 +126,19 @@ class TestWritableFile : public WritableFile {
class FaultInjectionTestEnv : public EnvWrapper {
public:
FaultInjectionTestEnv() : EnvWrapper(Env::Default()), filesystem_active_(true) {}
virtual ~FaultInjectionTestEnv() { }
virtual Status NewWritableFile(const std::string& fname,
WritableFile** result);
virtual Status NewAppendableFile(const std::string& fname,
WritableFile** result);
virtual Status DeleteFile(const std::string& f);
virtual Status RenameFile(const std::string& s, const std::string& t);
FaultInjectionTestEnv()
: EnvWrapper(Env::Default()), filesystem_active_(true) {}
~FaultInjectionTestEnv() override = default;
Status NewWritableFile(const std::string& fname,
WritableFile** result) override;
Status NewAppendableFile(const std::string& fname,
WritableFile** result) override;
Status RemoveFile(const std::string& f) override;
Status RenameFile(const std::string& s, const std::string& t) override;
void WritableFileClosed(const FileState& state);
Status DropUnsyncedFileData();
Status DeleteFilesCreatedAfterLastDirSync();
Status RemoveFilesCreatedAfterLastDirSync();
void DirWasSynced();
bool IsFileCreatedSinceLastDirSync(const std::string& filename);
void ResetState();
@ -146,24 +147,26 @@ class FaultInjectionTestEnv : public EnvWrapper {
// system reset. Setting to inactive will freeze our saved filesystem state so
// that it will stop being recorded. It can then be reset back to the state at
// the time of the reset.
bool IsFilesystemActive() const { return filesystem_active_; }
void SetFilesystemActive(bool active) { filesystem_active_ = active; }
bool IsFilesystemActive() LOCKS_EXCLUDED(mutex_) {
MutexLock l(&mutex_);
return filesystem_active_;
}
void SetFilesystemActive(bool active) LOCKS_EXCLUDED(mutex_) {
MutexLock l(&mutex_);
filesystem_active_ = active;
}
private:
port::Mutex mutex_;
std::map<std::string, FileState> db_file_state_;
std::set<std::string> new_files_since_last_dir_sync_;
bool filesystem_active_; // Record flushes, syncs, writes
std::map<std::string, FileState> db_file_state_ GUARDED_BY(mutex_);
std::set<std::string> new_files_since_last_dir_sync_ GUARDED_BY(mutex_);
bool filesystem_active_ GUARDED_BY(mutex_); // Record flushes, syncs, writes
};
TestWritableFile::TestWritableFile(const FileState& state,
WritableFile* f,
TestWritableFile::TestWritableFile(const FileState& state, WritableFile* f,
FaultInjectionTestEnv* env)
: state_(state),
target_(f),
writable_file_opened_(true),
env_(env) {
assert(f != NULL);
: state_(state), target_(f), writable_file_opened_(true), env_(env) {
assert(f != nullptr);
}
TestWritableFile::~TestWritableFile() {
@ -265,10 +268,11 @@ Status FaultInjectionTestEnv::NewAppendableFile(const std::string& fname,
Status FaultInjectionTestEnv::DropUnsyncedFileData() {
Status s;
MutexLock l(&mutex_);
for (std::map<std::string, FileState>::const_iterator it =
db_file_state_.begin();
s.ok() && it != db_file_state_.end(); ++it) {
const FileState& state = it->second;
for (const auto& kvp : db_file_state_) {
if (!s.ok()) {
break;
}
const FileState& state = kvp.second;
if (!state.IsFullySynced()) {
s = state.DropUnsyncedData();
}
@ -294,9 +298,9 @@ void FaultInjectionTestEnv::UntrackFile(const std::string& f) {
new_files_since_last_dir_sync_.erase(f);
}
Status FaultInjectionTestEnv::DeleteFile(const std::string& f) {
Status s = EnvWrapper::DeleteFile(f);
ASSERT_OK(s);
Status FaultInjectionTestEnv::RemoveFile(const std::string& f) {
Status s = EnvWrapper::RemoveFile(f);
EXPECT_LEVELDB_OK(s);
if (s.ok()) {
UntrackFile(f);
}
@ -328,22 +332,23 @@ void FaultInjectionTestEnv::ResetState() {
// Since we are not destroying the database, the existing files
// should keep their recorded synced/flushed state. Therefore
// we do not reset db_file_state_ and new_files_since_last_dir_sync_.
MutexLock l(&mutex_);
SetFilesystemActive(true);
}
Status FaultInjectionTestEnv::DeleteFilesCreatedAfterLastDirSync() {
// Because DeleteFile access this container make a copy to avoid deadlock
Status FaultInjectionTestEnv::RemoveFilesCreatedAfterLastDirSync() {
// Because RemoveFile access this container make a copy to avoid deadlock
mutex_.Lock();
std::set<std::string> new_files(new_files_since_last_dir_sync_.begin(),
new_files_since_last_dir_sync_.end());
mutex_.Unlock();
Status s;
std::set<std::string>::const_iterator it;
for (it = new_files.begin(); s.ok() && it != new_files.end(); ++it) {
s = DeleteFile(*it);
Status status;
for (const auto& new_file : new_files) {
Status remove_status = RemoveFile(new_file);
if (!remove_status.ok() && status.ok()) {
status = std::move(remove_status);
}
}
return s;
return status;
}
void FaultInjectionTestEnv::WritableFileClosed(const FileState& state) {
@ -352,11 +357,11 @@ void FaultInjectionTestEnv::WritableFileClosed(const FileState& state) {
}
Status FileState::DropUnsyncedData() const {
ssize_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_;
int64_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_;
return Truncate(filename_, sync_pos);
}
class FaultInjectionTest {
class FaultInjectionTest : public testing::Test {
public:
enum ExpectedVerifResult { VAL_EXPECT_NO_ERROR, VAL_EXPECT_ERROR };
enum ResetMethod { RESET_DROP_UNSYNCED_DATA, RESET_DELETE_UNSYNCED_FILES };
@ -370,8 +375,8 @@ class FaultInjectionTest {
FaultInjectionTest()
: env_(new FaultInjectionTestEnv),
tiny_cache_(NewLRUCache(100)),
db_(NULL) {
dbname_ = test::TmpDir() + "/fault_test";
db_(nullptr) {
dbname_ = testing::TempDir() + "fault_test";
DestroyDB(dbname_, Options()); // Destroy any db from earlier run
options_.reuse_logs = true;
options_.env = env_;
@ -387,9 +392,7 @@ class FaultInjectionTest {
delete env_;
}
void ReuseLogs(bool reuse) {
options_.reuse_logs = reuse;
}
void ReuseLogs(bool reuse) { options_.reuse_logs = reuse; }
void Build(int start_idx, int num_vals) {
std::string key_space, value_space;
@ -399,7 +402,7 @@ class FaultInjectionTest {
batch.Clear();
batch.Put(key, Value(i, &value_space));
WriteOptions options;
ASSERT_OK(db_->Write(options, &batch));
ASSERT_LEVELDB_OK(db_->Write(options, &batch));
}
}
@ -421,10 +424,10 @@ class FaultInjectionTest {
s = ReadValue(i, &val);
if (expected == VAL_EXPECT_NO_ERROR) {
if (s.ok()) {
ASSERT_EQ(value_space, val);
EXPECT_EQ(value_space, val);
}
} else if (s.ok()) {
fprintf(stderr, "Expected an error at %d, but was OK\n", i);
std::fprintf(stderr, "Expected an error at %d, but was OK\n", i);
s = Status::IOError(dbname_, "Expected value error:");
} else {
s = Status::OK(); // An expected error
@ -436,7 +439,7 @@ class FaultInjectionTest {
// Return the ith key
Slice Key(int i, std::string* storage) const {
char buf[100];
snprintf(buf, sizeof(buf), "%016d", i);
std::snprintf(buf, sizeof(buf), "%016d", i);
storage->assign(buf, strlen(buf));
return Slice(*storage);
}
@ -449,21 +452,20 @@ class FaultInjectionTest {
Status OpenDB() {
delete db_;
db_ = NULL;
db_ = nullptr;
env_->ResetState();
return DB::Open(options_, dbname_, &db_);
}
void CloseDB() {
delete db_;
db_ = NULL;
db_ = nullptr;
}
void DeleteAllData() {
Iterator* iter = db_->NewIterator(ReadOptions());
WriteOptions options;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ASSERT_OK(db_->Delete(WriteOptions(), iter->key()));
ASSERT_LEVELDB_OK(db_->Delete(WriteOptions(), iter->key()));
}
delete iter;
@ -472,10 +474,10 @@ class FaultInjectionTest {
void ResetDBState(ResetMethod reset_method) {
switch (reset_method) {
case RESET_DROP_UNSYNCED_DATA:
ASSERT_OK(env_->DropUnsyncedFileData());
ASSERT_LEVELDB_OK(env_->DropUnsyncedFileData());
break;
case RESET_DELETE_UNSYNCED_FILES:
ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync());
ASSERT_LEVELDB_OK(env_->RemoveFilesCreatedAfterLastDirSync());
break;
default:
assert(false);
@ -485,40 +487,39 @@ class FaultInjectionTest {
void PartialCompactTestPreFault(int num_pre_sync, int num_post_sync) {
DeleteAllData();
Build(0, num_pre_sync);
db_->CompactRange(NULL, NULL);
db_->CompactRange(nullptr, nullptr);
Build(num_pre_sync, num_post_sync);
}
void PartialCompactTestReopenWithFault(ResetMethod reset_method,
int num_pre_sync,
int num_post_sync) {
int num_pre_sync, int num_post_sync) {
env_->SetFilesystemActive(false);
CloseDB();
ResetDBState(reset_method);
ASSERT_OK(OpenDB());
ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::VAL_EXPECT_NO_ERROR));
ASSERT_OK(Verify(num_pre_sync, num_post_sync, FaultInjectionTest::VAL_EXPECT_ERROR));
ASSERT_LEVELDB_OK(OpenDB());
ASSERT_LEVELDB_OK(
Verify(0, num_pre_sync, FaultInjectionTest::VAL_EXPECT_NO_ERROR));
ASSERT_LEVELDB_OK(Verify(num_pre_sync, num_post_sync,
FaultInjectionTest::VAL_EXPECT_ERROR));
}
void NoWriteTestPreFault() {
}
void NoWriteTestPreFault() {}
void NoWriteTestReopenWithFault(ResetMethod reset_method) {
CloseDB();
ResetDBState(reset_method);
ASSERT_OK(OpenDB());
ASSERT_LEVELDB_OK(OpenDB());
}
void DoTest() {
Random rnd(0);
ASSERT_OK(OpenDB());
ASSERT_LEVELDB_OK(OpenDB());
for (size_t idx = 0; idx < kNumIterations; idx++) {
int num_pre_sync = rnd.Uniform(kMaxNumValues);
int num_post_sync = rnd.Uniform(kMaxNumValues);
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
PartialCompactTestReopenWithFault(RESET_DROP_UNSYNCED_DATA,
num_pre_sync,
PartialCompactTestReopenWithFault(RESET_DROP_UNSYNCED_DATA, num_pre_sync,
num_post_sync);
NoWriteTestPreFault();
@ -528,8 +529,7 @@ class FaultInjectionTest {
// No new files created so we expect all values since no files will be
// dropped.
PartialCompactTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES,
num_pre_sync + num_post_sync,
0);
num_pre_sync + num_post_sync, 0);
NoWriteTestPreFault();
NoWriteTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES);
@ -537,18 +537,14 @@ class FaultInjectionTest {
}
};
TEST(FaultInjectionTest, FaultTestNoLogReuse) {
TEST_F(FaultInjectionTest, FaultTestNoLogReuse) {
ReuseLogs(false);
DoTest();
}
TEST(FaultInjectionTest, FaultTestWithLogReuse) {
TEST_F(FaultInjectionTest, FaultTestWithLogReuse) {
ReuseLogs(true);
DoTest();
}
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

View File

@ -2,9 +2,11 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <ctype.h>
#include <stdio.h>
#include "db/filename.h"
#include <cassert>
#include <cstdio>
#include "db/dbformat.h"
#include "leveldb/env.h"
#include "util/logging.h"
@ -12,38 +14,37 @@
namespace leveldb {
// A utility routine: write "data" to the named file and Sync() it.
extern Status WriteStringToFileSync(Env* env, const Slice& data,
const std::string& fname);
Status WriteStringToFileSync(Env* env, const Slice& data,
const std::string& fname);
static std::string MakeFileName(const std::string& name, uint64_t number,
static std::string MakeFileName(const std::string& dbname, uint64_t number,
const char* suffix) {
char buf[100];
snprintf(buf, sizeof(buf), "/%06llu.%s",
static_cast<unsigned long long>(number),
suffix);
return name + buf;
std::snprintf(buf, sizeof(buf), "/%06llu.%s",
static_cast<unsigned long long>(number), suffix);
return dbname + buf;
}
std::string LogFileName(const std::string& name, uint64_t number) {
std::string LogFileName(const std::string& dbname, uint64_t number) {
assert(number > 0);
return MakeFileName(name, number, "log");
return MakeFileName(dbname, number, "log");
}
std::string TableFileName(const std::string& name, uint64_t number) {
std::string TableFileName(const std::string& dbname, uint64_t number) {
assert(number > 0);
return MakeFileName(name, number, "ldb");
return MakeFileName(dbname, number, "ldb");
}
std::string SSTTableFileName(const std::string& name, uint64_t number) {
std::string SSTTableFileName(const std::string& dbname, uint64_t number) {
assert(number > 0);
return MakeFileName(name, number, "sst");
return MakeFileName(dbname, number, "sst");
}
std::string DescriptorFileName(const std::string& dbname, uint64_t number) {
assert(number > 0);
char buf[100];
snprintf(buf, sizeof(buf), "/MANIFEST-%06llu",
static_cast<unsigned long long>(number));
std::snprintf(buf, sizeof(buf), "/MANIFEST-%06llu",
static_cast<unsigned long long>(number));
return dbname + buf;
}
@ -51,9 +52,7 @@ std::string CurrentFileName(const std::string& dbname) {
return dbname + "/CURRENT";
}
std::string LockFileName(const std::string& dbname) {
return dbname + "/LOCK";
}
std::string LockFileName(const std::string& dbname) { return dbname + "/LOCK"; }
std::string TempFileName(const std::string& dbname, uint64_t number) {
assert(number > 0);
@ -69,7 +68,6 @@ std::string OldInfoLogFileName(const std::string& dbname) {
return dbname + "/LOG.old";
}
// Owned filenames have the form:
// dbname/CURRENT
// dbname/LOCK
@ -77,10 +75,9 @@ std::string OldInfoLogFileName(const std::string& dbname) {
// dbname/LOG.old
// dbname/MANIFEST-[0-9]+
// dbname/[0-9]+.(log|sst|ldb)
bool ParseFileName(const std::string& fname,
uint64_t* number,
bool ParseFileName(const std::string& filename, uint64_t* number,
FileType* type) {
Slice rest(fname);
Slice rest(filename);
if (rest == "CURRENT") {
*number = 0;
*type = kCurrentFile;
@ -136,7 +133,7 @@ Status SetCurrentFile(Env* env, const std::string& dbname,
s = env->RenameFile(tmp, CurrentFileName(dbname));
}
if (!s.ok()) {
env->DeleteFile(tmp);
env->RemoveFile(tmp);
}
return s;
}

View File

@ -7,8 +7,9 @@
#ifndef STORAGE_LEVELDB_DB_FILENAME_H_
#define STORAGE_LEVELDB_DB_FILENAME_H_
#include <stdint.h>
#include <cstdint>
#include <string>
#include "leveldb/slice.h"
#include "leveldb/status.h"
#include "port/port.h"
@ -30,55 +31,52 @@ enum FileType {
// Return the name of the log file with the specified number
// in the db named by "dbname". The result will be prefixed with
// "dbname".
extern std::string LogFileName(const std::string& dbname, uint64_t number);
std::string LogFileName(const std::string& dbname, uint64_t number);
// Return the name of the sstable with the specified number
// in the db named by "dbname". The result will be prefixed with
// "dbname".
extern std::string TableFileName(const std::string& dbname, uint64_t number);
std::string TableFileName(const std::string& dbname, uint64_t number);
// Return the legacy file name for an sstable with the specified number
// in the db named by "dbname". The result will be prefixed with
// "dbname".
extern std::string SSTTableFileName(const std::string& dbname, uint64_t number);
std::string SSTTableFileName(const std::string& dbname, uint64_t number);
// Return the name of the descriptor file for the db named by
// "dbname" and the specified incarnation number. The result will be
// prefixed with "dbname".
extern std::string DescriptorFileName(const std::string& dbname,
uint64_t number);
std::string DescriptorFileName(const std::string& dbname, uint64_t number);
// Return the name of the current file. This file contains the name
// of the current manifest file. The result will be prefixed with
// "dbname".
extern std::string CurrentFileName(const std::string& dbname);
std::string CurrentFileName(const std::string& dbname);
// Return the name of the lock file for the db named by
// "dbname". The result will be prefixed with "dbname".
extern std::string LockFileName(const std::string& dbname);
std::string LockFileName(const std::string& dbname);
// Return the name of a temporary file owned by the db named "dbname".
// The result will be prefixed with "dbname".
extern std::string TempFileName(const std::string& dbname, uint64_t number);
std::string TempFileName(const std::string& dbname, uint64_t number);
// Return the name of the info log file for "dbname".
extern std::string InfoLogFileName(const std::string& dbname);
std::string InfoLogFileName(const std::string& dbname);
// Return the name of the old info log file for "dbname".
extern std::string OldInfoLogFileName(const std::string& dbname);
std::string OldInfoLogFileName(const std::string& dbname);
// If filename is a leveldb file, store the type of the file in *type.
// The number encoded in the filename is stored in *number. If the
// filename was successfully parsed, returns true. Else return false.
extern bool ParseFileName(const std::string& filename,
uint64_t* number,
FileType* type);
bool ParseFileName(const std::string& filename, uint64_t* number,
FileType* type);
// Make the CURRENT file point to the descriptor file with the
// specified number.
extern Status SetCurrentFile(Env* env, const std::string& dbname,
uint64_t descriptor_number);
Status SetCurrentFile(Env* env, const std::string& dbname,
uint64_t descriptor_number);
} // namespace leveldb

View File

@ -4,15 +4,13 @@
#include "db/filename.h"
#include "gtest/gtest.h"
#include "db/dbformat.h"
#include "port/port.h"
#include "util/logging.h"
#include "util/testharness.h"
namespace leveldb {
class FileNameTest { };
TEST(FileNameTest, Parse) {
Slice db;
FileType type;
@ -24,17 +22,17 @@ TEST(FileNameTest, Parse) {
uint64_t number;
FileType type;
} cases[] = {
{ "100.log", 100, kLogFile },
{ "0.log", 0, kLogFile },
{ "0.sst", 0, kTableFile },
{ "0.ldb", 0, kTableFile },
{ "CURRENT", 0, kCurrentFile },
{ "LOCK", 0, kDBLockFile },
{ "MANIFEST-2", 2, kDescriptorFile },
{ "MANIFEST-7", 7, kDescriptorFile },
{ "LOG", 0, kInfoLogFile },
{ "LOG.old", 0, kInfoLogFile },
{ "18446744073709551615.log", 18446744073709551615ull, kLogFile },
{"100.log", 100, kLogFile},
{"0.log", 0, kLogFile},
{"0.sst", 0, kTableFile},
{"0.ldb", 0, kTableFile},
{"CURRENT", 0, kCurrentFile},
{"LOCK", 0, kDBLockFile},
{"MANIFEST-2", 2, kDescriptorFile},
{"MANIFEST-7", 7, kDescriptorFile},
{"LOG", 0, kInfoLogFile},
{"LOG.old", 0, kInfoLogFile},
{"18446744073709551615.log", 18446744073709551615ull, kLogFile},
};
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
std::string f = cases[i].fname;
@ -44,30 +42,28 @@ TEST(FileNameTest, Parse) {
}
// Errors
static const char* errors[] = {
"",
"foo",
"foo-dx-100.log",
".log",
"",
"manifest",
"CURREN",
"CURRENTX",
"MANIFES",
"MANIFEST",
"MANIFEST-",
"XMANIFEST-3",
"MANIFEST-3x",
"LOC",
"LOCKx",
"LO",
"LOGx",
"18446744073709551616.log",
"184467440737095516150.log",
"100",
"100.",
"100.lop"
};
static const char* errors[] = {"",
"foo",
"foo-dx-100.log",
".log",
"",
"manifest",
"CURREN",
"CURRENTX",
"MANIFES",
"MANIFEST",
"MANIFEST-",
"XMANIFEST-3",
"MANIFEST-3x",
"LOC",
"LOCKx",
"LO",
"LOGx",
"18446744073709551616.log",
"184467440737095516150.log",
"100",
"100.",
"100.lop"};
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
std::string f = errors[i];
ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
@ -114,10 +110,18 @@ TEST(FileNameTest, Construction) {
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
ASSERT_EQ(999, number);
ASSERT_EQ(kTempFile, type);
fname = InfoLogFileName("foo");
ASSERT_EQ("foo/", std::string(fname.data(), 4));
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
ASSERT_EQ(0, number);
ASSERT_EQ(kInfoLogFile, type);
fname = OldInfoLogFileName("foo");
ASSERT_EQ("foo/", std::string(fname.data(), 4));
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
ASSERT_EQ(0, number);
ASSERT_EQ(kInfoLogFile, type);
}
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

View File

@ -2,7 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <stdio.h>
#include <cstdio>
#include "leveldb/dumpfile.h"
#include "leveldb/env.h"
#include "leveldb/status.h"
@ -12,13 +13,13 @@ namespace {
class StdoutPrinter : public WritableFile {
public:
virtual Status Append(const Slice& data) {
Status Append(const Slice& data) override {
fwrite(data.data(), 1, data.size(), stdout);
return Status::OK();
}
virtual Status Close() { return Status::OK(); }
virtual Status Flush() { return Status::OK(); }
virtual Status Sync() { return Status::OK(); }
Status Close() override { return Status::OK(); }
Status Flush() override { return Status::OK(); }
Status Sync() override { return Status::OK(); }
};
bool HandleDumpCommand(Env* env, char** files, int num) {
@ -27,7 +28,7 @@ bool HandleDumpCommand(Env* env, char** files, int num) {
for (int i = 0; i < num; i++) {
Status s = DumpFile(env, files[i], &printer);
if (!s.ok()) {
fprintf(stderr, "%s\n", s.ToString().c_str());
std::fprintf(stderr, "%s\n", s.ToString().c_str());
ok = false;
}
}
@ -38,11 +39,10 @@ bool HandleDumpCommand(Env* env, char** files, int num) {
} // namespace leveldb
static void Usage() {
fprintf(
std::fprintf(
stderr,
"Usage: leveldbutil command...\n"
" dump files... -- dump contents of specified files\n"
);
" dump files... -- dump contents of specified files\n");
}
int main(int argc, char** argv) {
@ -54,7 +54,7 @@ int main(int argc, char** argv) {
} else {
std::string command = argv[1];
if (command == "dump") {
ok = leveldb::HandleDumpCommand(env, argv+2, argc-2);
ok = leveldb::HandleDumpCommand(env, argv + 2, argc - 2);
} else {
Usage();
ok = false;

View File

@ -3,7 +3,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// Log format information shared by reader and writer.
// See ../doc/log_format.txt for more detail.
// See ../doc/log_format.md for more detail.
#ifndef STORAGE_LEVELDB_DB_LOG_FORMAT_H_
#define STORAGE_LEVELDB_DB_LOG_FORMAT_H_

View File

@ -4,7 +4,8 @@
#include "db/log_reader.h"
#include <stdio.h>
#include <cstdio>
#include "leveldb/env.h"
#include "util/coding.h"
#include "util/crc32c.h"
@ -12,8 +13,7 @@
namespace leveldb {
namespace log {
Reader::Reporter::~Reporter() {
}
Reader::Reporter::~Reporter() = default;
Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
uint64_t initial_offset)
@ -25,20 +25,17 @@ Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
eof_(false),
last_record_offset_(0),
end_of_buffer_offset_(0),
initial_offset_(initial_offset) {
}
initial_offset_(initial_offset),
resyncing_(initial_offset > 0) {}
Reader::~Reader() {
delete[] backing_store_;
}
Reader::~Reader() { delete[] backing_store_; }
bool Reader::SkipToInitialBlock() {
size_t offset_in_block = initial_offset_ % kBlockSize;
const size_t offset_in_block = initial_offset_ % kBlockSize;
uint64_t block_start_location = initial_offset_ - offset_in_block;
// Don't search a block if we'd be in the trailer
if (offset_in_block > kBlockSize - 6) {
offset_in_block = 0;
block_start_location += kBlockSize;
}
@ -72,8 +69,25 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
Slice fragment;
while (true) {
uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
const unsigned int record_type = ReadPhysicalRecord(&fragment);
// ReadPhysicalRecord may have only had an empty trailer remaining in its
// internal buffer. Calculate the offset of the next physical record now
// that it has returned, properly accounting for its header size.
uint64_t physical_record_offset =
end_of_buffer_offset_ - buffer_.size() - kHeaderSize - fragment.size();
if (resyncing_) {
if (record_type == kMiddleType) {
continue;
} else if (record_type == kLastType) {
resyncing_ = false;
continue;
} else {
resyncing_ = false;
}
}
switch (record_type) {
case kFullType:
if (in_fragmented_record) {
@ -81,9 +95,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
// it could emit an empty kFirstType record at the tail end
// of a block followed by a kFullType or kFirstType record
// at the beginning of the next block.
if (scratch->empty()) {
in_fragmented_record = false;
} else {
if (!scratch->empty()) {
ReportCorruption(scratch->size(), "partial record without end(1)");
}
}
@ -99,9 +111,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
// it could emit an empty kFirstType record at the tail end
// of a block followed by a kFullType or kFirstType record
// at the beginning of the next block.
if (scratch->empty()) {
in_fragmented_record = false;
} else {
if (!scratch->empty()) {
ReportCorruption(scratch->size(), "partial record without end(2)");
}
}
@ -150,7 +160,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
default: {
char buf[40];
snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
std::snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
ReportCorruption(
(fragment.size() + (in_fragmented_record ? scratch->size() : 0)),
buf);
@ -163,16 +173,14 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
return false;
}
uint64_t Reader::LastRecordOffset() {
return last_record_offset_;
}
uint64_t Reader::LastRecordOffset() { return last_record_offset_; }
void Reader::ReportCorruption(uint64_t bytes, const char* reason) {
ReportDrop(bytes, Status::Corruption(reason));
}
void Reader::ReportDrop(uint64_t bytes, const Status& reason) {
if (reporter_ != NULL &&
if (reporter_ != nullptr &&
end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) {
reporter_->Corruption(static_cast<size_t>(bytes), reason);
}

View File

@ -5,7 +5,7 @@
#ifndef STORAGE_LEVELDB_DB_LOG_READER_H_
#define STORAGE_LEVELDB_DB_LOG_READER_H_
#include <stdint.h>
#include <cstdint>
#include "db/log_format.h"
#include "leveldb/slice.h"
@ -24,7 +24,7 @@ class Reader {
public:
virtual ~Reporter();
// Some corruption was detected. "size" is the approximate number
// Some corruption was detected. "bytes" is the approximate number
// of bytes dropped due to the corruption.
virtual void Corruption(size_t bytes, const Status& status) = 0;
};
@ -32,7 +32,7 @@ class Reader {
// Create a reader that will return log records from "*file".
// "*file" must remain live while this Reader is in use.
//
// If "reporter" is non-NULL, it is notified whenever some data is
// If "reporter" is non-null, it is notified whenever some data is
// dropped due to a detected corruption. "*reporter" must remain
// live while this Reader is in use.
//
@ -43,6 +43,9 @@ class Reader {
Reader(SequentialFile* file, Reporter* reporter, bool checksum,
uint64_t initial_offset);
Reader(const Reader&) = delete;
Reader& operator=(const Reader&) = delete;
~Reader();
// Read the next record into *record. Returns true if read
@ -58,21 +61,6 @@ class Reader {
uint64_t LastRecordOffset();
private:
SequentialFile* const file_;
Reporter* const reporter_;
bool const checksum_;
char* const backing_store_;
Slice buffer_;
bool eof_; // Last Read() indicated EOF by returning < kBlockSize
// Offset of the last record returned by ReadRecord.
uint64_t last_record_offset_;
// Offset of the first location past the end of buffer_.
uint64_t end_of_buffer_offset_;
// Offset at which to start looking for the first record to return
uint64_t const initial_offset_;
// Extend record types with the following special values
enum {
kEof = kMaxRecordType + 1,
@ -97,9 +85,25 @@ class Reader {
void ReportCorruption(uint64_t bytes, const char* reason);
void ReportDrop(uint64_t bytes, const Status& reason);
// No copying allowed
Reader(const Reader&);
void operator=(const Reader&);
SequentialFile* const file_;
Reporter* const reporter_;
bool const checksum_;
char* const backing_store_;
Slice buffer_;
bool eof_; // Last Read() indicated EOF by returning < kBlockSize
// Offset of the last record returned by ReadRecord.
uint64_t last_record_offset_;
// Offset of the first location past the end of buffer_.
uint64_t end_of_buffer_offset_;
// Offset at which to start looking for the first record to return
uint64_t const initial_offset_;
// True if we are resynchronizing after a seek (initial_offset_ > 0). In
// particular, a run of kMiddleType and kLastType records can be silently
// skipped in this mode
bool resyncing_;
};
} // namespace log

View File

@ -2,13 +2,13 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "gtest/gtest.h"
#include "db/log_reader.h"
#include "db/log_writer.h"
#include "leveldb/env.h"
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/random.h"
#include "util/testharness.h"
namespace leveldb {
namespace log {
@ -27,7 +27,7 @@ static std::string BigString(const std::string& partial_string, size_t n) {
// Construct a string from a number
static std::string NumberString(int n) {
char buf[50];
snprintf(buf, sizeof(buf), "%d.", n);
std::snprintf(buf, sizeof(buf), "%d.", n);
return std::string(buf);
}
@ -36,90 +36,17 @@ static std::string RandomSkewedString(int i, Random* rnd) {
return BigString(NumberString(i), rnd->Skewed(17));
}
class LogTest {
private:
class StringDest : public WritableFile {
public:
std::string contents_;
virtual Status Close() { return Status::OK(); }
virtual Status Flush() { return Status::OK(); }
virtual Status Sync() { return Status::OK(); }
virtual Status Append(const Slice& slice) {
contents_.append(slice.data(), slice.size());
return Status::OK();
}
};
class StringSource : public SequentialFile {
public:
Slice contents_;
bool force_error_;
bool returned_partial_;
StringSource() : force_error_(false), returned_partial_(false) { }
virtual Status Read(size_t n, Slice* result, char* scratch) {
ASSERT_TRUE(!returned_partial_) << "must not Read() after eof/error";
if (force_error_) {
force_error_ = false;
returned_partial_ = true;
return Status::Corruption("read error");
}
if (contents_.size() < n) {
n = contents_.size();
returned_partial_ = true;
}
*result = Slice(contents_.data(), n);
contents_.remove_prefix(n);
return Status::OK();
}
virtual Status Skip(uint64_t n) {
if (n > contents_.size()) {
contents_.clear();
return Status::NotFound("in-memory file skipepd past end");
}
contents_.remove_prefix(n);
return Status::OK();
}
};
class ReportCollector : public Reader::Reporter {
public:
size_t dropped_bytes_;
std::string message_;
ReportCollector() : dropped_bytes_(0) { }
virtual void Corruption(size_t bytes, const Status& status) {
dropped_bytes_ += bytes;
message_.append(status.ToString());
}
};
StringDest dest_;
StringSource source_;
ReportCollector report_;
bool reading_;
Writer* writer_;
Reader reader_;
// Record metadata for testing initial offset functionality
static size_t initial_offset_record_sizes_[];
static uint64_t initial_offset_last_record_offsets_[];
class LogTest : public testing::Test {
public:
LogTest() : reading_(false),
writer_(new Writer(&dest_)),
reader_(&source_, &report_, true/*checksum*/,
0/*initial_offset*/) {
}
LogTest()
: reading_(false),
writer_(new Writer(&dest_)),
reader_(new Reader(&source_, &report_, true /*checksum*/,
0 /*initial_offset*/)) {}
~LogTest() {
delete writer_;
delete reader_;
}
void ReopenForAppend() {
@ -132,9 +59,7 @@ class LogTest {
writer_->AddRecord(Slice(msg));
}
size_t WrittenBytes() const {
return dest_.contents_.size();
}
size_t WrittenBytes() const { return dest_.contents_.size(); }
std::string Read() {
if (!reading_) {
@ -143,7 +68,7 @@ class LogTest {
}
std::string scratch;
Slice record;
if (reader_.ReadRecord(&record, &scratch)) {
if (reader_->ReadRecord(&record, &scratch)) {
return record.ToString();
} else {
return "EOF";
@ -164,22 +89,16 @@ class LogTest {
void FixChecksum(int header_offset, int len) {
// Compute crc of type/len/data
uint32_t crc = crc32c::Value(&dest_.contents_[header_offset+6], 1 + len);
uint32_t crc = crc32c::Value(&dest_.contents_[header_offset + 6], 1 + len);
crc = crc32c::Mask(crc);
EncodeFixed32(&dest_.contents_[header_offset], crc);
}
void ForceError() {
source_.force_error_ = true;
}
void ForceError() { source_.force_error_ = true; }
size_t DroppedBytes() const {
return report_.dropped_bytes_;
}
size_t DroppedBytes() const { return report_.dropped_bytes_; }
std::string ReportMessage() const {
return report_.message_;
}
std::string ReportMessage() const { return report_.message_; }
// Returns OK iff recorded error message contains "msg"
std::string MatchError(const std::string& msg) const {
@ -191,18 +110,23 @@ class LogTest {
}
void WriteInitialOffsetLog() {
for (int i = 0; i < 4; i++) {
for (int i = 0; i < num_initial_offset_records_; i++) {
std::string record(initial_offset_record_sizes_[i],
static_cast<char>('a' + i));
Write(record);
}
}
void StartReadingAt(uint64_t initial_offset) {
delete reader_;
reader_ = new Reader(&source_, &report_, true /*checksum*/, initial_offset);
}
void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) {
WriteInitialOffsetLog();
reading_ = true;
source_.contents_ = Slice(dest_.contents_);
Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
Reader* offset_reader = new Reader(&source_, &report_, true /*checksum*/,
WrittenBytes() + offset_past_end);
Slice record;
std::string scratch;
@ -215,40 +139,128 @@ class LogTest {
WriteInitialOffsetLog();
reading_ = true;
source_.contents_ = Slice(dest_.contents_);
Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
initial_offset);
Slice record;
std::string scratch;
ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset],
record.size());
ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
offset_reader->LastRecordOffset());
ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
Reader* offset_reader =
new Reader(&source_, &report_, true /*checksum*/, initial_offset);
// Read all records from expected_record_offset through the last one.
ASSERT_LT(expected_record_offset, num_initial_offset_records_);
for (; expected_record_offset < num_initial_offset_records_;
++expected_record_offset) {
Slice record;
std::string scratch;
ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset],
record.size());
ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
offset_reader->LastRecordOffset());
ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
}
delete offset_reader;
}
private:
class StringDest : public WritableFile {
public:
Status Close() override { return Status::OK(); }
Status Flush() override { return Status::OK(); }
Status Sync() override { return Status::OK(); }
Status Append(const Slice& slice) override {
contents_.append(slice.data(), slice.size());
return Status::OK();
}
std::string contents_;
};
class StringSource : public SequentialFile {
public:
StringSource() : force_error_(false), returned_partial_(false) {}
Status Read(size_t n, Slice* result, char* scratch) override {
EXPECT_TRUE(!returned_partial_) << "must not Read() after eof/error";
if (force_error_) {
force_error_ = false;
returned_partial_ = true;
return Status::Corruption("read error");
}
if (contents_.size() < n) {
n = contents_.size();
returned_partial_ = true;
}
*result = Slice(contents_.data(), n);
contents_.remove_prefix(n);
return Status::OK();
}
Status Skip(uint64_t n) override {
if (n > contents_.size()) {
contents_.clear();
return Status::NotFound("in-memory file skipped past end");
}
contents_.remove_prefix(n);
return Status::OK();
}
Slice contents_;
bool force_error_;
bool returned_partial_;
};
class ReportCollector : public Reader::Reporter {
public:
ReportCollector() : dropped_bytes_(0) {}
void Corruption(size_t bytes, const Status& status) override {
dropped_bytes_ += bytes;
message_.append(status.ToString());
}
size_t dropped_bytes_;
std::string message_;
};
// Record metadata for testing initial offset functionality
static size_t initial_offset_record_sizes_[];
static uint64_t initial_offset_last_record_offsets_[];
static int num_initial_offset_records_;
StringDest dest_;
StringSource source_;
ReportCollector report_;
bool reading_;
Writer* writer_;
Reader* reader_;
};
size_t LogTest::initial_offset_record_sizes_[] =
{10000, // Two sizable records in first block
10000,
2 * log::kBlockSize - 1000, // Span three blocks
1};
size_t LogTest::initial_offset_record_sizes_[] = {
10000, // Two sizable records in first block
10000,
2 * log::kBlockSize - 1000, // Span three blocks
1,
13716, // Consume all but two bytes of block 3.
log::kBlockSize - kHeaderSize, // Consume the entirety of block 4.
};
uint64_t LogTest::initial_offset_last_record_offsets_[] =
{0,
kHeaderSize + 10000,
2 * (kHeaderSize + 10000),
2 * (kHeaderSize + 10000) +
(2 * log::kBlockSize - 1000) + 3 * kHeaderSize};
uint64_t LogTest::initial_offset_last_record_offsets_[] = {
0,
kHeaderSize + 10000,
2 * (kHeaderSize + 10000),
2 * (kHeaderSize + 10000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize,
2 * (kHeaderSize + 10000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize +
kHeaderSize + 1,
3 * log::kBlockSize,
};
// LogTest::initial_offset_last_record_offsets_ must be defined before this.
int LogTest::num_initial_offset_records_ =
sizeof(LogTest::initial_offset_last_record_offsets_) / sizeof(uint64_t);
TEST(LogTest, Empty) {
ASSERT_EQ("EOF", Read());
}
TEST_F(LogTest, Empty) { ASSERT_EQ("EOF", Read()); }
TEST(LogTest, ReadWrite) {
TEST_F(LogTest, ReadWrite) {
Write("foo");
Write("bar");
Write("");
@ -261,7 +273,7 @@ TEST(LogTest, ReadWrite) {
ASSERT_EQ("EOF", Read()); // Make sure reads at eof work
}
TEST(LogTest, ManyBlocks) {
TEST_F(LogTest, ManyBlocks) {
for (int i = 0; i < 100000; i++) {
Write(NumberString(i));
}
@ -271,7 +283,7 @@ TEST(LogTest, ManyBlocks) {
ASSERT_EQ("EOF", Read());
}
TEST(LogTest, Fragmentation) {
TEST_F(LogTest, Fragmentation) {
Write("small");
Write(BigString("medium", 50000));
Write(BigString("large", 100000));
@ -281,9 +293,9 @@ TEST(LogTest, Fragmentation) {
ASSERT_EQ("EOF", Read());
}
TEST(LogTest, MarginalTrailer) {
TEST_F(LogTest, MarginalTrailer) {
// Make a trailer that is exactly the same length as an empty record.
const int n = kBlockSize - 2*kHeaderSize;
const int n = kBlockSize - 2 * kHeaderSize;
Write(BigString("foo", n));
ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes());
Write("");
@ -294,9 +306,9 @@ TEST(LogTest, MarginalTrailer) {
ASSERT_EQ("EOF", Read());
}
TEST(LogTest, MarginalTrailer2) {
TEST_F(LogTest, MarginalTrailer2) {
// Make a trailer that is exactly the same length as an empty record.
const int n = kBlockSize - 2*kHeaderSize;
const int n = kBlockSize - 2 * kHeaderSize;
Write(BigString("foo", n));
ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes());
Write("bar");
@ -307,8 +319,8 @@ TEST(LogTest, MarginalTrailer2) {
ASSERT_EQ("", ReportMessage());
}
TEST(LogTest, ShortTrailer) {
const int n = kBlockSize - 2*kHeaderSize + 4;
TEST_F(LogTest, ShortTrailer) {
const int n = kBlockSize - 2 * kHeaderSize + 4;
Write(BigString("foo", n));
ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes());
Write("");
@ -319,15 +331,15 @@ TEST(LogTest, ShortTrailer) {
ASSERT_EQ("EOF", Read());
}
TEST(LogTest, AlignedEof) {
const int n = kBlockSize - 2*kHeaderSize + 4;
TEST_F(LogTest, AlignedEof) {
const int n = kBlockSize - 2 * kHeaderSize + 4;
Write(BigString("foo", n));
ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes());
ASSERT_EQ(BigString("foo", n), Read());
ASSERT_EQ("EOF", Read());
}
TEST(LogTest, OpenForAppend) {
TEST_F(LogTest, OpenForAppend) {
Write("hello");
ReopenForAppend();
Write("world");
@ -336,7 +348,7 @@ TEST(LogTest, OpenForAppend) {
ASSERT_EQ("EOF", Read());
}
TEST(LogTest, RandomRead) {
TEST_F(LogTest, RandomRead) {
const int N = 500;
Random write_rnd(301);
for (int i = 0; i < N; i++) {
@ -351,7 +363,7 @@ TEST(LogTest, RandomRead) {
// Tests of all the error paths in log_reader.cc follow:
TEST(LogTest, ReadError) {
TEST_F(LogTest, ReadError) {
Write("foo");
ForceError();
ASSERT_EQ("EOF", Read());
@ -359,7 +371,7 @@ TEST(LogTest, ReadError) {
ASSERT_EQ("OK", MatchError("read error"));
}
TEST(LogTest, BadRecordType) {
TEST_F(LogTest, BadRecordType) {
Write("foo");
// Type is stored in header[6]
IncrementByte(6, 100);
@ -369,16 +381,16 @@ TEST(LogTest, BadRecordType) {
ASSERT_EQ("OK", MatchError("unknown record type"));
}
TEST(LogTest, TruncatedTrailingRecordIsIgnored) {
TEST_F(LogTest, TruncatedTrailingRecordIsIgnored) {
Write("foo");
ShrinkSize(4); // Drop all payload as well as a header byte
ShrinkSize(4); // Drop all payload as well as a header byte
ASSERT_EQ("EOF", Read());
// Truncated last record is ignored, not treated as an error.
ASSERT_EQ(0, DroppedBytes());
ASSERT_EQ("", ReportMessage());
}
TEST(LogTest, BadLength) {
TEST_F(LogTest, BadLength) {
const int kPayloadSize = kBlockSize - kHeaderSize;
Write(BigString("bar", kPayloadSize));
Write("foo");
@ -389,7 +401,7 @@ TEST(LogTest, BadLength) {
ASSERT_EQ("OK", MatchError("bad record length"));
}
TEST(LogTest, BadLengthAtEndIsIgnored) {
TEST_F(LogTest, BadLengthAtEndIsIgnored) {
Write("foo");
ShrinkSize(1);
ASSERT_EQ("EOF", Read());
@ -397,7 +409,7 @@ TEST(LogTest, BadLengthAtEndIsIgnored) {
ASSERT_EQ("", ReportMessage());
}
TEST(LogTest, ChecksumMismatch) {
TEST_F(LogTest, ChecksumMismatch) {
Write("foo");
IncrementByte(0, 10);
ASSERT_EQ("EOF", Read());
@ -405,7 +417,7 @@ TEST(LogTest, ChecksumMismatch) {
ASSERT_EQ("OK", MatchError("checksum mismatch"));
}
TEST(LogTest, UnexpectedMiddleType) {
TEST_F(LogTest, UnexpectedMiddleType) {
Write("foo");
SetByte(6, kMiddleType);
FixChecksum(0, 3);
@ -414,7 +426,7 @@ TEST(LogTest, UnexpectedMiddleType) {
ASSERT_EQ("OK", MatchError("missing start"));
}
TEST(LogTest, UnexpectedLastType) {
TEST_F(LogTest, UnexpectedLastType) {
Write("foo");
SetByte(6, kLastType);
FixChecksum(0, 3);
@ -423,7 +435,7 @@ TEST(LogTest, UnexpectedLastType) {
ASSERT_EQ("OK", MatchError("missing start"));
}
TEST(LogTest, UnexpectedFullType) {
TEST_F(LogTest, UnexpectedFullType) {
Write("foo");
Write("bar");
SetByte(6, kFirstType);
@ -434,7 +446,7 @@ TEST(LogTest, UnexpectedFullType) {
ASSERT_EQ("OK", MatchError("partial record without end"));
}
TEST(LogTest, UnexpectedFirstType) {
TEST_F(LogTest, UnexpectedFirstType) {
Write("foo");
Write(BigString("bar", 100000));
SetByte(6, kFirstType);
@ -445,7 +457,7 @@ TEST(LogTest, UnexpectedFirstType) {
ASSERT_EQ("OK", MatchError("partial record without end"));
}
TEST(LogTest, MissingLastIsIgnored) {
TEST_F(LogTest, MissingLastIsIgnored) {
Write(BigString("bar", kBlockSize));
// Remove the LAST block, including header.
ShrinkSize(14);
@ -454,7 +466,7 @@ TEST(LogTest, MissingLastIsIgnored) {
ASSERT_EQ(0, DroppedBytes());
}
TEST(LogTest, PartialLastIsIgnored) {
TEST_F(LogTest, PartialLastIsIgnored) {
Write(BigString("bar", kBlockSize));
// Cause a bad record length in the LAST block.
ShrinkSize(1);
@ -463,7 +475,23 @@ TEST(LogTest, PartialLastIsIgnored) {
ASSERT_EQ(0, DroppedBytes());
}
TEST(LogTest, ErrorJoinsRecords) {
TEST_F(LogTest, SkipIntoMultiRecord) {
// Consider a fragmented record:
// first(R1), middle(R1), last(R1), first(R2)
// If initial_offset points to a record after first(R1) but before first(R2)
// incomplete fragment errors are not actual errors, and must be suppressed
// until a new first or full record is encountered.
Write(BigString("foo", 3 * kBlockSize));
Write("correct");
StartReadingAt(kBlockSize);
ASSERT_EQ("correct", Read());
ASSERT_EQ("", ReportMessage());
ASSERT_EQ(0, DroppedBytes());
ASSERT_EQ("EOF", Read());
}
TEST_F(LogTest, ErrorJoinsRecords) {
// Consider two fragmented records:
// first(R1) last(R1) first(R2) last(R2)
// where the middle two fragments disappear. We do not want
@ -475,74 +503,56 @@ TEST(LogTest, ErrorJoinsRecords) {
Write("correct");
// Wipe the middle block
for (int offset = kBlockSize; offset < 2*kBlockSize; offset++) {
for (int offset = kBlockSize; offset < 2 * kBlockSize; offset++) {
SetByte(offset, 'x');
}
ASSERT_EQ("correct", Read());
ASSERT_EQ("EOF", Read());
const size_t dropped = DroppedBytes();
ASSERT_LE(dropped, 2*kBlockSize + 100);
ASSERT_GE(dropped, 2*kBlockSize);
ASSERT_LE(dropped, 2 * kBlockSize + 100);
ASSERT_GE(dropped, 2 * kBlockSize);
}
TEST(LogTest, ReadStart) {
CheckInitialOffsetRecord(0, 0);
}
TEST_F(LogTest, ReadStart) { CheckInitialOffsetRecord(0, 0); }
TEST(LogTest, ReadSecondOneOff) {
CheckInitialOffsetRecord(1, 1);
}
TEST_F(LogTest, ReadSecondOneOff) { CheckInitialOffsetRecord(1, 1); }
TEST(LogTest, ReadSecondTenThousand) {
CheckInitialOffsetRecord(10000, 1);
}
TEST_F(LogTest, ReadSecondTenThousand) { CheckInitialOffsetRecord(10000, 1); }
TEST(LogTest, ReadSecondStart) {
CheckInitialOffsetRecord(10007, 1);
}
TEST_F(LogTest, ReadSecondStart) { CheckInitialOffsetRecord(10007, 1); }
TEST(LogTest, ReadThirdOneOff) {
CheckInitialOffsetRecord(10008, 2);
}
TEST_F(LogTest, ReadThirdOneOff) { CheckInitialOffsetRecord(10008, 2); }
TEST(LogTest, ReadThirdStart) {
CheckInitialOffsetRecord(20014, 2);
}
TEST_F(LogTest, ReadThirdStart) { CheckInitialOffsetRecord(20014, 2); }
TEST(LogTest, ReadFourthOneOff) {
CheckInitialOffsetRecord(20015, 3);
}
TEST_F(LogTest, ReadFourthOneOff) { CheckInitialOffsetRecord(20015, 3); }
TEST(LogTest, ReadFourthFirstBlockTrailer) {
TEST_F(LogTest, ReadFourthFirstBlockTrailer) {
CheckInitialOffsetRecord(log::kBlockSize - 4, 3);
}
TEST(LogTest, ReadFourthMiddleBlock) {
TEST_F(LogTest, ReadFourthMiddleBlock) {
CheckInitialOffsetRecord(log::kBlockSize + 1, 3);
}
TEST(LogTest, ReadFourthLastBlock) {
TEST_F(LogTest, ReadFourthLastBlock) {
CheckInitialOffsetRecord(2 * log::kBlockSize + 1, 3);
}
TEST(LogTest, ReadFourthStart) {
TEST_F(LogTest, ReadFourthStart) {
CheckInitialOffsetRecord(
2 * (kHeaderSize + 1000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize,
3);
}
TEST(LogTest, ReadEnd) {
CheckOffsetPastEndReturnsNoRecords(0);
TEST_F(LogTest, ReadInitialOffsetIntoBlockPadding) {
CheckInitialOffsetRecord(3 * log::kBlockSize - 3, 5);
}
TEST(LogTest, ReadPastEnd) {
CheckOffsetPastEndReturnsNoRecords(5);
}
TEST_F(LogTest, ReadEnd) { CheckOffsetPastEndReturnsNoRecords(0); }
TEST_F(LogTest, ReadPastEnd) { CheckOffsetPastEndReturnsNoRecords(5); }
} // namespace log
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

View File

@ -4,7 +4,8 @@
#include "db/log_writer.h"
#include <stdint.h>
#include <cstdint>
#include "leveldb/env.h"
#include "util/coding.h"
#include "util/crc32c.h"
@ -19,9 +20,7 @@ static void InitTypeCrc(uint32_t* type_crc) {
}
}
Writer::Writer(WritableFile* dest)
: dest_(dest),
block_offset_(0) {
Writer::Writer(WritableFile* dest) : dest_(dest), block_offset_(0) {
InitTypeCrc(type_crc_);
}
@ -30,8 +29,7 @@ Writer::Writer(WritableFile* dest, uint64_t dest_length)
InitTypeCrc(type_crc_);
}
Writer::~Writer() {
}
Writer::~Writer() = default;
Status Writer::AddRecord(const Slice& slice) {
const char* ptr = slice.data();
@ -49,7 +47,7 @@ Status Writer::AddRecord(const Slice& slice) {
// Switch to a new block
if (leftover > 0) {
// Fill the trailer (literal below relies on kHeaderSize being 7)
assert(kHeaderSize == 7);
static_assert(kHeaderSize == 7, "");
dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
}
block_offset_ = 0;
@ -81,30 +79,31 @@ Status Writer::AddRecord(const Slice& slice) {
return s;
}
Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) {
assert(n <= 0xffff); // Must fit in two bytes
assert(block_offset_ + kHeaderSize + n <= kBlockSize);
Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr,
size_t length) {
assert(length <= 0xffff); // Must fit in two bytes
assert(block_offset_ + kHeaderSize + length <= kBlockSize);
// Format the header
char buf[kHeaderSize];
buf[4] = static_cast<char>(n & 0xff);
buf[5] = static_cast<char>(n >> 8);
buf[4] = static_cast<char>(length & 0xff);
buf[5] = static_cast<char>(length >> 8);
buf[6] = static_cast<char>(t);
// Compute the crc of the record type and the payload.
uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n);
crc = crc32c::Mask(crc); // Adjust for storage
uint32_t crc = crc32c::Extend(type_crc_[t], ptr, length);
crc = crc32c::Mask(crc); // Adjust for storage
EncodeFixed32(buf, crc);
// Write the header and the payload
Status s = dest_->Append(Slice(buf, kHeaderSize));
if (s.ok()) {
s = dest_->Append(Slice(ptr, n));
s = dest_->Append(Slice(ptr, length));
if (s.ok()) {
s = dest_->Flush();
}
}
block_offset_ += kHeaderSize + n;
block_offset_ += kHeaderSize + length;
return s;
}

View File

@ -5,7 +5,8 @@
#ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_
#define STORAGE_LEVELDB_DB_LOG_WRITER_H_
#include <stdint.h>
#include <cstdint>
#include "db/log_format.h"
#include "leveldb/slice.h"
#include "leveldb/status.h"
@ -28,24 +29,23 @@ class Writer {
// "*dest" must remain live while this Writer is in use.
Writer(WritableFile* dest, uint64_t dest_length);
Writer(const Writer&) = delete;
Writer& operator=(const Writer&) = delete;
~Writer();
Status AddRecord(const Slice& slice);
private:
Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
WritableFile* dest_;
int block_offset_; // Current offset in block
int block_offset_; // Current offset in block
// crc32c values for all supported record types. These are
// pre-computed to reduce the overhead of computing the crc of the
// record type stored in the header.
uint32_t type_crc_[kMaxRecordType + 1];
Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
// No copying allowed
Writer(const Writer&);
void operator=(const Writer&);
};
} // namespace log

View File

@ -18,20 +18,15 @@ static Slice GetLengthPrefixedSlice(const char* data) {
return Slice(p, len);
}
MemTable::MemTable(const InternalKeyComparator& cmp)
: comparator_(cmp),
refs_(0),
table_(comparator_, &arena_) {
}
MemTable::MemTable(const InternalKeyComparator& comparator)
: comparator_(comparator), refs_(0), table_(comparator_, &arena_) {}
MemTable::~MemTable() {
assert(refs_ == 0);
}
MemTable::~MemTable() { assert(refs_ == 0); }
size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); }
int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr)
const {
int MemTable::KeyComparator::operator()(const char* aptr,
const char* bptr) const {
// Internal keys are encoded as length-prefixed strings.
Slice a = GetLengthPrefixedSlice(aptr);
Slice b = GetLengthPrefixedSlice(bptr);
@ -48,60 +43,59 @@ static const char* EncodeKey(std::string* scratch, const Slice& target) {
return scratch->data();
}
class MemTableIterator: public Iterator {
class MemTableIterator : public Iterator {
public:
explicit MemTableIterator(MemTable::Table* table) : iter_(table) { }
explicit MemTableIterator(MemTable::Table* table) : iter_(table) {}
virtual bool Valid() const { return iter_.Valid(); }
virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); }
virtual void SeekToFirst() { iter_.SeekToFirst(); }
virtual void SeekToLast() { iter_.SeekToLast(); }
virtual void Next() { iter_.Next(); }
virtual void Prev() { iter_.Prev(); }
virtual Slice key() const { return GetLengthPrefixedSlice(iter_.key()); }
virtual Slice value() const {
MemTableIterator(const MemTableIterator&) = delete;
MemTableIterator& operator=(const MemTableIterator&) = delete;
~MemTableIterator() override = default;
bool Valid() const override { return iter_.Valid(); }
void Seek(const Slice& k) override { iter_.Seek(EncodeKey(&tmp_, k)); }
void SeekToFirst() override { iter_.SeekToFirst(); }
void SeekToLast() override { iter_.SeekToLast(); }
void Next() override { iter_.Next(); }
void Prev() override { iter_.Prev(); }
Slice key() const override { return GetLengthPrefixedSlice(iter_.key()); }
Slice value() const override {
Slice key_slice = GetLengthPrefixedSlice(iter_.key());
return GetLengthPrefixedSlice(key_slice.data() + key_slice.size());
}
virtual Status status() const { return Status::OK(); }
Status status() const override { return Status::OK(); }
private:
MemTable::Table::Iterator iter_;
std::string tmp_; // For passing to EncodeKey
// No copying allowed
MemTableIterator(const MemTableIterator&);
void operator=(const MemTableIterator&);
std::string tmp_; // For passing to EncodeKey
};
Iterator* MemTable::NewIterator() {
return new MemTableIterator(&table_);
}
Iterator* MemTable::NewIterator() { return new MemTableIterator(&table_); }
void MemTable::Add(SequenceNumber s, ValueType type,
const Slice& key,
void MemTable::Add(SequenceNumber s, ValueType type, const Slice& key,
const Slice& value) {
// Format of an entry is concatenation of:
// key_size : varint32 of internal_key.size()
// key bytes : char[internal_key.size()]
// tag : uint64((sequence << 8) | type)
// value_size : varint32 of value.size()
// value bytes : char[value.size()]
size_t key_size = key.size();
size_t val_size = value.size();
size_t internal_key_size = key_size + 8;
const size_t encoded_len =
VarintLength(internal_key_size) + internal_key_size +
VarintLength(val_size) + val_size;
const size_t encoded_len = VarintLength(internal_key_size) +
internal_key_size + VarintLength(val_size) +
val_size;
char* buf = arena_.Allocate(encoded_len);
char* p = EncodeVarint32(buf, internal_key_size);
memcpy(p, key.data(), key_size);
std::memcpy(p, key.data(), key_size);
p += key_size;
EncodeFixed64(p, (s << 8) | type);
p += 8;
p = EncodeVarint32(p, val_size);
memcpy(p, value.data(), val_size);
assert((p + val_size) - buf == encoded_len);
std::memcpy(p, value.data(), val_size);
assert(p + val_size == buf + encoded_len);
table_.Insert(buf);
}
@ -121,10 +115,9 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) {
// all entries with overly large sequence numbers.
const char* entry = iter.key();
uint32_t key_length;
const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length);
const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length);
if (comparator_.comparator.user_comparator()->Compare(
Slice(key_ptr, key_length - 8),
key.user_key()) == 0) {
Slice(key_ptr, key_length - 8), key.user_key()) == 0) {
// Correct user key
const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
switch (static_cast<ValueType>(tag & 0xff)) {

View File

@ -6,15 +6,15 @@
#define STORAGE_LEVELDB_DB_MEMTABLE_H_
#include <string>
#include "leveldb/db.h"
#include "db/dbformat.h"
#include "db/skiplist.h"
#include "leveldb/db.h"
#include "util/arena.h"
namespace leveldb {
class InternalKeyComparator;
class Mutex;
class MemTableIterator;
class MemTable {
@ -23,6 +23,9 @@ class MemTable {
// is zero and the caller must call Ref() at least once.
explicit MemTable(const InternalKeyComparator& comparator);
MemTable(const MemTable&) = delete;
MemTable& operator=(const MemTable&) = delete;
// Increase reference count.
void Ref() { ++refs_; }
@ -36,10 +39,7 @@ class MemTable {
}
// Returns an estimate of the number of bytes of data in use by this
// data structure.
//
// REQUIRES: external synchronization to prevent simultaneous
// operations on the same MemTable.
// data structure. It is safe to call when MemTable is being modified.
size_t ApproximateMemoryUsage();
// Return an iterator that yields the contents of the memtable.
@ -53,8 +53,7 @@ class MemTable {
// Add an entry into memtable that maps key to value at the
// specified sequence number and with the specified type.
// Typically value will be empty if type==kTypeDeletion.
void Add(SequenceNumber seq, ValueType type,
const Slice& key,
void Add(SequenceNumber seq, ValueType type, const Slice& key,
const Slice& value);
// If memtable contains a value for key, store it in *value and return true.
@ -64,26 +63,23 @@ class MemTable {
bool Get(const LookupKey& key, std::string* value, Status* s);
private:
~MemTable(); // Private since only Unref() should be used to delete it
struct KeyComparator {
const InternalKeyComparator comparator;
explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { }
int operator()(const char* a, const char* b) const;
};
friend class MemTableIterator;
friend class MemTableBackwardIterator;
struct KeyComparator {
const InternalKeyComparator comparator;
explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) {}
int operator()(const char* a, const char* b) const;
};
typedef SkipList<const char*, KeyComparator> Table;
~MemTable(); // Private since only Unref() should be used to delete it
KeyComparator comparator_;
int refs_;
Arena arena_;
Table table_;
// No copying allowed
MemTable(const MemTable&);
void operator=(const MemTable&);
};
} // namespace leveldb

View File

@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "gtest/gtest.h"
#include "db/db_impl.h"
#include "db/filename.h"
#include "db/version_set.h"
@ -10,15 +11,14 @@
#include "leveldb/env.h"
#include "leveldb/write_batch.h"
#include "util/logging.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace leveldb {
class RecoveryTest {
class RecoveryTest : public testing::Test {
public:
RecoveryTest() : env_(Env::Default()), db_(NULL) {
dbname_ = test::TmpDir() + "/recovery_test";
RecoveryTest() : env_(Env::Default()), db_(nullptr) {
dbname_ = testing::TempDir() + "recovery_test";
DestroyDB(dbname_, Options());
Open();
}
@ -44,22 +44,26 @@ class RecoveryTest {
void Close() {
delete db_;
db_ = NULL;
db_ = nullptr;
}
void Open(Options* options = NULL) {
Status OpenWithStatus(Options* options = nullptr) {
Close();
Options opts;
if (options != NULL) {
if (options != nullptr) {
opts = *options;
} else {
opts.reuse_logs = true; // TODO(sanjay): test both ways
opts.create_if_missing = true;
}
if (opts.env == NULL) {
if (opts.env == nullptr) {
opts.env = env_;
}
ASSERT_OK(DB::Open(opts, dbname_, &db_));
return DB::Open(opts, dbname_, &db_);
}
void Open(Options* options = nullptr) {
ASSERT_LEVELDB_OK(OpenWithStatus(options));
ASSERT_EQ(1, NumLogs());
}
@ -67,7 +71,7 @@ class RecoveryTest {
return db_->Put(WriteOptions(), k, v);
}
std::string Get(const std::string& k, const Snapshot* snapshot = NULL) {
std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
std::string result;
Status s = db_->Get(ReadOptions(), k, &result);
if (s.IsNotFound()) {
@ -80,34 +84,38 @@ class RecoveryTest {
std::string ManifestFileName() {
std::string current;
ASSERT_OK(ReadFileToString(env_, CurrentFileName(dbname_), &current));
EXPECT_LEVELDB_OK(
ReadFileToString(env_, CurrentFileName(dbname_), &current));
size_t len = current.size();
if (len > 0 && current[len-1] == '\n') {
if (len > 0 && current[len - 1] == '\n') {
current.resize(len - 1);
}
return dbname_ + "/" + current;
}
std::string LogName(uint64_t number) {
return LogFileName(dbname_, number);
}
std::string LogName(uint64_t number) { return LogFileName(dbname_, number); }
size_t DeleteLogFiles() {
size_t RemoveLogFiles() {
// Linux allows unlinking open files, but Windows does not.
// Closing the db allows for file deletion.
Close();
std::vector<uint64_t> logs = GetFiles(kLogFile);
for (size_t i = 0; i < logs.size(); i++) {
ASSERT_OK(env_->DeleteFile(LogName(logs[i]))) << LogName(logs[i]);
EXPECT_LEVELDB_OK(env_->RemoveFile(LogName(logs[i]))) << LogName(logs[i]);
}
return logs.size();
}
uint64_t FirstLogFile() {
return GetFiles(kLogFile)[0];
void RemoveManifestFile() {
ASSERT_LEVELDB_OK(env_->RemoveFile(ManifestFileName()));
}
std::vector<std::uint64_t> GetFiles(FileType t) {
uint64_t FirstLogFile() { return GetFiles(kLogFile)[0]; }
std::vector<uint64_t> GetFiles(FileType t) {
std::vector<std::string> filenames;
ASSERT_OK(env_->GetChildren(dbname_, &filenames));
std::vector<std::uint64_t> result;
EXPECT_LEVELDB_OK(env_->GetChildren(dbname_, &filenames));
std::vector<uint64_t> result;
for (size_t i = 0; i < filenames.size(); i++) {
uint64_t number;
FileType type;
@ -118,35 +126,29 @@ class RecoveryTest {
return result;
}
int NumLogs() {
return GetFiles(kLogFile).size();
}
int NumLogs() { return GetFiles(kLogFile).size(); }
int NumTables() {
return GetFiles(kTableFile).size();
}
int NumTables() { return GetFiles(kTableFile).size(); }
uint64_t FileSize(const std::string& fname) {
uint64_t result;
ASSERT_OK(env_->GetFileSize(fname, &result)) << fname;
EXPECT_LEVELDB_OK(env_->GetFileSize(fname, &result)) << fname;
return result;
}
void CompactMemTable() {
dbfull()->TEST_CompactMemTable();
}
void CompactMemTable() { dbfull()->TEST_CompactMemTable(); }
// Directly construct a log file that sets key to val.
void MakeLogFile(uint64_t lognum, SequenceNumber seq, Slice key, Slice val) {
std::string fname = LogFileName(dbname_, lognum);
WritableFile* file;
ASSERT_OK(env_->NewWritableFile(fname, &file));
ASSERT_LEVELDB_OK(env_->NewWritableFile(fname, &file));
log::Writer writer(file);
WriteBatch batch;
batch.Put(key, val);
WriteBatchInternal::SetSequence(&batch, seq);
ASSERT_OK(writer.AddRecord(WriteBatchInternal::Contents(&batch)));
ASSERT_OK(file->Flush());
ASSERT_LEVELDB_OK(writer.AddRecord(WriteBatchInternal::Contents(&batch)));
ASSERT_LEVELDB_OK(file->Flush());
delete file;
}
@ -156,12 +158,13 @@ class RecoveryTest {
DB* db_;
};
TEST(RecoveryTest, ManifestReused) {
TEST_F(RecoveryTest, ManifestReused) {
if (!CanAppend()) {
fprintf(stderr, "skipping test because env does not support appending\n");
std::fprintf(stderr,
"skipping test because env does not support appending\n");
return;
}
ASSERT_OK(Put("foo", "bar"));
ASSERT_LEVELDB_OK(Put("foo", "bar"));
Close();
std::string old_manifest = ManifestFileName();
Open();
@ -172,12 +175,13 @@ TEST(RecoveryTest, ManifestReused) {
ASSERT_EQ("bar", Get("foo"));
}
TEST(RecoveryTest, LargeManifestCompacted) {
TEST_F(RecoveryTest, LargeManifestCompacted) {
if (!CanAppend()) {
fprintf(stderr, "skipping test because env does not support appending\n");
std::fprintf(stderr,
"skipping test because env does not support appending\n");
return;
}
ASSERT_OK(Put("foo", "bar"));
ASSERT_LEVELDB_OK(Put("foo", "bar"));
Close();
std::string old_manifest = ManifestFileName();
@ -185,10 +189,10 @@ TEST(RecoveryTest, LargeManifestCompacted) {
{
uint64_t len = FileSize(old_manifest);
WritableFile* file;
ASSERT_OK(env()->NewAppendableFile(old_manifest, &file));
std::string zeroes(3*1048576 - static_cast<size_t>(len), 0);
ASSERT_OK(file->Append(zeroes));
ASSERT_OK(file->Flush());
ASSERT_LEVELDB_OK(env()->NewAppendableFile(old_manifest, &file));
std::string zeroes(3 * 1048576 - static_cast<size_t>(len), 0);
ASSERT_LEVELDB_OK(file->Append(zeroes));
ASSERT_LEVELDB_OK(file->Flush());
delete file;
}
@ -203,22 +207,23 @@ TEST(RecoveryTest, LargeManifestCompacted) {
ASSERT_EQ("bar", Get("foo"));
}
TEST(RecoveryTest, NoLogFiles) {
ASSERT_OK(Put("foo", "bar"));
ASSERT_EQ(1, DeleteLogFiles());
TEST_F(RecoveryTest, NoLogFiles) {
ASSERT_LEVELDB_OK(Put("foo", "bar"));
ASSERT_EQ(1, RemoveLogFiles());
Open();
ASSERT_EQ("NOT_FOUND", Get("foo"));
Open();
ASSERT_EQ("NOT_FOUND", Get("foo"));
}
TEST(RecoveryTest, LogFileReuse) {
TEST_F(RecoveryTest, LogFileReuse) {
if (!CanAppend()) {
fprintf(stderr, "skipping test because env does not support appending\n");
std::fprintf(stderr,
"skipping test because env does not support appending\n");
return;
}
for (int i = 0; i < 2; i++) {
ASSERT_OK(Put("foo", "bar"));
ASSERT_LEVELDB_OK(Put("foo", "bar"));
if (i == 0) {
// Compact to ensure current log is empty
CompactMemTable();
@ -242,13 +247,13 @@ TEST(RecoveryTest, LogFileReuse) {
}
}
TEST(RecoveryTest, MultipleMemTables) {
TEST_F(RecoveryTest, MultipleMemTables) {
// Make a large log.
const int kNum = 1000;
for (int i = 0; i < kNum; i++) {
char buf[100];
snprintf(buf, sizeof(buf), "%050d", i);
ASSERT_OK(Put(buf, buf));
std::snprintf(buf, sizeof(buf), "%050d", i);
ASSERT_LEVELDB_OK(Put(buf, buf));
}
ASSERT_EQ(0, NumTables());
Close();
@ -259,35 +264,35 @@ TEST(RecoveryTest, MultipleMemTables) {
// Force creation of multiple memtables by reducing the write buffer size.
Options opt;
opt.reuse_logs = true;
opt.write_buffer_size = (kNum*100) / 2;
opt.write_buffer_size = (kNum * 100) / 2;
Open(&opt);
ASSERT_LE(2, NumTables());
ASSERT_EQ(1, NumLogs());
ASSERT_NE(old_log_file, FirstLogFile()) << "must not reuse log";
for (int i = 0; i < kNum; i++) {
char buf[100];
snprintf(buf, sizeof(buf), "%050d", i);
std::snprintf(buf, sizeof(buf), "%050d", i);
ASSERT_EQ(buf, Get(buf));
}
}
TEST(RecoveryTest, MultipleLogFiles) {
ASSERT_OK(Put("foo", "bar"));
TEST_F(RecoveryTest, MultipleLogFiles) {
ASSERT_LEVELDB_OK(Put("foo", "bar"));
Close();
ASSERT_EQ(1, NumLogs());
// Make a bunch of uncompacted log files.
uint64_t old_log = FirstLogFile();
MakeLogFile(old_log+1, 1000, "hello", "world");
MakeLogFile(old_log+2, 1001, "hi", "there");
MakeLogFile(old_log+3, 1002, "foo", "bar2");
MakeLogFile(old_log + 1, 1000, "hello", "world");
MakeLogFile(old_log + 2, 1001, "hi", "there");
MakeLogFile(old_log + 3, 1002, "foo", "bar2");
// Recover and check that all log files were processed.
Open();
ASSERT_LE(1, NumTables());
ASSERT_EQ(1, NumLogs());
uint64_t new_log = FirstLogFile();
ASSERT_LE(old_log+3, new_log);
ASSERT_LE(old_log + 3, new_log);
ASSERT_EQ("bar2", Get("foo"));
ASSERT_EQ("world", Get("hello"));
ASSERT_EQ("there", Get("hi"));
@ -305,7 +310,7 @@ TEST(RecoveryTest, MultipleLogFiles) {
// Check that introducing an older log file does not cause it to be re-read.
Close();
MakeLogFile(old_log+1, 2000, "hello", "stale write");
MakeLogFile(old_log + 1, 2000, "hello", "stale write");
Open();
ASSERT_LE(1, NumTables());
ASSERT_EQ(1, NumLogs());
@ -317,8 +322,18 @@ TEST(RecoveryTest, MultipleLogFiles) {
ASSERT_EQ("there", Get("hi"));
}
} // namespace leveldb
TEST_F(RecoveryTest, ManifestMissing) {
ASSERT_LEVELDB_OK(Put("foo", "bar"));
Close();
RemoveManifestFile();
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
Status status = OpenWithStatus();
#if defined(LEVELDB_PLATFORM_CHROMIUM)
// TODO(crbug.com/760362): See comment in MakeIOError() from env_chromium.cc.
ASSERT_TRUE(status.IsIOError());
#else
ASSERT_TRUE(status.IsCorruption());
#endif // defined(LEVELDB_PLATFORM_CHROMIUM)
}
} // namespace leveldb

View File

@ -54,7 +54,7 @@ class Repairer {
owns_cache_(options_.block_cache != options.block_cache),
next_file_number_(1) {
// TableCache can be small since we expect each table to be opened once.
table_cache_ = new TableCache(dbname_, &options_, 10);
table_cache_ = new TableCache(dbname_, options_, 10);
}
~Repairer() {
@ -84,9 +84,7 @@ class Repairer {
"recovered %d files; %llu bytes. "
"Some data may have been lost. "
"****",
dbname_.c_str(),
static_cast<int>(tables_.size()),
bytes);
dbname_.c_str(), static_cast<int>(tables_.size()), bytes);
}
return status;
}
@ -97,22 +95,6 @@ class Repairer {
SequenceNumber max_sequence;
};
std::string const dbname_;
Env* const env_;
InternalKeyComparator const icmp_;
InternalFilterPolicy const ipolicy_;
Options const options_;
bool owns_info_log_;
bool owns_cache_;
TableCache* table_cache_;
VersionEdit edit_;
std::vector<std::string> manifests_;
std::vector<uint64_t> table_numbers_;
std::vector<uint64_t> logs_;
std::vector<TableInfo> tables_;
uint64_t next_file_number_;
Status FindFiles() {
std::vector<std::string> filenames;
Status status = env_->GetChildren(dbname_, &filenames);
@ -152,8 +134,7 @@ class Repairer {
Status status = ConvertLogToTable(logs_[i]);
if (!status.ok()) {
Log(options_.info_log, "Log #%llu: ignoring conversion error: %s",
(unsigned long long) logs_[i],
status.ToString().c_str());
(unsigned long long)logs_[i], status.ToString().c_str());
}
ArchiveFile(logname);
}
@ -164,11 +145,10 @@ class Repairer {
Env* env;
Logger* info_log;
uint64_t lognum;
virtual void Corruption(size_t bytes, const Status& s) {
void Corruption(size_t bytes, const Status& s) override {
// We print error messages for corruption, but continue repairing.
Log(info_log, "Log #%llu: dropping %d bytes; %s",
(unsigned long long) lognum,
static_cast<int>(bytes),
(unsigned long long)lognum, static_cast<int>(bytes),
s.ToString().c_str());
}
};
@ -190,8 +170,8 @@ class Repairer {
// corruptions cause entire commits to be skipped instead of
// propagating bad information (like overly large sequence
// numbers).
log::Reader reader(lfile, &reporter, false/*do not checksum*/,
0/*initial_offset*/);
log::Reader reader(lfile, &reporter, false /*do not checksum*/,
0 /*initial_offset*/);
// Read all the records and add to a memtable
std::string scratch;
@ -202,8 +182,8 @@ class Repairer {
int counter = 0;
while (reader.ReadRecord(&record, &scratch)) {
if (record.size() < 12) {
reporter.Corruption(
record.size(), Status::Corruption("log record too small"));
reporter.Corruption(record.size(),
Status::Corruption("log record too small"));
continue;
}
WriteBatchInternal::SetContents(&batch, record);
@ -212,8 +192,7 @@ class Repairer {
counter += WriteBatchInternal::Count(&batch);
} else {
Log(options_.info_log, "Log #%llu: ignoring %s",
(unsigned long long) log,
status.ToString().c_str());
(unsigned long long)log, status.ToString().c_str());
status = Status::OK(); // Keep going with rest of file
}
}
@ -227,16 +206,14 @@ class Repairer {
status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
delete iter;
mem->Unref();
mem = NULL;
mem = nullptr;
if (status.ok()) {
if (meta.file_size > 0) {
table_numbers_.push_back(meta.number);
}
}
Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
(unsigned long long) log,
counter,
(unsigned long long) meta.number,
(unsigned long long)log, counter, (unsigned long long)meta.number,
status.ToString().c_str());
return status;
}
@ -272,8 +249,7 @@ class Repairer {
ArchiveFile(TableFileName(dbname_, number));
ArchiveFile(SSTTableFileName(dbname_, number));
Log(options_.info_log, "Table #%llu: dropped: %s",
(unsigned long long) t.meta.number,
status.ToString().c_str());
(unsigned long long)t.meta.number, status.ToString().c_str());
return;
}
@ -287,8 +263,7 @@ class Repairer {
Slice key = iter->key();
if (!ParseInternalKey(key, &parsed)) {
Log(options_.info_log, "Table #%llu: unparsable key %s",
(unsigned long long) t.meta.number,
EscapeString(key).c_str());
(unsigned long long)t.meta.number, EscapeString(key).c_str());
continue;
}
@ -307,9 +282,7 @@ class Repairer {
}
delete iter;
Log(options_.info_log, "Table #%llu: %d entries %s",
(unsigned long long) t.meta.number,
counter,
status.ToString().c_str());
(unsigned long long)t.meta.number, counter, status.ToString().c_str());
if (status.ok()) {
tables_.push_back(t);
@ -350,25 +323,25 @@ class Repairer {
}
}
delete builder;
builder = NULL;
builder = nullptr;
if (s.ok()) {
s = file->Close();
}
delete file;
file = NULL;
file = nullptr;
if (counter > 0 && s.ok()) {
std::string orig = TableFileName(dbname_, t.meta.number);
s = env_->RenameFile(copy, orig);
if (s.ok()) {
Log(options_.info_log, "Table #%llu: %d entries repaired",
(unsigned long long) t.meta.number, counter);
(unsigned long long)t.meta.number, counter);
tables_.push_back(t);
}
}
if (!s.ok()) {
env_->DeleteFile(copy);
env_->RemoveFile(copy);
}
}
@ -395,11 +368,12 @@ class Repairer {
for (size_t i = 0; i < tables_.size(); i++) {
// TODO(opt): separate out into multiple levels
const TableInfo& t = tables_[i];
edit_.AddFile(0, t.meta.number, t.meta.file_size,
t.meta.smallest, t.meta.largest);
edit_.AddFile(0, t.meta.number, t.meta.file_size, t.meta.smallest,
t.meta.largest);
}
//fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
// std::fprintf(stderr,
// "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
{
log::Writer log(file);
std::string record;
@ -410,10 +384,10 @@ class Repairer {
status = file->Close();
}
delete file;
file = NULL;
file = nullptr;
if (!status.ok()) {
env_->DeleteFile(tmp);
env_->RemoveFile(tmp);
} else {
// Discard older manifests
for (size_t i = 0; i < manifests_.size(); i++) {
@ -425,7 +399,7 @@ class Repairer {
if (status.ok()) {
status = SetCurrentFile(env_, dbname_, 1);
} else {
env_->DeleteFile(tmp);
env_->RemoveFile(tmp);
}
}
return status;
@ -438,18 +412,34 @@ class Repairer {
// dir/lost/foo
const char* slash = strrchr(fname.c_str(), '/');
std::string new_dir;
if (slash != NULL) {
if (slash != nullptr) {
new_dir.assign(fname.data(), slash - fname.data());
}
new_dir.append("/lost");
env_->CreateDir(new_dir); // Ignore error
std::string new_file = new_dir;
new_file.append("/");
new_file.append((slash == NULL) ? fname.c_str() : slash + 1);
new_file.append((slash == nullptr) ? fname.c_str() : slash + 1);
Status s = env_->RenameFile(fname, new_file);
Log(options_.info_log, "Archiving %s: %s\n",
fname.c_str(), s.ToString().c_str());
Log(options_.info_log, "Archiving %s: %s\n", fname.c_str(),
s.ToString().c_str());
}
const std::string dbname_;
Env* const env_;
InternalKeyComparator const icmp_;
InternalFilterPolicy const ipolicy_;
const Options options_;
bool owns_info_log_;
bool owns_cache_;
TableCache* table_cache_;
VersionEdit edit_;
std::vector<std::string> manifests_;
std::vector<uint64_t> table_numbers_;
std::vector<uint64_t> logs_;
std::vector<TableInfo> tables_;
uint64_t next_file_number_;
};
} // namespace

View File

@ -27,17 +27,16 @@
//
// ... prev vs. next pointer ordering ...
#include <assert.h>
#include <stdlib.h>
#include "port/port.h"
#include <atomic>
#include <cassert>
#include <cstdlib>
#include "util/arena.h"
#include "util/random.h"
namespace leveldb {
class Arena;
template<typename Key, class Comparator>
template <typename Key, class Comparator>
class SkipList {
private:
struct Node;
@ -48,6 +47,9 @@ class SkipList {
// must remain allocated for the lifetime of the skiplist object.
explicit SkipList(Comparator cmp, Arena* arena);
SkipList(const SkipList&) = delete;
SkipList& operator=(const SkipList&) = delete;
// Insert key into the list.
// REQUIRES: nothing that compares equal to key is currently in the list.
void Insert(const Key& key);
@ -97,24 +99,10 @@ class SkipList {
private:
enum { kMaxHeight = 12 };
// Immutable after construction
Comparator const compare_;
Arena* const arena_; // Arena used for allocations of nodes
Node* const head_;
// Modified only by Insert(). Read racily by readers, but stale
// values are ok.
port::AtomicPointer max_height_; // Height of the entire list
inline int GetMaxHeight() const {
return static_cast<int>(
reinterpret_cast<intptr_t>(max_height_.NoBarrier_Load()));
return max_height_.load(std::memory_order_relaxed);
}
// Read/written only by Insert().
Random rnd_;
Node* NewNode(const Key& key, int height);
int RandomHeight();
bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
@ -123,9 +111,9 @@ class SkipList {
bool KeyIsAfterNode(const Key& key, Node* n) const;
// Return the earliest node that comes at or after key.
// Return NULL if there is no such node.
// Return nullptr if there is no such node.
//
// If prev is non-NULL, fills prev[level] with pointer to previous
// If prev is non-null, fills prev[level] with pointer to previous
// node at "level" for every level in [0..max_height_-1].
Node* FindGreaterOrEqual(const Key& key, Node** prev) const;
@ -137,15 +125,24 @@ class SkipList {
// Return head_ if list is empty.
Node* FindLast() const;
// No copying allowed
SkipList(const SkipList&);
void operator=(const SkipList&);
// Immutable after construction
Comparator const compare_;
Arena* const arena_; // Arena used for allocations of nodes
Node* const head_;
// Modified only by Insert(). Read racily by readers, but stale
// values are ok.
std::atomic<int> max_height_; // Height of the entire list
// Read/written only by Insert().
Random rnd_;
};
// Implementation details follow
template<typename Key, class Comparator>
struct SkipList<Key,Comparator>::Node {
explicit Node(const Key& k) : key(k) { }
template <typename Key, class Comparator>
struct SkipList<Key, Comparator>::Node {
explicit Node(const Key& k) : key(k) {}
Key const key;
@ -155,96 +152,96 @@ struct SkipList<Key,Comparator>::Node {
assert(n >= 0);
// Use an 'acquire load' so that we observe a fully initialized
// version of the returned Node.
return reinterpret_cast<Node*>(next_[n].Acquire_Load());
return next_[n].load(std::memory_order_acquire);
}
void SetNext(int n, Node* x) {
assert(n >= 0);
// Use a 'release store' so that anybody who reads through this
// pointer observes a fully initialized version of the inserted node.
next_[n].Release_Store(x);
next_[n].store(x, std::memory_order_release);
}
// No-barrier variants that can be safely used in a few locations.
Node* NoBarrier_Next(int n) {
assert(n >= 0);
return reinterpret_cast<Node*>(next_[n].NoBarrier_Load());
return next_[n].load(std::memory_order_relaxed);
}
void NoBarrier_SetNext(int n, Node* x) {
assert(n >= 0);
next_[n].NoBarrier_Store(x);
next_[n].store(x, std::memory_order_relaxed);
}
private:
// Array of length equal to the node height. next_[0] is lowest level link.
port::AtomicPointer next_[1];
std::atomic<Node*> next_[1];
};
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node*
SkipList<Key,Comparator>::NewNode(const Key& key, int height) {
char* mem = arena_->AllocateAligned(
sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1));
return new (mem) Node(key);
template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::NewNode(
const Key& key, int height) {
char* const node_memory = arena_->AllocateAligned(
sizeof(Node) + sizeof(std::atomic<Node*>) * (height - 1));
return new (node_memory) Node(key);
}
template<typename Key, class Comparator>
inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) {
template <typename Key, class Comparator>
inline SkipList<Key, Comparator>::Iterator::Iterator(const SkipList* list) {
list_ = list;
node_ = NULL;
node_ = nullptr;
}
template<typename Key, class Comparator>
inline bool SkipList<Key,Comparator>::Iterator::Valid() const {
return node_ != NULL;
template <typename Key, class Comparator>
inline bool SkipList<Key, Comparator>::Iterator::Valid() const {
return node_ != nullptr;
}
template<typename Key, class Comparator>
inline const Key& SkipList<Key,Comparator>::Iterator::key() const {
template <typename Key, class Comparator>
inline const Key& SkipList<Key, Comparator>::Iterator::key() const {
assert(Valid());
return node_->key;
}
template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::Next() {
template <typename Key, class Comparator>
inline void SkipList<Key, Comparator>::Iterator::Next() {
assert(Valid());
node_ = node_->Next(0);
}
template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::Prev() {
template <typename Key, class Comparator>
inline void SkipList<Key, Comparator>::Iterator::Prev() {
// Instead of using explicit "prev" links, we just search for the
// last node that falls before key.
assert(Valid());
node_ = list_->FindLessThan(node_->key);
if (node_ == list_->head_) {
node_ = NULL;
node_ = nullptr;
}
}
template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) {
node_ = list_->FindGreaterOrEqual(target, NULL);
template <typename Key, class Comparator>
inline void SkipList<Key, Comparator>::Iterator::Seek(const Key& target) {
node_ = list_->FindGreaterOrEqual(target, nullptr);
}
template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() {
template <typename Key, class Comparator>
inline void SkipList<Key, Comparator>::Iterator::SeekToFirst() {
node_ = list_->head_->Next(0);
}
template<typename Key, class Comparator>
inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {
template <typename Key, class Comparator>
inline void SkipList<Key, Comparator>::Iterator::SeekToLast() {
node_ = list_->FindLast();
if (node_ == list_->head_) {
node_ = NULL;
node_ = nullptr;
}
}
template<typename Key, class Comparator>
int SkipList<Key,Comparator>::RandomHeight() {
template <typename Key, class Comparator>
int SkipList<Key, Comparator>::RandomHeight() {
// Increase height with probability 1 in kBranching
static const unsigned int kBranching = 4;
int height = 1;
while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
while (height < kMaxHeight && rnd_.OneIn(kBranching)) {
height++;
}
assert(height > 0);
@ -252,15 +249,16 @@ int SkipList<Key,Comparator>::RandomHeight() {
return height;
}
template<typename Key, class Comparator>
bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
// NULL n is considered infinite
return (n != NULL) && (compare_(n->key, key) < 0);
template <typename Key, class Comparator>
bool SkipList<Key, Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
// null n is considered infinite
return (n != nullptr) && (compare_(n->key, key) < 0);
}
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev)
const {
template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node*
SkipList<Key, Comparator>::FindGreaterOrEqual(const Key& key,
Node** prev) const {
Node* x = head_;
int level = GetMaxHeight() - 1;
while (true) {
@ -269,7 +267,7 @@ typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOr
// Keep searching in this list
x = next;
} else {
if (prev != NULL) prev[level] = x;
if (prev != nullptr) prev[level] = x;
if (level == 0) {
return next;
} else {
@ -280,15 +278,15 @@ typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOr
}
}
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node*
SkipList<Key,Comparator>::FindLessThan(const Key& key) const {
template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node*
SkipList<Key, Comparator>::FindLessThan(const Key& key) const {
Node* x = head_;
int level = GetMaxHeight() - 1;
while (true) {
assert(x == head_ || compare_(x->key, key) < 0);
Node* next = x->Next(level);
if (next == NULL || compare_(next->key, key) >= 0) {
if (next == nullptr || compare_(next->key, key) >= 0) {
if (level == 0) {
return x;
} else {
@ -301,14 +299,14 @@ SkipList<Key,Comparator>::FindLessThan(const Key& key) const {
}
}
template<typename Key, class Comparator>
typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::FindLast()
const {
Node* x = head_;
int level = GetMaxHeight() - 1;
while (true) {
Node* next = x->Next(level);
if (next == NULL) {
if (next == nullptr) {
if (level == 0) {
return x;
} else {
@ -321,43 +319,41 @@ typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
}
}
template<typename Key, class Comparator>
SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena)
template <typename Key, class Comparator>
SkipList<Key, Comparator>::SkipList(Comparator cmp, Arena* arena)
: compare_(cmp),
arena_(arena),
head_(NewNode(0 /* any key will do */, kMaxHeight)),
max_height_(reinterpret_cast<void*>(1)),
max_height_(1),
rnd_(0xdeadbeef) {
for (int i = 0; i < kMaxHeight; i++) {
head_->SetNext(i, NULL);
head_->SetNext(i, nullptr);
}
}
template<typename Key, class Comparator>
void SkipList<Key,Comparator>::Insert(const Key& key) {
template <typename Key, class Comparator>
void SkipList<Key, Comparator>::Insert(const Key& key) {
// TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()
// here since Insert() is externally synchronized.
Node* prev[kMaxHeight];
Node* x = FindGreaterOrEqual(key, prev);
// Our data structure does not allow duplicate insertion
assert(x == NULL || !Equal(key, x->key));
assert(x == nullptr || !Equal(key, x->key));
int height = RandomHeight();
if (height > GetMaxHeight()) {
for (int i = GetMaxHeight(); i < height; i++) {
prev[i] = head_;
}
//fprintf(stderr, "Change height from %d to %d\n", max_height_, height);
// It is ok to mutate max_height_ without any synchronization
// with concurrent readers. A concurrent reader that observes
// the new value of max_height_ will see either the old value of
// new level pointers from head_ (NULL), or a new value set in
// new level pointers from head_ (nullptr), or a new value set in
// the loop below. In the former case the reader will
// immediately drop to the next level since NULL sorts after all
// immediately drop to the next level since nullptr sorts after all
// keys. In the latter case the reader will use the new node.
max_height_.NoBarrier_Store(reinterpret_cast<void*>(height));
max_height_.store(height, std::memory_order_relaxed);
}
x = NewNode(key, height);
@ -369,10 +365,10 @@ void SkipList<Key,Comparator>::Insert(const Key& key) {
}
}
template<typename Key, class Comparator>
bool SkipList<Key,Comparator>::Contains(const Key& key) const {
Node* x = FindGreaterOrEqual(key, NULL);
if (x != NULL && Equal(key, x->key)) {
template <typename Key, class Comparator>
bool SkipList<Key, Comparator>::Contains(const Key& key) const {
Node* x = FindGreaterOrEqual(key, nullptr);
if (x != nullptr && Equal(key, x->key)) {
return true;
} else {
return false;

View File

@ -3,12 +3,18 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/skiplist.h"
#include <atomic>
#include <set>
#include "gtest/gtest.h"
#include "leveldb/env.h"
#include "port/port.h"
#include "port/thread_annotations.h"
#include "util/arena.h"
#include "util/hash.h"
#include "util/random.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace leveldb {
@ -26,8 +32,6 @@ struct Comparator {
}
};
class SkipTest { };
TEST(SkipTest, Empty) {
Arena arena;
Comparator cmp;
@ -112,8 +116,7 @@ TEST(SkipTest, InsertAndLookup) {
// Compare against model iterator
for (std::set<Key>::reverse_iterator model_iter = keys.rbegin();
model_iter != keys.rend();
++model_iter) {
model_iter != keys.rend(); ++model_iter) {
ASSERT_TRUE(iter.Valid());
ASSERT_EQ(*model_iter, iter.key());
iter.Prev();
@ -126,7 +129,7 @@ TEST(SkipTest, InsertAndLookup) {
// concurrent readers (with no synchronization other than when a
// reader's iterator is created), the reader always observes all the
// data that was present in the skip list when the iterator was
// constructor. Because insertions are happening concurrently, we may
// constructed. Because insertions are happening concurrently, we may
// also observe new values that were inserted since the iterator was
// constructed, but we should never miss any values that were present
// at iterator construction time.
@ -148,19 +151,19 @@ TEST(SkipTest, InsertAndLookup) {
// been concurrently added since the iterator started.
class ConcurrentTest {
private:
static const uint32_t K = 4;
static constexpr uint32_t K = 4;
static uint64_t key(Key key) { return (key >> 40); }
static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; }
static uint64_t hash(Key key) { return key & 0xff; }
static uint64_t HashNumbers(uint64_t k, uint64_t g) {
uint64_t data[2] = { k, g };
uint64_t data[2] = {k, g};
return Hash(reinterpret_cast<char*>(data), sizeof(data), 0);
}
static Key MakeKey(uint64_t k, uint64_t g) {
assert(sizeof(Key) == sizeof(uint64_t));
static_assert(sizeof(Key) == sizeof(uint64_t), "");
assert(k <= K); // We sometimes pass K to seek to the end of the skiplist
assert(g <= 0xffffffffu);
return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff));
@ -186,13 +189,11 @@ class ConcurrentTest {
// Per-key generation
struct State {
port::AtomicPointer generation[K];
void Set(int k, intptr_t v) {
generation[k].Release_Store(reinterpret_cast<void*>(v));
}
intptr_t Get(int k) {
return reinterpret_cast<intptr_t>(generation[k].Acquire_Load());
std::atomic<int> generation[K];
void Set(int k, int v) {
generation[k].store(v, std::memory_order_release);
}
int Get(int k) { return generation[k].load(std::memory_order_acquire); }
State() {
for (int k = 0; k < K; k++) {
@ -211,7 +212,7 @@ class ConcurrentTest {
SkipList<Key, Comparator> list_;
public:
ConcurrentTest() : list_(Comparator(), &arena_) { }
ConcurrentTest() : list_(Comparator(), &arena_) {}
// REQUIRES: External synchronization
void WriteStep(Random* rnd) {
@ -250,11 +251,9 @@ class ConcurrentTest {
// Note that generation 0 is never inserted, so it is ok if
// <*,0,*> is missing.
ASSERT_TRUE((gen(pos) == 0) ||
(gen(pos) > initial_state.Get(key(pos)))
) << "key: " << key(pos)
<< "; gen: " << gen(pos)
<< "; initgen: "
<< initial_state.Get(key(pos));
(gen(pos) > static_cast<Key>(initial_state.Get(key(pos)))))
<< "key: " << key(pos) << "; gen: " << gen(pos)
<< "; initgen: " << initial_state.Get(key(pos));
// Advance to next key in the valid key space
if (key(pos) < key(current)) {
@ -281,7 +280,9 @@ class ConcurrentTest {
}
}
};
const uint32_t ConcurrentTest::K;
// Needed when building in C++11 mode.
constexpr uint32_t ConcurrentTest::K;
// Simple test that does single-threaded testing of the ConcurrentTest
// scaffolding.
@ -298,21 +299,14 @@ class TestState {
public:
ConcurrentTest t_;
int seed_;
port::AtomicPointer quit_flag_;
std::atomic<bool> quit_flag_;
enum ReaderState {
STARTING,
RUNNING,
DONE
};
enum ReaderState { STARTING, RUNNING, DONE };
explicit TestState(int s)
: seed_(s),
quit_flag_(NULL),
state_(STARTING),
state_cv_(&mu_) {}
: seed_(s), quit_flag_(false), state_(STARTING), state_cv_(&mu_) {}
void Wait(ReaderState s) {
void Wait(ReaderState s) LOCKS_EXCLUDED(mu_) {
mu_.Lock();
while (state_ != s) {
state_cv_.Wait();
@ -320,7 +314,7 @@ class TestState {
mu_.Unlock();
}
void Change(ReaderState s) {
void Change(ReaderState s) LOCKS_EXCLUDED(mu_) {
mu_.Lock();
state_ = s;
state_cv_.Signal();
@ -329,8 +323,8 @@ class TestState {
private:
port::Mutex mu_;
ReaderState state_;
port::CondVar state_cv_;
ReaderState state_ GUARDED_BY(mu_);
port::CondVar state_cv_ GUARDED_BY(mu_);
};
static void ConcurrentReader(void* arg) {
@ -338,7 +332,7 @@ static void ConcurrentReader(void* arg) {
Random rnd(state->seed_);
int64_t reads = 0;
state->Change(TestState::RUNNING);
while (!state->quit_flag_.Acquire_Load()) {
while (!state->quit_flag_.load(std::memory_order_acquire)) {
state->t_.ReadStep(&rnd);
++reads;
}
@ -352,7 +346,7 @@ static void RunConcurrent(int run) {
const int kSize = 1000;
for (int i = 0; i < N; i++) {
if ((i % 100) == 0) {
fprintf(stderr, "Run %d of %d\n", i, N);
std::fprintf(stderr, "Run %d of %d\n", i, N);
}
TestState state(seed + 1);
Env::Default()->Schedule(ConcurrentReader, &state);
@ -360,7 +354,7 @@ static void RunConcurrent(int run) {
for (int i = 0; i < kSize; i++) {
state.t_.WriteStep(&rnd);
}
state.quit_flag_.Release_Store(&state); // Any non-NULL arg will do
state.quit_flag_.store(true, std::memory_order_release);
state.Wait(TestState::DONE);
}
}
@ -372,7 +366,3 @@ TEST(SkipTest, Concurrent4) { RunConcurrent(4); }
TEST(SkipTest, Concurrent5) { RunConcurrent(5); }
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

View File

@ -16,50 +16,78 @@ class SnapshotList;
// Each SnapshotImpl corresponds to a particular sequence number.
class SnapshotImpl : public Snapshot {
public:
SequenceNumber number_; // const after creation
SnapshotImpl(SequenceNumber sequence_number)
: sequence_number_(sequence_number) {}
SequenceNumber sequence_number() const { return sequence_number_; }
private:
friend class SnapshotList;
// SnapshotImpl is kept in a doubly-linked circular list
// SnapshotImpl is kept in a doubly-linked circular list. The SnapshotList
// implementation operates on the next/previous fields directly.
SnapshotImpl* prev_;
SnapshotImpl* next_;
SnapshotList* list_; // just for sanity checks
const SequenceNumber sequence_number_;
#if !defined(NDEBUG)
SnapshotList* list_ = nullptr;
#endif // !defined(NDEBUG)
};
class SnapshotList {
public:
SnapshotList() {
list_.prev_ = &list_;
list_.next_ = &list_;
SnapshotList() : head_(0) {
head_.prev_ = &head_;
head_.next_ = &head_;
}
bool empty() const { return list_.next_ == &list_; }
SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; }
SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; }
const SnapshotImpl* New(SequenceNumber seq) {
SnapshotImpl* s = new SnapshotImpl;
s->number_ = seq;
s->list_ = this;
s->next_ = &list_;
s->prev_ = list_.prev_;
s->prev_->next_ = s;
s->next_->prev_ = s;
return s;
bool empty() const { return head_.next_ == &head_; }
SnapshotImpl* oldest() const {
assert(!empty());
return head_.next_;
}
SnapshotImpl* newest() const {
assert(!empty());
return head_.prev_;
}
void Delete(const SnapshotImpl* s) {
assert(s->list_ == this);
s->prev_->next_ = s->next_;
s->next_->prev_ = s->prev_;
delete s;
// Creates a SnapshotImpl and appends it to the end of the list.
SnapshotImpl* New(SequenceNumber sequence_number) {
assert(empty() || newest()->sequence_number_ <= sequence_number);
SnapshotImpl* snapshot = new SnapshotImpl(sequence_number);
#if !defined(NDEBUG)
snapshot->list_ = this;
#endif // !defined(NDEBUG)
snapshot->next_ = &head_;
snapshot->prev_ = head_.prev_;
snapshot->prev_->next_ = snapshot;
snapshot->next_->prev_ = snapshot;
return snapshot;
}
// Removes a SnapshotImpl from this list.
//
// The snapshot must have been created by calling New() on this list.
//
// The snapshot pointer should not be const, because its memory is
// deallocated. However, that would force us to change DB::ReleaseSnapshot(),
// which is in the API, and currently takes a const Snapshot.
void Delete(const SnapshotImpl* snapshot) {
#if !defined(NDEBUG)
assert(snapshot->list_ == this);
#endif // !defined(NDEBUG)
snapshot->prev_->next_ = snapshot->next_;
snapshot->next_->prev_ = snapshot->prev_;
delete snapshot;
}
private:
// Dummy head of doubly-linked list of snapshots
SnapshotImpl list_;
SnapshotImpl head_;
};
} // namespace leveldb

View File

@ -29,18 +29,14 @@ static void UnrefEntry(void* arg1, void* arg2) {
cache->Release(h);
}
TableCache::TableCache(const std::string& dbname,
const Options* options,
TableCache::TableCache(const std::string& dbname, const Options& options,
int entries)
: env_(options->env),
: env_(options.env),
dbname_(dbname),
options_(options),
cache_(NewLRUCache(entries)) {
}
cache_(NewLRUCache(entries)) {}
TableCache::~TableCache() {
delete cache_;
}
TableCache::~TableCache() { delete cache_; }
Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
Cache::Handle** handle) {
@ -49,10 +45,10 @@ Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
EncodeFixed64(buf, file_number);
Slice key(buf, sizeof(buf));
*handle = cache_->Lookup(key);
if (*handle == NULL) {
if (*handle == nullptr) {
std::string fname = TableFileName(dbname_, file_number);
RandomAccessFile* file = NULL;
Table* table = NULL;
RandomAccessFile* file = nullptr;
Table* table = nullptr;
s = env_->NewRandomAccessFile(fname, &file);
if (!s.ok()) {
std::string old_fname = SSTTableFileName(dbname_, file_number);
@ -61,11 +57,11 @@ Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
}
}
if (s.ok()) {
s = Table::Open(*options_, file, file_size, &table);
s = Table::Open(options_, file, file_size, &table);
}
if (!s.ok()) {
assert(table == NULL);
assert(table == nullptr);
delete file;
// We do not cache error results so that if the error is transient,
// or somebody repairs the file, we recover automatically.
@ -80,14 +76,13 @@ Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
}
Iterator* TableCache::NewIterator(const ReadOptions& options,
uint64_t file_number,
uint64_t file_size,
uint64_t file_number, uint64_t file_size,
Table** tableptr) {
if (tableptr != NULL) {
*tableptr = NULL;
if (tableptr != nullptr) {
*tableptr = nullptr;
}
Cache::Handle* handle = NULL;
Cache::Handle* handle = nullptr;
Status s = FindTable(file_number, file_size, &handle);
if (!s.ok()) {
return NewErrorIterator(s);
@ -96,23 +91,21 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
Table* table = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
Iterator* result = table->NewIterator(options);
result->RegisterCleanup(&UnrefEntry, cache_, handle);
if (tableptr != NULL) {
if (tableptr != nullptr) {
*tableptr = table;
}
return result;
}
Status TableCache::Get(const ReadOptions& options,
uint64_t file_number,
uint64_t file_size,
const Slice& k,
void* arg,
void (*saver)(void*, const Slice&, const Slice&)) {
Cache::Handle* handle = NULL;
Status TableCache::Get(const ReadOptions& options, uint64_t file_number,
uint64_t file_size, const Slice& k, void* arg,
void (*handle_result)(void*, const Slice&,
const Slice&)) {
Cache::Handle* handle = nullptr;
Status s = FindTable(file_number, file_size, &handle);
if (s.ok()) {
Table* t = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
s = t->InternalGet(options, k, arg, saver);
s = t->InternalGet(options, k, arg, handle_result);
cache_->Release(handle);
}
return s;

View File

@ -7,8 +7,9 @@
#ifndef STORAGE_LEVELDB_DB_TABLE_CACHE_H_
#define STORAGE_LEVELDB_DB_TABLE_CACHE_H_
#include <cstdint>
#include <string>
#include <stdint.h>
#include "db/dbformat.h"
#include "leveldb/cache.h"
#include "leveldb/table.h"
@ -20,40 +21,39 @@ class Env;
class TableCache {
public:
TableCache(const std::string& dbname, const Options* options, int entries);
TableCache(const std::string& dbname, const Options& options, int entries);
TableCache(const TableCache&) = delete;
TableCache& operator=(const TableCache&) = delete;
~TableCache();
// Return an iterator for the specified file number (the corresponding
// file length must be exactly "file_size" bytes). If "tableptr" is
// non-NULL, also sets "*tableptr" to point to the Table object
// underlying the returned iterator, or NULL if no Table object underlies
// the returned iterator. The returned "*tableptr" object is owned by
// the cache and should not be deleted, and is valid for as long as the
// non-null, also sets "*tableptr" to point to the Table object
// underlying the returned iterator, or to nullptr if no Table object
// underlies the returned iterator. The returned "*tableptr" object is owned
// by the cache and should not be deleted, and is valid for as long as the
// returned iterator is live.
Iterator* NewIterator(const ReadOptions& options,
uint64_t file_number,
uint64_t file_size,
Table** tableptr = NULL);
Iterator* NewIterator(const ReadOptions& options, uint64_t file_number,
uint64_t file_size, Table** tableptr = nullptr);
// If a seek to internal key "k" in specified file finds an entry,
// call (*handle_result)(arg, found_key, found_value).
Status Get(const ReadOptions& options,
uint64_t file_number,
uint64_t file_size,
const Slice& k,
void* arg,
Status Get(const ReadOptions& options, uint64_t file_number,
uint64_t file_size, const Slice& k, void* arg,
void (*handle_result)(void*, const Slice&, const Slice&));
// Evict any entry for the specified file number
void Evict(uint64_t file_number);
private:
Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**);
Env* const env_;
const std::string dbname_;
const Options* options_;
const Options& options_;
Cache* cache_;
Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**);
};
} // namespace leveldb

View File

@ -12,15 +12,15 @@ namespace leveldb {
// Tag numbers for serialized VersionEdit. These numbers are written to
// disk and should not be changed.
enum Tag {
kComparator = 1,
kLogNumber = 2,
kNextFileNumber = 3,
kLastSequence = 4,
kCompactPointer = 5,
kDeletedFile = 6,
kNewFile = 7,
kComparator = 1,
kLogNumber = 2,
kNextFileNumber = 3,
kLastSequence = 4,
kCompactPointer = 5,
kDeletedFile = 6,
kNewFile = 7,
// 8 was used for large value refs
kPrevLogNumber = 9
kPrevLogNumber = 9
};
void VersionEdit::Clear() {
@ -34,6 +34,7 @@ void VersionEdit::Clear() {
has_prev_log_number_ = false;
has_next_file_number_ = false;
has_last_sequence_ = false;
compact_pointers_.clear();
deleted_files_.clear();
new_files_.clear();
}
@ -66,12 +67,10 @@ void VersionEdit::EncodeTo(std::string* dst) const {
PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
}
for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
iter != deleted_files_.end();
++iter) {
for (const auto& deleted_file_kvp : deleted_files_) {
PutVarint32(dst, kDeletedFile);
PutVarint32(dst, iter->first); // level
PutVarint64(dst, iter->second); // file number
PutVarint32(dst, deleted_file_kvp.first); // level
PutVarint64(dst, deleted_file_kvp.second); // file number
}
for (size_t i = 0; i < new_files_.size(); i++) {
@ -88,8 +87,7 @@ void VersionEdit::EncodeTo(std::string* dst) const {
static bool GetInternalKey(Slice* input, InternalKey* dst) {
Slice str;
if (GetLengthPrefixedSlice(input, &str)) {
dst->DecodeFrom(str);
return true;
return dst->DecodeFrom(str);
} else {
return false;
}
@ -97,8 +95,7 @@ static bool GetInternalKey(Slice* input, InternalKey* dst) {
static bool GetLevel(Slice* input, int* level) {
uint32_t v;
if (GetVarint32(input, &v) &&
v < config::kNumLevels) {
if (GetVarint32(input, &v) && v < config::kNumLevels) {
*level = v;
return true;
} else {
@ -109,7 +106,7 @@ static bool GetLevel(Slice* input, int* level) {
Status VersionEdit::DecodeFrom(const Slice& src) {
Clear();
Slice input = src;
const char* msg = NULL;
const char* msg = nullptr;
uint32_t tag;
// Temporary storage for parsing
@ -119,7 +116,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
Slice str;
InternalKey key;
while (msg == NULL && GetVarint32(&input, &tag)) {
while (msg == nullptr && GetVarint32(&input, &tag)) {
switch (tag) {
case kComparator:
if (GetLengthPrefixedSlice(&input, &str)) {
@ -163,8 +160,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
break;
case kCompactPointer:
if (GetLevel(&input, &level) &&
GetInternalKey(&input, &key)) {
if (GetLevel(&input, &level) && GetInternalKey(&input, &key)) {
compact_pointers_.push_back(std::make_pair(level, key));
} else {
msg = "compaction pointer";
@ -172,8 +168,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
break;
case kDeletedFile:
if (GetLevel(&input, &level) &&
GetVarint64(&input, &number)) {
if (GetLevel(&input, &level) && GetVarint64(&input, &number)) {
deleted_files_.insert(std::make_pair(level, number));
} else {
msg = "deleted file";
@ -181,8 +176,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
break;
case kNewFile:
if (GetLevel(&input, &level) &&
GetVarint64(&input, &f.number) &&
if (GetLevel(&input, &level) && GetVarint64(&input, &f.number) &&
GetVarint64(&input, &f.file_size) &&
GetInternalKey(&input, &f.smallest) &&
GetInternalKey(&input, &f.largest)) {
@ -198,12 +192,12 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
}
}
if (msg == NULL && !input.empty()) {
if (msg == nullptr && !input.empty()) {
msg = "invalid tag";
}
Status result;
if (msg != NULL) {
if (msg != nullptr) {
result = Status::Corruption("VersionEdit", msg);
}
return result;
@ -238,13 +232,11 @@ std::string VersionEdit::DebugString() const {
r.append(" ");
r.append(compact_pointers_[i].second.DebugString());
}
for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
iter != deleted_files_.end();
++iter) {
r.append("\n DeleteFile: ");
AppendNumberTo(&r, iter->first);
for (const auto& deleted_files_kvp : deleted_files_) {
r.append("\n RemoveFile: ");
AppendNumberTo(&r, deleted_files_kvp.first);
r.append(" ");
AppendNumberTo(&r, iter->second);
AppendNumberTo(&r, deleted_files_kvp.second);
}
for (size_t i = 0; i < new_files_.size(); i++) {
const FileMetaData& f = new_files_[i].second;

View File

@ -8,6 +8,7 @@
#include <set>
#include <utility>
#include <vector>
#include "db/dbformat.h"
namespace leveldb {
@ -15,20 +16,20 @@ namespace leveldb {
class VersionSet;
struct FileMetaData {
int refs;
int allowed_seeks; // Seeks allowed until compaction
uint64_t number;
uint64_t file_size; // File size in bytes
InternalKey smallest; // Smallest internal key served by table
InternalKey largest; // Largest internal key served by table
FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) {}
FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) { }
int refs;
int allowed_seeks; // Seeks allowed until compaction
uint64_t number;
uint64_t file_size; // File size in bytes
InternalKey smallest; // Smallest internal key served by table
InternalKey largest; // Largest internal key served by table
};
class VersionEdit {
public:
VersionEdit() { Clear(); }
~VersionEdit() { }
~VersionEdit() = default;
void Clear();
@ -59,10 +60,8 @@ class VersionEdit {
// Add the specified file at the specified number.
// REQUIRES: This version has not been saved (see VersionSet::SaveTo)
// REQUIRES: "smallest" and "largest" are smallest and largest keys in file
void AddFile(int level, uint64_t file,
uint64_t file_size,
const InternalKey& smallest,
const InternalKey& largest) {
void AddFile(int level, uint64_t file, uint64_t file_size,
const InternalKey& smallest, const InternalKey& largest) {
FileMetaData f;
f.number = file;
f.file_size = file_size;
@ -72,7 +71,7 @@ class VersionEdit {
}
// Delete the specified "file" from the specified "level".
void DeleteFile(int level, uint64_t file) {
void RemoveFile(int level, uint64_t file) {
deleted_files_.insert(std::make_pair(level, file));
}
@ -84,7 +83,7 @@ class VersionEdit {
private:
friend class VersionSet;
typedef std::set< std::pair<int, uint64_t> > DeletedFileSet;
typedef std::set<std::pair<int, uint64_t>> DeletedFileSet;
std::string comparator_;
uint64_t log_number_;
@ -97,9 +96,9 @@ class VersionEdit {
bool has_next_file_number_;
bool has_last_sequence_;
std::vector< std::pair<int, InternalKey> > compact_pointers_;
std::vector<std::pair<int, InternalKey>> compact_pointers_;
DeletedFileSet deleted_files_;
std::vector< std::pair<int, FileMetaData> > new_files_;
std::vector<std::pair<int, FileMetaData>> new_files_;
};
} // namespace leveldb

View File

@ -3,7 +3,8 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/version_edit.h"
#include "util/testharness.h"
#include "gtest/gtest.h"
namespace leveldb {
@ -17,8 +18,6 @@ static void TestEncodeDecode(const VersionEdit& edit) {
ASSERT_EQ(encoded, encoded2);
}
class VersionEditTest { };
TEST(VersionEditTest, EncodeDecode) {
static const uint64_t kBig = 1ull << 50;
@ -28,7 +27,7 @@ TEST(VersionEditTest, EncodeDecode) {
edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
InternalKey("foo", kBig + 500 + i, kTypeValue),
InternalKey("zoo", kBig + 600 + i, kTypeDeletion));
edit.DeleteFile(4, kBig + 700 + i);
edit.RemoveFile(4, kBig + 700 + i);
edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
}
@ -40,7 +39,3 @@ TEST(VersionEditTest, EncodeDecode) {
}
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,7 @@
#include <map>
#include <set>
#include <vector>
#include "db/dbformat.h"
#include "db/version_edit.h"
#include "port/port.h"
@ -25,7 +26,9 @@
namespace leveldb {
namespace log { class Writer; }
namespace log {
class Writer;
}
class Compaction;
class Iterator;
@ -39,25 +42,28 @@ class WritableFile;
// Return the smallest index i such that files[i]->largest >= key.
// Return files.size() if there is no such file.
// REQUIRES: "files" contains a sorted list of non-overlapping files.
extern int FindFile(const InternalKeyComparator& icmp,
const std::vector<FileMetaData*>& files,
const Slice& key);
int FindFile(const InternalKeyComparator& icmp,
const std::vector<FileMetaData*>& files, const Slice& key);
// Returns true iff some file in "files" overlaps the user key range
// [*smallest,*largest].
// smallest==NULL represents a key smaller than all keys in the DB.
// largest==NULL represents a key largest than all keys in the DB.
// smallest==nullptr represents a key smaller than all keys in the DB.
// largest==nullptr represents a key largest than all keys in the DB.
// REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges
// in sorted order.
extern bool SomeFileOverlapsRange(
const InternalKeyComparator& icmp,
bool disjoint_sorted_files,
const std::vector<FileMetaData*>& files,
const Slice* smallest_user_key,
const Slice* largest_user_key);
bool SomeFileOverlapsRange(const InternalKeyComparator& icmp,
bool disjoint_sorted_files,
const std::vector<FileMetaData*>& files,
const Slice* smallest_user_key,
const Slice* largest_user_key);
class Version {
public:
struct GetStats {
FileMetaData* seek_file;
int seek_file_level;
};
// Append to *iters a sequence of iterators that will
// yield the contents of this Version when merged together.
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
@ -66,10 +72,6 @@ class Version {
// Lookup the value for key. If found, store it in *val and
// return OK. Else return a non-OK status. Fills *stats.
// REQUIRES: lock is not held
struct GetStats {
FileMetaData* seek_file;
int seek_file_level;
};
Status Get(const ReadOptions&, const LookupKey& key, std::string* val,
GetStats* stats);
@ -91,16 +93,15 @@ class Version {
void GetOverlappingInputs(
int level,
const InternalKey* begin, // NULL means before all keys
const InternalKey* end, // NULL means after all keys
const InternalKey* begin, // nullptr means before all keys
const InternalKey* end, // nullptr means after all keys
std::vector<FileMetaData*>* inputs);
// Returns true iff some file in the specified level overlaps
// some part of [*smallest_user_key,*largest_user_key].
// smallest_user_key==NULL represents a key smaller than all keys in the DB.
// largest_user_key==NULL represents a key largest than all keys in the DB.
bool OverlapInLevel(int level,
const Slice* smallest_user_key,
// smallest_user_key==nullptr represents a key smaller than all the DB's keys.
// largest_user_key==nullptr represents a key largest than all the DB's keys.
bool OverlapInLevel(int level, const Slice* smallest_user_key,
const Slice* largest_user_key);
// Return the level at which we should place a new memtable compaction
@ -118,6 +119,22 @@ class Version {
friend class VersionSet;
class LevelFileNumIterator;
explicit Version(VersionSet* vset)
: vset_(vset),
next_(this),
prev_(this),
refs_(0),
file_to_compact_(nullptr),
file_to_compact_level_(-1),
compaction_score_(-1),
compaction_level_(-1) {}
Version(const Version&) = delete;
Version& operator=(const Version&) = delete;
~Version();
Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
// Call func(arg, level, f) for every file that overlaps user_key in
@ -125,14 +142,13 @@ class Version {
// false, makes no more calls.
//
// REQUIRES: user portion of internal_key == user_key.
void ForEachOverlapping(Slice user_key, Slice internal_key,
void* arg,
void ForEachOverlapping(Slice user_key, Slice internal_key, void* arg,
bool (*func)(void*, int, FileMetaData*));
VersionSet* vset_; // VersionSet to which this Version belongs
Version* next_; // Next version in linked list
Version* prev_; // Previous version in linked list
int refs_; // Number of live refs to this version
VersionSet* vset_; // VersionSet to which this Version belongs
Version* next_; // Next version in linked list
Version* prev_; // Previous version in linked list
int refs_; // Number of live refs to this version
// List of files per level
std::vector<FileMetaData*> files_[config::kNumLevels];
@ -146,28 +162,15 @@ class Version {
// are initialized by Finalize().
double compaction_score_;
int compaction_level_;
explicit Version(VersionSet* vset)
: vset_(vset), next_(this), prev_(this), refs_(0),
file_to_compact_(NULL),
file_to_compact_level_(-1),
compaction_score_(-1),
compaction_level_(-1) {
}
~Version();
// No copying allowed
Version(const Version&);
void operator=(const Version&);
};
class VersionSet {
public:
VersionSet(const std::string& dbname,
const Options* options,
TableCache* table_cache,
const InternalKeyComparator*);
VersionSet(const std::string& dbname, const Options* options,
TableCache* table_cache, const InternalKeyComparator*);
VersionSet(const VersionSet&) = delete;
VersionSet& operator=(const VersionSet&) = delete;
~VersionSet();
// Apply *edit to the current version to form a new descriptor that
@ -179,7 +182,7 @@ class VersionSet {
EXCLUSIVE_LOCKS_REQUIRED(mu);
// Recover the last saved descriptor from persistent storage.
Status Recover(bool *save_manifest);
Status Recover(bool* save_manifest);
// Return the current version.
Version* current() const { return current_; }
@ -225,19 +228,17 @@ class VersionSet {
uint64_t PrevLogNumber() const { return prev_log_number_; }
// Pick level and inputs for a new compaction.
// Returns NULL if there is no compaction to be done.
// Returns nullptr if there is no compaction to be done.
// Otherwise returns a pointer to a heap-allocated object that
// describes the compaction. Caller should delete the result.
Compaction* PickCompaction();
// Return a compaction object for compacting the range [begin,end] in
// the specified level. Returns NULL if there is nothing in that
// the specified level. Returns nullptr if there is nothing in that
// level that overlaps the specified range. Caller should delete
// the result.
Compaction* CompactRange(
int level,
const InternalKey* begin,
const InternalKey* end);
Compaction* CompactRange(int level, const InternalKey* begin,
const InternalKey* end);
// Return the maximum overlapping data (in bytes) at next level for any
// file at a level >= 1.
@ -250,7 +251,7 @@ class VersionSet {
// Returns true iff some level needs a compaction.
bool NeedsCompaction() const {
Version* v = current_;
return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL);
return (v->compaction_score_ >= 1) || (v->file_to_compact_ != nullptr);
}
// Add all files listed in any live version to *live.
@ -278,14 +279,12 @@ class VersionSet {
void Finalize(Version* v);
void GetRange(const std::vector<FileMetaData*>& inputs,
InternalKey* smallest,
void GetRange(const std::vector<FileMetaData*>& inputs, InternalKey* smallest,
InternalKey* largest);
void GetRange2(const std::vector<FileMetaData*>& inputs1,
const std::vector<FileMetaData*>& inputs2,
InternalKey* smallest,
InternalKey* largest);
InternalKey* smallest, InternalKey* largest);
void SetupOtherInputs(Compaction* c);
@ -314,10 +313,6 @@ class VersionSet {
// Per-level key at which the next compaction at that level should start.
// Either an empty string, or a valid InternalKey.
std::string compact_pointer_[config::kNumLevels];
// No copying allowed
VersionSet(const VersionSet&);
void operator=(const VersionSet&);
};
// A Compaction encapsulates information about a compaction.
@ -366,7 +361,7 @@ class Compaction {
friend class Version;
friend class VersionSet;
explicit Compaction(int level);
Compaction(const Options* options, int level);
int level_;
uint64_t max_output_file_size_;
@ -374,9 +369,9 @@ class Compaction {
VersionEdit edit_;
// Each compaction reads inputs from "level_" and "level_+1"
std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs
std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs
// State used to check for number of of overlapping grandparent files
// State used to check for number of overlapping grandparent files
// (parent == level_ + 1, grandparent == level_ + 2)
std::vector<FileMetaData*> grandparents_;
size_t grandparent_index_; // Index in grandparent_starts_

View File

@ -3,18 +3,16 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/version_set.h"
#include "gtest/gtest.h"
#include "util/logging.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace leveldb {
class FindFileTest {
class FindFileTest : public testing::Test {
public:
std::vector<FileMetaData*> files_;
bool disjoint_sorted_files_;
FindFileTest() : disjoint_sorted_files_(true) { }
FindFileTest() : disjoint_sorted_files_(true) {}
~FindFileTest() {
for (int i = 0; i < files_.size(); i++) {
@ -40,23 +38,28 @@ class FindFileTest {
bool Overlaps(const char* smallest, const char* largest) {
InternalKeyComparator cmp(BytewiseComparator());
Slice s(smallest != NULL ? smallest : "");
Slice l(largest != NULL ? largest : "");
Slice s(smallest != nullptr ? smallest : "");
Slice l(largest != nullptr ? largest : "");
return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, files_,
(smallest != NULL ? &s : NULL),
(largest != NULL ? &l : NULL));
(smallest != nullptr ? &s : nullptr),
(largest != nullptr ? &l : nullptr));
}
bool disjoint_sorted_files_;
private:
std::vector<FileMetaData*> files_;
};
TEST(FindFileTest, Empty) {
TEST_F(FindFileTest, Empty) {
ASSERT_EQ(0, Find("foo"));
ASSERT_TRUE(! Overlaps("a", "z"));
ASSERT_TRUE(! Overlaps(NULL, "z"));
ASSERT_TRUE(! Overlaps("a", NULL));
ASSERT_TRUE(! Overlaps(NULL, NULL));
ASSERT_TRUE(!Overlaps("a", "z"));
ASSERT_TRUE(!Overlaps(nullptr, "z"));
ASSERT_TRUE(!Overlaps("a", nullptr));
ASSERT_TRUE(!Overlaps(nullptr, nullptr));
}
TEST(FindFileTest, Single) {
TEST_F(FindFileTest, Single) {
Add("p", "q");
ASSERT_EQ(0, Find("a"));
ASSERT_EQ(0, Find("p"));
@ -65,8 +68,8 @@ TEST(FindFileTest, Single) {
ASSERT_EQ(1, Find("q1"));
ASSERT_EQ(1, Find("z"));
ASSERT_TRUE(! Overlaps("a", "b"));
ASSERT_TRUE(! Overlaps("z1", "z2"));
ASSERT_TRUE(!Overlaps("a", "b"));
ASSERT_TRUE(!Overlaps("z1", "z2"));
ASSERT_TRUE(Overlaps("a", "p"));
ASSERT_TRUE(Overlaps("a", "q"));
ASSERT_TRUE(Overlaps("a", "z"));
@ -78,16 +81,15 @@ TEST(FindFileTest, Single) {
ASSERT_TRUE(Overlaps("q", "q"));
ASSERT_TRUE(Overlaps("q", "q1"));
ASSERT_TRUE(! Overlaps(NULL, "j"));
ASSERT_TRUE(! Overlaps("r", NULL));
ASSERT_TRUE(Overlaps(NULL, "p"));
ASSERT_TRUE(Overlaps(NULL, "p1"));
ASSERT_TRUE(Overlaps("q", NULL));
ASSERT_TRUE(Overlaps(NULL, NULL));
ASSERT_TRUE(!Overlaps(nullptr, "j"));
ASSERT_TRUE(!Overlaps("r", nullptr));
ASSERT_TRUE(Overlaps(nullptr, "p"));
ASSERT_TRUE(Overlaps(nullptr, "p1"));
ASSERT_TRUE(Overlaps("q", nullptr));
ASSERT_TRUE(Overlaps(nullptr, nullptr));
}
TEST(FindFileTest, Multiple) {
TEST_F(FindFileTest, Multiple) {
Add("150", "200");
Add("200", "250");
Add("300", "350");
@ -110,10 +112,10 @@ TEST(FindFileTest, Multiple) {
ASSERT_EQ(3, Find("450"));
ASSERT_EQ(4, Find("451"));
ASSERT_TRUE(! Overlaps("100", "149"));
ASSERT_TRUE(! Overlaps("251", "299"));
ASSERT_TRUE(! Overlaps("451", "500"));
ASSERT_TRUE(! Overlaps("351", "399"));
ASSERT_TRUE(!Overlaps("100", "149"));
ASSERT_TRUE(!Overlaps("251", "299"));
ASSERT_TRUE(!Overlaps("451", "500"));
ASSERT_TRUE(!Overlaps("351", "399"));
ASSERT_TRUE(Overlaps("100", "150"));
ASSERT_TRUE(Overlaps("100", "200"));
@ -125,41 +127,41 @@ TEST(FindFileTest, Multiple) {
ASSERT_TRUE(Overlaps("450", "500"));
}
TEST(FindFileTest, MultipleNullBoundaries) {
TEST_F(FindFileTest, MultipleNullBoundaries) {
Add("150", "200");
Add("200", "250");
Add("300", "350");
Add("400", "450");
ASSERT_TRUE(! Overlaps(NULL, "149"));
ASSERT_TRUE(! Overlaps("451", NULL));
ASSERT_TRUE(Overlaps(NULL, NULL));
ASSERT_TRUE(Overlaps(NULL, "150"));
ASSERT_TRUE(Overlaps(NULL, "199"));
ASSERT_TRUE(Overlaps(NULL, "200"));
ASSERT_TRUE(Overlaps(NULL, "201"));
ASSERT_TRUE(Overlaps(NULL, "400"));
ASSERT_TRUE(Overlaps(NULL, "800"));
ASSERT_TRUE(Overlaps("100", NULL));
ASSERT_TRUE(Overlaps("200", NULL));
ASSERT_TRUE(Overlaps("449", NULL));
ASSERT_TRUE(Overlaps("450", NULL));
ASSERT_TRUE(!Overlaps(nullptr, "149"));
ASSERT_TRUE(!Overlaps("451", nullptr));
ASSERT_TRUE(Overlaps(nullptr, nullptr));
ASSERT_TRUE(Overlaps(nullptr, "150"));
ASSERT_TRUE(Overlaps(nullptr, "199"));
ASSERT_TRUE(Overlaps(nullptr, "200"));
ASSERT_TRUE(Overlaps(nullptr, "201"));
ASSERT_TRUE(Overlaps(nullptr, "400"));
ASSERT_TRUE(Overlaps(nullptr, "800"));
ASSERT_TRUE(Overlaps("100", nullptr));
ASSERT_TRUE(Overlaps("200", nullptr));
ASSERT_TRUE(Overlaps("449", nullptr));
ASSERT_TRUE(Overlaps("450", nullptr));
}
TEST(FindFileTest, OverlapSequenceChecks) {
TEST_F(FindFileTest, OverlapSequenceChecks) {
Add("200", "200", 5000, 3000);
ASSERT_TRUE(! Overlaps("199", "199"));
ASSERT_TRUE(! Overlaps("201", "300"));
ASSERT_TRUE(!Overlaps("199", "199"));
ASSERT_TRUE(!Overlaps("201", "300"));
ASSERT_TRUE(Overlaps("200", "200"));
ASSERT_TRUE(Overlaps("190", "200"));
ASSERT_TRUE(Overlaps("200", "210"));
}
TEST(FindFileTest, OverlappingFiles) {
TEST_F(FindFileTest, OverlappingFiles) {
Add("150", "600");
Add("400", "500");
disjoint_sorted_files_ = false;
ASSERT_TRUE(! Overlaps("100", "149"));
ASSERT_TRUE(! Overlaps("601", "700"));
ASSERT_TRUE(!Overlaps("100", "149"));
ASSERT_TRUE(!Overlaps("601", "700"));
ASSERT_TRUE(Overlaps("100", "150"));
ASSERT_TRUE(Overlaps("100", "200"));
ASSERT_TRUE(Overlaps("100", "300"));
@ -172,8 +174,158 @@ TEST(FindFileTest, OverlappingFiles) {
ASSERT_TRUE(Overlaps("600", "700"));
}
} // namespace leveldb
void AddBoundaryInputs(const InternalKeyComparator& icmp,
const std::vector<FileMetaData*>& level_files,
std::vector<FileMetaData*>* compaction_files);
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
class AddBoundaryInputsTest : public testing::Test {
public:
std::vector<FileMetaData*> level_files_;
std::vector<FileMetaData*> compaction_files_;
std::vector<FileMetaData*> all_files_;
InternalKeyComparator icmp_;
AddBoundaryInputsTest() : icmp_(BytewiseComparator()) {}
~AddBoundaryInputsTest() {
for (size_t i = 0; i < all_files_.size(); ++i) {
delete all_files_[i];
}
all_files_.clear();
}
FileMetaData* CreateFileMetaData(uint64_t number, InternalKey smallest,
InternalKey largest) {
FileMetaData* f = new FileMetaData();
f->number = number;
f->smallest = smallest;
f->largest = largest;
all_files_.push_back(f);
return f;
}
};
TEST_F(AddBoundaryInputsTest, TestEmptyFileSets) {
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
ASSERT_TRUE(compaction_files_.empty());
ASSERT_TRUE(level_files_.empty());
}
TEST_F(AddBoundaryInputsTest, TestEmptyLevelFiles) {
FileMetaData* f1 =
CreateFileMetaData(1, InternalKey("100", 2, kTypeValue),
InternalKey(InternalKey("100", 1, kTypeValue)));
compaction_files_.push_back(f1);
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
ASSERT_EQ(1, compaction_files_.size());
ASSERT_EQ(f1, compaction_files_[0]);
ASSERT_TRUE(level_files_.empty());
}
TEST_F(AddBoundaryInputsTest, TestEmptyCompactionFiles) {
FileMetaData* f1 =
CreateFileMetaData(1, InternalKey("100", 2, kTypeValue),
InternalKey(InternalKey("100", 1, kTypeValue)));
level_files_.push_back(f1);
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
ASSERT_TRUE(compaction_files_.empty());
ASSERT_EQ(1, level_files_.size());
ASSERT_EQ(f1, level_files_[0]);
}
TEST_F(AddBoundaryInputsTest, TestNoBoundaryFiles) {
FileMetaData* f1 =
CreateFileMetaData(1, InternalKey("100", 2, kTypeValue),
InternalKey(InternalKey("100", 1, kTypeValue)));
FileMetaData* f2 =
CreateFileMetaData(1, InternalKey("200", 2, kTypeValue),
InternalKey(InternalKey("200", 1, kTypeValue)));
FileMetaData* f3 =
CreateFileMetaData(1, InternalKey("300", 2, kTypeValue),
InternalKey(InternalKey("300", 1, kTypeValue)));
level_files_.push_back(f3);
level_files_.push_back(f2);
level_files_.push_back(f1);
compaction_files_.push_back(f2);
compaction_files_.push_back(f3);
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
ASSERT_EQ(2, compaction_files_.size());
}
TEST_F(AddBoundaryInputsTest, TestOneBoundaryFiles) {
FileMetaData* f1 =
CreateFileMetaData(1, InternalKey("100", 3, kTypeValue),
InternalKey(InternalKey("100", 2, kTypeValue)));
FileMetaData* f2 =
CreateFileMetaData(1, InternalKey("100", 1, kTypeValue),
InternalKey(InternalKey("200", 3, kTypeValue)));
FileMetaData* f3 =
CreateFileMetaData(1, InternalKey("300", 2, kTypeValue),
InternalKey(InternalKey("300", 1, kTypeValue)));
level_files_.push_back(f3);
level_files_.push_back(f2);
level_files_.push_back(f1);
compaction_files_.push_back(f1);
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
ASSERT_EQ(2, compaction_files_.size());
ASSERT_EQ(f1, compaction_files_[0]);
ASSERT_EQ(f2, compaction_files_[1]);
}
TEST_F(AddBoundaryInputsTest, TestTwoBoundaryFiles) {
FileMetaData* f1 =
CreateFileMetaData(1, InternalKey("100", 6, kTypeValue),
InternalKey(InternalKey("100", 5, kTypeValue)));
FileMetaData* f2 =
CreateFileMetaData(1, InternalKey("100", 2, kTypeValue),
InternalKey(InternalKey("300", 1, kTypeValue)));
FileMetaData* f3 =
CreateFileMetaData(1, InternalKey("100", 4, kTypeValue),
InternalKey(InternalKey("100", 3, kTypeValue)));
level_files_.push_back(f2);
level_files_.push_back(f3);
level_files_.push_back(f1);
compaction_files_.push_back(f1);
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
ASSERT_EQ(3, compaction_files_.size());
ASSERT_EQ(f1, compaction_files_[0]);
ASSERT_EQ(f3, compaction_files_[1]);
ASSERT_EQ(f2, compaction_files_[2]);
}
TEST_F(AddBoundaryInputsTest, TestDisjoinFilePointers) {
FileMetaData* f1 =
CreateFileMetaData(1, InternalKey("100", 6, kTypeValue),
InternalKey(InternalKey("100", 5, kTypeValue)));
FileMetaData* f2 =
CreateFileMetaData(1, InternalKey("100", 6, kTypeValue),
InternalKey(InternalKey("100", 5, kTypeValue)));
FileMetaData* f3 =
CreateFileMetaData(1, InternalKey("100", 2, kTypeValue),
InternalKey(InternalKey("300", 1, kTypeValue)));
FileMetaData* f4 =
CreateFileMetaData(1, InternalKey("100", 4, kTypeValue),
InternalKey(InternalKey("100", 3, kTypeValue)));
level_files_.push_back(f2);
level_files_.push_back(f3);
level_files_.push_back(f4);
compaction_files_.push_back(f1);
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
ASSERT_EQ(3, compaction_files_.size());
ASSERT_EQ(f1, compaction_files_[0]);
ASSERT_EQ(f4, compaction_files_[1]);
ASSERT_EQ(f3, compaction_files_[2]);
}
} // namespace leveldb

View File

@ -15,10 +15,10 @@
#include "leveldb/write_batch.h"
#include "leveldb/db.h"
#include "db/dbformat.h"
#include "db/memtable.h"
#include "db/write_batch_internal.h"
#include "leveldb/db.h"
#include "util/coding.h"
namespace leveldb {
@ -26,19 +26,19 @@ namespace leveldb {
// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
static const size_t kHeader = 12;
WriteBatch::WriteBatch() {
Clear();
}
WriteBatch::WriteBatch() { Clear(); }
WriteBatch::~WriteBatch() { }
WriteBatch::~WriteBatch() = default;
WriteBatch::Handler::~Handler() { }
WriteBatch::Handler::~Handler() = default;
void WriteBatch::Clear() {
rep_.clear();
rep_.resize(kHeader);
}
size_t WriteBatch::ApproximateSize() const { return rep_.size(); }
Status WriteBatch::Iterate(Handler* handler) const {
Slice input(rep_);
if (input.size() < kHeader) {
@ -108,25 +108,28 @@ void WriteBatch::Delete(const Slice& key) {
PutLengthPrefixedSlice(&rep_, key);
}
void WriteBatch::Append(const WriteBatch& source) {
WriteBatchInternal::Append(this, &source);
}
namespace {
class MemTableInserter : public WriteBatch::Handler {
public:
SequenceNumber sequence_;
MemTable* mem_;
virtual void Put(const Slice& key, const Slice& value) {
void Put(const Slice& key, const Slice& value) override {
mem_->Add(sequence_, kTypeValue, key, value);
sequence_++;
}
virtual void Delete(const Slice& key) {
void Delete(const Slice& key) override {
mem_->Add(sequence_, kTypeDeletion, key, Slice());
sequence_++;
}
};
} // namespace
Status WriteBatchInternal::InsertInto(const WriteBatch* b,
MemTable* memtable) {
Status WriteBatchInternal::InsertInto(const WriteBatch* b, MemTable* memtable) {
MemTableInserter inserter;
inserter.sequence_ = WriteBatchInternal::Sequence(b);
inserter.mem_ = memtable;

View File

@ -29,13 +29,9 @@ class WriteBatchInternal {
// this batch.
static void SetSequence(WriteBatch* batch, SequenceNumber seq);
static Slice Contents(const WriteBatch* batch) {
return Slice(batch->rep_);
}
static Slice Contents(const WriteBatch* batch) { return Slice(batch->rep_); }
static size_t ByteSize(const WriteBatch* batch) {
return batch->rep_.size();
}
static size_t ByteSize(const WriteBatch* batch) { return batch->rep_.size(); }
static void SetContents(WriteBatch* batch, const Slice& contents);
@ -46,5 +42,4 @@ class WriteBatchInternal {
} // namespace leveldb
#endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_

View File

@ -2,13 +2,12 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "leveldb/db.h"
#include "gtest/gtest.h"
#include "db/memtable.h"
#include "db/write_batch_internal.h"
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "util/logging.h"
#include "util/testharness.h"
namespace leveldb {
@ -22,7 +21,7 @@ static std::string PrintContents(WriteBatch* b) {
Iterator* iter = mem->NewIterator();
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ParsedInternalKey ikey;
ASSERT_TRUE(ParseInternalKey(iter->key(), &ikey));
EXPECT_TRUE(ParseInternalKey(iter->key(), &ikey));
switch (ikey.type) {
case kTypeValue:
state.append("Put(");
@ -52,8 +51,6 @@ static std::string PrintContents(WriteBatch* b) {
return state;
}
class WriteBatchTest { };
TEST(WriteBatchTest, Empty) {
WriteBatch batch;
ASSERT_EQ("", PrintContents(&batch));
@ -68,10 +65,11 @@ TEST(WriteBatchTest, Multiple) {
WriteBatchInternal::SetSequence(&batch, 100);
ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch));
ASSERT_EQ(3, WriteBatchInternal::Count(&batch));
ASSERT_EQ("Put(baz, boo)@102"
"Delete(box)@101"
"Put(foo, bar)@100",
PrintContents(&batch));
ASSERT_EQ(
"Put(baz, boo)@102"
"Delete(box)@101"
"Put(foo, bar)@100",
PrintContents(&batch));
}
TEST(WriteBatchTest, Corruption) {
@ -81,40 +79,54 @@ TEST(WriteBatchTest, Corruption) {
WriteBatchInternal::SetSequence(&batch, 200);
Slice contents = WriteBatchInternal::Contents(&batch);
WriteBatchInternal::SetContents(&batch,
Slice(contents.data(),contents.size()-1));
ASSERT_EQ("Put(foo, bar)@200"
"ParseError()",
PrintContents(&batch));
Slice(contents.data(), contents.size() - 1));
ASSERT_EQ(
"Put(foo, bar)@200"
"ParseError()",
PrintContents(&batch));
}
TEST(WriteBatchTest, Append) {
WriteBatch b1, b2;
WriteBatchInternal::SetSequence(&b1, 200);
WriteBatchInternal::SetSequence(&b2, 300);
WriteBatchInternal::Append(&b1, &b2);
ASSERT_EQ("",
PrintContents(&b1));
b1.Append(b2);
ASSERT_EQ("", PrintContents(&b1));
b2.Put("a", "va");
WriteBatchInternal::Append(&b1, &b2);
ASSERT_EQ("Put(a, va)@200",
PrintContents(&b1));
b1.Append(b2);
ASSERT_EQ("Put(a, va)@200", PrintContents(&b1));
b2.Clear();
b2.Put("b", "vb");
WriteBatchInternal::Append(&b1, &b2);
ASSERT_EQ("Put(a, va)@200"
"Put(b, vb)@201",
PrintContents(&b1));
b1.Append(b2);
ASSERT_EQ(
"Put(a, va)@200"
"Put(b, vb)@201",
PrintContents(&b1));
b2.Delete("foo");
WriteBatchInternal::Append(&b1, &b2);
ASSERT_EQ("Put(a, va)@200"
"Put(b, vb)@202"
"Put(b, vb)@201"
"Delete(foo)@203",
PrintContents(&b1));
b1.Append(b2);
ASSERT_EQ(
"Put(a, va)@200"
"Put(b, vb)@202"
"Put(b, vb)@201"
"Delete(foo)@203",
PrintContents(&b1));
}
TEST(WriteBatchTest, ApproximateSize) {
WriteBatch batch;
size_t empty_size = batch.ApproximateSize();
batch.Put(Slice("foo"), Slice("bar"));
size_t one_key_size = batch.ApproximateSize();
ASSERT_LT(empty_size, one_key_size);
batch.Put(Slice("baz"), Slice("boo"));
size_t two_keys_size = batch.ApproximateSize();
ASSERT_LT(one_key_size, two_keys_size);
batch.Delete(Slice("box"));
size_t post_delete_size = batch.ApproximateSize();
ASSERT_LT(two_keys_size, post_delete_size);
}
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

View File

@ -83,23 +83,23 @@ div.bsql {
<p>Google, July 2011</p>
<hr>
<p>In order to test LevelDB's performance, we benchmark it against other well-established database implementations. We compare LevelDB (revision 39) against <a href="http://www.sqlite.org/">SQLite3</a> (version 3.7.6.3) and <a href="http://fallabs.com/kyotocabinet/spex.html">Kyoto Cabinet's</a> (version 1.2.67) TreeDB (a B+Tree based key-value store). We would like to acknowledge Scott Hess and Mikio Hirabayashi for their suggestions and contributions to the SQLite3 and Kyoto Cabinet benchmarks, respectively.</p>
<p>In order to test LevelDB's performance, we benchmark it against other well-established database implementations. We compare LevelDB (revision 39) against <a href="https://www.sqlite.org/">SQLite3</a> (version 3.7.6.3) and <a href="https://dbmx.net/kyotocabinet/spex.html">Kyoto Cabinet's</a> (version 1.2.67) TreeDB (a B+Tree based key-value store). We would like to acknowledge Scott Hess and Mikio Hirabayashi for their suggestions and contributions to the SQLite3 and Kyoto Cabinet benchmarks, respectively.</p>
<p>Benchmarks were all performed on a six-core Intel(R) Xeon(R) CPU X5650 @ 2.67GHz, with 12288 KB of total L3 cache and 12 GB of DDR3 RAM at 1333 MHz. (Note that LevelDB uses at most two CPUs since the benchmarks are single threaded: one to run the benchmark, and one for background compactions.) We ran the benchmarks on two machines (with identical processors), one with an Ext3 file system and one with an Ext4 file system. The machine with the Ext3 file system has a SATA Hitachi HDS721050CLA362 hard drive. The machine with the Ext4 file system has a SATA Samsung HD502HJ hard drive. Both hard drives spin at 7200 RPM and have hard drive write-caching enabled (using `hdparm -W 1 [device]`). The numbers reported below are the median of three measurements.</p>
<h4>Benchmark Source Code</h4>
<p>We wrote benchmark tools for SQLite and Kyoto TreeDB based on LevelDB's <span class="code">db_bench</span>. The code for each of the benchmarks resides here:</p>
<ul>
<li> <b>LevelDB:</b> <a href="http://code.google.com/p/leveldb/source/browse/trunk/db/db_bench.cc">db/db_bench.cc</a>.</li>
<li> <b>SQLite:</b> <a href="http://code.google.com/p/leveldb/source/browse/#svn%2Ftrunk%2Fdoc%2Fbench%2Fdb_bench_sqlite3.cc">doc/bench/db_bench_sqlite3.cc</a>.</li>
<li> <b>Kyoto TreeDB:</b> <a href="http://code.google.com/p/leveldb/source/browse/#svn%2Ftrunk%2Fdoc%2Fbench%2Fdb_bench_tree_db.cc">doc/bench/db_bench_tree_db.cc</a>.</li>
<li> <b>LevelDB:</b> <a href="https://github.com/google/leveldb/blob/main/benchmarks/db_bench.cc">benchmarks/db_bench.cc</a>.</li>
<li> <b>SQLite:</b> <a href="https://github.com/google/leveldb/blob/main/benchmarks/db_bench_sqlite3.cc">benchmarks/db_bench_sqlite3.cc</a>.</li>
<li> <b>Kyoto TreeDB:</b> <a href="https://github.com/google/leveldb/blob/main/benchmarks/db_bench_tree_db.cc">benchmarks/db_bench_tree_db.cc</a>.</li>
</ul>
<h4>Custom Build Specifications</h4>
<ul>
<li>LevelDB: LevelDB was compiled with the <a href="http://code.google.com/p/google-perftools">tcmalloc</a> library and the <a href="http://code.google.com/p/snappy/">Snappy</a> compression library (revision 33). Assertions were disabled.</li>
<li>TreeDB: TreeDB was compiled using the <a href="http://www.oberhumer.com/opensource/lzo/">LZO</a> compression library (version 2.03). Furthermore, we enabled the TSMALL and TLINEAR options when opening the database in order to reduce the footprint of each record.</li>
<li>SQLite: We tuned SQLite's performance, by setting its locking mode to exclusive. We also enabled SQLite's <a href="http://www.sqlite.org/draft/wal.html">write-ahead logging</a>.</li>
<li>LevelDB: LevelDB was compiled with the <a href="https://github.com/gperftools/gperftools">tcmalloc</a> library and the <a href="https://github.com/google/snappy">Snappy</a> compression library (revision 33). Assertions were disabled.</li>
<li>TreeDB: TreeDB was compiled using the <a href="https://www.oberhumer.com/opensource/lzo/">LZO</a> compression library (version 2.03). Furthermore, we enabled the TSMALL and TLINEAR options when opening the database in order to reduce the footprint of each record.</li>
<li>SQLite: We tuned SQLite's performance, by setting its locking mode to exclusive. We also enabled SQLite's <a href="https://www.sqlite.org/draft/wal.html">write-ahead logging</a>.</li>
</ul>
<h2>1. Baseline Performance</h2>
@ -451,7 +451,7 @@ performance may very well be better with compression if it allows more
of the working set to fit in memory.</p>
<h2>Note about Ext4 Filesystems</h2>
<p>The preceding numbers are for an ext3 file system. Synchronous writes are much slower under <a href="http://en.wikipedia.org/wiki/Ext4">ext4</a> (LevelDB drops to ~31 writes / second and TreeDB drops to ~5 writes / second; SQLite3's synchronous writes do not noticeably drop) due to ext4's different handling of <span class="code">fsync</span> / <span class="code">msync</span> calls. Even LevelDB's asynchronous write performance drops somewhat since it spreads its storage across multiple files and issues <span class="code">fsync</span> calls when switching to a new file.</p>
<p>The preceding numbers are for an ext3 file system. Synchronous writes are much slower under <a href="https://en.wikipedia.org/wiki/Ext4">ext4</a> (LevelDB drops to ~31 writes / second and TreeDB drops to ~5 writes / second; SQLite3's synchronous writes do not noticeably drop) due to ext4's different handling of <span class="code">fsync</span> / <span class="code">msync</span> calls. Even LevelDB's asynchronous write performance drops somewhat since it spreads its storage across multiple files and issues <span class="code">fsync</span> calls when switching to a new file.</p>
<h2>Acknowledgements</h2>
<p>Jeff Dean and Sanjay Ghemawat wrote LevelDB. Kevin Tseng wrote and compiled these benchmarks. Mikio Hirabayashi, Scott Hess, and Gabor Cselle provided help and advice.</p>

View File

@ -1,89 +0,0 @@
body {
margin-left: 0.5in;
margin-right: 0.5in;
background: white;
color: black;
}
h1 {
margin-left: -0.2in;
font-size: 14pt;
}
h2 {
margin-left: -0in;
font-size: 12pt;
}
h3 {
margin-left: -0in;
}
h4 {
margin-left: -0in;
}
hr {
margin-left: -0in;
}
/* Definition lists: definition term bold */
dt {
font-weight: bold;
}
address {
text-align: center;
}
code,samp,var {
color: blue;
}
kbd {
color: #600000;
}
div.note p {
float: right;
width: 3in;
margin-right: 0%;
padding: 1px;
border: 2px solid #6060a0;
background-color: #fffff0;
}
ul {
margin-top: -0em;
margin-bottom: -0em;
}
ol {
margin-top: -0em;
margin-bottom: -0em;
}
UL.nobullets {
list-style-type: none;
list-style-image: none;
margin-left: -1em;
}
p {
margin: 1em 0 1em 0;
padding: 0 0 0 0;
}
pre {
line-height: 1.3em;
padding: 0.4em 0 0.8em 0;
margin: 0 0 0 0;
border: 0 0 0 0;
color: blue;
}
.datatable {
margin-left: auto;
margin-right: auto;
margin-top: 2em;
margin-bottom: 2em;
border: 1px solid;
}
.datatable td,th {
padding: 0 0.5em 0 0.5em;
text-align: right;
}

View File

@ -1,213 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" type="text/css" href="doc.css" />
<title>Leveldb file layout and compactions</title>
</head>
<body>
<h1>Files</h1>
The implementation of leveldb is similar in spirit to the
representation of a single
<a href="http://research.google.com/archive/bigtable.html">
Bigtable tablet (section 5.3)</a>.
However the organization of the files that make up the representation
is somewhat different and is explained below.
<p>
Each database is represented by a set of files stored in a directory.
There are several different types of files as documented below:
<p>
<h2>Log files</h2>
<p>
A log file (*.log) stores a sequence of recent updates. Each update
is appended to the current log file. When the log file reaches a
pre-determined size (approximately 4MB by default), it is converted
to a sorted table (see below) and a new log file is created for future
updates.
<p>
A copy of the current log file is kept in an in-memory structure (the
<code>memtable</code>). This copy is consulted on every read so that read
operations reflect all logged updates.
<p>
<h2>Sorted tables</h2>
<p>
A sorted table (*.sst) stores a sequence of entries sorted by key.
Each entry is either a value for the key, or a deletion marker for the
key. (Deletion markers are kept around to hide obsolete values
present in older sorted tables).
<p>
The set of sorted tables are organized into a sequence of levels. The
sorted table generated from a log file is placed in a special <code>young</code>
level (also called level-0). When the number of young files exceeds a
certain threshold (currently four), all of the young files are merged
together with all of the overlapping level-1 files to produce a
sequence of new level-1 files (we create a new level-1 file for every
2MB of data.)
<p>
Files in the young level may contain overlapping keys. However files
in other levels have distinct non-overlapping key ranges. Consider
level number L where L >= 1. When the combined size of files in
level-L exceeds (10^L) MB (i.e., 10MB for level-1, 100MB for level-2,
...), one file in level-L, and all of the overlapping files in
level-(L+1) are merged to form a set of new files for level-(L+1).
These merges have the effect of gradually migrating new updates from
the young level to the largest level using only bulk reads and writes
(i.e., minimizing expensive seeks).
<h2>Manifest</h2>
<p>
A MANIFEST file lists the set of sorted tables that make up each
level, the corresponding key ranges, and other important metadata.
A new MANIFEST file (with a new number embedded in the file name)
is created whenever the database is reopened. The MANIFEST file is
formatted as a log, and changes made to the serving state (as files
are added or removed) are appended to this log.
<p>
<h2>Current</h2>
<p>
CURRENT is a simple text file that contains the name of the latest
MANIFEST file.
<p>
<h2>Info logs</h2>
<p>
Informational messages are printed to files named LOG and LOG.old.
<p>
<h2>Others</h2>
<p>
Other files used for miscellaneous purposes may also be present
(LOCK, *.dbtmp).
<h1>Level 0</h1>
When the log file grows above a certain size (1MB by default):
<ul>
<li>Create a brand new memtable and log file and direct future updates here
<li>In the background:
<ul>
<li>Write the contents of the previous memtable to an sstable
<li>Discard the memtable
<li>Delete the old log file and the old memtable
<li>Add the new sstable to the young (level-0) level.
</ul>
</ul>
<h1>Compactions</h1>
<p>
When the size of level L exceeds its limit, we compact it in a
background thread. The compaction picks a file from level L and all
overlapping files from the next level L+1. Note that if a level-L
file overlaps only part of a level-(L+1) file, the entire file at
level-(L+1) is used as an input to the compaction and will be
discarded after the compaction. Aside: because level-0 is special
(files in it may overlap each other), we treat compactions from
level-0 to level-1 specially: a level-0 compaction may pick more than
one level-0 file in case some of these files overlap each other.
<p>
A compaction merges the contents of the picked files to produce a
sequence of level-(L+1) files. We switch to producing a new
level-(L+1) file after the current output file has reached the target
file size (2MB). We also switch to a new output file when the key
range of the current output file has grown enough to overlap more than
ten level-(L+2) files. This last rule ensures that a later compaction
of a level-(L+1) file will not pick up too much data from level-(L+2).
<p>
The old files are discarded and the new files are added to the serving
state.
<p>
Compactions for a particular level rotate through the key space. In
more detail, for each level L, we remember the ending key of the last
compaction at level L. The next compaction for level L will pick the
first file that starts after this key (wrapping around to the
beginning of the key space if there is no such file).
<p>
Compactions drop overwritten values. They also drop deletion markers
if there are no higher numbered levels that contain a file whose range
overlaps the current key.
<h2>Timing</h2>
Level-0 compactions will read up to four 1MB files from level-0, and
at worst all the level-1 files (10MB). I.e., we will read 14MB and
write 14MB.
<p>
Other than the special level-0 compactions, we will pick one 2MB file
from level L. In the worst case, this will overlap ~ 12 files from
level L+1 (10 because level-(L+1) is ten times the size of level-L,
and another two at the boundaries since the file ranges at level-L
will usually not be aligned with the file ranges at level-L+1). The
compaction will therefore read 26MB and write 26MB. Assuming a disk
IO rate of 100MB/s (ballpark range for modern drives), the worst
compaction cost will be approximately 0.5 second.
<p>
If we throttle the background writing to something small, say 10% of
the full 100MB/s speed, a compaction may take up to 5 seconds. If the
user is writing at 10MB/s, we might build up lots of level-0 files
(~50 to hold the 5*10MB). This may significantly increase the cost of
reads due to the overhead of merging more files together on every
read.
<p>
Solution 1: To reduce this problem, we might want to increase the log
switching threshold when the number of level-0 files is large. Though
the downside is that the larger this threshold, the more memory we will
need to hold the corresponding memtable.
<p>
Solution 2: We might want to decrease write rate artificially when the
number of level-0 files goes up.
<p>
Solution 3: We work on reducing the cost of very wide merges.
Perhaps most of the level-0 files will have their blocks sitting
uncompressed in the cache and we will only need to worry about the
O(N) complexity in the merging iterator.
<h2>Number of files</h2>
Instead of always making 2MB files, we could make larger files for
larger levels to reduce the total file count, though at the expense of
more bursty compactions. Alternatively, we could shard the set of
files into multiple directories.
<p>
An experiment on an <code>ext3</code> filesystem on Feb 04, 2011 shows
the following timings to do 100K file opens in directories with
varying number of files:
<table class="datatable">
<tr><th>Files in directory</th><th>Microseconds to open a file</th></tr>
<tr><td>1000</td><td>9</td>
<tr><td>10000</td><td>10</td>
<tr><td>100000</td><td>16</td>
</table>
So maybe even the sharding is not necessary on modern filesystems?
<h1>Recovery</h1>
<ul>
<li> Read CURRENT to find name of the latest committed MANIFEST
<li> Read the named MANIFEST file
<li> Clean up stale files
<li> We could open all sstables here, but it is probably better to be lazy...
<li> Convert log chunk to a new level-0 sstable
<li> Start directing new writes to a new log file with recovered sequence#
</ul>
<h1>Garbage collection of files</h1>
<code>DeleteObsoleteFiles()</code> is called at the end of every
compaction and at the end of recovery. It finds the names of all
files in the database. It deletes all log files that are not the
current log file. It deletes all table files that are not referenced
from some level and are not the output of an active compaction.
</body>
</html>

172
doc/impl.md Normal file
View File

@ -0,0 +1,172 @@
## Files
The implementation of leveldb is similar in spirit to the representation of a
single [Bigtable tablet (section 5.3)](https://research.google/pubs/pub27898/).
However the organization of the files that make up the representation is
somewhat different and is explained below.
Each database is represented by a set of files stored in a directory. There are
several different types of files as documented below:
### Log files
A log file (*.log) stores a sequence of recent updates. Each update is appended
to the current log file. When the log file reaches a pre-determined size
(approximately 4MB by default), it is converted to a sorted table (see below)
and a new log file is created for future updates.
A copy of the current log file is kept in an in-memory structure (the
`memtable`). This copy is consulted on every read so that read operations
reflect all logged updates.
## Sorted tables
A sorted table (*.ldb) stores a sequence of entries sorted by key. Each entry is
either a value for the key, or a deletion marker for the key. (Deletion markers
are kept around to hide obsolete values present in older sorted tables).
The set of sorted tables are organized into a sequence of levels. The sorted
table generated from a log file is placed in a special **young** level (also
called level-0). When the number of young files exceeds a certain threshold
(currently four), all of the young files are merged together with all of the
overlapping level-1 files to produce a sequence of new level-1 files (we create
a new level-1 file for every 2MB of data.)
Files in the young level may contain overlapping keys. However files in other
levels have distinct non-overlapping key ranges. Consider level number L where
L >= 1. When the combined size of files in level-L exceeds (10^L) MB (i.e., 10MB
for level-1, 100MB for level-2, ...), one file in level-L, and all of the
overlapping files in level-(L+1) are merged to form a set of new files for
level-(L+1). These merges have the effect of gradually migrating new updates
from the young level to the largest level using only bulk reads and writes
(i.e., minimizing expensive seeks).
### Manifest
A MANIFEST file lists the set of sorted tables that make up each level, the
corresponding key ranges, and other important metadata. A new MANIFEST file
(with a new number embedded in the file name) is created whenever the database
is reopened. The MANIFEST file is formatted as a log, and changes made to the
serving state (as files are added or removed) are appended to this log.
### Current
CURRENT is a simple text file that contains the name of the latest MANIFEST
file.
### Info logs
Informational messages are printed to files named LOG and LOG.old.
### Others
Other files used for miscellaneous purposes may also be present (LOCK, *.dbtmp).
## Level 0
When the log file grows above a certain size (4MB by default):
Create a brand new memtable and log file and direct future updates here.
In the background:
1. Write the contents of the previous memtable to an sstable.
2. Discard the memtable.
3. Delete the old log file and the old memtable.
4. Add the new sstable to the young (level-0) level.
## Compactions
When the size of level L exceeds its limit, we compact it in a background
thread. The compaction picks a file from level L and all overlapping files from
the next level L+1. Note that if a level-L file overlaps only part of a
level-(L+1) file, the entire file at level-(L+1) is used as an input to the
compaction and will be discarded after the compaction. Aside: because level-0
is special (files in it may overlap each other), we treat compactions from
level-0 to level-1 specially: a level-0 compaction may pick more than one
level-0 file in case some of these files overlap each other.
A compaction merges the contents of the picked files to produce a sequence of
level-(L+1) files. We switch to producing a new level-(L+1) file after the
current output file has reached the target file size (2MB). We also switch to a
new output file when the key range of the current output file has grown enough
to overlap more than ten level-(L+2) files. This last rule ensures that a later
compaction of a level-(L+1) file will not pick up too much data from
level-(L+2).
The old files are discarded and the new files are added to the serving state.
Compactions for a particular level rotate through the key space. In more detail,
for each level L, we remember the ending key of the last compaction at level L.
The next compaction for level L will pick the first file that starts after this
key (wrapping around to the beginning of the key space if there is no such
file).
Compactions drop overwritten values. They also drop deletion markers if there
are no higher numbered levels that contain a file whose range overlaps the
current key.
### Timing
Level-0 compactions will read up to four 1MB files from level-0, and at worst
all the level-1 files (10MB). I.e., we will read 14MB and write 14MB.
Other than the special level-0 compactions, we will pick one 2MB file from level
L. In the worst case, this will overlap ~ 12 files from level L+1 (10 because
level-(L+1) is ten times the size of level-L, and another two at the boundaries
since the file ranges at level-L will usually not be aligned with the file
ranges at level-L+1). The compaction will therefore read 26MB and write 26MB.
Assuming a disk IO rate of 100MB/s (ballpark range for modern drives), the worst
compaction cost will be approximately 0.5 second.
If we throttle the background writing to something small, say 10% of the full
100MB/s speed, a compaction may take up to 5 seconds. If the user is writing at
10MB/s, we might build up lots of level-0 files (~50 to hold the 5*10MB). This
may significantly increase the cost of reads due to the overhead of merging more
files together on every read.
Solution 1: To reduce this problem, we might want to increase the log switching
threshold when the number of level-0 files is large. Though the downside is that
the larger this threshold, the more memory we will need to hold the
corresponding memtable.
Solution 2: We might want to decrease write rate artificially when the number of
level-0 files goes up.
Solution 3: We work on reducing the cost of very wide merges. Perhaps most of
the level-0 files will have their blocks sitting uncompressed in the cache and
we will only need to worry about the O(N) complexity in the merging iterator.
### Number of files
Instead of always making 2MB files, we could make larger files for larger levels
to reduce the total file count, though at the expense of more bursty
compactions. Alternatively, we could shard the set of files into multiple
directories.
An experiment on an ext3 filesystem on Feb 04, 2011 shows the following timings
to do 100K file opens in directories with varying number of files:
| Files in directory | Microseconds to open a file |
|-------------------:|----------------------------:|
| 1000 | 9 |
| 10000 | 10 |
| 100000 | 16 |
So maybe even the sharding is not necessary on modern filesystems?
## Recovery
* Read CURRENT to find name of the latest committed MANIFEST
* Read the named MANIFEST file
* Clean up stale files
* We could open all sstables here, but it is probably better to be lazy...
* Convert log chunk to a new level-0 sstable
* Start directing new writes to a new log file with recovered sequence#
## Garbage collection of files
`RemoveObsoleteFiles()` is called at the end of every compaction and at the end
of recovery. It finds the names of all files in the database. It deletes all log
files that are not the current log file. It deletes all table files that are not
referenced from some level and are not the output of an active compaction.

View File

@ -1,549 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" type="text/css" href="doc.css" />
<title>Leveldb</title>
</head>
<body>
<h1>Leveldb</h1>
<address>Jeff Dean, Sanjay Ghemawat</address>
<p>
The <code>leveldb</code> library provides a persistent key value store. Keys and
values are arbitrary byte arrays. The keys are ordered within the key
value store according to a user-specified comparator function.
<p>
<h1>Opening A Database</h1>
<p>
A <code>leveldb</code> database has a name which corresponds to a file system
directory. All of the contents of database are stored in this
directory. The following example shows how to open a database,
creating it if necessary:
<p>
<pre>
#include &lt;assert&gt;
#include "leveldb/db.h"
leveldb::DB* db;
leveldb::Options options;
options.create_if_missing = true;
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &amp;db);
assert(status.ok());
...
</pre>
If you want to raise an error if the database already exists, add
the following line before the <code>leveldb::DB::Open</code> call:
<pre>
options.error_if_exists = true;
</pre>
<h1>Status</h1>
<p>
You may have noticed the <code>leveldb::Status</code> type above. Values of this
type are returned by most functions in <code>leveldb</code> that may encounter an
error. You can check if such a result is ok, and also print an
associated error message:
<p>
<pre>
leveldb::Status s = ...;
if (!s.ok()) cerr &lt;&lt; s.ToString() &lt;&lt; endl;
</pre>
<h1>Closing A Database</h1>
<p>
When you are done with a database, just delete the database object.
Example:
<p>
<pre>
... open the db as described above ...
... do something with db ...
delete db;
</pre>
<h1>Reads And Writes</h1>
<p>
The database provides <code>Put</code>, <code>Delete</code>, and <code>Get</code> methods to
modify/query the database. For example, the following code
moves the value stored under key1 to key2.
<pre>
std::string value;
leveldb::Status s = db-&gt;Get(leveldb::ReadOptions(), key1, &amp;value);
if (s.ok()) s = db-&gt;Put(leveldb::WriteOptions(), key2, value);
if (s.ok()) s = db-&gt;Delete(leveldb::WriteOptions(), key1);
</pre>
<h1>Atomic Updates</h1>
<p>
Note that if the process dies after the Put of key2 but before the
delete of key1, the same value may be left stored under multiple keys.
Such problems can be avoided by using the <code>WriteBatch</code> class to
atomically apply a set of updates:
<p>
<pre>
#include "leveldb/write_batch.h"
...
std::string value;
leveldb::Status s = db-&gt;Get(leveldb::ReadOptions(), key1, &amp;value);
if (s.ok()) {
leveldb::WriteBatch batch;
batch.Delete(key1);
batch.Put(key2, value);
s = db-&gt;Write(leveldb::WriteOptions(), &amp;batch);
}
</pre>
The <code>WriteBatch</code> holds a sequence of edits to be made to the database,
and these edits within the batch are applied in order. Note that we
called <code>Delete</code> before <code>Put</code> so that if <code>key1</code> is identical to <code>key2</code>,
we do not end up erroneously dropping the value entirely.
<p>
Apart from its atomicity benefits, <code>WriteBatch</code> may also be used to
speed up bulk updates by placing lots of individual mutations into the
same batch.
<h1>Synchronous Writes</h1>
By default, each write to <code>leveldb</code> is asynchronous: it
returns after pushing the write from the process into the operating
system. The transfer from operating system memory to the underlying
persistent storage happens asynchronously. The <code>sync</code> flag
can be turned on for a particular write to make the write operation
not return until the data being written has been pushed all the way to
persistent storage. (On Posix systems, this is implemented by calling
either <code>fsync(...)</code> or <code>fdatasync(...)</code> or
<code>msync(..., MS_SYNC)</code> before the write operation returns.)
<pre>
leveldb::WriteOptions write_options;
write_options.sync = true;
db-&gt;Put(write_options, ...);
</pre>
Asynchronous writes are often more than a thousand times as fast as
synchronous writes. The downside of asynchronous writes is that a
crash of the machine may cause the last few updates to be lost. Note
that a crash of just the writing process (i.e., not a reboot) will not
cause any loss since even when <code>sync</code> is false, an update
is pushed from the process memory into the operating system before it
is considered done.
<p>
Asynchronous writes can often be used safely. For example, when
loading a large amount of data into the database you can handle lost
updates by restarting the bulk load after a crash. A hybrid scheme is
also possible where every Nth write is synchronous, and in the event
of a crash, the bulk load is restarted just after the last synchronous
write finished by the previous run. (The synchronous write can update
a marker that describes where to restart on a crash.)
<p>
<code>WriteBatch</code> provides an alternative to asynchronous writes.
Multiple updates may be placed in the same <code>WriteBatch</code> and
applied together using a synchronous write (i.e.,
<code>write_options.sync</code> is set to true). The extra cost of
the synchronous write will be amortized across all of the writes in
the batch.
<p>
<h1>Concurrency</h1>
<p>
A database may only be opened by one process at a time.
The <code>leveldb</code> implementation acquires a lock from the
operating system to prevent misuse. Within a single process, the
same <code>leveldb::DB</code> object may be safely shared by multiple
concurrent threads. I.e., different threads may write into or fetch
iterators or call <code>Get</code> on the same database without any
external synchronization (the leveldb implementation will
automatically do the required synchronization). However other objects
(like Iterator and WriteBatch) may require external synchronization.
If two threads share such an object, they must protect access to it
using their own locking protocol. More details are available in
the public header files.
<p>
<h1>Iteration</h1>
<p>
The following example demonstrates how to print all key,value pairs
in a database.
<p>
<pre>
leveldb::Iterator* it = db-&gt;NewIterator(leveldb::ReadOptions());
for (it-&gt;SeekToFirst(); it-&gt;Valid(); it-&gt;Next()) {
cout &lt;&lt; it-&gt;key().ToString() &lt;&lt; ": " &lt;&lt; it-&gt;value().ToString() &lt;&lt; endl;
}
assert(it-&gt;status().ok()); // Check for any errors found during the scan
delete it;
</pre>
The following variation shows how to process just the keys in the
range <code>[start,limit)</code>:
<p>
<pre>
for (it-&gt;Seek(start);
it-&gt;Valid() &amp;&amp; it-&gt;key().ToString() &lt; limit;
it-&gt;Next()) {
...
}
</pre>
You can also process entries in reverse order. (Caveat: reverse
iteration may be somewhat slower than forward iteration.)
<p>
<pre>
for (it-&gt;SeekToLast(); it-&gt;Valid(); it-&gt;Prev()) {
...
}
</pre>
<h1>Snapshots</h1>
<p>
Snapshots provide consistent read-only views over the entire state of
the key-value store. <code>ReadOptions::snapshot</code> may be non-NULL to indicate
that a read should operate on a particular version of the DB state.
If <code>ReadOptions::snapshot</code> is NULL, the read will operate on an
implicit snapshot of the current state.
<p>
Snapshots are created by the DB::GetSnapshot() method:
<p>
<pre>
leveldb::ReadOptions options;
options.snapshot = db-&gt;GetSnapshot();
... apply some updates to db ...
leveldb::Iterator* iter = db-&gt;NewIterator(options);
... read using iter to view the state when the snapshot was created ...
delete iter;
db-&gt;ReleaseSnapshot(options.snapshot);
</pre>
Note that when a snapshot is no longer needed, it should be released
using the DB::ReleaseSnapshot interface. This allows the
implementation to get rid of state that was being maintained just to
support reading as of that snapshot.
<h1>Slice</h1>
<p>
The return value of the <code>it->key()</code> and <code>it->value()</code> calls above
are instances of the <code>leveldb::Slice</code> type. <code>Slice</code> is a simple
structure that contains a length and a pointer to an external byte
array. Returning a <code>Slice</code> is a cheaper alternative to returning a
<code>std::string</code> since we do not need to copy potentially large keys and
values. In addition, <code>leveldb</code> methods do not return null-terminated
C-style strings since <code>leveldb</code> keys and values are allowed to
contain '\0' bytes.
<p>
C++ strings and null-terminated C-style strings can be easily converted
to a Slice:
<p>
<pre>
leveldb::Slice s1 = "hello";
std::string str("world");
leveldb::Slice s2 = str;
</pre>
A Slice can be easily converted back to a C++ string:
<pre>
std::string str = s1.ToString();
assert(str == std::string("hello"));
</pre>
Be careful when using Slices since it is up to the caller to ensure that
the external byte array into which the Slice points remains live while
the Slice is in use. For example, the following is buggy:
<p>
<pre>
leveldb::Slice slice;
if (...) {
std::string str = ...;
slice = str;
}
Use(slice);
</pre>
When the <code>if</code> statement goes out of scope, <code>str</code> will be destroyed and the
backing storage for <code>slice</code> will disappear.
<p>
<h1>Comparators</h1>
<p>
The preceding examples used the default ordering function for key,
which orders bytes lexicographically. You can however supply a custom
comparator when opening a database. For example, suppose each
database key consists of two numbers and we should sort by the first
number, breaking ties by the second number. First, define a proper
subclass of <code>leveldb::Comparator</code> that expresses these rules:
<p>
<pre>
class TwoPartComparator : public leveldb::Comparator {
public:
// Three-way comparison function:
// if a &lt; b: negative result
// if a &gt; b: positive result
// else: zero result
int Compare(const leveldb::Slice&amp; a, const leveldb::Slice&amp; b) const {
int a1, a2, b1, b2;
ParseKey(a, &amp;a1, &amp;a2);
ParseKey(b, &amp;b1, &amp;b2);
if (a1 &lt; b1) return -1;
if (a1 &gt; b1) return +1;
if (a2 &lt; b2) return -1;
if (a2 &gt; b2) return +1;
return 0;
}
// Ignore the following methods for now:
const char* Name() const { return "TwoPartComparator"; }
void FindShortestSeparator(std::string*, const leveldb::Slice&amp;) const { }
void FindShortSuccessor(std::string*) const { }
};
</pre>
Now create a database using this custom comparator:
<p>
<pre>
TwoPartComparator cmp;
leveldb::DB* db;
leveldb::Options options;
options.create_if_missing = true;
options.comparator = &amp;cmp;
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &amp;db);
...
</pre>
<h2>Backwards compatibility</h2>
<p>
The result of the comparator's <code>Name</code> method is attached to the
database when it is created, and is checked on every subsequent
database open. If the name changes, the <code>leveldb::DB::Open</code> call will
fail. Therefore, change the name if and only if the new key format
and comparison function are incompatible with existing databases, and
it is ok to discard the contents of all existing databases.
<p>
You can however still gradually evolve your key format over time with
a little bit of pre-planning. For example, you could store a version
number at the end of each key (one byte should suffice for most uses).
When you wish to switch to a new key format (e.g., adding an optional
third part to the keys processed by <code>TwoPartComparator</code>),
(a) keep the same comparator name (b) increment the version number
for new keys (c) change the comparator function so it uses the
version numbers found in the keys to decide how to interpret them.
<p>
<h1>Performance</h1>
<p>
Performance can be tuned by changing the default values of the
types defined in <code>include/leveldb/options.h</code>.
<p>
<h2>Block size</h2>
<p>
<code>leveldb</code> groups adjacent keys together into the same block and such a
block is the unit of transfer to and from persistent storage. The
default block size is approximately 4096 uncompressed bytes.
Applications that mostly do bulk scans over the contents of the
database may wish to increase this size. Applications that do a lot
of point reads of small values may wish to switch to a smaller block
size if performance measurements indicate an improvement. There isn't
much benefit in using blocks smaller than one kilobyte, or larger than
a few megabytes. Also note that compression will be more effective
with larger block sizes.
<p>
<h2>Compression</h2>
<p>
Each block is individually compressed before being written to
persistent storage. Compression is on by default since the default
compression method is very fast, and is automatically disabled for
uncompressible data. In rare cases, applications may want to disable
compression entirely, but should only do so if benchmarks show a
performance improvement:
<p>
<pre>
leveldb::Options options;
options.compression = leveldb::kNoCompression;
... leveldb::DB::Open(options, name, ...) ....
</pre>
<h2>Cache</h2>
<p>
The contents of the database are stored in a set of files in the
filesystem and each file stores a sequence of compressed blocks. If
<code>options.cache</code> is non-NULL, it is used to cache frequently used
uncompressed block contents.
<p>
<pre>
#include "leveldb/cache.h"
leveldb::Options options;
options.cache = leveldb::NewLRUCache(100 * 1048576); // 100MB cache
leveldb::DB* db;
leveldb::DB::Open(options, name, &db);
... use the db ...
delete db
delete options.cache;
</pre>
Note that the cache holds uncompressed data, and therefore it should
be sized according to application level data sizes, without any
reduction from compression. (Caching of compressed blocks is left to
the operating system buffer cache, or any custom <code>Env</code>
implementation provided by the client.)
<p>
When performing a bulk read, the application may wish to disable
caching so that the data processed by the bulk read does not end up
displacing most of the cached contents. A per-iterator option can be
used to achieve this:
<p>
<pre>
leveldb::ReadOptions options;
options.fill_cache = false;
leveldb::Iterator* it = db-&gt;NewIterator(options);
for (it-&gt;SeekToFirst(); it-&gt;Valid(); it-&gt;Next()) {
...
}
</pre>
<h2>Key Layout</h2>
<p>
Note that the unit of disk transfer and caching is a block. Adjacent
keys (according to the database sort order) will usually be placed in
the same block. Therefore the application can improve its performance
by placing keys that are accessed together near each other and placing
infrequently used keys in a separate region of the key space.
<p>
For example, suppose we are implementing a simple file system on top
of <code>leveldb</code>. The types of entries we might wish to store are:
<p>
<pre>
filename -&gt; permission-bits, length, list of file_block_ids
file_block_id -&gt; data
</pre>
We might want to prefix <code>filename</code> keys with one letter (say '/') and the
<code>file_block_id</code> keys with a different letter (say '0') so that scans
over just the metadata do not force us to fetch and cache bulky file
contents.
<p>
<h2>Filters</h2>
<p>
Because of the way <code>leveldb</code> data is organized on disk,
a single <code>Get()</code> call may involve multiple reads from disk.
The optional <code>FilterPolicy</code> mechanism can be used to reduce
the number of disk reads substantially.
<pre>
leveldb::Options options;
options.filter_policy = NewBloomFilterPolicy(10);
leveldb::DB* db;
leveldb::DB::Open(options, "/tmp/testdb", &amp;db);
... use the database ...
delete db;
delete options.filter_policy;
</pre>
The preceding code associates a
<a href="http://en.wikipedia.org/wiki/Bloom_filter">Bloom filter</a>
based filtering policy with the database. Bloom filter based
filtering relies on keeping some number of bits of data in memory per
key (in this case 10 bits per key since that is the argument we passed
to NewBloomFilterPolicy). This filter will reduce the number of unnecessary
disk reads needed for <code>Get()</code> calls by a factor of
approximately a 100. Increasing the bits per key will lead to a
larger reduction at the cost of more memory usage. We recommend that
applications whose working set does not fit in memory and that do a
lot of random reads set a filter policy.
<p>
If you are using a custom comparator, you should ensure that the filter
policy you are using is compatible with your comparator. For example,
consider a comparator that ignores trailing spaces when comparing keys.
<code>NewBloomFilterPolicy</code> must not be used with such a comparator.
Instead, the application should provide a custom filter policy that
also ignores trailing spaces. For example:
<pre>
class CustomFilterPolicy : public leveldb::FilterPolicy {
private:
FilterPolicy* builtin_policy_;
public:
CustomFilterPolicy() : builtin_policy_(NewBloomFilterPolicy(10)) { }
~CustomFilterPolicy() { delete builtin_policy_; }
const char* Name() const { return "IgnoreTrailingSpacesFilter"; }
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
// Use builtin bloom filter code after removing trailing spaces
std::vector&lt;Slice&gt; trimmed(n);
for (int i = 0; i &lt; n; i++) {
trimmed[i] = RemoveTrailingSpaces(keys[i]);
}
return builtin_policy_-&gt;CreateFilter(&amp;trimmed[i], n, dst);
}
bool KeyMayMatch(const Slice& key, const Slice& filter) const {
// Use builtin bloom filter code after removing trailing spaces
return builtin_policy_-&gt;KeyMayMatch(RemoveTrailingSpaces(key), filter);
}
};
</pre>
<p>
Advanced applications may provide a filter policy that does not use
a bloom filter but uses some other mechanism for summarizing a set
of keys. See <code>leveldb/filter_policy.h</code> for detail.
<p>
<h1>Checksums</h1>
<p>
<code>leveldb</code> associates checksums with all data it stores in the file system.
There are two separate controls provided over how aggressively these
checksums are verified:
<p>
<ul>
<li> <code>ReadOptions::verify_checksums</code> may be set to true to force
checksum verification of all data that is read from the file system on
behalf of a particular read. By default, no such verification is
done.
<p>
<li> <code>Options::paranoid_checks</code> may be set to true before opening a
database to make the database implementation raise an error as soon as
it detects an internal corruption. Depending on which portion of the
database has been corrupted, the error may be raised when the database
is opened, or later by another database operation. By default,
paranoid checking is off so that the database can be used even if
parts of its persistent storage have been corrupted.
<p>
If a database is corrupted (perhaps it cannot be opened when
paranoid checking is turned on), the <code>leveldb::RepairDB</code> function
may be used to recover as much of the data as possible
<p>
</ul>
<h1>Approximate Sizes</h1>
<p>
The <code>GetApproximateSizes</code> method can used to get the approximate
number of bytes of file system space used by one or more key ranges.
<p>
<pre>
leveldb::Range ranges[2];
ranges[0] = leveldb::Range("a", "c");
ranges[1] = leveldb::Range("x", "z");
uint64_t sizes[2];
leveldb::Status s = db-&gt;GetApproximateSizes(ranges, 2, sizes);
</pre>
The preceding call will set <code>sizes[0]</code> to the approximate number of
bytes of file system space used by the key range <code>[a..c)</code> and
<code>sizes[1]</code> to the approximate number of bytes used by the key range
<code>[x..z)</code>.
<p>
<h1>Environment</h1>
<p>
All file operations (and other operating system calls) issued by the
<code>leveldb</code> implementation are routed through a <code>leveldb::Env</code> object.
Sophisticated clients may wish to provide their own <code>Env</code>
implementation to get better control. For example, an application may
introduce artificial delays in the file IO paths to limit the impact
of <code>leveldb</code> on other activities in the system.
<p>
<pre>
class SlowEnv : public leveldb::Env {
.. implementation of the Env interface ...
};
SlowEnv env;
leveldb::Options options;
options.env = &amp;env;
Status s = leveldb::DB::Open(options, ...);
</pre>
<h1>Porting</h1>
<p>
<code>leveldb</code> may be ported to a new platform by providing platform
specific implementations of the types/methods/functions exported by
<code>leveldb/port/port.h</code>. See <code>leveldb/port/port_example.h</code> for more
details.
<p>
In addition, the new platform may need a new default <code>leveldb::Env</code>
implementation. See <code>leveldb/util/env_posix.h</code> for an example.
<h1>Other Information</h1>
<p>
Details about the <code>leveldb</code> implementation may be found in
the following documents:
<ul>
<li> <a href="impl.html">Implementation notes</a>
<li> <a href="table_format.txt">Format of an immutable Table file</a>
<li> <a href="log_format.txt">Format of a log file</a>
</ul>
</body>
</html>

524
doc/index.md Normal file
View File

@ -0,0 +1,524 @@
leveldb
=======
_Jeff Dean, Sanjay Ghemawat_
The leveldb library provides a persistent key value store. Keys and values are
arbitrary byte arrays. The keys are ordered within the key value store
according to a user-specified comparator function.
## Opening A Database
A leveldb database has a name which corresponds to a file system directory. All
of the contents of database are stored in this directory. The following example
shows how to open a database, creating it if necessary:
```c++
#include <cassert>
#include "leveldb/db.h"
leveldb::DB* db;
leveldb::Options options;
options.create_if_missing = true;
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db);
assert(status.ok());
...
```
If you want to raise an error if the database already exists, add the following
line before the `leveldb::DB::Open` call:
```c++
options.error_if_exists = true;
```
## Status
You may have noticed the `leveldb::Status` type above. Values of this type are
returned by most functions in leveldb that may encounter an error. You can check
if such a result is ok, and also print an associated error message:
```c++
leveldb::Status s = ...;
if (!s.ok()) cerr << s.ToString() << endl;
```
## Closing A Database
When you are done with a database, just delete the database object. Example:
```c++
... open the db as described above ...
... do something with db ...
delete db;
```
## Reads And Writes
The database provides Put, Delete, and Get methods to modify/query the database.
For example, the following code moves the value stored under key1 to key2.
```c++
std::string value;
leveldb::Status s = db->Get(leveldb::ReadOptions(), key1, &value);
if (s.ok()) s = db->Put(leveldb::WriteOptions(), key2, value);
if (s.ok()) s = db->Delete(leveldb::WriteOptions(), key1);
```
## Atomic Updates
Note that if the process dies after the Put of key2 but before the delete of
key1, the same value may be left stored under multiple keys. Such problems can
be avoided by using the `WriteBatch` class to atomically apply a set of updates:
```c++
#include "leveldb/write_batch.h"
...
std::string value;
leveldb::Status s = db->Get(leveldb::ReadOptions(), key1, &value);
if (s.ok()) {
leveldb::WriteBatch batch;
batch.Delete(key1);
batch.Put(key2, value);
s = db->Write(leveldb::WriteOptions(), &batch);
}
```
The `WriteBatch` holds a sequence of edits to be made to the database, and these
edits within the batch are applied in order. Note that we called Delete before
Put so that if key1 is identical to key2, we do not end up erroneously dropping
the value entirely.
Apart from its atomicity benefits, `WriteBatch` may also be used to speed up
bulk updates by placing lots of individual mutations into the same batch.
## Synchronous Writes
By default, each write to leveldb is asynchronous: it returns after pushing the
write from the process into the operating system. The transfer from operating
system memory to the underlying persistent storage happens asynchronously. The
sync flag can be turned on for a particular write to make the write operation
not return until the data being written has been pushed all the way to
persistent storage. (On Posix systems, this is implemented by calling either
`fsync(...)` or `fdatasync(...)` or `msync(..., MS_SYNC)` before the write
operation returns.)
```c++
leveldb::WriteOptions write_options;
write_options.sync = true;
db->Put(write_options, ...);
```
Asynchronous writes are often more than a thousand times as fast as synchronous
writes. The downside of asynchronous writes is that a crash of the machine may
cause the last few updates to be lost. Note that a crash of just the writing
process (i.e., not a reboot) will not cause any loss since even when sync is
false, an update is pushed from the process memory into the operating system
before it is considered done.
Asynchronous writes can often be used safely. For example, when loading a large
amount of data into the database you can handle lost updates by restarting the
bulk load after a crash. A hybrid scheme is also possible where every Nth write
is synchronous, and in the event of a crash, the bulk load is restarted just
after the last synchronous write finished by the previous run. (The synchronous
write can update a marker that describes where to restart on a crash.)
`WriteBatch` provides an alternative to asynchronous writes. Multiple updates
may be placed in the same WriteBatch and applied together using a synchronous
write (i.e., `write_options.sync` is set to true). The extra cost of the
synchronous write will be amortized across all of the writes in the batch.
## Concurrency
A database may only be opened by one process at a time. The leveldb
implementation acquires a lock from the operating system to prevent misuse.
Within a single process, the same `leveldb::DB` object may be safely shared by
multiple concurrent threads. I.e., different threads may write into or fetch
iterators or call Get on the same database without any external synchronization
(the leveldb implementation will automatically do the required synchronization).
However other objects (like Iterator and `WriteBatch`) may require external
synchronization. If two threads share such an object, they must protect access
to it using their own locking protocol. More details are available in the public
header files.
## Iteration
The following example demonstrates how to print all key,value pairs in a
database.
```c++
leveldb::Iterator* it = db->NewIterator(leveldb::ReadOptions());
for (it->SeekToFirst(); it->Valid(); it->Next()) {
cout << it->key().ToString() << ": " << it->value().ToString() << endl;
}
assert(it->status().ok()); // Check for any errors found during the scan
delete it;
```
The following variation shows how to process just the keys in the range
[start,limit):
```c++
for (it->Seek(start);
it->Valid() && it->key().ToString() < limit;
it->Next()) {
...
}
```
You can also process entries in reverse order. (Caveat: reverse iteration may be
somewhat slower than forward iteration.)
```c++
for (it->SeekToLast(); it->Valid(); it->Prev()) {
...
}
```
## Snapshots
Snapshots provide consistent read-only views over the entire state of the
key-value store. `ReadOptions::snapshot` may be non-NULL to indicate that a
read should operate on a particular version of the DB state. If
`ReadOptions::snapshot` is NULL, the read will operate on an implicit snapshot
of the current state.
Snapshots are created by the `DB::GetSnapshot()` method:
```c++
leveldb::ReadOptions options;
options.snapshot = db->GetSnapshot();
... apply some updates to db ...
leveldb::Iterator* iter = db->NewIterator(options);
... read using iter to view the state when the snapshot was created ...
delete iter;
db->ReleaseSnapshot(options.snapshot);
```
Note that when a snapshot is no longer needed, it should be released using the
`DB::ReleaseSnapshot` interface. This allows the implementation to get rid of
state that was being maintained just to support reading as of that snapshot.
## Slice
The return value of the `it->key()` and `it->value()` calls above are instances
of the `leveldb::Slice` type. Slice is a simple structure that contains a length
and a pointer to an external byte array. Returning a Slice is a cheaper
alternative to returning a `std::string` since we do not need to copy
potentially large keys and values. In addition, leveldb methods do not return
null-terminated C-style strings since leveldb keys and values are allowed to
contain `'\0'` bytes.
C++ strings and null-terminated C-style strings can be easily converted to a
Slice:
```c++
leveldb::Slice s1 = "hello";
std::string str("world");
leveldb::Slice s2 = str;
```
A Slice can be easily converted back to a C++ string:
```c++
std::string str = s1.ToString();
assert(str == std::string("hello"));
```
Be careful when using Slices since it is up to the caller to ensure that the
external byte array into which the Slice points remains live while the Slice is
in use. For example, the following is buggy:
```c++
leveldb::Slice slice;
if (...) {
std::string str = ...;
slice = str;
}
Use(slice);
```
When the if statement goes out of scope, str will be destroyed and the backing
storage for slice will disappear.
## Comparators
The preceding examples used the default ordering function for key, which orders
bytes lexicographically. You can however supply a custom comparator when opening
a database. For example, suppose each database key consists of two numbers and
we should sort by the first number, breaking ties by the second number. First,
define a proper subclass of `leveldb::Comparator` that expresses these rules:
```c++
class TwoPartComparator : public leveldb::Comparator {
public:
// Three-way comparison function:
// if a < b: negative result
// if a > b: positive result
// else: zero result
int Compare(const leveldb::Slice& a, const leveldb::Slice& b) const {
int a1, a2, b1, b2;
ParseKey(a, &a1, &a2);
ParseKey(b, &b1, &b2);
if (a1 < b1) return -1;
if (a1 > b1) return +1;
if (a2 < b2) return -1;
if (a2 > b2) return +1;
return 0;
}
// Ignore the following methods for now:
const char* Name() const { return "TwoPartComparator"; }
void FindShortestSeparator(std::string*, const leveldb::Slice&) const {}
void FindShortSuccessor(std::string*) const {}
};
```
Now create a database using this custom comparator:
```c++
TwoPartComparator cmp;
leveldb::DB* db;
leveldb::Options options;
options.create_if_missing = true;
options.comparator = &cmp;
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db);
...
```
### Backwards compatibility
The result of the comparator's Name method is attached to the database when it
is created, and is checked on every subsequent database open. If the name
changes, the `leveldb::DB::Open` call will fail. Therefore, change the name if
and only if the new key format and comparison function are incompatible with
existing databases, and it is ok to discard the contents of all existing
databases.
You can however still gradually evolve your key format over time with a little
bit of pre-planning. For example, you could store a version number at the end of
each key (one byte should suffice for most uses). When you wish to switch to a
new key format (e.g., adding an optional third part to the keys processed by
`TwoPartComparator`), (a) keep the same comparator name (b) increment the
version number for new keys (c) change the comparator function so it uses the
version numbers found in the keys to decide how to interpret them.
## Performance
Performance can be tuned by changing the default values of the types defined in
`include/options.h`.
### Block size
leveldb groups adjacent keys together into the same block and such a block is
the unit of transfer to and from persistent storage. The default block size is
approximately 4096 uncompressed bytes. Applications that mostly do bulk scans
over the contents of the database may wish to increase this size. Applications
that do a lot of point reads of small values may wish to switch to a smaller
block size if performance measurements indicate an improvement. There isn't much
benefit in using blocks smaller than one kilobyte, or larger than a few
megabytes. Also note that compression will be more effective with larger block
sizes.
### Compression
Each block is individually compressed before being written to persistent
storage. Compression is on by default since the default compression method is
very fast, and is automatically disabled for uncompressible data. In rare cases,
applications may want to disable compression entirely, but should only do so if
benchmarks show a performance improvement:
```c++
leveldb::Options options;
options.compression = leveldb::kNoCompression;
... leveldb::DB::Open(options, name, ...) ....
```
### Cache
The contents of the database are stored in a set of files in the filesystem and
each file stores a sequence of compressed blocks. If options.block_cache is
non-NULL, it is used to cache frequently used uncompressed block contents.
```c++
#include "leveldb/cache.h"
leveldb::Options options;
options.block_cache = leveldb::NewLRUCache(100 * 1048576); // 100MB cache
leveldb::DB* db;
leveldb::DB::Open(options, name, &db);
... use the db ...
delete db
delete options.block_cache;
```
Note that the cache holds uncompressed data, and therefore it should be sized
according to application level data sizes, without any reduction from
compression. (Caching of compressed blocks is left to the operating system
buffer cache, or any custom Env implementation provided by the client.)
When performing a bulk read, the application may wish to disable caching so that
the data processed by the bulk read does not end up displacing most of the
cached contents. A per-iterator option can be used to achieve this:
```c++
leveldb::ReadOptions options;
options.fill_cache = false;
leveldb::Iterator* it = db->NewIterator(options);
for (it->SeekToFirst(); it->Valid(); it->Next()) {
...
}
delete it;
```
### Key Layout
Note that the unit of disk transfer and caching is a block. Adjacent keys
(according to the database sort order) will usually be placed in the same block.
Therefore the application can improve its performance by placing keys that are
accessed together near each other and placing infrequently used keys in a
separate region of the key space.
For example, suppose we are implementing a simple file system on top of leveldb.
The types of entries we might wish to store are:
filename -> permission-bits, length, list of file_block_ids
file_block_id -> data
We might want to prefix filename keys with one letter (say '/') and the
`file_block_id` keys with a different letter (say '0') so that scans over just
the metadata do not force us to fetch and cache bulky file contents.
### Filters
Because of the way leveldb data is organized on disk, a single `Get()` call may
involve multiple reads from disk. The optional FilterPolicy mechanism can be
used to reduce the number of disk reads substantially.
```c++
leveldb::Options options;
options.filter_policy = NewBloomFilterPolicy(10);
leveldb::DB* db;
leveldb::DB::Open(options, "/tmp/testdb", &db);
... use the database ...
delete db;
delete options.filter_policy;
```
The preceding code associates a Bloom filter based filtering policy with the
database. Bloom filter based filtering relies on keeping some number of bits of
data in memory per key (in this case 10 bits per key since that is the argument
we passed to `NewBloomFilterPolicy`). This filter will reduce the number of
unnecessary disk reads needed for Get() calls by a factor of approximately
a 100. Increasing the bits per key will lead to a larger reduction at the cost
of more memory usage. We recommend that applications whose working set does not
fit in memory and that do a lot of random reads set a filter policy.
If you are using a custom comparator, you should ensure that the filter policy
you are using is compatible with your comparator. For example, consider a
comparator that ignores trailing spaces when comparing keys.
`NewBloomFilterPolicy` must not be used with such a comparator. Instead, the
application should provide a custom filter policy that also ignores trailing
spaces. For example:
```c++
class CustomFilterPolicy : public leveldb::FilterPolicy {
private:
leveldb::FilterPolicy* builtin_policy_;
public:
CustomFilterPolicy() : builtin_policy_(leveldb::NewBloomFilterPolicy(10)) {}
~CustomFilterPolicy() { delete builtin_policy_; }
const char* Name() const { return "IgnoreTrailingSpacesFilter"; }
void CreateFilter(const leveldb::Slice* keys, int n, std::string* dst) const {
// Use builtin bloom filter code after removing trailing spaces
std::vector<leveldb::Slice> trimmed(n);
for (int i = 0; i < n; i++) {
trimmed[i] = RemoveTrailingSpaces(keys[i]);
}
builtin_policy_->CreateFilter(trimmed.data(), n, dst);
}
};
```
Advanced applications may provide a filter policy that does not use a bloom
filter but uses some other mechanism for summarizing a set of keys. See
`leveldb/filter_policy.h` for detail.
## Checksums
leveldb associates checksums with all data it stores in the file system. There
are two separate controls provided over how aggressively these checksums are
verified:
`ReadOptions::verify_checksums` may be set to true to force checksum
verification of all data that is read from the file system on behalf of a
particular read. By default, no such verification is done.
`Options::paranoid_checks` may be set to true before opening a database to make
the database implementation raise an error as soon as it detects an internal
corruption. Depending on which portion of the database has been corrupted, the
error may be raised when the database is opened, or later by another database
operation. By default, paranoid checking is off so that the database can be used
even if parts of its persistent storage have been corrupted.
If a database is corrupted (perhaps it cannot be opened when paranoid checking
is turned on), the `leveldb::RepairDB` function may be used to recover as much
of the data as possible
## Approximate Sizes
The `GetApproximateSizes` method can used to get the approximate number of bytes
of file system space used by one or more key ranges.
```c++
leveldb::Range ranges[2];
ranges[0] = leveldb::Range("a", "c");
ranges[1] = leveldb::Range("x", "z");
uint64_t sizes[2];
db->GetApproximateSizes(ranges, 2, sizes);
```
The preceding call will set `sizes[0]` to the approximate number of bytes of
file system space used by the key range `[a..c)` and `sizes[1]` to the
approximate number of bytes used by the key range `[x..z)`.
## Environment
All file operations (and other operating system calls) issued by the leveldb
implementation are routed through a `leveldb::Env` object. Sophisticated clients
may wish to provide their own Env implementation to get better control.
For example, an application may introduce artificial delays in the file IO
paths to limit the impact of leveldb on other activities in the system.
```c++
class SlowEnv : public leveldb::Env {
... implementation of the Env interface ...
};
SlowEnv env;
leveldb::Options options;
options.env = &env;
Status s = leveldb::DB::Open(options, ...);
```
## Porting
leveldb may be ported to a new platform by providing platform specific
implementations of the types/methods/functions exported by
`leveldb/port/port.h`. See `leveldb/port/port_example.h` for more details.
In addition, the new platform may need a new default `leveldb::Env`
implementation. See `leveldb/util/env_posix.h` for an example.
## Other Information
Details about the leveldb implementation may be found in the following
documents:
1. [Implementation notes](impl.md)
2. [Format of an immutable Table file](table_format.md)
3. [Format of a log file](log_format.md)

75
doc/log_format.md Normal file
View File

@ -0,0 +1,75 @@
leveldb Log format
==================
The log file contents are a sequence of 32KB blocks. The only exception is that
the tail of the file may contain a partial block.
Each block consists of a sequence of records:
block := record* trailer?
record :=
checksum: uint32 // crc32c of type and data[] ; little-endian
length: uint16 // little-endian
type: uint8 // One of FULL, FIRST, MIDDLE, LAST
data: uint8[length]
A record never starts within the last six bytes of a block (since it won't fit).
Any leftover bytes here form the trailer, which must consist entirely of zero
bytes and must be skipped by readers.
Aside: if exactly seven bytes are left in the current block, and a new non-zero
length record is added, the writer must emit a FIRST record (which contains zero
bytes of user data) to fill up the trailing seven bytes of the block and then
emit all of the user data in subsequent blocks.
More types may be added in the future. Some Readers may skip record types they
do not understand, others may report that some data was skipped.
FULL == 1
FIRST == 2
MIDDLE == 3
LAST == 4
The FULL record contains the contents of an entire user record.
FIRST, MIDDLE, LAST are types used for user records that have been split into
multiple fragments (typically because of block boundaries). FIRST is the type
of the first fragment of a user record, LAST is the type of the last fragment of
a user record, and MIDDLE is the type of all interior fragments of a user
record.
Example: consider a sequence of user records:
A: length 1000
B: length 97270
C: length 8000
**A** will be stored as a FULL record in the first block.
**B** will be split into three fragments: first fragment occupies the rest of
the first block, second fragment occupies the entirety of the second block, and
the third fragment occupies a prefix of the third block. This will leave six
bytes free in the third block, which will be left empty as the trailer.
**C** will be stored as a FULL record in the fourth block.
----
## Some benefits over the recordio format:
1. We do not need any heuristics for resyncing - just go to next block boundary
and scan. If there is a corruption, skip to the next block. As a
side-benefit, we do not get confused when part of the contents of one log
file are embedded as a record inside another log file.
2. Splitting at approximate boundaries (e.g., for mapreduce) is simple: find the
next block boundary and skip records until we hit a FULL or FIRST record.
3. We do not need extra buffering for large records.
## Some downsides compared to recordio format:
1. No packing of tiny records. This could be fixed by adding a new record type,
so it is a shortcoming of the current implementation, not necessarily the
format.
2. No compression. Again, this could be fixed by adding new record types.

View File

@ -1,75 +0,0 @@
The log file contents are a sequence of 32KB blocks. The only
exception is that the tail of the file may contain a partial block.
Each block consists of a sequence of records:
block := record* trailer?
record :=
checksum: uint32 // crc32c of type and data[] ; little-endian
length: uint16 // little-endian
type: uint8 // One of FULL, FIRST, MIDDLE, LAST
data: uint8[length]
A record never starts within the last six bytes of a block (since it
won't fit). Any leftover bytes here form the trailer, which must
consist entirely of zero bytes and must be skipped by readers.
Aside: if exactly seven bytes are left in the current block, and a new
non-zero length record is added, the writer must emit a FIRST record
(which contains zero bytes of user data) to fill up the trailing seven
bytes of the block and then emit all of the user data in subsequent
blocks.
More types may be added in the future. Some Readers may skip record
types they do not understand, others may report that some data was
skipped.
FULL == 1
FIRST == 2
MIDDLE == 3
LAST == 4
The FULL record contains the contents of an entire user record.
FIRST, MIDDLE, LAST are types used for user records that have been
split into multiple fragments (typically because of block boundaries).
FIRST is the type of the first fragment of a user record, LAST is the
type of the last fragment of a user record, and MIDDLE is the type of
all interior fragments of a user record.
Example: consider a sequence of user records:
A: length 1000
B: length 97270
C: length 8000
A will be stored as a FULL record in the first block.
B will be split into three fragments: first fragment occupies the rest
of the first block, second fragment occupies the entirety of the
second block, and the third fragment occupies a prefix of the third
block. This will leave six bytes free in the third block, which will
be left empty as the trailer.
C will be stored as a FULL record in the fourth block.
===================
Some benefits over the recordio format:
(1) We do not need any heuristics for resyncing - just go to next
block boundary and scan. If there is a corruption, skip to the next
block. As a side-benefit, we do not get confused when part of the
contents of one log file are embedded as a record inside another log
file.
(2) Splitting at approximate boundaries (e.g., for mapreduce) is
simple: find the next block boundary and skip records until we
hit a FULL or FIRST record.
(3) We do not need extra buffering for large records.
Some downsides compared to recordio format:
(1) No packing of tiny records. This could be fixed by adding a new
record type, so it is a shortcoming of the current implementation,
not necessarily the format.
(2) No compression. Again, this could be fixed by adding new record types.

107
doc/table_format.md Normal file
View File

@ -0,0 +1,107 @@
leveldb File format
===================
<beginning_of_file>
[data block 1]
[data block 2]
...
[data block N]
[meta block 1]
...
[meta block K]
[metaindex block]
[index block]
[Footer] (fixed size; starts at file_size - sizeof(Footer))
<end_of_file>
The file contains internal pointers. Each such pointer is called
a BlockHandle and contains the following information:
offset: varint64
size: varint64
See [varints](https://developers.google.com/protocol-buffers/docs/encoding#varints)
for an explanation of varint64 format.
1. The sequence of key/value pairs in the file are stored in sorted
order and partitioned into a sequence of data blocks. These blocks
come one after another at the beginning of the file. Each data block
is formatted according to the code in `block_builder.cc`, and then
optionally compressed.
2. After the data blocks we store a bunch of meta blocks. The
supported meta block types are described below. More meta block types
may be added in the future. Each meta block is again formatted using
`block_builder.cc` and then optionally compressed.
3. A "metaindex" block. It contains one entry for every other meta
block where the key is the name of the meta block and the value is a
BlockHandle pointing to that meta block.
4. An "index" block. This block contains one entry per data block,
where the key is a string >= last key in that data block and before
the first key in the successive data block. The value is the
BlockHandle for the data block.
5. At the very end of the file is a fixed length footer that contains
the BlockHandle of the metaindex and index blocks as well as a magic number.
metaindex_handle: char[p]; // Block handle for metaindex
index_handle: char[q]; // Block handle for index
padding: char[40-p-q];// zeroed bytes to make fixed length
// (40==2*BlockHandle::kMaxEncodedLength)
magic: fixed64; // == 0xdb4775248b80fb57 (little-endian)
## "filter" Meta Block
If a `FilterPolicy` was specified when the database was opened, a
filter block is stored in each table. The "metaindex" block contains
an entry that maps from `filter.<N>` to the BlockHandle for the filter
block where `<N>` is the string returned by the filter policy's
`Name()` method.
The filter block stores a sequence of filters, where filter i contains
the output of `FilterPolicy::CreateFilter()` on all keys that are stored
in a block whose file offset falls within the range
[ i*base ... (i+1)*base-1 ]
Currently, "base" is 2KB. So for example, if blocks X and Y start in
the range `[ 0KB .. 2KB-1 ]`, all of the keys in X and Y will be
converted to a filter by calling `FilterPolicy::CreateFilter()`, and the
resulting filter will be stored as the first filter in the filter
block.
The filter block is formatted as follows:
[filter 0]
[filter 1]
[filter 2]
...
[filter N-1]
[offset of filter 0] : 4 bytes
[offset of filter 1] : 4 bytes
[offset of filter 2] : 4 bytes
...
[offset of filter N-1] : 4 bytes
[offset of beginning of offset array] : 4 bytes
lg(base) : 1 byte
The offset array at the end of the filter block allows efficient
mapping from a data block offset to the corresponding filter.
## "stats" Meta Block
This meta block contains a bunch of stats. The key is the name
of the statistic. The value contains the statistic.
TODO(postrelease): record following stats.
data size
index size
key size (uncompressed)
value size (uncompressed)
number of entries
number of data blocks

View File

@ -1,104 +0,0 @@
File format
===========
<beginning_of_file>
[data block 1]
[data block 2]
...
[data block N]
[meta block 1]
...
[meta block K]
[metaindex block]
[index block]
[Footer] (fixed size; starts at file_size - sizeof(Footer))
<end_of_file>
The file contains internal pointers. Each such pointer is called
a BlockHandle and contains the following information:
offset: varint64
size: varint64
See https://developers.google.com/protocol-buffers/docs/encoding#varints
for an explanation of varint64 format.
(1) The sequence of key/value pairs in the file are stored in sorted
order and partitioned into a sequence of data blocks. These blocks
come one after another at the beginning of the file. Each data block
is formatted according to the code in block_builder.cc, and then
optionally compressed.
(2) After the data blocks we store a bunch of meta blocks. The
supported meta block types are described below. More meta block types
may be added in the future. Each meta block is again formatted using
block_builder.cc and then optionally compressed.
(3) A "metaindex" block. It contains one entry for every other meta
block where the key is the name of the meta block and the value is a
BlockHandle pointing to that meta block.
(4) An "index" block. This block contains one entry per data block,
where the key is a string >= last key in that data block and before
the first key in the successive data block. The value is the
BlockHandle for the data block.
(6) At the very end of the file is a fixed length footer that contains
the BlockHandle of the metaindex and index blocks as well as a magic number.
metaindex_handle: char[p]; // Block handle for metaindex
index_handle: char[q]; // Block handle for index
padding: char[40-p-q]; // zeroed bytes to make fixed length
// (40==2*BlockHandle::kMaxEncodedLength)
magic: fixed64; // == 0xdb4775248b80fb57 (little-endian)
"filter" Meta Block
-------------------
If a "FilterPolicy" was specified when the database was opened, a
filter block is stored in each table. The "metaindex" block contains
an entry that maps from "filter.<N>" to the BlockHandle for the filter
block where "<N>" is the string returned by the filter policy's
"Name()" method.
The filter block stores a sequence of filters, where filter i contains
the output of FilterPolicy::CreateFilter() on all keys that are stored
in a block whose file offset falls within the range
[ i*base ... (i+1)*base-1 ]
Currently, "base" is 2KB. So for example, if blocks X and Y start in
the range [ 0KB .. 2KB-1 ], all of the keys in X and Y will be
converted to a filter by calling FilterPolicy::CreateFilter(), and the
resulting filter will be stored as the first filter in the filter
block.
The filter block is formatted as follows:
[filter 0]
[filter 1]
[filter 2]
...
[filter N-1]
[offset of filter 0] : 4 bytes
[offset of filter 1] : 4 bytes
[offset of filter 2] : 4 bytes
...
[offset of filter N-1] : 4 bytes
[offset of beginning of offset array] : 4 bytes
lg(base) : 1 byte
The offset array at the end of the filter block allows efficient
mapping from a data block offset to the corresponding filter.
"stats" Meta Block
------------------
This meta block contains a bunch of stats. The key is the name
of the statistic. The value contains the statistic.
TODO(postrelease): record following stats.
data size
index size
key size (uncompressed)
value size (uncompressed)
number of entries
number of data blocks

View File

@ -4,14 +4,17 @@
#include "helpers/memenv/memenv.h"
#include <cstring>
#include <limits>
#include <map>
#include <string>
#include <vector>
#include "leveldb/env.h"
#include "leveldb/status.h"
#include "port/port.h"
#include "port/thread_annotations.h"
#include "util/mutexlock.h"
#include <map>
#include <string.h>
#include <string>
#include <vector>
namespace leveldb {
@ -23,6 +26,10 @@ class FileState {
// and the caller must call Ref() at least once.
FileState() : refs_(0), size_(0) {}
// No copying allowed.
FileState(const FileState&) = delete;
FileState& operator=(const FileState&) = delete;
// Increase the reference count.
void Ref() {
MutexLock lock(&refs_mutex_);
@ -47,9 +54,22 @@ class FileState {
}
}
uint64_t Size() const { return size_; }
uint64_t Size() const {
MutexLock lock(&blocks_mutex_);
return size_;
}
void Truncate() {
MutexLock lock(&blocks_mutex_);
for (char*& block : blocks_) {
delete[] block;
}
blocks_.clear();
size_ = 0;
}
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const {
MutexLock lock(&blocks_mutex_);
if (offset > size_) {
return Status::IOError("Offset greater than file size.");
}
@ -62,16 +82,9 @@ class FileState {
return Status::OK();
}
assert(offset / kBlockSize <= SIZE_MAX);
assert(offset / kBlockSize <= std::numeric_limits<size_t>::max());
size_t block = static_cast<size_t>(offset / kBlockSize);
size_t block_offset = offset % kBlockSize;
if (n <= kBlockSize - block_offset) {
// The requested bytes are all in the first block.
*result = Slice(blocks_[block] + block_offset, n);
return Status::OK();
}
size_t bytes_to_copy = n;
char* dst = scratch;
@ -80,7 +93,7 @@ class FileState {
if (avail > bytes_to_copy) {
avail = bytes_to_copy;
}
memcpy(dst, blocks_[block] + block_offset, avail);
std::memcpy(dst, blocks_[block] + block_offset, avail);
bytes_to_copy -= avail;
dst += avail;
@ -96,6 +109,7 @@ class FileState {
const char* src = data.data();
size_t src_len = data.size();
MutexLock lock(&blocks_mutex_);
while (src_len > 0) {
size_t avail;
size_t offset = size_ % kBlockSize;
@ -112,7 +126,7 @@ class FileState {
if (avail > src_len) {
avail = src_len;
}
memcpy(blocks_.back() + offset, src, avail);
std::memcpy(blocks_.back() + offset, src, avail);
src_len -= avail;
src += avail;
size_ += avail;
@ -122,28 +136,17 @@ class FileState {
}
private:
// Private since only Unref() should be used to delete it.
~FileState() {
for (std::vector<char*>::iterator i = blocks_.begin(); i != blocks_.end();
++i) {
delete [] *i;
}
}
enum { kBlockSize = 8 * 1024 };
// No copying allowed.
FileState(const FileState&);
void operator=(const FileState&);
// Private since only Unref() should be used to delete it.
~FileState() { Truncate(); }
port::Mutex refs_mutex_;
int refs_; // Protected by refs_mutex_;
int refs_ GUARDED_BY(refs_mutex_);
// The following fields are not protected by any mutex. They are only mutable
// while the file is being written, and concurrent access is not allowed
// to writable files.
std::vector<char*> blocks_;
uint64_t size_;
enum { kBlockSize = 8 * 1024 };
mutable port::Mutex blocks_mutex_;
std::vector<char*> blocks_ GUARDED_BY(blocks_mutex_);
uint64_t size_ GUARDED_BY(blocks_mutex_);
};
class SequentialFileImpl : public SequentialFile {
@ -152,11 +155,9 @@ class SequentialFileImpl : public SequentialFile {
file_->Ref();
}
~SequentialFileImpl() {
file_->Unref();
}
~SequentialFileImpl() override { file_->Unref(); }
virtual Status Read(size_t n, Slice* result, char* scratch) {
Status Read(size_t n, Slice* result, char* scratch) override {
Status s = file_->Read(pos_, n, result, scratch);
if (s.ok()) {
pos_ += result->size();
@ -164,7 +165,7 @@ class SequentialFileImpl : public SequentialFile {
return s;
}
virtual Status Skip(uint64_t n) {
Status Skip(uint64_t n) override {
if (pos_ > file_->Size()) {
return Status::IOError("pos_ > file_->Size()");
}
@ -183,16 +184,12 @@ class SequentialFileImpl : public SequentialFile {
class RandomAccessFileImpl : public RandomAccessFile {
public:
explicit RandomAccessFileImpl(FileState* file) : file_(file) {
file_->Ref();
}
explicit RandomAccessFileImpl(FileState* file) : file_(file) { file_->Ref(); }
~RandomAccessFileImpl() {
file_->Unref();
}
~RandomAccessFileImpl() override { file_->Unref(); }
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
return file_->Read(offset, n, result, scratch);
}
@ -202,21 +199,15 @@ class RandomAccessFileImpl : public RandomAccessFile {
class WritableFileImpl : public WritableFile {
public:
WritableFileImpl(FileState* file) : file_(file) {
file_->Ref();
}
WritableFileImpl(FileState* file) : file_(file) { file_->Ref(); }
~WritableFileImpl() {
file_->Unref();
}
~WritableFileImpl() override { file_->Unref(); }
virtual Status Append(const Slice& data) {
return file_->Append(data);
}
Status Append(const Slice& data) override { return file_->Append(data); }
virtual Status Close() { return Status::OK(); }
virtual Status Flush() { return Status::OK(); }
virtual Status Sync() { return Status::OK(); }
Status Close() override { return Status::OK(); }
Status Flush() override { return Status::OK(); }
Status Sync() override { return Status::OK(); }
private:
FileState* file_;
@ -224,25 +215,25 @@ class WritableFileImpl : public WritableFile {
class NoOpLogger : public Logger {
public:
virtual void Logv(const char* format, va_list ap) { }
void Logv(const char* format, std::va_list ap) override {}
};
class InMemoryEnv : public EnvWrapper {
public:
explicit InMemoryEnv(Env* base_env) : EnvWrapper(base_env) { }
explicit InMemoryEnv(Env* base_env) : EnvWrapper(base_env) {}
virtual ~InMemoryEnv() {
for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i){
i->second->Unref();
~InMemoryEnv() override {
for (const auto& kvp : file_map_) {
kvp.second->Unref();
}
}
// Partial implementation of the Env interface.
virtual Status NewSequentialFile(const std::string& fname,
SequentialFile** result) {
Status NewSequentialFile(const std::string& fname,
SequentialFile** result) override {
MutexLock lock(&mutex_);
if (file_map_.find(fname) == file_map_.end()) {
*result = NULL;
*result = nullptr;
return Status::IOError(fname, "File not found");
}
@ -250,11 +241,11 @@ class InMemoryEnv : public EnvWrapper {
return Status::OK();
}
virtual Status NewRandomAccessFile(const std::string& fname,
RandomAccessFile** result) {
Status NewRandomAccessFile(const std::string& fname,
RandomAccessFile** result) override {
MutexLock lock(&mutex_);
if (file_map_.find(fname) == file_map_.end()) {
*result = NULL;
*result = nullptr;
return Status::IOError(fname, "File not found");
}
@ -262,27 +253,32 @@ class InMemoryEnv : public EnvWrapper {
return Status::OK();
}
virtual Status NewWritableFile(const std::string& fname,
WritableFile** result) {
Status NewWritableFile(const std::string& fname,
WritableFile** result) override {
MutexLock lock(&mutex_);
if (file_map_.find(fname) != file_map_.end()) {
DeleteFileInternal(fname);
}
FileSystem::iterator it = file_map_.find(fname);
FileState* file = new FileState();
file->Ref();
file_map_[fname] = file;
FileState* file;
if (it == file_map_.end()) {
// File is not currently open.
file = new FileState();
file->Ref();
file_map_[fname] = file;
} else {
file = it->second;
file->Truncate();
}
*result = new WritableFileImpl(file);
return Status::OK();
}
virtual Status NewAppendableFile(const std::string& fname,
WritableFile** result) {
Status NewAppendableFile(const std::string& fname,
WritableFile** result) override {
MutexLock lock(&mutex_);
FileState** sptr = &file_map_[fname];
FileState* file = *sptr;
if (file == NULL) {
if (file == nullptr) {
file = new FileState();
file->Ref();
}
@ -290,18 +286,18 @@ class InMemoryEnv : public EnvWrapper {
return Status::OK();
}
virtual bool FileExists(const std::string& fname) {
bool FileExists(const std::string& fname) override {
MutexLock lock(&mutex_);
return file_map_.find(fname) != file_map_.end();
}
virtual Status GetChildren(const std::string& dir,
std::vector<std::string>* result) {
Status GetChildren(const std::string& dir,
std::vector<std::string>* result) override {
MutexLock lock(&mutex_);
result->clear();
for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i){
const std::string& filename = i->first;
for (const auto& kvp : file_map_) {
const std::string& filename = kvp.first;
if (filename.size() >= dir.size() + 1 && filename[dir.size()] == '/' &&
Slice(filename).starts_with(Slice(dir))) {
@ -312,7 +308,8 @@ class InMemoryEnv : public EnvWrapper {
return Status::OK();
}
void DeleteFileInternal(const std::string& fname) {
void RemoveFileInternal(const std::string& fname)
EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
if (file_map_.find(fname) == file_map_.end()) {
return;
}
@ -321,25 +318,21 @@ class InMemoryEnv : public EnvWrapper {
file_map_.erase(fname);
}
virtual Status DeleteFile(const std::string& fname) {
Status RemoveFile(const std::string& fname) override {
MutexLock lock(&mutex_);
if (file_map_.find(fname) == file_map_.end()) {
return Status::IOError(fname, "File not found");
}
DeleteFileInternal(fname);
RemoveFileInternal(fname);
return Status::OK();
}
virtual Status CreateDir(const std::string& dirname) {
return Status::OK();
}
Status CreateDir(const std::string& dirname) override { return Status::OK(); }
virtual Status DeleteDir(const std::string& dirname) {
return Status::OK();
}
Status RemoveDir(const std::string& dirname) override { return Status::OK(); }
virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) {
Status GetFileSize(const std::string& fname, uint64_t* file_size) override {
MutexLock lock(&mutex_);
if (file_map_.find(fname) == file_map_.end()) {
return Status::IOError(fname, "File not found");
@ -349,35 +342,35 @@ class InMemoryEnv : public EnvWrapper {
return Status::OK();
}
virtual Status RenameFile(const std::string& src,
const std::string& target) {
Status RenameFile(const std::string& src,
const std::string& target) override {
MutexLock lock(&mutex_);
if (file_map_.find(src) == file_map_.end()) {
return Status::IOError(src, "File not found");
}
DeleteFileInternal(target);
RemoveFileInternal(target);
file_map_[target] = file_map_[src];
file_map_.erase(src);
return Status::OK();
}
virtual Status LockFile(const std::string& fname, FileLock** lock) {
Status LockFile(const std::string& fname, FileLock** lock) override {
*lock = new FileLock;
return Status::OK();
}
virtual Status UnlockFile(FileLock* lock) {
Status UnlockFile(FileLock* lock) override {
delete lock;
return Status::OK();
}
virtual Status GetTestDirectory(std::string* path) {
Status GetTestDirectory(std::string* path) override {
*path = "/test";
return Status::OK();
}
virtual Status NewLogger(const std::string& fname, Logger** result) {
Status NewLogger(const std::string& fname, Logger** result) override {
*result = new NoOpLogger;
return Status::OK();
}
@ -385,14 +378,13 @@ class InMemoryEnv : public EnvWrapper {
private:
// Map from filenames to FileState objects, representing a simple file system.
typedef std::map<std::string, FileState*> FileSystem;
port::Mutex mutex_;
FileSystem file_map_; // Protected by mutex_.
FileSystem file_map_ GUARDED_BY(mutex_);
};
} // namespace
Env* NewMemEnv(Env* base_env) {
return new InMemoryEnv(base_env);
}
Env* NewMemEnv(Env* base_env) { return new InMemoryEnv(base_env); }
} // namespace leveldb

View File

@ -5,6 +5,8 @@
#ifndef STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_
#define STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_
#include "leveldb/export.h"
namespace leveldb {
class Env;
@ -13,7 +15,7 @@ class Env;
// all non-file-storage tasks to base_env. The caller must delete the result
// when it is no longer needed.
// *base_env must remain live while the result is in use.
Env* NewMemEnv(Env* base_env);
LEVELDB_EXPORT Env* NewMemEnv(Env* base_env);
} // namespace leveldb

View File

@ -4,76 +4,74 @@
#include "helpers/memenv/memenv.h"
#include "db/db_impl.h"
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "util/testharness.h"
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "db/db_impl.h"
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "util/testutil.h"
namespace leveldb {
class MemEnvTest {
class MemEnvTest : public testing::Test {
public:
Env* env_;
MemEnvTest() : env_(NewMemEnv(Env::Default())) {}
~MemEnvTest() { delete env_; }
MemEnvTest()
: env_(NewMemEnv(Env::Default())) {
}
~MemEnvTest() {
delete env_;
}
Env* env_;
};
TEST(MemEnvTest, Basics) {
TEST_F(MemEnvTest, Basics) {
uint64_t file_size;
WritableFile* writable_file;
std::vector<std::string> children;
ASSERT_OK(env_->CreateDir("/dir"));
ASSERT_LEVELDB_OK(env_->CreateDir("/dir"));
// Check that the directory is empty.
ASSERT_TRUE(!env_->FileExists("/dir/non_existent"));
ASSERT_TRUE(!env_->GetFileSize("/dir/non_existent", &file_size).ok());
ASSERT_OK(env_->GetChildren("/dir", &children));
ASSERT_LEVELDB_OK(env_->GetChildren("/dir", &children));
ASSERT_EQ(0, children.size());
// Create a file.
ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file));
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_LEVELDB_OK(env_->NewWritableFile("/dir/f", &writable_file));
ASSERT_LEVELDB_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_EQ(0, file_size);
delete writable_file;
// Check that the file exists.
ASSERT_TRUE(env_->FileExists("/dir/f"));
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_LEVELDB_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_EQ(0, file_size);
ASSERT_OK(env_->GetChildren("/dir", &children));
ASSERT_LEVELDB_OK(env_->GetChildren("/dir", &children));
ASSERT_EQ(1, children.size());
ASSERT_EQ("f", children[0]);
// Write to the file.
ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file));
ASSERT_OK(writable_file->Append("abc"));
ASSERT_LEVELDB_OK(env_->NewWritableFile("/dir/f", &writable_file));
ASSERT_LEVELDB_OK(writable_file->Append("abc"));
delete writable_file;
// Check that append works.
ASSERT_OK(env_->NewAppendableFile("/dir/f", &writable_file));
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_LEVELDB_OK(env_->NewAppendableFile("/dir/f", &writable_file));
ASSERT_LEVELDB_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_EQ(3, file_size);
ASSERT_OK(writable_file->Append("hello"));
ASSERT_LEVELDB_OK(writable_file->Append("hello"));
delete writable_file;
// Check for expected size.
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_LEVELDB_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_EQ(8, file_size);
// Check that renaming works.
ASSERT_TRUE(!env_->RenameFile("/dir/non_existent", "/dir/g").ok());
ASSERT_OK(env_->RenameFile("/dir/f", "/dir/g"));
ASSERT_LEVELDB_OK(env_->RenameFile("/dir/f", "/dir/g"));
ASSERT_TRUE(!env_->FileExists("/dir/f"));
ASSERT_TRUE(env_->FileExists("/dir/g"));
ASSERT_OK(env_->GetFileSize("/dir/g", &file_size));
ASSERT_LEVELDB_OK(env_->GetFileSize("/dir/g", &file_size));
ASSERT_EQ(8, file_size);
// Check that opening non-existent file fails.
@ -85,49 +83,50 @@ TEST(MemEnvTest, Basics) {
ASSERT_TRUE(!rand_file);
// Check that deleting works.
ASSERT_TRUE(!env_->DeleteFile("/dir/non_existent").ok());
ASSERT_OK(env_->DeleteFile("/dir/g"));
ASSERT_TRUE(!env_->RemoveFile("/dir/non_existent").ok());
ASSERT_LEVELDB_OK(env_->RemoveFile("/dir/g"));
ASSERT_TRUE(!env_->FileExists("/dir/g"));
ASSERT_OK(env_->GetChildren("/dir", &children));
ASSERT_LEVELDB_OK(env_->GetChildren("/dir", &children));
ASSERT_EQ(0, children.size());
ASSERT_OK(env_->DeleteDir("/dir"));
ASSERT_LEVELDB_OK(env_->RemoveDir("/dir"));
}
TEST(MemEnvTest, ReadWrite) {
TEST_F(MemEnvTest, ReadWrite) {
WritableFile* writable_file;
SequentialFile* seq_file;
RandomAccessFile* rand_file;
Slice result;
char scratch[100];
ASSERT_OK(env_->CreateDir("/dir"));
ASSERT_LEVELDB_OK(env_->CreateDir("/dir"));
ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file));
ASSERT_OK(writable_file->Append("hello "));
ASSERT_OK(writable_file->Append("world"));
ASSERT_LEVELDB_OK(env_->NewWritableFile("/dir/f", &writable_file));
ASSERT_LEVELDB_OK(writable_file->Append("hello "));
ASSERT_LEVELDB_OK(writable_file->Append("world"));
delete writable_file;
// Read sequentially.
ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file));
ASSERT_OK(seq_file->Read(5, &result, scratch)); // Read "hello".
ASSERT_LEVELDB_OK(env_->NewSequentialFile("/dir/f", &seq_file));
ASSERT_LEVELDB_OK(seq_file->Read(5, &result, scratch)); // Read "hello".
ASSERT_EQ(0, result.compare("hello"));
ASSERT_OK(seq_file->Skip(1));
ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Read "world".
ASSERT_LEVELDB_OK(seq_file->Skip(1));
ASSERT_LEVELDB_OK(seq_file->Read(1000, &result, scratch)); // Read "world".
ASSERT_EQ(0, result.compare("world"));
ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Try reading past EOF.
ASSERT_LEVELDB_OK(
seq_file->Read(1000, &result, scratch)); // Try reading past EOF.
ASSERT_EQ(0, result.size());
ASSERT_OK(seq_file->Skip(100)); // Try to skip past end of file.
ASSERT_OK(seq_file->Read(1000, &result, scratch));
ASSERT_LEVELDB_OK(seq_file->Skip(100)); // Try to skip past end of file.
ASSERT_LEVELDB_OK(seq_file->Read(1000, &result, scratch));
ASSERT_EQ(0, result.size());
delete seq_file;
// Random reads.
ASSERT_OK(env_->NewRandomAccessFile("/dir/f", &rand_file));
ASSERT_OK(rand_file->Read(6, 5, &result, scratch)); // Read "world".
ASSERT_LEVELDB_OK(env_->NewRandomAccessFile("/dir/f", &rand_file));
ASSERT_LEVELDB_OK(rand_file->Read(6, 5, &result, scratch)); // Read "world".
ASSERT_EQ(0, result.compare("world"));
ASSERT_OK(rand_file->Read(0, 5, &result, scratch)); // Read "hello".
ASSERT_LEVELDB_OK(rand_file->Read(0, 5, &result, scratch)); // Read "hello".
ASSERT_EQ(0, result.compare("hello"));
ASSERT_OK(rand_file->Read(10, 100, &result, scratch)); // Read "d".
ASSERT_LEVELDB_OK(rand_file->Read(10, 100, &result, scratch)); // Read "d".
ASSERT_EQ(0, result.compare("d"));
// Too high offset.
@ -135,30 +134,30 @@ TEST(MemEnvTest, ReadWrite) {
delete rand_file;
}
TEST(MemEnvTest, Locks) {
TEST_F(MemEnvTest, Locks) {
FileLock* lock;
// These are no-ops, but we test they return success.
ASSERT_OK(env_->LockFile("some file", &lock));
ASSERT_OK(env_->UnlockFile(lock));
ASSERT_LEVELDB_OK(env_->LockFile("some file", &lock));
ASSERT_LEVELDB_OK(env_->UnlockFile(lock));
}
TEST(MemEnvTest, Misc) {
TEST_F(MemEnvTest, Misc) {
std::string test_dir;
ASSERT_OK(env_->GetTestDirectory(&test_dir));
ASSERT_LEVELDB_OK(env_->GetTestDirectory(&test_dir));
ASSERT_TRUE(!test_dir.empty());
WritableFile* writable_file;
ASSERT_OK(env_->NewWritableFile("/a/b", &writable_file));
ASSERT_LEVELDB_OK(env_->NewWritableFile("/a/b", &writable_file));
// These are no-ops, but we test they return success.
ASSERT_OK(writable_file->Sync());
ASSERT_OK(writable_file->Flush());
ASSERT_OK(writable_file->Close());
ASSERT_LEVELDB_OK(writable_file->Sync());
ASSERT_LEVELDB_OK(writable_file->Flush());
ASSERT_LEVELDB_OK(writable_file->Close());
delete writable_file;
}
TEST(MemEnvTest, LargeWrite) {
TEST_F(MemEnvTest, LargeWrite) {
const size_t kWriteSize = 300 * 1024;
char* scratch = new char[kWriteSize * 2];
@ -168,30 +167,53 @@ TEST(MemEnvTest, LargeWrite) {
}
WritableFile* writable_file;
ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file));
ASSERT_OK(writable_file->Append("foo"));
ASSERT_OK(writable_file->Append(write_data));
ASSERT_LEVELDB_OK(env_->NewWritableFile("/dir/f", &writable_file));
ASSERT_LEVELDB_OK(writable_file->Append("foo"));
ASSERT_LEVELDB_OK(writable_file->Append(write_data));
delete writable_file;
SequentialFile* seq_file;
Slice result;
ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file));
ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo".
ASSERT_LEVELDB_OK(env_->NewSequentialFile("/dir/f", &seq_file));
ASSERT_LEVELDB_OK(seq_file->Read(3, &result, scratch)); // Read "foo".
ASSERT_EQ(0, result.compare("foo"));
size_t read = 0;
std::string read_data;
while (read < kWriteSize) {
ASSERT_OK(seq_file->Read(kWriteSize - read, &result, scratch));
ASSERT_LEVELDB_OK(seq_file->Read(kWriteSize - read, &result, scratch));
read_data.append(result.data(), result.size());
read += result.size();
}
ASSERT_TRUE(write_data == read_data);
delete seq_file;
delete [] scratch;
delete[] scratch;
}
TEST(MemEnvTest, DBTest) {
TEST_F(MemEnvTest, OverwriteOpenFile) {
const char kWrite1Data[] = "Write #1 data";
const size_t kFileDataLen = sizeof(kWrite1Data) - 1;
const std::string kTestFileName = testing::TempDir() + "leveldb-TestFile.dat";
ASSERT_LEVELDB_OK(WriteStringToFile(env_, kWrite1Data, kTestFileName));
RandomAccessFile* rand_file;
ASSERT_LEVELDB_OK(env_->NewRandomAccessFile(kTestFileName, &rand_file));
const char kWrite2Data[] = "Write #2 data";
ASSERT_LEVELDB_OK(WriteStringToFile(env_, kWrite2Data, kTestFileName));
// Verify that overwriting an open file will result in the new file data
// being read from files opened before the write.
Slice result;
char scratch[kFileDataLen];
ASSERT_LEVELDB_OK(rand_file->Read(0, kFileDataLen, &result, scratch));
ASSERT_EQ(0, result.compare(kWrite2Data));
delete rand_file;
}
TEST_F(MemEnvTest, DBTest) {
Options options;
options.create_if_missing = true;
options.env = env_;
@ -200,14 +222,14 @@ TEST(MemEnvTest, DBTest) {
const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
ASSERT_OK(DB::Open(options, "/dir/db", &db));
ASSERT_LEVELDB_OK(DB::Open(options, "/dir/db", &db));
for (size_t i = 0; i < 3; ++i) {
ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
ASSERT_LEVELDB_OK(db->Put(WriteOptions(), keys[i], vals[i]));
}
for (size_t i = 0; i < 3; ++i) {
std::string res;
ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
ASSERT_LEVELDB_OK(db->Get(ReadOptions(), keys[i], &res));
ASSERT_TRUE(res == vals[i]);
}
@ -223,11 +245,11 @@ TEST(MemEnvTest, DBTest) {
delete iterator;
DBImpl* dbi = reinterpret_cast<DBImpl*>(db);
ASSERT_OK(dbi->TEST_CompactMemTable());
ASSERT_LEVELDB_OK(dbi->TEST_CompactMemTable());
for (size_t i = 0; i < 3; ++i) {
std::string res;
ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
ASSERT_LEVELDB_OK(db->Get(ReadOptions(), keys[i], &res));
ASSERT_TRUE(res == vals[i]);
}
@ -235,7 +257,3 @@ TEST(MemEnvTest, DBTest) {
}
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

View File

@ -32,7 +32,7 @@
On failure, leveldb frees the old value of *errptr and
set *errptr to a malloc()ed error message.
(4) Bools have the type unsigned char (0 == false; rest == true)
(4) Bools have the type uint8_t (0 == false; rest == true)
(5) All of the pointer arguments must be non-NULL.
*/
@ -40,233 +40,213 @@
#ifndef STORAGE_LEVELDB_INCLUDE_C_H_
#define STORAGE_LEVELDB_INCLUDE_C_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
#include "leveldb/export.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Exported types */
typedef struct leveldb_t leveldb_t;
typedef struct leveldb_cache_t leveldb_cache_t;
typedef struct leveldb_comparator_t leveldb_comparator_t;
typedef struct leveldb_env_t leveldb_env_t;
typedef struct leveldb_filelock_t leveldb_filelock_t;
typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t;
typedef struct leveldb_iterator_t leveldb_iterator_t;
typedef struct leveldb_logger_t leveldb_logger_t;
typedef struct leveldb_options_t leveldb_options_t;
typedef struct leveldb_randomfile_t leveldb_randomfile_t;
typedef struct leveldb_readoptions_t leveldb_readoptions_t;
typedef struct leveldb_seqfile_t leveldb_seqfile_t;
typedef struct leveldb_snapshot_t leveldb_snapshot_t;
typedef struct leveldb_writablefile_t leveldb_writablefile_t;
typedef struct leveldb_writebatch_t leveldb_writebatch_t;
typedef struct leveldb_writeoptions_t leveldb_writeoptions_t;
typedef struct leveldb_t leveldb_t;
typedef struct leveldb_cache_t leveldb_cache_t;
typedef struct leveldb_comparator_t leveldb_comparator_t;
typedef struct leveldb_env_t leveldb_env_t;
typedef struct leveldb_filelock_t leveldb_filelock_t;
typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t;
typedef struct leveldb_iterator_t leveldb_iterator_t;
typedef struct leveldb_logger_t leveldb_logger_t;
typedef struct leveldb_options_t leveldb_options_t;
typedef struct leveldb_randomfile_t leveldb_randomfile_t;
typedef struct leveldb_readoptions_t leveldb_readoptions_t;
typedef struct leveldb_seqfile_t leveldb_seqfile_t;
typedef struct leveldb_snapshot_t leveldb_snapshot_t;
typedef struct leveldb_writablefile_t leveldb_writablefile_t;
typedef struct leveldb_writebatch_t leveldb_writebatch_t;
typedef struct leveldb_writeoptions_t leveldb_writeoptions_t;
/* DB operations */
extern leveldb_t* leveldb_open(
const leveldb_options_t* options,
const char* name,
char** errptr);
LEVELDB_EXPORT leveldb_t* leveldb_open(const leveldb_options_t* options,
const char* name, char** errptr);
extern void leveldb_close(leveldb_t* db);
LEVELDB_EXPORT void leveldb_close(leveldb_t* db);
extern void leveldb_put(
leveldb_t* db,
const leveldb_writeoptions_t* options,
const char* key, size_t keylen,
const char* val, size_t vallen,
char** errptr);
LEVELDB_EXPORT void leveldb_put(leveldb_t* db,
const leveldb_writeoptions_t* options,
const char* key, size_t keylen, const char* val,
size_t vallen, char** errptr);
extern void leveldb_delete(
leveldb_t* db,
const leveldb_writeoptions_t* options,
const char* key, size_t keylen,
char** errptr);
LEVELDB_EXPORT void leveldb_delete(leveldb_t* db,
const leveldb_writeoptions_t* options,
const char* key, size_t keylen,
char** errptr);
extern void leveldb_write(
leveldb_t* db,
const leveldb_writeoptions_t* options,
leveldb_writebatch_t* batch,
char** errptr);
LEVELDB_EXPORT void leveldb_write(leveldb_t* db,
const leveldb_writeoptions_t* options,
leveldb_writebatch_t* batch, char** errptr);
/* Returns NULL if not found. A malloc()ed array otherwise.
Stores the length of the array in *vallen. */
extern char* leveldb_get(
leveldb_t* db,
const leveldb_readoptions_t* options,
const char* key, size_t keylen,
size_t* vallen,
char** errptr);
LEVELDB_EXPORT char* leveldb_get(leveldb_t* db,
const leveldb_readoptions_t* options,
const char* key, size_t keylen, size_t* vallen,
char** errptr);
extern leveldb_iterator_t* leveldb_create_iterator(
leveldb_t* db,
const leveldb_readoptions_t* options);
LEVELDB_EXPORT leveldb_iterator_t* leveldb_create_iterator(
leveldb_t* db, const leveldb_readoptions_t* options);
extern const leveldb_snapshot_t* leveldb_create_snapshot(
leveldb_t* db);
LEVELDB_EXPORT const leveldb_snapshot_t* leveldb_create_snapshot(leveldb_t* db);
extern void leveldb_release_snapshot(
leveldb_t* db,
const leveldb_snapshot_t* snapshot);
LEVELDB_EXPORT void leveldb_release_snapshot(
leveldb_t* db, const leveldb_snapshot_t* snapshot);
/* Returns NULL if property name is unknown.
Else returns a pointer to a malloc()-ed null-terminated value. */
extern char* leveldb_property_value(
leveldb_t* db,
const char* propname);
LEVELDB_EXPORT char* leveldb_property_value(leveldb_t* db,
const char* propname);
extern void leveldb_approximate_sizes(
leveldb_t* db,
int num_ranges,
const char* const* range_start_key, const size_t* range_start_key_len,
const char* const* range_limit_key, const size_t* range_limit_key_len,
uint64_t* sizes);
LEVELDB_EXPORT void leveldb_approximate_sizes(
leveldb_t* db, int num_ranges, const char* const* range_start_key,
const size_t* range_start_key_len, const char* const* range_limit_key,
const size_t* range_limit_key_len, uint64_t* sizes);
extern void leveldb_compact_range(
leveldb_t* db,
const char* start_key, size_t start_key_len,
const char* limit_key, size_t limit_key_len);
LEVELDB_EXPORT void leveldb_compact_range(leveldb_t* db, const char* start_key,
size_t start_key_len,
const char* limit_key,
size_t limit_key_len);
/* Management operations */
extern void leveldb_destroy_db(
const leveldb_options_t* options,
const char* name,
char** errptr);
LEVELDB_EXPORT void leveldb_destroy_db(const leveldb_options_t* options,
const char* name, char** errptr);
extern void leveldb_repair_db(
const leveldb_options_t* options,
const char* name,
char** errptr);
LEVELDB_EXPORT void leveldb_repair_db(const leveldb_options_t* options,
const char* name, char** errptr);
/* Iterator */
extern void leveldb_iter_destroy(leveldb_iterator_t*);
extern unsigned char leveldb_iter_valid(const leveldb_iterator_t*);
extern void leveldb_iter_seek_to_first(leveldb_iterator_t*);
extern void leveldb_iter_seek_to_last(leveldb_iterator_t*);
extern void leveldb_iter_seek(leveldb_iterator_t*, const char* k, size_t klen);
extern void leveldb_iter_next(leveldb_iterator_t*);
extern void leveldb_iter_prev(leveldb_iterator_t*);
extern const char* leveldb_iter_key(const leveldb_iterator_t*, size_t* klen);
extern const char* leveldb_iter_value(const leveldb_iterator_t*, size_t* vlen);
extern void leveldb_iter_get_error(const leveldb_iterator_t*, char** errptr);
LEVELDB_EXPORT void leveldb_iter_destroy(leveldb_iterator_t*);
LEVELDB_EXPORT uint8_t leveldb_iter_valid(const leveldb_iterator_t*);
LEVELDB_EXPORT void leveldb_iter_seek_to_first(leveldb_iterator_t*);
LEVELDB_EXPORT void leveldb_iter_seek_to_last(leveldb_iterator_t*);
LEVELDB_EXPORT void leveldb_iter_seek(leveldb_iterator_t*, const char* k,
size_t klen);
LEVELDB_EXPORT void leveldb_iter_next(leveldb_iterator_t*);
LEVELDB_EXPORT void leveldb_iter_prev(leveldb_iterator_t*);
LEVELDB_EXPORT const char* leveldb_iter_key(const leveldb_iterator_t*,
size_t* klen);
LEVELDB_EXPORT const char* leveldb_iter_value(const leveldb_iterator_t*,
size_t* vlen);
LEVELDB_EXPORT void leveldb_iter_get_error(const leveldb_iterator_t*,
char** errptr);
/* Write batch */
extern leveldb_writebatch_t* leveldb_writebatch_create();
extern void leveldb_writebatch_destroy(leveldb_writebatch_t*);
extern void leveldb_writebatch_clear(leveldb_writebatch_t*);
extern void leveldb_writebatch_put(
leveldb_writebatch_t*,
const char* key, size_t klen,
const char* val, size_t vlen);
extern void leveldb_writebatch_delete(
leveldb_writebatch_t*,
const char* key, size_t klen);
extern void leveldb_writebatch_iterate(
leveldb_writebatch_t*,
void* state,
LEVELDB_EXPORT leveldb_writebatch_t* leveldb_writebatch_create(void);
LEVELDB_EXPORT void leveldb_writebatch_destroy(leveldb_writebatch_t*);
LEVELDB_EXPORT void leveldb_writebatch_clear(leveldb_writebatch_t*);
LEVELDB_EXPORT void leveldb_writebatch_put(leveldb_writebatch_t*,
const char* key, size_t klen,
const char* val, size_t vlen);
LEVELDB_EXPORT void leveldb_writebatch_delete(leveldb_writebatch_t*,
const char* key, size_t klen);
LEVELDB_EXPORT void leveldb_writebatch_iterate(
const leveldb_writebatch_t*, void* state,
void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
void (*deleted)(void*, const char* k, size_t klen));
LEVELDB_EXPORT void leveldb_writebatch_append(
leveldb_writebatch_t* destination, const leveldb_writebatch_t* source);
/* Options */
extern leveldb_options_t* leveldb_options_create();
extern void leveldb_options_destroy(leveldb_options_t*);
extern void leveldb_options_set_comparator(
leveldb_options_t*,
leveldb_comparator_t*);
extern void leveldb_options_set_filter_policy(
leveldb_options_t*,
leveldb_filterpolicy_t*);
extern void leveldb_options_set_create_if_missing(
leveldb_options_t*, unsigned char);
extern void leveldb_options_set_error_if_exists(
leveldb_options_t*, unsigned char);
extern void leveldb_options_set_paranoid_checks(
leveldb_options_t*, unsigned char);
extern void leveldb_options_set_env(leveldb_options_t*, leveldb_env_t*);
extern void leveldb_options_set_info_log(leveldb_options_t*, leveldb_logger_t*);
extern void leveldb_options_set_write_buffer_size(leveldb_options_t*, size_t);
extern void leveldb_options_set_max_open_files(leveldb_options_t*, int);
extern void leveldb_options_set_cache(leveldb_options_t*, leveldb_cache_t*);
extern void leveldb_options_set_block_size(leveldb_options_t*, size_t);
extern void leveldb_options_set_block_restart_interval(leveldb_options_t*, int);
LEVELDB_EXPORT leveldb_options_t* leveldb_options_create(void);
LEVELDB_EXPORT void leveldb_options_destroy(leveldb_options_t*);
LEVELDB_EXPORT void leveldb_options_set_comparator(leveldb_options_t*,
leveldb_comparator_t*);
LEVELDB_EXPORT void leveldb_options_set_filter_policy(leveldb_options_t*,
leveldb_filterpolicy_t*);
LEVELDB_EXPORT void leveldb_options_set_create_if_missing(leveldb_options_t*,
uint8_t);
LEVELDB_EXPORT void leveldb_options_set_error_if_exists(leveldb_options_t*,
uint8_t);
LEVELDB_EXPORT void leveldb_options_set_paranoid_checks(leveldb_options_t*,
uint8_t);
LEVELDB_EXPORT void leveldb_options_set_env(leveldb_options_t*, leveldb_env_t*);
LEVELDB_EXPORT void leveldb_options_set_info_log(leveldb_options_t*,
leveldb_logger_t*);
LEVELDB_EXPORT void leveldb_options_set_write_buffer_size(leveldb_options_t*,
size_t);
LEVELDB_EXPORT void leveldb_options_set_max_open_files(leveldb_options_t*, int);
LEVELDB_EXPORT void leveldb_options_set_cache(leveldb_options_t*,
leveldb_cache_t*);
LEVELDB_EXPORT void leveldb_options_set_block_size(leveldb_options_t*, size_t);
LEVELDB_EXPORT void leveldb_options_set_block_restart_interval(
leveldb_options_t*, int);
LEVELDB_EXPORT void leveldb_options_set_max_file_size(leveldb_options_t*,
size_t);
enum {
leveldb_no_compression = 0,
leveldb_snappy_compression = 1
};
extern void leveldb_options_set_compression(leveldb_options_t*, int);
enum { leveldb_no_compression = 0, leveldb_snappy_compression = 1 };
LEVELDB_EXPORT void leveldb_options_set_compression(leveldb_options_t*, int);
/* Comparator */
extern leveldb_comparator_t* leveldb_comparator_create(
void* state,
void (*destructor)(void*),
int (*compare)(
void*,
const char* a, size_t alen,
const char* b, size_t blen),
LEVELDB_EXPORT leveldb_comparator_t* leveldb_comparator_create(
void* state, void (*destructor)(void*),
int (*compare)(void*, const char* a, size_t alen, const char* b,
size_t blen),
const char* (*name)(void*));
extern void leveldb_comparator_destroy(leveldb_comparator_t*);
LEVELDB_EXPORT void leveldb_comparator_destroy(leveldb_comparator_t*);
/* Filter policy */
extern leveldb_filterpolicy_t* leveldb_filterpolicy_create(
void* state,
void (*destructor)(void*),
char* (*create_filter)(
void*,
const char* const* key_array, const size_t* key_length_array,
int num_keys,
size_t* filter_length),
unsigned char (*key_may_match)(
void*,
const char* key, size_t length,
const char* filter, size_t filter_length),
LEVELDB_EXPORT leveldb_filterpolicy_t* leveldb_filterpolicy_create(
void* state, void (*destructor)(void*),
char* (*create_filter)(void*, const char* const* key_array,
const size_t* key_length_array, int num_keys,
size_t* filter_length),
uint8_t (*key_may_match)(void*, const char* key, size_t length,
const char* filter, size_t filter_length),
const char* (*name)(void*));
extern void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t*);
LEVELDB_EXPORT void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t*);
extern leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(
LEVELDB_EXPORT leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(
int bits_per_key);
/* Read options */
extern leveldb_readoptions_t* leveldb_readoptions_create();
extern void leveldb_readoptions_destroy(leveldb_readoptions_t*);
extern void leveldb_readoptions_set_verify_checksums(
leveldb_readoptions_t*,
unsigned char);
extern void leveldb_readoptions_set_fill_cache(
leveldb_readoptions_t*, unsigned char);
extern void leveldb_readoptions_set_snapshot(
leveldb_readoptions_t*,
const leveldb_snapshot_t*);
LEVELDB_EXPORT leveldb_readoptions_t* leveldb_readoptions_create(void);
LEVELDB_EXPORT void leveldb_readoptions_destroy(leveldb_readoptions_t*);
LEVELDB_EXPORT void leveldb_readoptions_set_verify_checksums(
leveldb_readoptions_t*, uint8_t);
LEVELDB_EXPORT void leveldb_readoptions_set_fill_cache(leveldb_readoptions_t*,
uint8_t);
LEVELDB_EXPORT void leveldb_readoptions_set_snapshot(leveldb_readoptions_t*,
const leveldb_snapshot_t*);
/* Write options */
extern leveldb_writeoptions_t* leveldb_writeoptions_create();
extern void leveldb_writeoptions_destroy(leveldb_writeoptions_t*);
extern void leveldb_writeoptions_set_sync(
leveldb_writeoptions_t*, unsigned char);
LEVELDB_EXPORT leveldb_writeoptions_t* leveldb_writeoptions_create(void);
LEVELDB_EXPORT void leveldb_writeoptions_destroy(leveldb_writeoptions_t*);
LEVELDB_EXPORT void leveldb_writeoptions_set_sync(leveldb_writeoptions_t*,
uint8_t);
/* Cache */
extern leveldb_cache_t* leveldb_cache_create_lru(size_t capacity);
extern void leveldb_cache_destroy(leveldb_cache_t* cache);
LEVELDB_EXPORT leveldb_cache_t* leveldb_cache_create_lru(size_t capacity);
LEVELDB_EXPORT void leveldb_cache_destroy(leveldb_cache_t* cache);
/* Env */
extern leveldb_env_t* leveldb_create_default_env();
extern void leveldb_env_destroy(leveldb_env_t*);
LEVELDB_EXPORT leveldb_env_t* leveldb_create_default_env(void);
LEVELDB_EXPORT void leveldb_env_destroy(leveldb_env_t*);
/* If not NULL, the returned buffer must be released using leveldb_free(). */
LEVELDB_EXPORT char* leveldb_env_get_test_directory(leveldb_env_t*);
/* Utility */
@ -275,16 +255,16 @@ extern void leveldb_env_destroy(leveldb_env_t*);
in this file. Note that in certain cases (typically on Windows), you
may need to call this routine instead of free(ptr) to dispose of
malloc()-ed memory returned by this library. */
extern void leveldb_free(void* ptr);
LEVELDB_EXPORT void leveldb_free(void* ptr);
/* Return the major version number for this release. */
extern int leveldb_major_version();
LEVELDB_EXPORT int leveldb_major_version(void);
/* Return the minor version number for this release. */
extern int leveldb_minor_version();
LEVELDB_EXPORT int leveldb_minor_version(void);
#ifdef __cplusplus
} /* end extern "C" */
} /* end extern "C" */
#endif
#endif /* STORAGE_LEVELDB_INCLUDE_C_H_ */
#endif /* STORAGE_LEVELDB_INCLUDE_C_H_ */

View File

@ -18,27 +18,32 @@
#ifndef STORAGE_LEVELDB_INCLUDE_CACHE_H_
#define STORAGE_LEVELDB_INCLUDE_CACHE_H_
#include <stdint.h>
#include <cstdint>
#include "leveldb/export.h"
#include "leveldb/slice.h"
namespace leveldb {
class Cache;
class LEVELDB_EXPORT Cache;
// Create a new cache with a fixed size capacity. This implementation
// of Cache uses a least-recently-used eviction policy.
extern Cache* NewLRUCache(size_t capacity);
LEVELDB_EXPORT Cache* NewLRUCache(size_t capacity);
class Cache {
class LEVELDB_EXPORT Cache {
public:
Cache() { }
Cache() = default;
Cache(const Cache&) = delete;
Cache& operator=(const Cache&) = delete;
// Destroys all existing entries by calling the "deleter"
// function that was passed to the constructor.
virtual ~Cache();
// Opaque handle to an entry stored in the cache.
struct Handle { };
struct Handle {};
// Insert a mapping from key->value into the cache and assign it
// the specified charge against the total cache capacity.
@ -52,7 +57,7 @@ class Cache {
virtual Handle* Insert(const Slice& key, void* value, size_t charge,
void (*deleter)(const Slice& key, void* value)) = 0;
// If the cache has no mapping for "key", returns NULL.
// If the cache has no mapping for "key", returns nullptr.
//
// Else return a handle that corresponds to the mapping. The caller
// must call this->Release(handle) when the returned mapping is no
@ -91,18 +96,6 @@ class Cache {
// Return an estimate of the combined charges of all elements stored in the
// cache.
virtual size_t TotalCharge() const = 0;
private:
void LRU_Remove(Handle* e);
void LRU_Append(Handle* e);
void Unref(Handle* e);
struct Rep;
Rep* rep_;
// No copying allowed
Cache(const Cache&);
void operator=(const Cache&);
};
} // namespace leveldb

View File

@ -7,6 +7,8 @@
#include <string>
#include "leveldb/export.h"
namespace leveldb {
class Slice;
@ -15,7 +17,7 @@ class Slice;
// used as keys in an sstable or a database. A Comparator implementation
// must be thread-safe since leveldb may invoke its methods concurrently
// from multiple threads.
class Comparator {
class LEVELDB_EXPORT Comparator {
public:
virtual ~Comparator();
@ -43,9 +45,8 @@ class Comparator {
// If *start < limit, changes *start to a short string in [start,limit).
// Simple comparator implementations may return with *start unchanged,
// i.e., an implementation of this method that does nothing is correct.
virtual void FindShortestSeparator(
std::string* start,
const Slice& limit) const = 0;
virtual void FindShortestSeparator(std::string* start,
const Slice& limit) const = 0;
// Changes *key to a short string >= *key.
// Simple comparator implementations may return with *key unchanged,
@ -56,7 +57,7 @@ class Comparator {
// Return a builtin comparator that uses lexicographic byte-wise
// ordering. The result remains the property of this module and
// must not be deleted.
extern const Comparator* BytewiseComparator();
LEVELDB_EXPORT const Comparator* BytewiseComparator();
} // namespace leveldb

View File

@ -5,16 +5,18 @@
#ifndef STORAGE_LEVELDB_INCLUDE_DB_H_
#define STORAGE_LEVELDB_INCLUDE_DB_H_
#include <stdint.h>
#include <stdio.h>
#include <cstdint>
#include <cstdio>
#include "leveldb/export.h"
#include "leveldb/iterator.h"
#include "leveldb/options.h"
namespace leveldb {
// Update Makefile if you change these
// Update CMakeLists.txt if you change these
static const int kMajorVersion = 1;
static const int kMinorVersion = 18;
static const int kMinorVersion = 23;
struct Options;
struct ReadOptions;
@ -24,42 +26,44 @@ class WriteBatch;
// Abstract handle to particular state of a DB.
// A Snapshot is an immutable object and can therefore be safely
// accessed from multiple threads without any external synchronization.
class Snapshot {
class LEVELDB_EXPORT Snapshot {
protected:
virtual ~Snapshot();
};
// A range of keys
struct Range {
Slice start; // Included in the range
Slice limit; // Not included in the range
struct LEVELDB_EXPORT Range {
Range() = default;
Range(const Slice& s, const Slice& l) : start(s), limit(l) {}
Range() { }
Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
Slice start; // Included in the range
Slice limit; // Not included in the range
};
// A DB is a persistent ordered map from keys to values.
// A DB is safe for concurrent access from multiple threads without
// any external synchronization.
class DB {
class LEVELDB_EXPORT DB {
public:
// Open the database with the specified "name".
// Stores a pointer to a heap-allocated database in *dbptr and returns
// OK on success.
// Stores NULL in *dbptr and returns a non-OK status on error.
// Stores nullptr in *dbptr and returns a non-OK status on error.
// Caller should delete *dbptr when it is no longer needed.
static Status Open(const Options& options,
const std::string& name,
static Status Open(const Options& options, const std::string& name,
DB** dbptr);
DB() { }
DB() = default;
DB(const DB&) = delete;
DB& operator=(const DB&) = delete;
virtual ~DB();
// Set the database entry for "key" to "value". Returns OK on success,
// and a non-OK status on error.
// Note: consider setting options.sync = true.
virtual Status Put(const WriteOptions& options,
const Slice& key,
virtual Status Put(const WriteOptions& options, const Slice& key,
const Slice& value) = 0;
// Remove the database entry (if any) for "key". Returns OK on
@ -80,8 +84,8 @@ class DB {
// a status for which Status::IsNotFound() returns true.
//
// May return some other Status on an error.
virtual Status Get(const ReadOptions& options,
const Slice& key, std::string* value) = 0;
virtual Status Get(const ReadOptions& options, const Slice& key,
std::string* value) = 0;
// Return a heap-allocated iterator over the contents of the database.
// The result of NewIterator() is initially invalid (caller must
@ -136,27 +140,27 @@ class DB {
// needed to access the data. This operation should typically only
// be invoked by users who understand the underlying implementation.
//
// begin==NULL is treated as a key before all keys in the database.
// end==NULL is treated as a key after all keys in the database.
// begin==nullptr is treated as a key before all keys in the database.
// end==nullptr is treated as a key after all keys in the database.
// Therefore the following call will compact the entire database:
// db->CompactRange(NULL, NULL);
// db->CompactRange(nullptr, nullptr);
virtual void CompactRange(const Slice* begin, const Slice* end) = 0;
private:
// No copying allowed
DB(const DB&);
void operator=(const DB&);
};
// Destroy the contents of the specified database.
// Be very careful using this method.
Status DestroyDB(const std::string& name, const Options& options);
//
// Note: For backwards compatibility, if DestroyDB is unable to list the
// database files, Status::OK() will still be returned masking this failure.
LEVELDB_EXPORT Status DestroyDB(const std::string& name,
const Options& options);
// If a DB cannot be opened, you may attempt to call this method to
// resurrect as much of the contents of the database as possible.
// Some data may be lost, so be careful when calling this function
// on a database that contains important information.
Status RepairDB(const std::string& dbname, const Options& options);
LEVELDB_EXPORT Status RepairDB(const std::string& dbname,
const Options& options);
} // namespace leveldb

View File

@ -6,7 +6,9 @@
#define STORAGE_LEVELDB_INCLUDE_DUMPFILE_H_
#include <string>
#include "leveldb/env.h"
#include "leveldb/export.h"
#include "leveldb/status.h"
namespace leveldb {
@ -18,7 +20,8 @@ namespace leveldb {
//
// Returns a non-OK result if fname does not name a leveldb storage
// file, or if the file cannot be read.
Status DumpFile(Env* env, const std::string& fname, WritableFile* dst);
LEVELDB_EXPORT Status DumpFile(Env* env, const std::string& fname,
WritableFile* dst);
} // namespace leveldb

View File

@ -13,12 +13,32 @@
#ifndef STORAGE_LEVELDB_INCLUDE_ENV_H_
#define STORAGE_LEVELDB_INCLUDE_ENV_H_
#include <cstdarg>
#include <cstdint>
#include <string>
#include <vector>
#include <stdarg.h>
#include <stdint.h>
#include "leveldb/export.h"
#include "leveldb/status.h"
// This workaround can be removed when leveldb::Env::DeleteFile is removed.
#if defined(_WIN32)
// On Windows, the method name DeleteFile (below) introduces the risk of
// triggering undefined behavior by exposing the compiler to different
// declarations of the Env class in different translation units.
//
// This is because <windows.h>, a fairly popular header file for Windows
// applications, defines a DeleteFile macro. So, files that include the Windows
// header before this header will contain an altered Env declaration.
//
// This workaround ensures that the compiler sees the same Env declaration,
// independently of whether <windows.h> was included.
#if defined(DeleteFile)
#undef DeleteFile
#define LEVELDB_DELETEFILE_UNDEFINED
#endif // defined(DeleteFile)
#endif // defined(_WIN32)
namespace leveldb {
class FileLock;
@ -28,9 +48,13 @@ class SequentialFile;
class Slice;
class WritableFile;
class Env {
class LEVELDB_EXPORT Env {
public:
Env() { }
Env();
Env(const Env&) = delete;
Env& operator=(const Env&) = delete;
virtual ~Env();
// Return a default environment suitable for the current operating
@ -40,20 +64,22 @@ class Env {
// The result of Default() belongs to leveldb and must never be deleted.
static Env* Default();
// Create a brand new sequentially-readable file with the specified name.
// Create an object that sequentially reads the file with the specified name.
// On success, stores a pointer to the new file in *result and returns OK.
// On failure stores NULL in *result and returns non-OK. If the file does
// not exist, returns a non-OK status.
// On failure stores nullptr in *result and returns non-OK. If the file does
// not exist, returns a non-OK status. Implementations should return a
// NotFound status when the file does not exist.
//
// The returned file will only be accessed by one thread at a time.
virtual Status NewSequentialFile(const std::string& fname,
SequentialFile** result) = 0;
// Create a brand new random access read-only file with the
// Create an object supporting random-access reads from the file with the
// specified name. On success, stores a pointer to the new file in
// *result and returns OK. On failure stores NULL in *result and
// *result and returns OK. On failure stores nullptr in *result and
// returns non-OK. If the file does not exist, returns a non-OK
// status.
// status. Implementations should return a NotFound status when the file does
// not exist.
//
// The returned file may be concurrently accessed by multiple threads.
virtual Status NewRandomAccessFile(const std::string& fname,
@ -62,7 +88,7 @@ class Env {
// Create an object that writes to a new file with the specified
// name. Deletes any existing file with the same name and creates a
// new file. On success, stores a pointer to the new file in
// *result and returns OK. On failure stores NULL in *result and
// *result and returns OK. On failure stores nullptr in *result and
// returns non-OK.
//
// The returned file will only be accessed by one thread at a time.
@ -72,7 +98,7 @@ class Env {
// Create an object that either appends to an existing file, or
// writes to a new file (if the file does not exist to begin with).
// On success, stores a pointer to the new file in *result and
// returns OK. On failure stores NULL in *result and returns
// returns OK. On failure stores nullptr in *result and returns
// non-OK.
//
// The returned file will only be accessed by one thread at a time.
@ -92,15 +118,48 @@ class Env {
// Original contents of *results are dropped.
virtual Status GetChildren(const std::string& dir,
std::vector<std::string>* result) = 0;
// Delete the named file.
virtual Status DeleteFile(const std::string& fname) = 0;
//
// The default implementation calls DeleteFile, to support legacy Env
// implementations. Updated Env implementations must override RemoveFile and
// ignore the existence of DeleteFile. Updated code calling into the Env API
// must call RemoveFile instead of DeleteFile.
//
// A future release will remove DeleteDir and the default implementation of
// RemoveDir.
virtual Status RemoveFile(const std::string& fname);
// DEPRECATED: Modern Env implementations should override RemoveFile instead.
//
// The default implementation calls RemoveFile, to support legacy Env user
// code that calls this method on modern Env implementations. Modern Env user
// code should call RemoveFile.
//
// A future release will remove this method.
virtual Status DeleteFile(const std::string& fname);
// Create the specified directory.
virtual Status CreateDir(const std::string& dirname) = 0;
// Delete the specified directory.
virtual Status DeleteDir(const std::string& dirname) = 0;
//
// The default implementation calls DeleteDir, to support legacy Env
// implementations. Updated Env implementations must override RemoveDir and
// ignore the existence of DeleteDir. Modern code calling into the Env API
// must call RemoveDir instead of DeleteDir.
//
// A future release will remove DeleteDir and the default implementation of
// RemoveDir.
virtual Status RemoveDir(const std::string& dirname);
// DEPRECATED: Modern Env implementations should override RemoveDir instead.
//
// The default implementation calls RemoveDir, to support legacy Env user
// code that calls this method on modern Env implementations. Modern Env user
// code should call RemoveDir.
//
// A future release will remove this method.
virtual Status DeleteDir(const std::string& dirname);
// Store the size of fname in *file_size.
virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0;
@ -110,7 +169,7 @@ class Env {
const std::string& target) = 0;
// Lock the specified file. Used to prevent concurrent access to
// the same db by multiple processes. On failure, stores NULL in
// the same db by multiple processes. On failure, stores nullptr in
// *lock and returns non-OK.
//
// On success, stores a pointer to the object that represents the
@ -136,16 +195,14 @@ class Env {
// added to the same Env may run concurrently in different threads.
// I.e., the caller may not assume that background work items are
// serialized.
virtual void Schedule(
void (*function)(void* arg),
void* arg) = 0;
virtual void Schedule(void (*function)(void* arg), void* arg) = 0;
// Start a new thread, invoking "function(arg)" within the new thread.
// When "function(arg)" returns, the thread will be destroyed.
virtual void StartThread(void (*function)(void* arg), void* arg) = 0;
// *path is set to a temporary directory that can be used for testing. It may
// or many not have just been created. The directory may or may not differ
// or may not have just been created. The directory may or may not differ
// between runs of the same process, but subsequent calls will return the
// same directory.
virtual Status GetTestDirectory(std::string* path) = 0;
@ -159,17 +216,16 @@ class Env {
// Sleep/delay the thread for the prescribed number of micro-seconds.
virtual void SleepForMicroseconds(int micros) = 0;
private:
// No copying allowed
Env(const Env&);
void operator=(const Env&);
};
// A file abstraction for reading sequentially through a file
class SequentialFile {
class LEVELDB_EXPORT SequentialFile {
public:
SequentialFile() { }
SequentialFile() = default;
SequentialFile(const SequentialFile&) = delete;
SequentialFile& operator=(const SequentialFile&) = delete;
virtual ~SequentialFile();
// Read up to "n" bytes from the file. "scratch[0..n-1]" may be
@ -190,17 +246,16 @@ class SequentialFile {
//
// REQUIRES: External synchronization
virtual Status Skip(uint64_t n) = 0;
private:
// No copying allowed
SequentialFile(const SequentialFile&);
void operator=(const SequentialFile&);
};
// A file abstraction for randomly reading the contents of a file.
class RandomAccessFile {
class LEVELDB_EXPORT RandomAccessFile {
public:
RandomAccessFile() { }
RandomAccessFile() = default;
RandomAccessFile(const RandomAccessFile&) = delete;
RandomAccessFile& operator=(const RandomAccessFile&) = delete;
virtual ~RandomAccessFile();
// Read up to "n" bytes from the file starting at "offset".
@ -214,138 +269,149 @@ class RandomAccessFile {
// Safe for concurrent use by multiple threads.
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const = 0;
private:
// No copying allowed
RandomAccessFile(const RandomAccessFile&);
void operator=(const RandomAccessFile&);
};
// A file abstraction for sequential writing. The implementation
// must provide buffering since callers may append small fragments
// at a time to the file.
class WritableFile {
class LEVELDB_EXPORT WritableFile {
public:
WritableFile() { }
WritableFile() = default;
WritableFile(const WritableFile&) = delete;
WritableFile& operator=(const WritableFile&) = delete;
virtual ~WritableFile();
virtual Status Append(const Slice& data) = 0;
virtual Status Close() = 0;
virtual Status Flush() = 0;
virtual Status Sync() = 0;
private:
// No copying allowed
WritableFile(const WritableFile&);
void operator=(const WritableFile&);
};
// An interface for writing log messages.
class Logger {
class LEVELDB_EXPORT Logger {
public:
Logger() { }
Logger() = default;
Logger(const Logger&) = delete;
Logger& operator=(const Logger&) = delete;
virtual ~Logger();
// Write an entry to the log file with the specified format.
virtual void Logv(const char* format, va_list ap) = 0;
private:
// No copying allowed
Logger(const Logger&);
void operator=(const Logger&);
virtual void Logv(const char* format, std::va_list ap) = 0;
};
// Identifies a locked file.
class FileLock {
class LEVELDB_EXPORT FileLock {
public:
FileLock() { }
FileLock() = default;
FileLock(const FileLock&) = delete;
FileLock& operator=(const FileLock&) = delete;
virtual ~FileLock();
private:
// No copying allowed
FileLock(const FileLock&);
void operator=(const FileLock&);
};
// Log the specified data to *info_log if info_log is non-NULL.
extern void Log(Logger* info_log, const char* format, ...)
# if defined(__GNUC__) || defined(__clang__)
__attribute__((__format__ (__printf__, 2, 3)))
# endif
// Log the specified data to *info_log if info_log is non-null.
void Log(Logger* info_log, const char* format, ...)
#if defined(__GNUC__) || defined(__clang__)
__attribute__((__format__(__printf__, 2, 3)))
#endif
;
// A utility routine: write "data" to the named file.
extern Status WriteStringToFile(Env* env, const Slice& data,
const std::string& fname);
LEVELDB_EXPORT Status WriteStringToFile(Env* env, const Slice& data,
const std::string& fname);
// A utility routine: read contents of named file into *data
extern Status ReadFileToString(Env* env, const std::string& fname,
std::string* data);
LEVELDB_EXPORT Status ReadFileToString(Env* env, const std::string& fname,
std::string* data);
// An implementation of Env that forwards all calls to another Env.
// May be useful to clients who wish to override just part of the
// functionality of another Env.
class EnvWrapper : public Env {
class LEVELDB_EXPORT EnvWrapper : public Env {
public:
// Initialize an EnvWrapper that delegates all calls to *t
explicit EnvWrapper(Env* t) : target_(t) { }
// Initialize an EnvWrapper that delegates all calls to *t.
explicit EnvWrapper(Env* t) : target_(t) {}
virtual ~EnvWrapper();
// Return the target to which this Env forwards all calls
// Return the target to which this Env forwards all calls.
Env* target() const { return target_; }
// The following text is boilerplate that forwards all methods to target()
Status NewSequentialFile(const std::string& f, SequentialFile** r) {
// The following text is boilerplate that forwards all methods to target().
Status NewSequentialFile(const std::string& f, SequentialFile** r) override {
return target_->NewSequentialFile(f, r);
}
Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
Status NewRandomAccessFile(const std::string& f,
RandomAccessFile** r) override {
return target_->NewRandomAccessFile(f, r);
}
Status NewWritableFile(const std::string& f, WritableFile** r) {
Status NewWritableFile(const std::string& f, WritableFile** r) override {
return target_->NewWritableFile(f, r);
}
Status NewAppendableFile(const std::string& f, WritableFile** r) {
Status NewAppendableFile(const std::string& f, WritableFile** r) override {
return target_->NewAppendableFile(f, r);
}
bool FileExists(const std::string& f) { return target_->FileExists(f); }
Status GetChildren(const std::string& dir, std::vector<std::string>* r) {
bool FileExists(const std::string& f) override {
return target_->FileExists(f);
}
Status GetChildren(const std::string& dir,
std::vector<std::string>* r) override {
return target_->GetChildren(dir, r);
}
Status DeleteFile(const std::string& f) { return target_->DeleteFile(f); }
Status CreateDir(const std::string& d) { return target_->CreateDir(d); }
Status DeleteDir(const std::string& d) { return target_->DeleteDir(d); }
Status GetFileSize(const std::string& f, uint64_t* s) {
Status RemoveFile(const std::string& f) override {
return target_->RemoveFile(f);
}
Status CreateDir(const std::string& d) override {
return target_->CreateDir(d);
}
Status RemoveDir(const std::string& d) override {
return target_->RemoveDir(d);
}
Status GetFileSize(const std::string& f, uint64_t* s) override {
return target_->GetFileSize(f, s);
}
Status RenameFile(const std::string& s, const std::string& t) {
Status RenameFile(const std::string& s, const std::string& t) override {
return target_->RenameFile(s, t);
}
Status LockFile(const std::string& f, FileLock** l) {
Status LockFile(const std::string& f, FileLock** l) override {
return target_->LockFile(f, l);
}
Status UnlockFile(FileLock* l) { return target_->UnlockFile(l); }
void Schedule(void (*f)(void*), void* a) {
Status UnlockFile(FileLock* l) override { return target_->UnlockFile(l); }
void Schedule(void (*f)(void*), void* a) override {
return target_->Schedule(f, a);
}
void StartThread(void (*f)(void*), void* a) {
void StartThread(void (*f)(void*), void* a) override {
return target_->StartThread(f, a);
}
virtual Status GetTestDirectory(std::string* path) {
Status GetTestDirectory(std::string* path) override {
return target_->GetTestDirectory(path);
}
virtual Status NewLogger(const std::string& fname, Logger** result) {
Status NewLogger(const std::string& fname, Logger** result) override {
return target_->NewLogger(fname, result);
}
uint64_t NowMicros() {
return target_->NowMicros();
}
void SleepForMicroseconds(int micros) {
uint64_t NowMicros() override { return target_->NowMicros(); }
void SleepForMicroseconds(int micros) override {
target_->SleepForMicroseconds(micros);
}
private:
Env* target_;
};
} // namespace leveldb
// This workaround can be removed when leveldb::Env::DeleteFile is removed.
// Redefine DeleteFile if it was undefined earlier.
#if defined(_WIN32) && defined(LEVELDB_DELETEFILE_UNDEFINED)
#if defined(UNICODE)
#define DeleteFile DeleteFileW
#else
#define DeleteFile DeleteFileA
#endif // defined(UNICODE)
#endif // defined(_WIN32) && defined(LEVELDB_DELETEFILE_UNDEFINED)
#endif // STORAGE_LEVELDB_INCLUDE_ENV_H_

33
include/leveldb/export.h Normal file
View File

@ -0,0 +1,33 @@
// Copyright (c) 2017 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef STORAGE_LEVELDB_INCLUDE_EXPORT_H_
#define STORAGE_LEVELDB_INCLUDE_EXPORT_H_
#if !defined(LEVELDB_EXPORT)
#if defined(LEVELDB_SHARED_LIBRARY)
#if defined(_WIN32)
#if defined(LEVELDB_COMPILE_LIBRARY)
#define LEVELDB_EXPORT __declspec(dllexport)
#else
#define LEVELDB_EXPORT __declspec(dllimport)
#endif // defined(LEVELDB_COMPILE_LIBRARY)
#else // defined(_WIN32)
#if defined(LEVELDB_COMPILE_LIBRARY)
#define LEVELDB_EXPORT __attribute__((visibility("default")))
#else
#define LEVELDB_EXPORT
#endif
#endif // defined(_WIN32)
#else // defined(LEVELDB_SHARED_LIBRARY)
#define LEVELDB_EXPORT
#endif
#endif // !defined(LEVELDB_EXPORT)
#endif // STORAGE_LEVELDB_INCLUDE_EXPORT_H_

View File

@ -18,11 +18,13 @@
#include <string>
#include "leveldb/export.h"
namespace leveldb {
class Slice;
class FilterPolicy {
class LEVELDB_EXPORT FilterPolicy {
public:
virtual ~FilterPolicy();
@ -38,8 +40,8 @@ class FilterPolicy {
//
// Warning: do not change the initial contents of *dst. Instead,
// append the newly constructed filter to *dst.
virtual void CreateFilter(const Slice* keys, int n, std::string* dst)
const = 0;
virtual void CreateFilter(const Slice* keys, int n,
std::string* dst) const = 0;
// "filter" contains the data appended by a preceding call to
// CreateFilter() on this class. This method must return true if
@ -63,8 +65,8 @@ class FilterPolicy {
// ignores trailing spaces, it would be incorrect to use a
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
// trailing spaces in keys.
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
LEVELDB_EXPORT const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
}
} // namespace leveldb
#endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_

View File

@ -15,14 +15,19 @@
#ifndef STORAGE_LEVELDB_INCLUDE_ITERATOR_H_
#define STORAGE_LEVELDB_INCLUDE_ITERATOR_H_
#include "leveldb/export.h"
#include "leveldb/slice.h"
#include "leveldb/status.h"
namespace leveldb {
class Iterator {
class LEVELDB_EXPORT Iterator {
public:
Iterator();
Iterator(const Iterator&) = delete;
Iterator& operator=(const Iterator&) = delete;
virtual ~Iterator();
// An iterator is either positioned at a key/value pair, or
@ -72,28 +77,35 @@ class Iterator {
//
// Note that unlike all of the preceding methods, this method is
// not abstract and therefore clients should not override it.
typedef void (*CleanupFunction)(void* arg1, void* arg2);
using CleanupFunction = void (*)(void* arg1, void* arg2);
void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2);
private:
struct Cleanup {
// Cleanup functions are stored in a single-linked list.
// The list's head node is inlined in the iterator.
struct CleanupNode {
// True if the node is not used. Only head nodes might be unused.
bool IsEmpty() const { return function == nullptr; }
// Invokes the cleanup function.
void Run() {
assert(function != nullptr);
(*function)(arg1, arg2);
}
// The head node is used if the function pointer is not null.
CleanupFunction function;
void* arg1;
void* arg2;
Cleanup* next;
CleanupNode* next;
};
Cleanup cleanup_;
// No copying allowed
Iterator(const Iterator&);
void operator=(const Iterator&);
CleanupNode cleanup_head_;
};
// Return an empty iterator (yields nothing).
extern Iterator* NewEmptyIterator();
LEVELDB_EXPORT Iterator* NewEmptyIterator();
// Return an empty iterator with the specified status.
extern Iterator* NewErrorIterator(const Status& status);
LEVELDB_EXPORT Iterator* NewErrorIterator(const Status& status);
} // namespace leveldb

View File

@ -5,7 +5,9 @@
#ifndef STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
#define STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
#include <stddef.h>
#include <cstddef>
#include "leveldb/export.h"
namespace leveldb {
@ -23,12 +25,16 @@ class Snapshot;
enum CompressionType {
// NOTE: do not change the values of existing entries, as these are
// part of the persistent format on disk.
kNoCompression = 0x0,
kSnappyCompression = 0x1
kNoCompression = 0x0,
kSnappyCompression = 0x1,
kZstdCompression = 0x2,
};
// Options to control the behavior of a database (passed to DB::Open)
struct Options {
struct LEVELDB_EXPORT Options {
// Create an Options object with default values for all fields.
Options();
// -------------------
// Parameters that affect behavior
@ -41,20 +47,17 @@ struct Options {
const Comparator* comparator;
// If true, the database will be created if it is missing.
// Default: false
bool create_if_missing;
bool create_if_missing = false;
// If true, an error is raised if the database already exists.
// Default: false
bool error_if_exists;
bool error_if_exists = false;
// If true, the implementation will do aggressive checking of the
// data it is processing and will stop early if it detects any
// errors. This may have unforeseen ramifications: for example, a
// corruption of one DB entry may cause a large number of entries to
// become unreadable or for the entire DB to become unopenable.
// Default: false
bool paranoid_checks;
bool paranoid_checks = false;
// Use the specified object to interact with the environment,
// e.g. to read/write files, schedule background work, etc.
@ -62,10 +65,9 @@ struct Options {
Env* env;
// Any internal progress/error information generated by the db will
// be written to info_log if it is non-NULL, or to a file stored
// in the same directory as the DB contents if info_log is NULL.
// Default: NULL
Logger* info_log;
// be written to info_log if it is non-null, or to a file stored
// in the same directory as the DB contents if info_log is null.
Logger* info_log = nullptr;
// -------------------
// Parameters that affect performance
@ -78,39 +80,40 @@ struct Options {
// so you may wish to adjust this parameter to control memory usage.
// Also, a larger write buffer will result in a longer recovery time
// the next time the database is opened.
//
// Default: 4MB
size_t write_buffer_size;
size_t write_buffer_size = 4 * 1024 * 1024;
// Number of open files that can be used by the DB. You may need to
// increase this if your database has a large working set (budget
// one open file per 2MB of working set).
//
// Default: 1000
int max_open_files;
int max_open_files = 1000;
// Control over blocks (user data is stored in a set of blocks, and
// a block is the unit of reading from disk).
// If non-NULL, use the specified cache for blocks.
// If NULL, leveldb will automatically create and use an 8MB internal cache.
// Default: NULL
Cache* block_cache;
// If non-null, use the specified cache for blocks.
// If null, leveldb will automatically create and use an 8MB internal cache.
Cache* block_cache = nullptr;
// Approximate size of user data packed per block. Note that the
// block size specified here corresponds to uncompressed data. The
// actual size of the unit read from disk may be smaller if
// compression is enabled. This parameter can be changed dynamically.
//
// Default: 4K
size_t block_size;
size_t block_size = 4 * 1024;
// Number of keys between restart points for delta encoding of keys.
// This parameter can be changed dynamically. Most clients should
// leave this parameter alone.
//
// Default: 16
int block_restart_interval;
int block_restart_interval = 16;
// Leveldb will write up to this amount of bytes to a file before
// switching to a new one.
// Most clients should leave this parameter alone. However if your
// filesystem is more efficient with larger files, you could
// consider increasing the value. The downside will be longer
// compactions and hence longer latency/performance hiccups.
// Another reason to increase this parameter might be when you are
// initially populating a large database.
size_t max_file_size = 2 * 1024 * 1024;
// Compress blocks using the specified compression algorithm. This
// parameter can be changed dynamically.
@ -126,53 +129,45 @@ struct Options {
// worth switching to kNoCompression. Even if the input data is
// incompressible, the kSnappyCompression implementation will
// efficiently detect that and will switch to uncompressed mode.
CompressionType compression;
CompressionType compression = kSnappyCompression;
// Compression level for zstd.
// Currently only the range [-5,22] is supported. Default is 1.
int zstd_compression_level = 1;
// EXPERIMENTAL: If true, append to existing MANIFEST and log files
// when a database is opened. This can significantly speed up open.
//
// Default: currently false, but may become true later.
bool reuse_logs;
bool reuse_logs = false;
// If non-NULL, use the specified filter policy to reduce disk reads.
// If non-null, use the specified filter policy to reduce disk reads.
// Many applications will benefit from passing the result of
// NewBloomFilterPolicy() here.
//
// Default: NULL
const FilterPolicy* filter_policy;
// Create an Options object with default values for all fields.
Options();
const FilterPolicy* filter_policy = nullptr;
};
// Options that control read operations
struct ReadOptions {
struct LEVELDB_EXPORT ReadOptions {
// If true, all data read from underlying storage will be
// verified against corresponding checksums.
// Default: false
bool verify_checksums;
bool verify_checksums = false;
// Should the data read for this iteration be cached in memory?
// Callers may wish to set this field to false for bulk scans.
// Default: true
bool fill_cache;
bool fill_cache = true;
// If "snapshot" is non-NULL, read as of the supplied snapshot
// If "snapshot" is non-null, read as of the supplied snapshot
// (which must belong to the DB that is being read and which must
// not have been released). If "snapshot" is NULL, use an implicit
// not have been released). If "snapshot" is null, use an implicit
// snapshot of the state at the beginning of this read operation.
// Default: NULL
const Snapshot* snapshot;
ReadOptions()
: verify_checksums(false),
fill_cache(true),
snapshot(NULL) {
}
const Snapshot* snapshot = nullptr;
};
// Options that control write operations
struct WriteOptions {
struct LEVELDB_EXPORT WriteOptions {
WriteOptions() = default;
// If true, the write will be flushed from the operating system
// buffer cache (by calling WritableFile::Sync()) before the write
// is considered complete. If this flag is true, writes will be
@ -187,13 +182,7 @@ struct WriteOptions {
// crash semantics as the "write()" system call. A DB write
// with sync==true has similar crash semantics to a "write()"
// system call followed by "fsync()".
//
// Default: false
bool sync;
WriteOptions()
: sync(false) {
}
bool sync = false;
};
} // namespace leveldb

View File

@ -15,26 +15,32 @@
#ifndef STORAGE_LEVELDB_INCLUDE_SLICE_H_
#define STORAGE_LEVELDB_INCLUDE_SLICE_H_
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include <cassert>
#include <cstddef>
#include <cstring>
#include <string>
#include "leveldb/export.h"
namespace leveldb {
class Slice {
class LEVELDB_EXPORT Slice {
public:
// Create an empty slice.
Slice() : data_(""), size_(0) { }
Slice() : data_(""), size_(0) {}
// Create a slice that refers to d[0,n-1].
Slice(const char* d, size_t n) : data_(d), size_(n) { }
Slice(const char* d, size_t n) : data_(d), size_(n) {}
// Create a slice that refers to the contents of "s"
Slice(const std::string& s) : data_(s.data()), size_(s.size()) { }
Slice(const std::string& s) : data_(s.data()), size_(s.size()) {}
// Create a slice that refers to s[0,strlen(s)-1]
Slice(const char* s) : data_(s), size_(strlen(s)) { }
Slice(const char* s) : data_(s), size_(strlen(s)) {}
// Intentionally copyable.
Slice(const Slice&) = default;
Slice& operator=(const Slice&) = default;
// Return a pointer to the beginning of the referenced data
const char* data() const { return data_; }
@ -53,7 +59,10 @@ class Slice {
}
// Change this slice to refer to an empty array
void clear() { data_ = ""; size_ = 0; }
void clear() {
data_ = "";
size_ = 0;
}
// Drop the first "n" bytes from this slice.
void remove_prefix(size_t n) {
@ -73,15 +82,12 @@ class Slice {
// Return true iff "x" is a prefix of "*this"
bool starts_with(const Slice& x) const {
return ((size_ >= x.size_) &&
(memcmp(data_, x.data_, x.size_) == 0));
return ((size_ >= x.size_) && (memcmp(data_, x.data_, x.size_) == 0));
}
private:
const char* data_;
size_t size_;
// Intentionally copyable
};
inline bool operator==(const Slice& x, const Slice& y) {
@ -89,21 +95,20 @@ inline bool operator==(const Slice& x, const Slice& y) {
(memcmp(x.data(), y.data(), x.size()) == 0));
}
inline bool operator!=(const Slice& x, const Slice& y) {
return !(x == y);
}
inline bool operator!=(const Slice& x, const Slice& y) { return !(x == y); }
inline int Slice::compare(const Slice& b) const {
const size_t min_len = (size_ < b.size_) ? size_ : b.size_;
int r = memcmp(data_, b.data_, min_len);
if (r == 0) {
if (size_ < b.size_) r = -1;
else if (size_ > b.size_) r = +1;
if (size_ < b.size_)
r = -1;
else if (size_ > b.size_)
r = +1;
}
return r;
}
} // namespace leveldb
#endif // STORAGE_LEVELDB_INCLUDE_SLICE_H_

View File

@ -13,20 +13,25 @@
#ifndef STORAGE_LEVELDB_INCLUDE_STATUS_H_
#define STORAGE_LEVELDB_INCLUDE_STATUS_H_
#include <algorithm>
#include <string>
#include "leveldb/export.h"
#include "leveldb/slice.h"
namespace leveldb {
class Status {
class LEVELDB_EXPORT Status {
public:
// Create a success status.
Status() : state_(NULL) { }
Status() noexcept : state_(nullptr) {}
~Status() { delete[] state_; }
// Copy the specified status.
Status(const Status& s);
void operator=(const Status& s);
Status(const Status& rhs);
Status& operator=(const Status& rhs);
Status(Status&& rhs) noexcept : state_(rhs.state_) { rhs.state_ = nullptr; }
Status& operator=(Status&& rhs) noexcept;
// Return a success status.
static Status OK() { return Status(); }
@ -49,7 +54,7 @@ class Status {
}
// Returns true iff the status indicates success.
bool ok() const { return (state_ == NULL); }
bool ok() const { return (state_ == nullptr); }
// Returns true iff the status indicates a NotFound error.
bool IsNotFound() const { return code() == kNotFound; }
@ -63,18 +68,14 @@ class Status {
// Returns true iff the status indicates a NotSupportedError.
bool IsNotSupportedError() const { return code() == kNotSupported; }
// Returns true iff the status indicates an InvalidArgument.
bool IsInvalidArgument() const { return code() == kInvalidArgument; }
// Return a string representation of this status suitable for printing.
// Returns the string "OK" for success.
std::string ToString() const;
private:
// OK status has a NULL state_. Otherwise, state_ is a new[] array
// of the following form:
// state_[0..3] == length of message
// state_[4] == code
// state_[5..] == message
const char* state_;
enum Code {
kOk = 0,
kNotFound = 1,
@ -85,23 +86,35 @@ class Status {
};
Code code() const {
return (state_ == NULL) ? kOk : static_cast<Code>(state_[4]);
return (state_ == nullptr) ? kOk : static_cast<Code>(state_[4]);
}
Status(Code code, const Slice& msg, const Slice& msg2);
static const char* CopyState(const char* s);
// OK status has a null state_. Otherwise, state_ is a new[] array
// of the following form:
// state_[0..3] == length of message
// state_[4] == code
// state_[5..] == message
const char* state_;
};
inline Status::Status(const Status& s) {
state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_);
inline Status::Status(const Status& rhs) {
state_ = (rhs.state_ == nullptr) ? nullptr : CopyState(rhs.state_);
}
inline void Status::operator=(const Status& s) {
// The following condition catches both aliasing (when this == &s),
// and the common case where both s and *this are ok.
if (state_ != s.state_) {
inline Status& Status::operator=(const Status& rhs) {
// The following condition catches both aliasing (when this == &rhs),
// and the common case where both rhs and *this are ok.
if (state_ != rhs.state_) {
delete[] state_;
state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_);
state_ = (rhs.state_ == nullptr) ? nullptr : CopyState(rhs.state_);
}
return *this;
}
inline Status& Status::operator=(Status&& rhs) noexcept {
std::swap(state_, rhs.state_);
return *this;
}
} // namespace leveldb

View File

@ -5,7 +5,9 @@
#ifndef STORAGE_LEVELDB_INCLUDE_TABLE_H_
#define STORAGE_LEVELDB_INCLUDE_TABLE_H_
#include <stdint.h>
#include <cstdint>
#include "leveldb/export.h"
#include "leveldb/iterator.h"
namespace leveldb {
@ -21,7 +23,7 @@ class TableCache;
// A Table is a sorted map from strings to strings. Tables are
// immutable and persistent. A Table may be safely accessed from
// multiple threads without external synchronization.
class Table {
class LEVELDB_EXPORT Table {
public:
// Attempt to open the table that is stored in bytes [0..file_size)
// of "file", and read the metadata entries necessary to allow
@ -30,15 +32,16 @@ class Table {
// If successful, returns ok and sets "*table" to the newly opened
// table. The client should delete "*table" when no longer needed.
// If there was an error while initializing the table, sets "*table"
// to NULL and returns a non-ok status. Does not take ownership of
// to nullptr and returns a non-ok status. Does not take ownership of
// "*source", but the client must ensure that "source" remains live
// for the duration of the returned table's lifetime.
//
// *file must remain live while this Table is in use.
static Status Open(const Options& options,
RandomAccessFile* file,
uint64_t file_size,
Table** table);
static Status Open(const Options& options, RandomAccessFile* file,
uint64_t file_size, Table** table);
Table(const Table&) = delete;
Table& operator=(const Table&) = delete;
~Table();
@ -56,28 +59,24 @@ class Table {
uint64_t ApproximateOffsetOf(const Slice& key) const;
private:
friend class TableCache;
struct Rep;
Rep* rep_;
explicit Table(Rep* rep) { rep_ = rep; }
static Iterator* BlockReader(void*, const ReadOptions&, const Slice&);
explicit Table(Rep* rep) : rep_(rep) {}
// Calls (*handle_result)(arg, ...) with the entry found after a call
// to Seek(key). May not make such a call if filter policy says
// that key is not present.
friend class TableCache;
Status InternalGet(
const ReadOptions&, const Slice& key,
void* arg,
void (*handle_result)(void* arg, const Slice& k, const Slice& v));
Status InternalGet(const ReadOptions&, const Slice& key, void* arg,
void (*handle_result)(void* arg, const Slice& k,
const Slice& v));
void ReadMeta(const Footer& footer);
void ReadFilter(const Slice& filter_handle_value);
// No copying allowed
Table(const Table&);
void operator=(const Table&);
Rep* const rep_;
};
} // namespace leveldb

View File

@ -13,7 +13,9 @@
#ifndef STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_
#define STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_
#include <stdint.h>
#include <cstdint>
#include "leveldb/export.h"
#include "leveldb/options.h"
#include "leveldb/status.h"
@ -23,13 +25,16 @@ class BlockBuilder;
class BlockHandle;
class WritableFile;
class TableBuilder {
class LEVELDB_EXPORT TableBuilder {
public:
// Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish().
TableBuilder(const Options& options, WritableFile* file);
TableBuilder(const TableBuilder&) = delete;
TableBuilder& operator=(const TableBuilder&) = delete;
// REQUIRES: Either Finish() or Abandon() has been called.
~TableBuilder();
@ -81,10 +86,6 @@ class TableBuilder {
struct Rep;
Rep* rep_;
// No copying allowed
TableBuilder(const TableBuilder&);
void operator=(const TableBuilder&);
};
} // namespace leveldb

View File

@ -22,15 +22,29 @@
#define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_
#include <string>
#include "leveldb/export.h"
#include "leveldb/status.h"
namespace leveldb {
class Slice;
class WriteBatch {
class LEVELDB_EXPORT WriteBatch {
public:
class LEVELDB_EXPORT Handler {
public:
virtual ~Handler();
virtual void Put(const Slice& key, const Slice& value) = 0;
virtual void Delete(const Slice& key) = 0;
};
WriteBatch();
// Intentionally copyable.
WriteBatch(const WriteBatch&) = default;
WriteBatch& operator=(const WriteBatch&) = default;
~WriteBatch();
// Store the mapping "key->value" in the database.
@ -42,21 +56,26 @@ class WriteBatch {
// Clear all updates buffered in this batch.
void Clear();
// The size of the database changes caused by this batch.
//
// This number is tied to implementation details, and may change across
// releases. It is intended for LevelDB usage metrics.
size_t ApproximateSize() const;
// Copies the operations in "source" to this batch.
//
// This runs in O(source size) time. However, the constant factor is better
// than calling Iterate() over the source batch with a Handler that replicates
// the operations into this batch.
void Append(const WriteBatch& source);
// Support for iterating over the contents of a batch.
class Handler {
public:
virtual ~Handler();
virtual void Put(const Slice& key, const Slice& value) = 0;
virtual void Delete(const Slice& key) = 0;
};
Status Iterate(Handler* handler) const;
private:
friend class WriteBatchInternal;
std::string rep_; // See comment in write_batch.cc for the format of rep_
// Intentionally copyable
};
} // namespace leveldb

View File

@ -3,13 +3,14 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Test for issue 178: a manual compaction causes deleted data to reappear.
#include <cstdlib>
#include <iostream>
#include <sstream>
#include <cstdlib>
#include "gtest/gtest.h"
#include "leveldb/db.h"
#include "leveldb/write_batch.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace {
@ -17,19 +18,15 @@ const int kNumKeys = 1100000;
std::string Key1(int i) {
char buf[100];
snprintf(buf, sizeof(buf), "my_key_%d", i);
std::snprintf(buf, sizeof(buf), "my_key_%d", i);
return buf;
}
std::string Key2(int i) {
return Key1(i) + "_xxx";
}
class Issue178 { };
std::string Key2(int i) { return Key1(i) + "_xxx"; }
TEST(Issue178, Test) {
// Get rid of any state from an old run.
std::string dbpath = leveldb::test::TmpDir() + "/leveldb_cbug_test";
std::string dbpath = testing::TempDir() + "leveldb_cbug_test";
DestroyDB(dbpath, leveldb::Options());
// Open database. Disable compression since it affects the creation
@ -39,28 +36,28 @@ TEST(Issue178, Test) {
leveldb::Options db_options;
db_options.create_if_missing = true;
db_options.compression = leveldb::kNoCompression;
ASSERT_OK(leveldb::DB::Open(db_options, dbpath, &db));
ASSERT_LEVELDB_OK(leveldb::DB::Open(db_options, dbpath, &db));
// create first key range
leveldb::WriteBatch batch;
for (size_t i = 0; i < kNumKeys; i++) {
batch.Put(Key1(i), "value for range 1 key");
}
ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch));
ASSERT_LEVELDB_OK(db->Write(leveldb::WriteOptions(), &batch));
// create second key range
batch.Clear();
for (size_t i = 0; i < kNumKeys; i++) {
batch.Put(Key2(i), "value for range 2 key");
}
ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch));
ASSERT_LEVELDB_OK(db->Write(leveldb::WriteOptions(), &batch));
// delete second key range
batch.Clear();
for (size_t i = 0; i < kNumKeys; i++) {
batch.Delete(Key2(i));
}
ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch));
ASSERT_LEVELDB_OK(db->Write(leveldb::WriteOptions(), &batch));
// compact database
std::string start_key = Key1(0);
@ -86,7 +83,3 @@ TEST(Issue178, Test) {
}
} // anonymous namespace
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

View File

@ -6,35 +6,34 @@
// to forward, the current key can be yielded unexpectedly if a new
// mutation has been added just before the current key.
#include "gtest/gtest.h"
#include "leveldb/db.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace leveldb {
class Issue200 { };
TEST(Issue200, Test) {
// Get rid of any state from an old run.
std::string dbpath = test::TmpDir() + "/leveldb_issue200_test";
std::string dbpath = testing::TempDir() + "leveldb_issue200_test";
DestroyDB(dbpath, Options());
DB *db;
DB* db;
Options options;
options.create_if_missing = true;
ASSERT_OK(DB::Open(options, dbpath, &db));
ASSERT_LEVELDB_OK(DB::Open(options, dbpath, &db));
WriteOptions write_options;
ASSERT_OK(db->Put(write_options, "1", "b"));
ASSERT_OK(db->Put(write_options, "2", "c"));
ASSERT_OK(db->Put(write_options, "3", "d"));
ASSERT_OK(db->Put(write_options, "4", "e"));
ASSERT_OK(db->Put(write_options, "5", "f"));
ASSERT_LEVELDB_OK(db->Put(write_options, "1", "b"));
ASSERT_LEVELDB_OK(db->Put(write_options, "2", "c"));
ASSERT_LEVELDB_OK(db->Put(write_options, "3", "d"));
ASSERT_LEVELDB_OK(db->Put(write_options, "4", "e"));
ASSERT_LEVELDB_OK(db->Put(write_options, "5", "f"));
ReadOptions read_options;
Iterator *iter = db->NewIterator(read_options);
Iterator* iter = db->NewIterator(read_options);
// Add an element that should not be reflected in the iterator.
ASSERT_OK(db->Put(write_options, "25", "cd"));
ASSERT_LEVELDB_OK(db->Put(write_options, "25", "cd"));
iter->Seek("5");
ASSERT_EQ(iter->key().ToString(), "5");
@ -53,7 +52,3 @@ TEST(Issue200, Test) {
}
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

126
issues/issue320_test.cc Normal file
View File

@ -0,0 +1,126 @@
// Copyright (c) 2019 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "leveldb/db.h"
#include "leveldb/write_batch.h"
#include "util/testutil.h"
namespace leveldb {
namespace {
// Creates a random number in the range of [0, max).
int GenerateRandomNumber(int max) { return std::rand() % max; }
std::string CreateRandomString(int32_t index) {
static const size_t len = 1024;
char bytes[len];
size_t i = 0;
while (i < 8) {
bytes[i] = 'a' + ((index >> (4 * i)) & 0xf);
++i;
}
while (i < sizeof(bytes)) {
bytes[i] = 'a' + GenerateRandomNumber(26);
++i;
}
return std::string(bytes, sizeof(bytes));
}
} // namespace
TEST(Issue320, Test) {
std::srand(0);
bool delete_before_put = false;
bool keep_snapshots = true;
std::vector<std::unique_ptr<std::pair<std::string, std::string>>> test_map(
10000);
std::vector<Snapshot const*> snapshots(100, nullptr);
DB* db;
Options options;
options.create_if_missing = true;
std::string dbpath = testing::TempDir() + "leveldb_issue320_test";
ASSERT_LEVELDB_OK(DB::Open(options, dbpath, &db));
uint32_t target_size = 10000;
uint32_t num_items = 0;
uint32_t count = 0;
std::string key;
std::string value, old_value;
WriteOptions writeOptions;
ReadOptions readOptions;
while (count < 200000) {
if ((++count % 1000) == 0) {
std::cout << "count: " << count << std::endl;
}
int index = GenerateRandomNumber(test_map.size());
WriteBatch batch;
if (test_map[index] == nullptr) {
num_items++;
test_map[index].reset(new std::pair<std::string, std::string>(
CreateRandomString(index), CreateRandomString(index)));
batch.Put(test_map[index]->first, test_map[index]->second);
} else {
ASSERT_LEVELDB_OK(
db->Get(readOptions, test_map[index]->first, &old_value));
if (old_value != test_map[index]->second) {
std::cout << "ERROR incorrect value returned by Get" << std::endl;
std::cout << " count=" << count << std::endl;
std::cout << " old value=" << old_value << std::endl;
std::cout << " test_map[index]->second=" << test_map[index]->second
<< std::endl;
std::cout << " test_map[index]->first=" << test_map[index]->first
<< std::endl;
std::cout << " index=" << index << std::endl;
ASSERT_EQ(old_value, test_map[index]->second);
}
if (num_items >= target_size && GenerateRandomNumber(100) > 30) {
batch.Delete(test_map[index]->first);
test_map[index] = nullptr;
--num_items;
} else {
test_map[index]->second = CreateRandomString(index);
if (delete_before_put) batch.Delete(test_map[index]->first);
batch.Put(test_map[index]->first, test_map[index]->second);
}
}
ASSERT_LEVELDB_OK(db->Write(writeOptions, &batch));
if (keep_snapshots && GenerateRandomNumber(10) == 0) {
int i = GenerateRandomNumber(snapshots.size());
if (snapshots[i] != nullptr) {
db->ReleaseSnapshot(snapshots[i]);
}
snapshots[i] = db->GetSnapshot();
}
}
for (Snapshot const* snapshot : snapshots) {
if (snapshot) {
db->ReleaseSnapshot(snapshot);
}
}
delete db;
DestroyDB(dbpath, options);
}
} // namespace leveldb

View File

@ -5,6 +5,6 @@ Code in the rest of the package includes "port.h" from this directory.
"port.h" in turn includes a platform specific "port_<platform>.h" file
that provides the platform specific implementation.
See port_posix.h for an example of what must be provided in a platform
See port_stdcxx.h for an example of what must be provided in a platform
specific header file.

View File

@ -1,233 +0,0 @@
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// AtomicPointer provides storage for a lock-free pointer.
// Platform-dependent implementation of AtomicPointer:
// - If the platform provides a cheap barrier, we use it with raw pointers
// - If <atomic> is present (on newer versions of gcc, it is), we use
// a <atomic>-based AtomicPointer. However we prefer the memory
// barrier based version, because at least on a gcc 4.4 32-bit build
// on linux, we have encountered a buggy <atomic> implementation.
// Also, some <atomic> implementations are much slower than a memory-barrier
// based implementation (~16ns for <atomic> based acquire-load vs. ~1ns for
// a barrier based acquire-load).
// This code is based on atomicops-internals-* in Google's perftools:
// http://code.google.com/p/google-perftools/source/browse/#svn%2Ftrunk%2Fsrc%2Fbase
#ifndef PORT_ATOMIC_POINTER_H_
#define PORT_ATOMIC_POINTER_H_
#include <stdint.h>
#ifdef LEVELDB_ATOMIC_PRESENT
#include <atomic>
#endif
#ifdef OS_WIN
#include <windows.h>
#endif
#ifdef OS_MACOSX
#include <libkern/OSAtomic.h>
#endif
#if defined(_M_X64) || defined(__x86_64__)
#define ARCH_CPU_X86_FAMILY 1
#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
#define ARCH_CPU_X86_FAMILY 1
#elif defined(__ARMEL__)
#define ARCH_CPU_ARM_FAMILY 1
#elif defined(__aarch64__)
#define ARCH_CPU_ARM64_FAMILY 1
#elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__)
#define ARCH_CPU_PPC_FAMILY 1
#endif
namespace leveldb {
namespace port {
// Define MemoryBarrier() if available
// Windows on x86
#if defined(OS_WIN) && defined(COMPILER_MSVC) && defined(ARCH_CPU_X86_FAMILY)
// windows.h already provides a MemoryBarrier(void) macro
// http://msdn.microsoft.com/en-us/library/ms684208(v=vs.85).aspx
#define LEVELDB_HAVE_MEMORY_BARRIER
// Mac OS
#elif defined(OS_MACOSX)
inline void MemoryBarrier() {
OSMemoryBarrier();
}
#define LEVELDB_HAVE_MEMORY_BARRIER
// Gcc on x86
#elif defined(ARCH_CPU_X86_FAMILY) && defined(__GNUC__)
inline void MemoryBarrier() {
// See http://gcc.gnu.org/ml/gcc/2003-04/msg01180.html for a discussion on
// this idiom. Also see http://en.wikipedia.org/wiki/Memory_ordering.
__asm__ __volatile__("" : : : "memory");
}
#define LEVELDB_HAVE_MEMORY_BARRIER
// Sun Studio
#elif defined(ARCH_CPU_X86_FAMILY) && defined(__SUNPRO_CC)
inline void MemoryBarrier() {
// See http://gcc.gnu.org/ml/gcc/2003-04/msg01180.html for a discussion on
// this idiom. Also see http://en.wikipedia.org/wiki/Memory_ordering.
asm volatile("" : : : "memory");
}
#define LEVELDB_HAVE_MEMORY_BARRIER
// ARM Linux
#elif defined(ARCH_CPU_ARM_FAMILY) && defined(__linux__)
typedef void (*LinuxKernelMemoryBarrierFunc)(void);
// The Linux ARM kernel provides a highly optimized device-specific memory
// barrier function at a fixed memory address that is mapped in every
// user-level process.
//
// This beats using CPU-specific instructions which are, on single-core
// devices, un-necessary and very costly (e.g. ARMv7-A "dmb" takes more
// than 180ns on a Cortex-A8 like the one on a Nexus One). Benchmarking
// shows that the extra function call cost is completely negligible on
// multi-core devices.
//
inline void MemoryBarrier() {
(*(LinuxKernelMemoryBarrierFunc)0xffff0fa0)();
}
#define LEVELDB_HAVE_MEMORY_BARRIER
// ARM64
#elif defined(ARCH_CPU_ARM64_FAMILY)
inline void MemoryBarrier() {
asm volatile("dmb sy" : : : "memory");
}
#define LEVELDB_HAVE_MEMORY_BARRIER
// PPC
#elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__)
inline void MemoryBarrier() {
// TODO for some powerpc expert: is there a cheaper suitable variant?
// Perhaps by having separate barriers for acquire and release ops.
asm volatile("sync" : : : "memory");
}
#define LEVELDB_HAVE_MEMORY_BARRIER
#endif
// AtomicPointer built using platform-specific MemoryBarrier()
#if defined(LEVELDB_HAVE_MEMORY_BARRIER)
class AtomicPointer {
private:
void* rep_;
public:
AtomicPointer() { }
explicit AtomicPointer(void* p) : rep_(p) {}
inline void* NoBarrier_Load() const { return rep_; }
inline void NoBarrier_Store(void* v) { rep_ = v; }
inline void* Acquire_Load() const {
void* result = rep_;
MemoryBarrier();
return result;
}
inline void Release_Store(void* v) {
MemoryBarrier();
rep_ = v;
}
};
// AtomicPointer based on <cstdatomic>
#elif defined(LEVELDB_ATOMIC_PRESENT)
class AtomicPointer {
private:
std::atomic<void*> rep_;
public:
AtomicPointer() { }
explicit AtomicPointer(void* v) : rep_(v) { }
inline void* Acquire_Load() const {
return rep_.load(std::memory_order_acquire);
}
inline void Release_Store(void* v) {
rep_.store(v, std::memory_order_release);
}
inline void* NoBarrier_Load() const {
return rep_.load(std::memory_order_relaxed);
}
inline void NoBarrier_Store(void* v) {
rep_.store(v, std::memory_order_relaxed);
}
};
// Atomic pointer based on sparc memory barriers
#elif defined(__sparcv9) && defined(__GNUC__)
class AtomicPointer {
private:
void* rep_;
public:
AtomicPointer() { }
explicit AtomicPointer(void* v) : rep_(v) { }
inline void* Acquire_Load() const {
void* val;
__asm__ __volatile__ (
"ldx [%[rep_]], %[val] \n\t"
"membar #LoadLoad|#LoadStore \n\t"
: [val] "=r" (val)
: [rep_] "r" (&rep_)
: "memory");
return val;
}
inline void Release_Store(void* v) {
__asm__ __volatile__ (
"membar #LoadStore|#StoreStore \n\t"
"stx %[v], [%[rep_]] \n\t"
:
: [rep_] "r" (&rep_), [v] "r" (v)
: "memory");
}
inline void* NoBarrier_Load() const { return rep_; }
inline void NoBarrier_Store(void* v) { rep_ = v; }
};
// Atomic pointer based on ia64 acq/rel
#elif defined(__ia64) && defined(__GNUC__)
class AtomicPointer {
private:
void* rep_;
public:
AtomicPointer() { }
explicit AtomicPointer(void* v) : rep_(v) { }
inline void* Acquire_Load() const {
void* val ;
__asm__ __volatile__ (
"ld8.acq %[val] = [%[rep_]] \n\t"
: [val] "=r" (val)
: [rep_] "r" (&rep_)
: "memory"
);
return val;
}
inline void Release_Store(void* v) {
__asm__ __volatile__ (
"st8.rel [%[rep_]] = %[v] \n\t"
:
: [rep_] "r" (&rep_), [v] "r" (v)
: "memory"
);
}
inline void* NoBarrier_Load() const { return rep_; }
inline void NoBarrier_Store(void* v) { rep_ = v; }
};
// We have neither MemoryBarrier(), nor <atomic>
#else
#error Please implement AtomicPointer for this platform.
#endif
#undef LEVELDB_HAVE_MEMORY_BARRIER
#undef ARCH_CPU_X86_FAMILY
#undef ARCH_CPU_ARM_FAMILY
#undef ARCH_CPU_ARM64_FAMILY
#undef ARCH_CPU_PPC_FAMILY
} // namespace port
} // namespace leveldb
#endif // PORT_ATOMIC_POINTER_H_

View File

@ -10,10 +10,10 @@
// Include the appropriate platform specific file below. If you are
// porting to a new platform, see "port_example.h" for documentation
// of what the new port_<platform>.h file must provide.
#if defined(LEVELDB_PLATFORM_POSIX)
# include "port/port_posix.h"
#if defined(LEVELDB_PLATFORM_POSIX) || defined(LEVELDB_PLATFORM_WINDOWS)
#include "port/port_stdcxx.h"
#elif defined(LEVELDB_PLATFORM_CHROMIUM)
# include "port/port_chromium.h"
#include "port/port_chromium.h"
#endif
#endif // STORAGE_LEVELDB_PORT_PORT_H_

38
port/port_config.h.in Normal file
View File

@ -0,0 +1,38 @@
// Copyright 2017 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef STORAGE_LEVELDB_PORT_PORT_CONFIG_H_
#define STORAGE_LEVELDB_PORT_PORT_CONFIG_H_
// Define to 1 if you have a definition for fdatasync() in <unistd.h>.
#if !defined(HAVE_FDATASYNC)
#cmakedefine01 HAVE_FDATASYNC
#endif // !defined(HAVE_FDATASYNC)
// Define to 1 if you have a definition for F_FULLFSYNC in <fcntl.h>.
#if !defined(HAVE_FULLFSYNC)
#cmakedefine01 HAVE_FULLFSYNC
#endif // !defined(HAVE_FULLFSYNC)
// Define to 1 if you have a definition for O_CLOEXEC in <fcntl.h>.
#if !defined(HAVE_O_CLOEXEC)
#cmakedefine01 HAVE_O_CLOEXEC
#endif // !defined(HAVE_O_CLOEXEC)
// Define to 1 if you have Google CRC32C.
#if !defined(HAVE_CRC32C)
#cmakedefine01 HAVE_CRC32C
#endif // !defined(HAVE_CRC32C)
// Define to 1 if you have Google Snappy.
#if !defined(HAVE_SNAPPY)
#cmakedefine01 HAVE_SNAPPY
#endif // !defined(HAVE_SNAPPY)
// Define to 1 if you have Zstd.
#if !defined(HAVE_Zstd)
#cmakedefine01 HAVE_ZSTD
#endif // !defined(HAVE_ZSTD)
#endif // STORAGE_LEVELDB_PORT_PORT_CONFIG_H_

View File

@ -10,36 +10,34 @@
#ifndef STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_
#define STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_
#include "port/thread_annotations.h"
namespace leveldb {
namespace port {
// TODO(jorlow): Many of these belong more in the environment class rather than
// here. We should try moving them and see if it affects perf.
// The following boolean constant must be true on a little-endian machine
// and false otherwise.
static const bool kLittleEndian = true /* or some other expression */;
// ------------------ Threading -------------------
// A Mutex represents an exclusive lock.
class Mutex {
class LOCKABLE Mutex {
public:
Mutex();
~Mutex();
// Lock the mutex. Waits until other lockers have exited.
// Will deadlock if the mutex is already locked by this thread.
void Lock();
void Lock() EXCLUSIVE_LOCK_FUNCTION();
// Unlock the mutex.
// REQUIRES: This mutex was locked by this thread.
void Unlock();
void Unlock() UNLOCK_FUNCTION();
// Optionally crash if this thread does not hold this mutex.
// The implementation must be fast, especially if NDEBUG is
// defined. The implementation is allowed to skip all checks.
void AssertHeld();
void AssertHeld() ASSERT_EXCLUSIVE_LOCK();
};
class CondVar {
@ -57,77 +55,64 @@ class CondVar {
void Signal();
// Wake up all waiting threads.
void SignallAll();
};
// Thread-safe initialization.
// Used as follows:
// static port::OnceType init_control = LEVELDB_ONCE_INIT;
// static void Initializer() { ... do something ...; }
// ...
// port::InitOnce(&init_control, &Initializer);
typedef intptr_t OnceType;
#define LEVELDB_ONCE_INIT 0
extern void InitOnce(port::OnceType*, void (*initializer)());
// A type that holds a pointer that can be read or written atomically
// (i.e., without word-tearing.)
class AtomicPointer {
private:
intptr_t rep_;
public:
// Initialize to arbitrary value
AtomicPointer();
// Initialize to hold v
explicit AtomicPointer(void* v) : rep_(v) { }
// Read and return the stored pointer with the guarantee that no
// later memory access (read or write) by this thread can be
// reordered ahead of this read.
void* Acquire_Load() const;
// Set v as the stored pointer with the guarantee that no earlier
// memory access (read or write) by this thread can be reordered
// after this store.
void Release_Store(void* v);
// Read the stored pointer with no ordering guarantees.
void* NoBarrier_Load() const;
// Set va as the stored pointer with no ordering guarantees.
void NoBarrier_Store(void* v);
void SignalAll();
};
// ------------------ Compression -------------------
// Store the snappy compression of "input[0,input_length-1]" in *output.
// Returns false if snappy is not supported by this port.
extern bool Snappy_Compress(const char* input, size_t input_length,
std::string* output);
bool Snappy_Compress(const char* input, size_t input_length,
std::string* output);
// If input[0,input_length-1] looks like a valid snappy compressed
// buffer, store the size of the uncompressed data in *result and
// return true. Else return false.
extern bool Snappy_GetUncompressedLength(const char* input, size_t length,
size_t* result);
bool Snappy_GetUncompressedLength(const char* input, size_t length,
size_t* result);
// Attempt to snappy uncompress input[0,input_length-1] into *output.
// Returns true if successful, false if the input is invalid lightweight
// Returns true if successful, false if the input is invalid snappy
// compressed data.
//
// REQUIRES: at least the first "n" bytes of output[] must be writable
// where "n" is the result of a successful call to
// Snappy_GetUncompressedLength.
extern bool Snappy_Uncompress(const char* input_data, size_t input_length,
char* output);
bool Snappy_Uncompress(const char* input_data, size_t input_length,
char* output);
// Store the zstd compression of "input[0,input_length-1]" in *output.
// Returns false if zstd is not supported by this port.
bool Zstd_Compress(int level, const char* input, size_t input_length,
std::string* output);
// If input[0,input_length-1] looks like a valid zstd compressed
// buffer, store the size of the uncompressed data in *result and
// return true. Else return false.
bool Zstd_GetUncompressedLength(const char* input, size_t length,
size_t* result);
// Attempt to zstd uncompress input[0,input_length-1] into *output.
// Returns true if successful, false if the input is invalid zstd
// compressed data.
//
// REQUIRES: at least the first "n" bytes of output[] must be writable
// where "n" is the result of a successful call to
// Zstd_GetUncompressedLength.
bool Zstd_Uncompress(const char* input_data, size_t input_length, char* output);
// ------------------ Miscellaneous -------------------
// If heap profiling is not supported, returns false.
// Else repeatedly calls (*func)(arg, data, n) and then returns true.
// The concatenation of all "data[0,n-1]" fragments is the heap profile.
extern bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg);
bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg);
// Extend the CRC to include the first n bytes of buf.
//
// Returns zero if the CRC cannot be extended using acceleration, else returns
// the newly extended CRC value (which may also be zero).
uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size);
} // namespace port
} // namespace leveldb

View File

@ -1,54 +0,0 @@
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "port/port_posix.h"
#include <cstdlib>
#include <stdio.h>
#include <string.h>
#include "util/logging.h"
namespace leveldb {
namespace port {
static void PthreadCall(const char* label, int result) {
if (result != 0) {
fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
abort();
}
}
Mutex::Mutex() { PthreadCall("init mutex", pthread_mutex_init(&mu_, NULL)); }
Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); }
void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); }
void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); }
CondVar::CondVar(Mutex* mu)
: mu_(mu) {
PthreadCall("init cv", pthread_cond_init(&cv_, NULL));
}
CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); }
void CondVar::Wait() {
PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_));
}
void CondVar::Signal() {
PthreadCall("signal", pthread_cond_signal(&cv_));
}
void CondVar::SignalAll() {
PthreadCall("broadcast", pthread_cond_broadcast(&cv_));
}
void InitOnce(OnceType* once, void (*initializer)()) {
PthreadCall("once", pthread_once(once, initializer));
}
} // namespace port
} // namespace leveldb

View File

@ -1,154 +0,0 @@
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// See port_example.h for documentation for the following types/functions.
#ifndef STORAGE_LEVELDB_PORT_PORT_POSIX_H_
#define STORAGE_LEVELDB_PORT_PORT_POSIX_H_
#undef PLATFORM_IS_LITTLE_ENDIAN
#if defined(OS_MACOSX)
#include <machine/endian.h>
#if defined(__DARWIN_LITTLE_ENDIAN) && defined(__DARWIN_BYTE_ORDER)
#define PLATFORM_IS_LITTLE_ENDIAN \
(__DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN)
#endif
#elif defined(OS_SOLARIS)
#include <sys/isa_defs.h>
#ifdef _LITTLE_ENDIAN
#define PLATFORM_IS_LITTLE_ENDIAN true
#else
#define PLATFORM_IS_LITTLE_ENDIAN false
#endif
#elif defined(OS_FREEBSD) || defined(OS_OPENBSD) ||\
defined(OS_NETBSD) || defined(OS_DRAGONFLYBSD)
#include <sys/types.h>
#include <sys/endian.h>
#define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN)
#elif defined(OS_HPUX)
#define PLATFORM_IS_LITTLE_ENDIAN false
#elif defined(OS_ANDROID)
// Due to a bug in the NDK x86 <sys/endian.h> definition,
// _BYTE_ORDER must be used instead of __BYTE_ORDER on Android.
// See http://code.google.com/p/android/issues/detail?id=39824
#include <endian.h>
#define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN)
#else
#include <endian.h>
#endif
#include <pthread.h>
#ifdef SNAPPY
#include <snappy.h>
#endif
#include <stdint.h>
#include <string>
#include "port/atomic_pointer.h"
#ifndef PLATFORM_IS_LITTLE_ENDIAN
#define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN)
#endif
#if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\
defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\
defined(OS_ANDROID) || defined(OS_HPUX) || defined(CYGWIN)
// Use fread/fwrite/fflush on platforms without _unlocked variants
#define fread_unlocked fread
#define fwrite_unlocked fwrite
#define fflush_unlocked fflush
#endif
#if defined(OS_MACOSX) || defined(OS_FREEBSD) ||\
defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD)
// Use fsync() on platforms without fdatasync()
#define fdatasync fsync
#endif
#if defined(OS_ANDROID) && __ANDROID_API__ < 9
// fdatasync() was only introduced in API level 9 on Android. Use fsync()
// when targetting older platforms.
#define fdatasync fsync
#endif
namespace leveldb {
namespace port {
static const bool kLittleEndian = PLATFORM_IS_LITTLE_ENDIAN;
#undef PLATFORM_IS_LITTLE_ENDIAN
class CondVar;
class Mutex {
public:
Mutex();
~Mutex();
void Lock();
void Unlock();
void AssertHeld() { }
private:
friend class CondVar;
pthread_mutex_t mu_;
// No copying
Mutex(const Mutex&);
void operator=(const Mutex&);
};
class CondVar {
public:
explicit CondVar(Mutex* mu);
~CondVar();
void Wait();
void Signal();
void SignalAll();
private:
pthread_cond_t cv_;
Mutex* mu_;
};
typedef pthread_once_t OnceType;
#define LEVELDB_ONCE_INIT PTHREAD_ONCE_INIT
extern void InitOnce(OnceType* once, void (*initializer)());
inline bool Snappy_Compress(const char* input, size_t length,
::std::string* output) {
#ifdef SNAPPY
output->resize(snappy::MaxCompressedLength(length));
size_t outlen;
snappy::RawCompress(input, length, &(*output)[0], &outlen);
output->resize(outlen);
return true;
#endif
return false;
}
inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
size_t* result) {
#ifdef SNAPPY
return snappy::GetUncompressedLength(input, length, result);
#else
return false;
#endif
}
inline bool Snappy_Uncompress(const char* input, size_t length,
char* output) {
#ifdef SNAPPY
return snappy::RawUncompress(input, length, output);
#else
return false;
#endif
}
inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) {
return false;
}
} // namespace port
} // namespace leveldb
#endif // STORAGE_LEVELDB_PORT_PORT_POSIX_H_

223
port/port_stdcxx.h Normal file
View File

@ -0,0 +1,223 @@
// Copyright (c) 2018 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef STORAGE_LEVELDB_PORT_PORT_STDCXX_H_
#define STORAGE_LEVELDB_PORT_PORT_STDCXX_H_
// port/port_config.h availability is automatically detected via __has_include
// in newer compilers. If LEVELDB_HAS_PORT_CONFIG_H is defined, it overrides the
// configuration detection.
#if defined(LEVELDB_HAS_PORT_CONFIG_H)
#if LEVELDB_HAS_PORT_CONFIG_H
#include "port/port_config.h"
#endif // LEVELDB_HAS_PORT_CONFIG_H
#elif defined(__has_include)
#if __has_include("port/port_config.h")
#include "port/port_config.h"
#endif // __has_include("port/port_config.h")
#endif // defined(LEVELDB_HAS_PORT_CONFIG_H)
#if HAVE_CRC32C
#include <crc32c/crc32c.h>
#endif // HAVE_CRC32C
#if HAVE_SNAPPY
#include <snappy.h>
#endif // HAVE_SNAPPY
#if HAVE_ZSTD
#define ZSTD_STATIC_LINKING_ONLY // For ZSTD_compressionParameters.
#include <zstd.h>
#endif // HAVE_ZSTD
#include <cassert>
#include <condition_variable> // NOLINT
#include <cstddef>
#include <cstdint>
#include <mutex> // NOLINT
#include <string>
#include "port/thread_annotations.h"
namespace leveldb {
namespace port {
class CondVar;
// Thinly wraps std::mutex.
class LOCKABLE Mutex {
public:
Mutex() = default;
~Mutex() = default;
Mutex(const Mutex&) = delete;
Mutex& operator=(const Mutex&) = delete;
void Lock() EXCLUSIVE_LOCK_FUNCTION() { mu_.lock(); }
void Unlock() UNLOCK_FUNCTION() { mu_.unlock(); }
void AssertHeld() ASSERT_EXCLUSIVE_LOCK() {}
private:
friend class CondVar;
std::mutex mu_;
};
// Thinly wraps std::condition_variable.
class CondVar {
public:
explicit CondVar(Mutex* mu) : mu_(mu) { assert(mu != nullptr); }
~CondVar() = default;
CondVar(const CondVar&) = delete;
CondVar& operator=(const CondVar&) = delete;
void Wait() {
std::unique_lock<std::mutex> lock(mu_->mu_, std::adopt_lock);
cv_.wait(lock);
lock.release();
}
void Signal() { cv_.notify_one(); }
void SignalAll() { cv_.notify_all(); }
private:
std::condition_variable cv_;
Mutex* const mu_;
};
inline bool Snappy_Compress(const char* input, size_t length,
std::string* output) {
#if HAVE_SNAPPY
output->resize(snappy::MaxCompressedLength(length));
size_t outlen;
snappy::RawCompress(input, length, &(*output)[0], &outlen);
output->resize(outlen);
return true;
#else
// Silence compiler warnings about unused arguments.
(void)input;
(void)length;
(void)output;
#endif // HAVE_SNAPPY
return false;
}
inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
size_t* result) {
#if HAVE_SNAPPY
return snappy::GetUncompressedLength(input, length, result);
#else
// Silence compiler warnings about unused arguments.
(void)input;
(void)length;
(void)result;
return false;
#endif // HAVE_SNAPPY
}
inline bool Snappy_Uncompress(const char* input, size_t length, char* output) {
#if HAVE_SNAPPY
return snappy::RawUncompress(input, length, output);
#else
// Silence compiler warnings about unused arguments.
(void)input;
(void)length;
(void)output;
return false;
#endif // HAVE_SNAPPY
}
inline bool Zstd_Compress(int level, const char* input, size_t length,
std::string* output) {
#if HAVE_ZSTD
// Get the MaxCompressedLength.
size_t outlen = ZSTD_compressBound(length);
if (ZSTD_isError(outlen)) {
return false;
}
output->resize(outlen);
ZSTD_CCtx* ctx = ZSTD_createCCtx();
ZSTD_compressionParameters parameters =
ZSTD_getCParams(level, std::max(length, size_t{1}), /*dictSize=*/0);
ZSTD_CCtx_setCParams(ctx, parameters);
outlen = ZSTD_compress2(ctx, &(*output)[0], output->size(), input, length);
ZSTD_freeCCtx(ctx);
if (ZSTD_isError(outlen)) {
return false;
}
output->resize(outlen);
return true;
#else
// Silence compiler warnings about unused arguments.
(void)level;
(void)input;
(void)length;
(void)output;
return false;
#endif // HAVE_ZSTD
}
inline bool Zstd_GetUncompressedLength(const char* input, size_t length,
size_t* result) {
#if HAVE_ZSTD
size_t size = ZSTD_getFrameContentSize(input, length);
if (size == 0) return false;
*result = size;
return true;
#else
// Silence compiler warnings about unused arguments.
(void)input;
(void)length;
(void)result;
return false;
#endif // HAVE_ZSTD
}
inline bool Zstd_Uncompress(const char* input, size_t length, char* output) {
#if HAVE_ZSTD
size_t outlen;
if (!Zstd_GetUncompressedLength(input, length, &outlen)) {
return false;
}
ZSTD_DCtx* ctx = ZSTD_createDCtx();
outlen = ZSTD_decompressDCtx(ctx, output, outlen, input, length);
ZSTD_freeDCtx(ctx);
if (ZSTD_isError(outlen)) {
return false;
}
return true;
#else
// Silence compiler warnings about unused arguments.
(void)input;
(void)length;
(void)output;
return false;
#endif // HAVE_ZSTD
}
inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) {
// Silence compiler warnings about unused arguments.
(void)func;
(void)arg;
return false;
}
inline uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) {
#if HAVE_CRC32C
return ::crc32c::Extend(crc, reinterpret_cast<const uint8_t*>(buf), size);
#else
// Silence compiler warnings about unused arguments.
(void)crc;
(void)buf;
(void)size;
return 0;
#endif // HAVE_CRC32C
}
} // namespace port
} // namespace leveldb
#endif // STORAGE_LEVELDB_PORT_PORT_STDCXX_H_

View File

@ -5,56 +5,104 @@
#ifndef STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_
#define STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_
// Some environments provide custom macros to aid in static thread-safety
// analysis. Provide empty definitions of such macros unless they are already
// defined.
// Use Clang's thread safety analysis annotations when available. In other
// environments, the macros receive empty definitions.
// Usage documentation: https://clang.llvm.org/docs/ThreadSafetyAnalysis.html
#if !defined(THREAD_ANNOTATION_ATTRIBUTE__)
#if defined(__clang__)
#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x))
#else
#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op
#endif
#endif // !defined(THREAD_ANNOTATION_ATTRIBUTE__)
#ifndef GUARDED_BY
#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
#endif
#ifndef PT_GUARDED_BY
#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
#endif
#ifndef ACQUIRED_AFTER
#define ACQUIRED_AFTER(...) \
THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
#endif
#ifndef ACQUIRED_BEFORE
#define ACQUIRED_BEFORE(...) \
THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
#endif
#ifndef EXCLUSIVE_LOCKS_REQUIRED
#define EXCLUSIVE_LOCKS_REQUIRED(...)
#define EXCLUSIVE_LOCKS_REQUIRED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(__VA_ARGS__))
#endif
#ifndef SHARED_LOCKS_REQUIRED
#define SHARED_LOCKS_REQUIRED(...)
#define SHARED_LOCKS_REQUIRED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(__VA_ARGS__))
#endif
#ifndef LOCKS_EXCLUDED
#define LOCKS_EXCLUDED(...)
#define LOCKS_EXCLUDED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
#endif
#ifndef LOCK_RETURNED
#define LOCK_RETURNED(x)
#define LOCK_RETURNED(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
#endif
#ifndef LOCKABLE
#define LOCKABLE
#define LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(lockable)
#endif
#ifndef SCOPED_LOCKABLE
#define SCOPED_LOCKABLE
#define SCOPED_LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
#endif
#ifndef EXCLUSIVE_LOCK_FUNCTION
#define EXCLUSIVE_LOCK_FUNCTION(...)
#define EXCLUSIVE_LOCK_FUNCTION(...) \
THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock_function(__VA_ARGS__))
#endif
#ifndef SHARED_LOCK_FUNCTION
#define SHARED_LOCK_FUNCTION(...)
#define SHARED_LOCK_FUNCTION(...) \
THREAD_ANNOTATION_ATTRIBUTE__(shared_lock_function(__VA_ARGS__))
#endif
#ifndef EXCLUSIVE_TRYLOCK_FUNCTION
#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \
THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock_function(__VA_ARGS__))
#endif
#ifndef SHARED_TRYLOCK_FUNCTION
#define SHARED_TRYLOCK_FUNCTION(...)
#define SHARED_TRYLOCK_FUNCTION(...) \
THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock_function(__VA_ARGS__))
#endif
#ifndef UNLOCK_FUNCTION
#define UNLOCK_FUNCTION(...)
#define UNLOCK_FUNCTION(...) \
THREAD_ANNOTATION_ATTRIBUTE__(unlock_function(__VA_ARGS__))
#endif
#ifndef NO_THREAD_SAFETY_ANALYSIS
#define NO_THREAD_SAFETY_ANALYSIS
#define NO_THREAD_SAFETY_ANALYSIS \
THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
#endif
#ifndef ASSERT_EXCLUSIVE_LOCK
#define ASSERT_EXCLUSIVE_LOCK(...) \
THREAD_ANNOTATION_ATTRIBUTE__(assert_exclusive_lock(__VA_ARGS__))
#endif
#ifndef ASSERT_SHARED_LOCK
#define ASSERT_SHARED_LOCK(...) \
THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_lock(__VA_ARGS__))
#endif
#endif // STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_

View File

@ -1,24 +0,0 @@
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// MSVC didn't ship with this file until the 2010 version.
#ifndef STORAGE_LEVELDB_PORT_WIN_STDINT_H_
#define STORAGE_LEVELDB_PORT_WIN_STDINT_H_
#if !defined(_MSC_VER)
#error This file should only be included when compiling with MSVC.
#endif
// Define C99 equivalent types.
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef signed long long int64_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
#endif // STORAGE_LEVELDB_PORT_WIN_STDINT_H_

View File

@ -6,8 +6,10 @@
#include "table/block.h"
#include <vector>
#include <algorithm>
#include <cstdint>
#include <vector>
#include "leveldb/comparator.h"
#include "table/format.h"
#include "util/coding.h"
@ -27,7 +29,7 @@ Block::Block(const BlockContents& contents)
if (size_ < sizeof(uint32_t)) {
size_ = 0; // Error marker
} else {
size_t max_restarts_allowed = (size_-sizeof(uint32_t)) / sizeof(uint32_t);
size_t max_restarts_allowed = (size_ - sizeof(uint32_t)) / sizeof(uint32_t);
if (NumRestarts() > max_restarts_allowed) {
// The size is too small for NumRestarts()
size_ = 0;
@ -48,27 +50,26 @@ Block::~Block() {
// and the length of the value in "*shared", "*non_shared", and
// "*value_length", respectively. Will not dereference past "limit".
//
// If any errors are detected, returns NULL. Otherwise, returns a
// If any errors are detected, returns nullptr. Otherwise, returns a
// pointer to the key delta (just past the three decoded values).
static inline const char* DecodeEntry(const char* p, const char* limit,
uint32_t* shared,
uint32_t* non_shared,
uint32_t* shared, uint32_t* non_shared,
uint32_t* value_length) {
if (limit - p < 3) return NULL;
*shared = reinterpret_cast<const unsigned char*>(p)[0];
*non_shared = reinterpret_cast<const unsigned char*>(p)[1];
*value_length = reinterpret_cast<const unsigned char*>(p)[2];
if (limit - p < 3) return nullptr;
*shared = reinterpret_cast<const uint8_t*>(p)[0];
*non_shared = reinterpret_cast<const uint8_t*>(p)[1];
*value_length = reinterpret_cast<const uint8_t*>(p)[2];
if ((*shared | *non_shared | *value_length) < 128) {
// Fast path: all three values are encoded in one byte each
p += 3;
} else {
if ((p = GetVarint32Ptr(p, limit, shared)) == NULL) return NULL;
if ((p = GetVarint32Ptr(p, limit, non_shared)) == NULL) return NULL;
if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL;
if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr;
if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr;
if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) return nullptr;
}
if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) {
return NULL;
return nullptr;
}
return p;
}
@ -76,9 +77,9 @@ static inline const char* DecodeEntry(const char* p, const char* limit,
class Block::Iter : public Iterator {
private:
const Comparator* const comparator_;
const char* const data_; // underlying block contents
uint32_t const restarts_; // Offset of restart array (list of fixed32)
uint32_t const num_restarts_; // Number of uint32_t entries in restart array
const char* const data_; // underlying block contents
uint32_t const restarts_; // Offset of restart array (list of fixed32)
uint32_t const num_restarts_; // Number of uint32_t entries in restart array
// current_ is offset in data_ of current entry. >= restarts_ if !Valid
uint32_t current_;
@ -112,9 +113,7 @@ class Block::Iter : public Iterator {
}
public:
Iter(const Comparator* comparator,
const char* data,
uint32_t restarts,
Iter(const Comparator* comparator, const char* data, uint32_t restarts,
uint32_t num_restarts)
: comparator_(comparator),
data_(data),
@ -125,23 +124,23 @@ class Block::Iter : public Iterator {
assert(num_restarts_ > 0);
}
virtual bool Valid() const { return current_ < restarts_; }
virtual Status status() const { return status_; }
virtual Slice key() const {
bool Valid() const override { return current_ < restarts_; }
Status status() const override { return status_; }
Slice key() const override {
assert(Valid());
return key_;
}
virtual Slice value() const {
Slice value() const override {
assert(Valid());
return value_;
}
virtual void Next() {
void Next() override {
assert(Valid());
ParseNextKey();
}
virtual void Prev() {
void Prev() override {
assert(Valid());
// Scan backwards to a restart point before current_
@ -162,19 +161,37 @@ class Block::Iter : public Iterator {
} while (ParseNextKey() && NextEntryOffset() < original);
}
virtual void Seek(const Slice& target) {
void Seek(const Slice& target) override {
// Binary search in restart array to find the last restart point
// with a key < target
uint32_t left = 0;
uint32_t right = num_restarts_ - 1;
int current_key_compare = 0;
if (Valid()) {
// If we're already scanning, use the current position as a starting
// point. This is beneficial if the key we're seeking to is ahead of the
// current position.
current_key_compare = Compare(key_, target);
if (current_key_compare < 0) {
// key_ is smaller than target
left = restart_index_;
} else if (current_key_compare > 0) {
right = restart_index_;
} else {
// We're seeking to the key we're already at.
return;
}
}
while (left < right) {
uint32_t mid = (left + right + 1) / 2;
uint32_t region_offset = GetRestartPoint(mid);
uint32_t shared, non_shared, value_length;
const char* key_ptr = DecodeEntry(data_ + region_offset,
data_ + restarts_,
&shared, &non_shared, &value_length);
if (key_ptr == NULL || (shared != 0)) {
const char* key_ptr =
DecodeEntry(data_ + region_offset, data_ + restarts_, &shared,
&non_shared, &value_length);
if (key_ptr == nullptr || (shared != 0)) {
CorruptionError();
return;
}
@ -190,8 +207,15 @@ class Block::Iter : public Iterator {
}
}
// We might be able to use our current position within the restart block.
// This is true if we determined the key we desire is in the current block
// and is after than the current key.
assert(current_key_compare == 0 || Valid());
bool skip_seek = left == restart_index_ && current_key_compare < 0;
if (!skip_seek) {
SeekToRestartPoint(left);
}
// Linear search (within restart block) for first key >= target
SeekToRestartPoint(left);
while (true) {
if (!ParseNextKey()) {
return;
@ -202,12 +226,12 @@ class Block::Iter : public Iterator {
}
}
virtual void SeekToFirst() {
void SeekToFirst() override {
SeekToRestartPoint(0);
ParseNextKey();
}
virtual void SeekToLast() {
void SeekToLast() override {
SeekToRestartPoint(num_restarts_ - 1);
while (ParseNextKey() && NextEntryOffset() < restarts_) {
// Keep skipping
@ -237,7 +261,7 @@ class Block::Iter : public Iterator {
// Decode next entry
uint32_t shared, non_shared, value_length;
p = DecodeEntry(p, limit, &shared, &non_shared, &value_length);
if (p == NULL || key_.size() < shared) {
if (p == nullptr || key_.size() < shared) {
CorruptionError();
return false;
} else {
@ -253,7 +277,7 @@ class Block::Iter : public Iterator {
}
};
Iterator* Block::NewIterator(const Comparator* cmp) {
Iterator* Block::NewIterator(const Comparator* comparator) {
if (size_ < sizeof(uint32_t)) {
return NewErrorIterator(Status::Corruption("bad block contents"));
}
@ -261,7 +285,7 @@ Iterator* Block::NewIterator(const Comparator* cmp) {
if (num_restarts == 0) {
return NewEmptyIterator();
} else {
return new Iter(cmp, data_, restart_offset_, num_restarts);
return new Iter(comparator, data_, restart_offset_, num_restarts);
}
}

Some files were not shown because too many files have changed in this diff Show More