leveldb/port/port_stdcxx.h

152 lines
3.7 KiB
C
Raw Normal View History

// Copyright (c) 2018 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef STORAGE_LEVELDB_PORT_PORT_STDCXX_H_
#define STORAGE_LEVELDB_PORT_PORT_STDCXX_H_
// port/port_config.h availability is automatically detected via __has_include
// in newer compilers. If LEVELDB_HAS_PORT_CONFIG_H is defined, it overrides the
// configuration detection.
#if defined(LEVELDB_HAS_PORT_CONFIG_H)
#if LEVELDB_HAS_PORT_CONFIG_H
#include "port/port_config.h"
#endif // LEVELDB_HAS_PORT_CONFIG_H
#elif defined(__has_include)
#if __has_include("port/port_config.h")
#include "port/port_config.h"
#endif // __has_include("port/port_config.h")
#endif // defined(LEVELDB_HAS_PORT_CONFIG_H)
#if HAVE_CRC32C
Replace SSE-optimized CRC32C in POSIX port with external library. Maintaining a hardware-accelerated CRC32C implementation tailored for all modern platforms deserves a repository of its own. We extracted the implementation here into https://github.com/google/crc32c and improved it in that repository. This CL removes the SSE-optimized implementation from this codebase, and adds the ability to use the google/crc32c library, if it is present on the system. The benchmarks below show the performance impact of the change. In summary, open source builds that use the google/crc32c library can expect a 3x improvement in CRC32C throughput, whereas builds that do not use the library will see a 50% drop in CRC32C throughput. This translates in much smaller changes in overall leveldb performance. Baseline, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.064 micros/op; 36.1 MB/s fillsync : 57.861 micros/op; 1.9 MB/s (1000 ops) fillrandom : 3.887 micros/op; 28.5 MB/s overwrite : 4.140 micros/op; 26.7 MB/s readrandom : 7.433 micros/op; (1000000 of 1000000 found) readrandom : 6.825 micros/op; (1000000 of 1000000 found) readseq : 0.244 micros/op; 453.4 MB/s readreverse : 0.387 micros/op; 285.8 MB/s compact : 449707.000 micros/op; readrandom : 4.196 micros/op; (1000000 of 1000000 found) readseq : 0.228 micros/op; 485.8 MB/s readreverse : 0.320 micros/op; 345.2 MB/s fill100K : 562.556 micros/op; 169.6 MB/s (1000 ops) crc32c : 0.768 micros/op; 5085.0 MB/s (4K per op) snappycomp : 4.220 micros/op; 925.7 MB/s (output: 55.1%) snappyuncomp : 0.635 micros/op; 6155.7 MB/s acquireload : 13.054 micros/op; (each op is 1000 loads) New with crc32c, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 2.820 micros/op; 39.2 MB/s fillsync : 51.988 micros/op; 2.1 MB/s (1000 ops) fillrandom : 3.747 micros/op; 29.5 MB/s overwrite : 4.047 micros/op; 27.3 MB/s readrandom : 7.287 micros/op; (1000000 of 1000000 found) readrandom : 6.927 micros/op; (1000000 of 1000000 found) readseq : 0.253 micros/op; 437.5 MB/s readreverse : 0.411 micros/op; 269.2 MB/s compact : 440405.000 micros/op; readrandom : 4.159 micros/op; (1000000 of 1000000 found) readseq : 0.230 micros/op; 481.1 MB/s readreverse : 0.320 micros/op; 345.9 MB/s fill100K : 558.222 micros/op; 170.9 MB/s (1000 ops) crc32c : 0.214 micros/op; 18263.5 MB/s (4K per op) snappycomp : 4.471 micros/op; 873.7 MB/s (output: 55.1%) snappyuncomp : 0.833 micros/op; 4688.5 MB/s acquireload : 13.289 micros/op; (each op is 1000 loads) New without crc32c, MacBookPro13,3 with Core i7 6920HQ LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.094 micros/op; 35.8 MB/s fillsync : 52.160 micros/op; 2.1 MB/s (1000 ops) fillrandom : 4.090 micros/op; 27.0 MB/s overwrite : 4.006 micros/op; 27.6 MB/s readrandom : 6.584 micros/op; (1000000 of 1000000 found) readrandom : 6.676 micros/op; (1000000 of 1000000 found) readseq : 0.280 micros/op; 395.2 MB/s readreverse : 0.391 micros/op; 283.2 MB/s compact : 433911.000 micros/op; readrandom : 4.261 micros/op; (1000000 of 1000000 found) readseq : 0.251 micros/op; 440.5 MB/s readreverse : 0.356 micros/op; 310.9 MB/s fill100K : 584.023 micros/op; 163.3 MB/s (1000 ops) crc32c : 1.384 micros/op; 2822.3 MB/s (4K per op) snappycomp : 4.763 micros/op; 820.1 MB/s (output: 55.1%) snappyuncomp : 0.766 micros/op; 5098.6 MB/s acquireload : 12.931 micros/op; (each op is 1000 loads) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=171667771
2017-10-10 21:05:17 +08:00
#include <crc32c/crc32c.h>
#endif // HAVE_CRC32C
#if HAVE_SNAPPY
#include <snappy.h>
#endif // HAVE_SNAPPY
#include <cassert>
#include <condition_variable> // NOLINT
#include <cstddef>
#include <cstdint>
#include <mutex> // NOLINT
#include <string>
#include "port/thread_annotations.h"
namespace leveldb {
namespace port {
class CondVar;
// Thinly wraps std::mutex.
class LOCKABLE Mutex {
public:
Mutex() = default;
~Mutex() = default;
Mutex(const Mutex&) = delete;
Mutex& operator=(const Mutex&) = delete;
void Lock() EXCLUSIVE_LOCK_FUNCTION() { mu_.lock(); }
void Unlock() UNLOCK_FUNCTION() { mu_.unlock(); }
void AssertHeld() ASSERT_EXCLUSIVE_LOCK() {}
private:
friend class CondVar;
std::mutex mu_;
};
// Thinly wraps std::condition_variable.
class CondVar {
public:
explicit CondVar(Mutex* mu) : mu_(mu) { assert(mu != nullptr); }
~CondVar() = default;
CondVar(const CondVar&) = delete;
CondVar& operator=(const CondVar&) = delete;
void Wait() {
std::unique_lock<std::mutex> lock(mu_->mu_, std::adopt_lock);
cv_.wait(lock);
lock.release();
}
void Signal() { cv_.notify_one(); }
void SignalAll() { cv_.notify_all(); }
private:
std::condition_variable cv_;
Mutex* const mu_;
};
inline bool Snappy_Compress(const char* input, size_t length,
std::string* output) {
#if HAVE_SNAPPY
output->resize(snappy::MaxCompressedLength(length));
size_t outlen;
snappy::RawCompress(input, length, &(*output)[0], &outlen);
output->resize(outlen);
return true;
#else
// Silence compiler warnings about unused arguments.
(void)input;
(void)length;
(void)output;
#endif // HAVE_SNAPPY
return false;
}
inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
size_t* result) {
#if HAVE_SNAPPY
return snappy::GetUncompressedLength(input, length, result);
#else
// Silence compiler warnings about unused arguments.
(void)input;
(void)length;
(void)result;
return false;
#endif // HAVE_SNAPPY
}
inline bool Snappy_Uncompress(const char* input, size_t length, char* output) {
#if HAVE_SNAPPY
return snappy::RawUncompress(input, length, output);
#else
// Silence compiler warnings about unused arguments.
(void)input;
(void)length;
(void)output;
return false;
#endif // HAVE_SNAPPY
}
inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) {
// Silence compiler warnings about unused arguments.
(void)func;
(void)arg;
return false;
}
Replace SSE-optimized CRC32C in POSIX port with external library. Maintaining a hardware-accelerated CRC32C implementation tailored for all modern platforms deserves a repository of its own. We extracted the implementation here into https://github.com/google/crc32c and improved it in that repository. This CL removes the SSE-optimized implementation from this codebase, and adds the ability to use the google/crc32c library, if it is present on the system. The benchmarks below show the performance impact of the change. In summary, open source builds that use the google/crc32c library can expect a 3x improvement in CRC32C throughput, whereas builds that do not use the library will see a 50% drop in CRC32C throughput. This translates in much smaller changes in overall leveldb performance. Baseline, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.064 micros/op; 36.1 MB/s fillsync : 57.861 micros/op; 1.9 MB/s (1000 ops) fillrandom : 3.887 micros/op; 28.5 MB/s overwrite : 4.140 micros/op; 26.7 MB/s readrandom : 7.433 micros/op; (1000000 of 1000000 found) readrandom : 6.825 micros/op; (1000000 of 1000000 found) readseq : 0.244 micros/op; 453.4 MB/s readreverse : 0.387 micros/op; 285.8 MB/s compact : 449707.000 micros/op; readrandom : 4.196 micros/op; (1000000 of 1000000 found) readseq : 0.228 micros/op; 485.8 MB/s readreverse : 0.320 micros/op; 345.2 MB/s fill100K : 562.556 micros/op; 169.6 MB/s (1000 ops) crc32c : 0.768 micros/op; 5085.0 MB/s (4K per op) snappycomp : 4.220 micros/op; 925.7 MB/s (output: 55.1%) snappyuncomp : 0.635 micros/op; 6155.7 MB/s acquireload : 13.054 micros/op; (each op is 1000 loads) New with crc32c, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 2.820 micros/op; 39.2 MB/s fillsync : 51.988 micros/op; 2.1 MB/s (1000 ops) fillrandom : 3.747 micros/op; 29.5 MB/s overwrite : 4.047 micros/op; 27.3 MB/s readrandom : 7.287 micros/op; (1000000 of 1000000 found) readrandom : 6.927 micros/op; (1000000 of 1000000 found) readseq : 0.253 micros/op; 437.5 MB/s readreverse : 0.411 micros/op; 269.2 MB/s compact : 440405.000 micros/op; readrandom : 4.159 micros/op; (1000000 of 1000000 found) readseq : 0.230 micros/op; 481.1 MB/s readreverse : 0.320 micros/op; 345.9 MB/s fill100K : 558.222 micros/op; 170.9 MB/s (1000 ops) crc32c : 0.214 micros/op; 18263.5 MB/s (4K per op) snappycomp : 4.471 micros/op; 873.7 MB/s (output: 55.1%) snappyuncomp : 0.833 micros/op; 4688.5 MB/s acquireload : 13.289 micros/op; (each op is 1000 loads) New without crc32c, MacBookPro13,3 with Core i7 6920HQ LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.094 micros/op; 35.8 MB/s fillsync : 52.160 micros/op; 2.1 MB/s (1000 ops) fillrandom : 4.090 micros/op; 27.0 MB/s overwrite : 4.006 micros/op; 27.6 MB/s readrandom : 6.584 micros/op; (1000000 of 1000000 found) readrandom : 6.676 micros/op; (1000000 of 1000000 found) readseq : 0.280 micros/op; 395.2 MB/s readreverse : 0.391 micros/op; 283.2 MB/s compact : 433911.000 micros/op; readrandom : 4.261 micros/op; (1000000 of 1000000 found) readseq : 0.251 micros/op; 440.5 MB/s readreverse : 0.356 micros/op; 310.9 MB/s fill100K : 584.023 micros/op; 163.3 MB/s (1000 ops) crc32c : 1.384 micros/op; 2822.3 MB/s (4K per op) snappycomp : 4.763 micros/op; 820.1 MB/s (output: 55.1%) snappyuncomp : 0.766 micros/op; 5098.6 MB/s acquireload : 12.931 micros/op; (each op is 1000 loads) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=171667771
2017-10-10 21:05:17 +08:00
inline uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) {
#if HAVE_CRC32C
Replace SSE-optimized CRC32C in POSIX port with external library. Maintaining a hardware-accelerated CRC32C implementation tailored for all modern platforms deserves a repository of its own. We extracted the implementation here into https://github.com/google/crc32c and improved it in that repository. This CL removes the SSE-optimized implementation from this codebase, and adds the ability to use the google/crc32c library, if it is present on the system. The benchmarks below show the performance impact of the change. In summary, open source builds that use the google/crc32c library can expect a 3x improvement in CRC32C throughput, whereas builds that do not use the library will see a 50% drop in CRC32C throughput. This translates in much smaller changes in overall leveldb performance. Baseline, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.064 micros/op; 36.1 MB/s fillsync : 57.861 micros/op; 1.9 MB/s (1000 ops) fillrandom : 3.887 micros/op; 28.5 MB/s overwrite : 4.140 micros/op; 26.7 MB/s readrandom : 7.433 micros/op; (1000000 of 1000000 found) readrandom : 6.825 micros/op; (1000000 of 1000000 found) readseq : 0.244 micros/op; 453.4 MB/s readreverse : 0.387 micros/op; 285.8 MB/s compact : 449707.000 micros/op; readrandom : 4.196 micros/op; (1000000 of 1000000 found) readseq : 0.228 micros/op; 485.8 MB/s readreverse : 0.320 micros/op; 345.2 MB/s fill100K : 562.556 micros/op; 169.6 MB/s (1000 ops) crc32c : 0.768 micros/op; 5085.0 MB/s (4K per op) snappycomp : 4.220 micros/op; 925.7 MB/s (output: 55.1%) snappyuncomp : 0.635 micros/op; 6155.7 MB/s acquireload : 13.054 micros/op; (each op is 1000 loads) New with crc32c, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 2.820 micros/op; 39.2 MB/s fillsync : 51.988 micros/op; 2.1 MB/s (1000 ops) fillrandom : 3.747 micros/op; 29.5 MB/s overwrite : 4.047 micros/op; 27.3 MB/s readrandom : 7.287 micros/op; (1000000 of 1000000 found) readrandom : 6.927 micros/op; (1000000 of 1000000 found) readseq : 0.253 micros/op; 437.5 MB/s readreverse : 0.411 micros/op; 269.2 MB/s compact : 440405.000 micros/op; readrandom : 4.159 micros/op; (1000000 of 1000000 found) readseq : 0.230 micros/op; 481.1 MB/s readreverse : 0.320 micros/op; 345.9 MB/s fill100K : 558.222 micros/op; 170.9 MB/s (1000 ops) crc32c : 0.214 micros/op; 18263.5 MB/s (4K per op) snappycomp : 4.471 micros/op; 873.7 MB/s (output: 55.1%) snappyuncomp : 0.833 micros/op; 4688.5 MB/s acquireload : 13.289 micros/op; (each op is 1000 loads) New without crc32c, MacBookPro13,3 with Core i7 6920HQ LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.094 micros/op; 35.8 MB/s fillsync : 52.160 micros/op; 2.1 MB/s (1000 ops) fillrandom : 4.090 micros/op; 27.0 MB/s overwrite : 4.006 micros/op; 27.6 MB/s readrandom : 6.584 micros/op; (1000000 of 1000000 found) readrandom : 6.676 micros/op; (1000000 of 1000000 found) readseq : 0.280 micros/op; 395.2 MB/s readreverse : 0.391 micros/op; 283.2 MB/s compact : 433911.000 micros/op; readrandom : 4.261 micros/op; (1000000 of 1000000 found) readseq : 0.251 micros/op; 440.5 MB/s readreverse : 0.356 micros/op; 310.9 MB/s fill100K : 584.023 micros/op; 163.3 MB/s (1000 ops) crc32c : 1.384 micros/op; 2822.3 MB/s (4K per op) snappycomp : 4.763 micros/op; 820.1 MB/s (output: 55.1%) snappyuncomp : 0.766 micros/op; 5098.6 MB/s acquireload : 12.931 micros/op; (each op is 1000 loads) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=171667771
2017-10-10 21:05:17 +08:00
return ::crc32c::Extend(crc, reinterpret_cast<const uint8_t*>(buf), size);
#else
// Silence compiler warnings about unused arguments.
(void)crc;
(void)buf;
(void)size;
Replace SSE-optimized CRC32C in POSIX port with external library. Maintaining a hardware-accelerated CRC32C implementation tailored for all modern platforms deserves a repository of its own. We extracted the implementation here into https://github.com/google/crc32c and improved it in that repository. This CL removes the SSE-optimized implementation from this codebase, and adds the ability to use the google/crc32c library, if it is present on the system. The benchmarks below show the performance impact of the change. In summary, open source builds that use the google/crc32c library can expect a 3x improvement in CRC32C throughput, whereas builds that do not use the library will see a 50% drop in CRC32C throughput. This translates in much smaller changes in overall leveldb performance. Baseline, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.064 micros/op; 36.1 MB/s fillsync : 57.861 micros/op; 1.9 MB/s (1000 ops) fillrandom : 3.887 micros/op; 28.5 MB/s overwrite : 4.140 micros/op; 26.7 MB/s readrandom : 7.433 micros/op; (1000000 of 1000000 found) readrandom : 6.825 micros/op; (1000000 of 1000000 found) readseq : 0.244 micros/op; 453.4 MB/s readreverse : 0.387 micros/op; 285.8 MB/s compact : 449707.000 micros/op; readrandom : 4.196 micros/op; (1000000 of 1000000 found) readseq : 0.228 micros/op; 485.8 MB/s readreverse : 0.320 micros/op; 345.2 MB/s fill100K : 562.556 micros/op; 169.6 MB/s (1000 ops) crc32c : 0.768 micros/op; 5085.0 MB/s (4K per op) snappycomp : 4.220 micros/op; 925.7 MB/s (output: 55.1%) snappyuncomp : 0.635 micros/op; 6155.7 MB/s acquireload : 13.054 micros/op; (each op is 1000 loads) New with crc32c, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 2.820 micros/op; 39.2 MB/s fillsync : 51.988 micros/op; 2.1 MB/s (1000 ops) fillrandom : 3.747 micros/op; 29.5 MB/s overwrite : 4.047 micros/op; 27.3 MB/s readrandom : 7.287 micros/op; (1000000 of 1000000 found) readrandom : 6.927 micros/op; (1000000 of 1000000 found) readseq : 0.253 micros/op; 437.5 MB/s readreverse : 0.411 micros/op; 269.2 MB/s compact : 440405.000 micros/op; readrandom : 4.159 micros/op; (1000000 of 1000000 found) readseq : 0.230 micros/op; 481.1 MB/s readreverse : 0.320 micros/op; 345.9 MB/s fill100K : 558.222 micros/op; 170.9 MB/s (1000 ops) crc32c : 0.214 micros/op; 18263.5 MB/s (4K per op) snappycomp : 4.471 micros/op; 873.7 MB/s (output: 55.1%) snappyuncomp : 0.833 micros/op; 4688.5 MB/s acquireload : 13.289 micros/op; (each op is 1000 loads) New without crc32c, MacBookPro13,3 with Core i7 6920HQ LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.094 micros/op; 35.8 MB/s fillsync : 52.160 micros/op; 2.1 MB/s (1000 ops) fillrandom : 4.090 micros/op; 27.0 MB/s overwrite : 4.006 micros/op; 27.6 MB/s readrandom : 6.584 micros/op; (1000000 of 1000000 found) readrandom : 6.676 micros/op; (1000000 of 1000000 found) readseq : 0.280 micros/op; 395.2 MB/s readreverse : 0.391 micros/op; 283.2 MB/s compact : 433911.000 micros/op; readrandom : 4.261 micros/op; (1000000 of 1000000 found) readseq : 0.251 micros/op; 440.5 MB/s readreverse : 0.356 micros/op; 310.9 MB/s fill100K : 584.023 micros/op; 163.3 MB/s (1000 ops) crc32c : 1.384 micros/op; 2822.3 MB/s (4K per op) snappycomp : 4.763 micros/op; 820.1 MB/s (output: 55.1%) snappyuncomp : 0.766 micros/op; 5098.6 MB/s acquireload : 12.931 micros/op; (each op is 1000 loads) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=171667771
2017-10-10 21:05:17 +08:00
return 0;
#endif // HAVE_CRC32C
Replace SSE-optimized CRC32C in POSIX port with external library. Maintaining a hardware-accelerated CRC32C implementation tailored for all modern platforms deserves a repository of its own. We extracted the implementation here into https://github.com/google/crc32c and improved it in that repository. This CL removes the SSE-optimized implementation from this codebase, and adds the ability to use the google/crc32c library, if it is present on the system. The benchmarks below show the performance impact of the change. In summary, open source builds that use the google/crc32c library can expect a 3x improvement in CRC32C throughput, whereas builds that do not use the library will see a 50% drop in CRC32C throughput. This translates in much smaller changes in overall leveldb performance. Baseline, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.064 micros/op; 36.1 MB/s fillsync : 57.861 micros/op; 1.9 MB/s (1000 ops) fillrandom : 3.887 micros/op; 28.5 MB/s overwrite : 4.140 micros/op; 26.7 MB/s readrandom : 7.433 micros/op; (1000000 of 1000000 found) readrandom : 6.825 micros/op; (1000000 of 1000000 found) readseq : 0.244 micros/op; 453.4 MB/s readreverse : 0.387 micros/op; 285.8 MB/s compact : 449707.000 micros/op; readrandom : 4.196 micros/op; (1000000 of 1000000 found) readseq : 0.228 micros/op; 485.8 MB/s readreverse : 0.320 micros/op; 345.2 MB/s fill100K : 562.556 micros/op; 169.6 MB/s (1000 ops) crc32c : 0.768 micros/op; 5085.0 MB/s (4K per op) snappycomp : 4.220 micros/op; 925.7 MB/s (output: 55.1%) snappyuncomp : 0.635 micros/op; 6155.7 MB/s acquireload : 13.054 micros/op; (each op is 1000 loads) New with crc32c, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 2.820 micros/op; 39.2 MB/s fillsync : 51.988 micros/op; 2.1 MB/s (1000 ops) fillrandom : 3.747 micros/op; 29.5 MB/s overwrite : 4.047 micros/op; 27.3 MB/s readrandom : 7.287 micros/op; (1000000 of 1000000 found) readrandom : 6.927 micros/op; (1000000 of 1000000 found) readseq : 0.253 micros/op; 437.5 MB/s readreverse : 0.411 micros/op; 269.2 MB/s compact : 440405.000 micros/op; readrandom : 4.159 micros/op; (1000000 of 1000000 found) readseq : 0.230 micros/op; 481.1 MB/s readreverse : 0.320 micros/op; 345.9 MB/s fill100K : 558.222 micros/op; 170.9 MB/s (1000 ops) crc32c : 0.214 micros/op; 18263.5 MB/s (4K per op) snappycomp : 4.471 micros/op; 873.7 MB/s (output: 55.1%) snappyuncomp : 0.833 micros/op; 4688.5 MB/s acquireload : 13.289 micros/op; (each op is 1000 loads) New without crc32c, MacBookPro13,3 with Core i7 6920HQ LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.094 micros/op; 35.8 MB/s fillsync : 52.160 micros/op; 2.1 MB/s (1000 ops) fillrandom : 4.090 micros/op; 27.0 MB/s overwrite : 4.006 micros/op; 27.6 MB/s readrandom : 6.584 micros/op; (1000000 of 1000000 found) readrandom : 6.676 micros/op; (1000000 of 1000000 found) readseq : 0.280 micros/op; 395.2 MB/s readreverse : 0.391 micros/op; 283.2 MB/s compact : 433911.000 micros/op; readrandom : 4.261 micros/op; (1000000 of 1000000 found) readseq : 0.251 micros/op; 440.5 MB/s readreverse : 0.356 micros/op; 310.9 MB/s fill100K : 584.023 micros/op; 163.3 MB/s (1000 ops) crc32c : 1.384 micros/op; 2822.3 MB/s (4K per op) snappycomp : 4.763 micros/op; 820.1 MB/s (output: 55.1%) snappyuncomp : 0.766 micros/op; 5098.6 MB/s acquireload : 12.931 micros/op; (each op is 1000 loads) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=171667771
2017-10-10 21:05:17 +08:00
}
Replace SSE-optimized CRC32C in POSIX port with external library. Maintaining a hardware-accelerated CRC32C implementation tailored for all modern platforms deserves a repository of its own. We extracted the implementation here into https://github.com/google/crc32c and improved it in that repository. This CL removes the SSE-optimized implementation from this codebase, and adds the ability to use the google/crc32c library, if it is present on the system. The benchmarks below show the performance impact of the change. In summary, open source builds that use the google/crc32c library can expect a 3x improvement in CRC32C throughput, whereas builds that do not use the library will see a 50% drop in CRC32C throughput. This translates in much smaller changes in overall leveldb performance. Baseline, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.064 micros/op; 36.1 MB/s fillsync : 57.861 micros/op; 1.9 MB/s (1000 ops) fillrandom : 3.887 micros/op; 28.5 MB/s overwrite : 4.140 micros/op; 26.7 MB/s readrandom : 7.433 micros/op; (1000000 of 1000000 found) readrandom : 6.825 micros/op; (1000000 of 1000000 found) readseq : 0.244 micros/op; 453.4 MB/s readreverse : 0.387 micros/op; 285.8 MB/s compact : 449707.000 micros/op; readrandom : 4.196 micros/op; (1000000 of 1000000 found) readseq : 0.228 micros/op; 485.8 MB/s readreverse : 0.320 micros/op; 345.2 MB/s fill100K : 562.556 micros/op; 169.6 MB/s (1000 ops) crc32c : 0.768 micros/op; 5085.0 MB/s (4K per op) snappycomp : 4.220 micros/op; 925.7 MB/s (output: 55.1%) snappyuncomp : 0.635 micros/op; 6155.7 MB/s acquireload : 13.054 micros/op; (each op is 1000 loads) New with crc32c, MacBookPro13,3 with Core i7 6920HQ: LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 2.820 micros/op; 39.2 MB/s fillsync : 51.988 micros/op; 2.1 MB/s (1000 ops) fillrandom : 3.747 micros/op; 29.5 MB/s overwrite : 4.047 micros/op; 27.3 MB/s readrandom : 7.287 micros/op; (1000000 of 1000000 found) readrandom : 6.927 micros/op; (1000000 of 1000000 found) readseq : 0.253 micros/op; 437.5 MB/s readreverse : 0.411 micros/op; 269.2 MB/s compact : 440405.000 micros/op; readrandom : 4.159 micros/op; (1000000 of 1000000 found) readseq : 0.230 micros/op; 481.1 MB/s readreverse : 0.320 micros/op; 345.9 MB/s fill100K : 558.222 micros/op; 170.9 MB/s (1000 ops) crc32c : 0.214 micros/op; 18263.5 MB/s (4K per op) snappycomp : 4.471 micros/op; 873.7 MB/s (output: 55.1%) snappyuncomp : 0.833 micros/op; 4688.5 MB/s acquireload : 13.289 micros/op; (each op is 1000 loads) New without crc32c, MacBookPro13,3 with Core i7 6920HQ LevelDB: version 1.20 Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 3.094 micros/op; 35.8 MB/s fillsync : 52.160 micros/op; 2.1 MB/s (1000 ops) fillrandom : 4.090 micros/op; 27.0 MB/s overwrite : 4.006 micros/op; 27.6 MB/s readrandom : 6.584 micros/op; (1000000 of 1000000 found) readrandom : 6.676 micros/op; (1000000 of 1000000 found) readseq : 0.280 micros/op; 395.2 MB/s readreverse : 0.391 micros/op; 283.2 MB/s compact : 433911.000 micros/op; readrandom : 4.261 micros/op; (1000000 of 1000000 found) readseq : 0.251 micros/op; 440.5 MB/s readreverse : 0.356 micros/op; 310.9 MB/s fill100K : 584.023 micros/op; 163.3 MB/s (1000 ops) crc32c : 1.384 micros/op; 2822.3 MB/s (4K per op) snappycomp : 4.763 micros/op; 820.1 MB/s (output: 55.1%) snappyuncomp : 0.766 micros/op; 5098.6 MB/s acquireload : 12.931 micros/op; (each op is 1000 loads) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=171667771
2017-10-10 21:05:17 +08:00
} // namespace port
} // namespace leveldb
#endif // STORAGE_LEVELDB_PORT_PORT_STDCXX_H_