leveldb/port/port_posix.h
costan 5c39524f36 Replace SSE-optimized CRC32C in POSIX port with external library.
Maintaining a hardware-accelerated CRC32C implementation tailored for
all modern platforms deserves a repository of its own. We extracted the
implementation here into https://github.com/google/crc32c and improved
it in that repository. This CL removes the SSE-optimized implementation
from this codebase, and adds the ability to use the google/crc32c
library, if it is present on the system.

The benchmarks below show the performance impact of the change. In
summary, open source builds that use the google/crc32c library can
expect a 3x improvement in CRC32C throughput, whereas builds that do not
use the library will see a 50% drop in CRC32C throughput. This
translates in much smaller changes in overall leveldb performance.

Baseline, MacBookPro13,3 with Core i7 6920HQ:
LevelDB:    version 1.20
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
------------------------------------------------
fillseq      :       3.064 micros/op;   36.1 MB/s
fillsync     :      57.861 micros/op;    1.9 MB/s (1000 ops)
fillrandom   :       3.887 micros/op;   28.5 MB/s
overwrite    :       4.140 micros/op;   26.7 MB/s
readrandom   :       7.433 micros/op; (1000000 of 1000000 found)
readrandom   :       6.825 micros/op; (1000000 of 1000000 found)
readseq      :       0.244 micros/op;  453.4 MB/s
readreverse  :       0.387 micros/op;  285.8 MB/s
compact      :  449707.000 micros/op;
readrandom   :       4.196 micros/op; (1000000 of 1000000 found)
readseq      :       0.228 micros/op;  485.8 MB/s
readreverse  :       0.320 micros/op;  345.2 MB/s
fill100K     :     562.556 micros/op;  169.6 MB/s (1000 ops)
crc32c       :       0.768 micros/op; 5085.0 MB/s (4K per op)
snappycomp   :       4.220 micros/op;  925.7 MB/s (output: 55.1%)
snappyuncomp :       0.635 micros/op; 6155.7 MB/s
acquireload  :      13.054 micros/op; (each op is 1000 loads)

New with crc32c, MacBookPro13,3 with Core i7 6920HQ:
LevelDB:    version 1.20
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
------------------------------------------------
fillseq      :       2.820 micros/op;   39.2 MB/s
fillsync     :      51.988 micros/op;    2.1 MB/s (1000 ops)
fillrandom   :       3.747 micros/op;   29.5 MB/s
overwrite    :       4.047 micros/op;   27.3 MB/s
readrandom   :       7.287 micros/op; (1000000 of 1000000 found)
readrandom   :       6.927 micros/op; (1000000 of 1000000 found)
readseq      :       0.253 micros/op;  437.5 MB/s
readreverse  :       0.411 micros/op;  269.2 MB/s
compact      :  440405.000 micros/op;
readrandom   :       4.159 micros/op; (1000000 of 1000000 found)
readseq      :       0.230 micros/op;  481.1 MB/s
readreverse  :       0.320 micros/op;  345.9 MB/s
fill100K     :     558.222 micros/op;  170.9 MB/s (1000 ops)
crc32c       :       0.214 micros/op; 18263.5 MB/s (4K per op)
snappycomp   :       4.471 micros/op;  873.7 MB/s (output: 55.1%)
snappyuncomp :       0.833 micros/op; 4688.5 MB/s
acquireload  :      13.289 micros/op; (each op is 1000 loads)

New without crc32c, MacBookPro13,3 with Core i7 6920HQ
LevelDB:    version 1.20
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1000000
RawSize:    110.6 MB (estimated)
FileSize:   62.9 MB (estimated)
------------------------------------------------
fillseq      :       3.094 micros/op;   35.8 MB/s
fillsync     :      52.160 micros/op;    2.1 MB/s (1000 ops)
fillrandom   :       4.090 micros/op;   27.0 MB/s
overwrite    :       4.006 micros/op;   27.6 MB/s
readrandom   :       6.584 micros/op; (1000000 of 1000000 found)
readrandom   :       6.676 micros/op; (1000000 of 1000000 found)
readseq      :       0.280 micros/op;  395.2 MB/s
readreverse  :       0.391 micros/op;  283.2 MB/s
compact      :  433911.000 micros/op;
readrandom   :       4.261 micros/op; (1000000 of 1000000 found)
readseq      :       0.251 micros/op;  440.5 MB/s
readreverse  :       0.356 micros/op;  310.9 MB/s
fill100K     :     584.023 micros/op;  163.3 MB/s (1000 ops)
crc32c       :       1.384 micros/op; 2822.3 MB/s (4K per op)
snappycomp   :       4.763 micros/op;  820.1 MB/s (output: 55.1%)
snappyuncomp :       0.766 micros/op; 5098.6 MB/s
acquireload  :      12.931 micros/op; (each op is 1000 loads)

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=171667771
2017-10-10 11:46:40 -07:00

157 lines
4.1 KiB
C++

// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// See port_example.h for documentation for the following types/functions.
#ifndef STORAGE_LEVELDB_PORT_PORT_POSIX_H_
#define STORAGE_LEVELDB_PORT_PORT_POSIX_H_
#undef PLATFORM_IS_LITTLE_ENDIAN
#if defined(__APPLE__)
#include <machine/endian.h>
#if defined(__DARWIN_LITTLE_ENDIAN) && defined(__DARWIN_BYTE_ORDER)
#define PLATFORM_IS_LITTLE_ENDIAN \
(__DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN)
#endif
#elif defined(OS_SOLARIS)
#include <sys/isa_defs.h>
#ifdef _LITTLE_ENDIAN
#define PLATFORM_IS_LITTLE_ENDIAN true
#else
#define PLATFORM_IS_LITTLE_ENDIAN false
#endif
#elif defined(OS_FREEBSD) || defined(OS_OPENBSD) ||\
defined(OS_NETBSD) || defined(OS_DRAGONFLYBSD)
#include <sys/types.h>
#include <sys/endian.h>
#define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN)
#elif defined(OS_HPUX)
#define PLATFORM_IS_LITTLE_ENDIAN false
#elif defined(OS_ANDROID)
// Due to a bug in the NDK x86 <sys/endian.h> definition,
// _BYTE_ORDER must be used instead of __BYTE_ORDER on Android.
// See http://code.google.com/p/android/issues/detail?id=39824
#include <endian.h>
#define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN)
#else
#include <endian.h>
#endif
#include <pthread.h>
#if defined(HAVE_CRC32C)
#include <crc32c/crc32c.h>
#endif // defined(HAVE_CRC32C)
#ifdef HAVE_SNAPPY
#include <snappy.h>
#endif // defined(HAVE_SNAPPY)
#include <stdint.h>
#include <string>
#include "port/atomic_pointer.h"
#ifndef PLATFORM_IS_LITTLE_ENDIAN
#define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN)
#endif
#if defined(__APPLE__) || defined(OS_FREEBSD) ||\
defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD)
// Use fsync() on platforms without fdatasync()
#define fdatasync fsync
#endif
#if defined(OS_ANDROID) && __ANDROID_API__ < 9
// fdatasync() was only introduced in API level 9 on Android. Use fsync()
// when targetting older platforms.
#define fdatasync fsync
#endif
namespace leveldb {
namespace port {
static const bool kLittleEndian = PLATFORM_IS_LITTLE_ENDIAN;
#undef PLATFORM_IS_LITTLE_ENDIAN
class CondVar;
class Mutex {
public:
Mutex();
~Mutex();
void Lock();
void Unlock();
void AssertHeld() { }
private:
friend class CondVar;
pthread_mutex_t mu_;
// No copying
Mutex(const Mutex&);
void operator=(const Mutex&);
};
class CondVar {
public:
explicit CondVar(Mutex* mu);
~CondVar();
void Wait();
void Signal();
void SignalAll();
private:
pthread_cond_t cv_;
Mutex* mu_;
};
typedef pthread_once_t OnceType;
#define LEVELDB_ONCE_INIT PTHREAD_ONCE_INIT
extern void InitOnce(OnceType* once, void (*initializer)());
inline bool Snappy_Compress(const char* input, size_t length,
::std::string* output) {
#ifdef HAVE_SNAPPY
output->resize(snappy::MaxCompressedLength(length));
size_t outlen;
snappy::RawCompress(input, length, &(*output)[0], &outlen);
output->resize(outlen);
return true;
#endif // defined(HAVE_SNAPPY)
return false;
}
inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
size_t* result) {
#ifdef HAVE_SNAPPY
return snappy::GetUncompressedLength(input, length, result);
#else
return false;
#endif // defined(HAVE_SNAPPY)
}
inline bool Snappy_Uncompress(const char* input, size_t length,
char* output) {
#ifdef HAVE_SNAPPY
return snappy::RawUncompress(input, length, output);
#else
return false;
#endif // defined(HAVE_SNAPPY)
}
inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) {
return false;
}
inline uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) {
#if defined(HAVE_CRC32C)
return ::crc32c::Extend(crc, reinterpret_cast<const uint8_t*>(buf), size);
#else
return 0;
#endif // defined(HAVE_CRC32C)
}
} // namespace port
} // namespace leveldb
#endif // STORAGE_LEVELDB_PORT_PORT_POSIX_H_