fix: breakpad use miniz
Some checks failed
sm-rpc / build (Debug, arm-linux-gnueabihf) (push) Successful in 1m34s
sm-rpc / build (Debug, aarch64-linux-gnu) (push) Successful in 2m46s
sm-rpc / build (Debug, host.gcc) (push) Failing after 1m28s
sm-rpc / build (Release, aarch64-linux-gnu) (push) Successful in 2m14s
sm-rpc / build (Release, arm-linux-gnueabihf) (push) Successful in 2m8s
sm-rpc / build (Debug, mipsel-linux-gnu) (push) Successful in 5m35s
sm-rpc / build (Release, host.gcc) (push) Failing after 1m55s
sm-rpc / build (Release, mipsel-linux-gnu) (push) Successful in 7m21s
Some checks failed
sm-rpc / build (Debug, arm-linux-gnueabihf) (push) Successful in 1m34s
sm-rpc / build (Debug, aarch64-linux-gnu) (push) Successful in 2m46s
sm-rpc / build (Debug, host.gcc) (push) Failing after 1m28s
sm-rpc / build (Release, aarch64-linux-gnu) (push) Successful in 2m14s
sm-rpc / build (Release, arm-linux-gnueabihf) (push) Successful in 2m8s
sm-rpc / build (Debug, mipsel-linux-gnu) (push) Successful in 5m35s
sm-rpc / build (Release, host.gcc) (push) Failing after 1m55s
sm-rpc / build (Release, mipsel-linux-gnu) (push) Successful in 7m21s
This commit is contained in:
116
third_party/zlib-ng/test/benchmarks/CMakeLists.txt
vendored
Normal file
116
third_party/zlib-ng/test/benchmarks/CMakeLists.txt
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
if(NOT DEFINED CMAKE_CXX_STANDARD)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
endif()
|
||||
if(NOT DEFINED CMAKE_CXX_STANDARD_REQUIRED)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
endif()
|
||||
if(NOT DEFINED CMAKE_CXX_EXTENSIONS)
|
||||
set(CMAKE_CXX_EXTENSIONS ON)
|
||||
endif()
|
||||
|
||||
enable_language(CXX)
|
||||
|
||||
# Search for Google benchmark package
|
||||
find_package(benchmark QUIET)
|
||||
if(NOT benchmark_FOUND)
|
||||
# Fetch google benchmark source code from official repository
|
||||
set(BENCHMARK_ENABLE_TESTING OFF)
|
||||
|
||||
# Allow specifying alternative Google benchmark repository
|
||||
if(NOT DEFINED GBENCHMARK_REPOSITORY)
|
||||
set(GBENCHMARK_REPOSITORY https://github.com/google/benchmark.git)
|
||||
endif()
|
||||
if(NOT DEFINED GBENCHMARK_TAG)
|
||||
set(GBENCHMARK_TAG v1.7.1)
|
||||
endif()
|
||||
|
||||
FetchContent_Declare(benchmark
|
||||
GIT_REPOSITORY ${GBENCHMARK_REPOSITORY}
|
||||
GIT_TAG ${GBENCHMARK_TAG})
|
||||
|
||||
FetchContent_GetProperties(benchmark)
|
||||
if(NOT benchmark_POPULATED)
|
||||
FetchContent_Populate(benchmark)
|
||||
add_subdirectory(${benchmark_SOURCE_DIR} ${benchmark_BINARY_DIR} EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_executable(benchmark_zlib
|
||||
benchmark_adler32.cc
|
||||
benchmark_adler32_copy.cc
|
||||
benchmark_compare256.cc
|
||||
benchmark_compare256_rle.cc
|
||||
benchmark_compress.cc
|
||||
benchmark_crc32.cc
|
||||
benchmark_main.cc
|
||||
benchmark_slidehash.cc
|
||||
benchmark_uncompress.cc
|
||||
)
|
||||
|
||||
target_compile_definitions(benchmark_zlib PRIVATE -DBENCHMARK_STATIC_DEFINE)
|
||||
target_include_directories(benchmark_zlib PRIVATE
|
||||
${PROJECT_SOURCE_DIR}
|
||||
${PROJECT_BINARY_DIR}
|
||||
${benchmark_SOURCE_DIR}/benchmark/include)
|
||||
|
||||
target_link_libraries(benchmark_zlib zlibstatic benchmark::benchmark)
|
||||
if(WIN32)
|
||||
target_link_libraries(benchmark_zlib shlwapi)
|
||||
endif()
|
||||
|
||||
add_test(NAME benchmark_zlib
|
||||
COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $<TARGET_FILE:benchmark_zlib>)
|
||||
|
||||
if(WITH_BENCHMARK_APPS)
|
||||
option(BUILD_ALT_BENCH "Link against alternative zlib implementation" OFF)
|
||||
|
||||
# Search for libpng package
|
||||
find_package(PNG QUIET)
|
||||
|
||||
if(NOT PNG_FOUND)
|
||||
FetchContent_Declare(PNG
|
||||
GIT_REPOSITORY https://github.com/glennrp/libpng.git)
|
||||
|
||||
FetchContent_GetProperties(PNG)
|
||||
if(NOT PNG_POPULATED)
|
||||
FetchContent_Populate(PNG)
|
||||
set(PNG_INCLUDE_DIR ${png_SOURCE_DIR})
|
||||
add_subdirectory(${png_SOURCE_DIR} ${png_BINARY_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(BENCH_APP_SRCS
|
||||
benchmark_png_encode.cc
|
||||
benchmark_png_decode.cc
|
||||
benchmark_main.cc
|
||||
)
|
||||
|
||||
add_executable(benchmark_zlib_apps ${BENCH_APP_SRCS})
|
||||
|
||||
if(DEFINED BUILD_ALT_BENCH)
|
||||
set(ZLIB_ALT_LIB "libz.a" CACHE FILEPATH "Optional alternative zlib implementation (defaults to stock zlib)")
|
||||
add_executable(benchmark_zlib_apps_alt ${BENCH_APP_SRCS})
|
||||
target_link_libraries(benchmark_zlib_apps_alt libpng.a ${ZLIB_ALT_LIB} benchmark::benchmark)
|
||||
target_compile_definitions(benchmark_zlib_apps_alt PRIVATE BUILD_ALT=1)
|
||||
target_include_directories(benchmark_zlib_apps_alt PRIVATE
|
||||
${PROJECT_SOURCE_DIR}
|
||||
${PROJECT_BINARY_DIR}
|
||||
${PNG_INCLUDE_DIR}
|
||||
${benchmark_SOURCE_DIR}/benchmark/include)
|
||||
endif()
|
||||
|
||||
target_include_directories(benchmark_zlib_apps PRIVATE
|
||||
${PROJECT_SOURCE_DIR}
|
||||
${PROJECT_BINARY_DIR}
|
||||
${PNG_INCLUDE_DIR}
|
||||
${benchmark_SOURCE_DIR}/benchmark/include)
|
||||
|
||||
# We need the static png library if we're statically linking to zlib,
|
||||
# otherwise it will resolve these things in the system provided dynamic
|
||||
# libraries (likely linked to stock zlib)
|
||||
target_link_libraries(benchmark_zlib_apps libpng.a zlibstatic benchmark::benchmark)
|
||||
endif()
|
47
third_party/zlib-ng/test/benchmarks/README.md
vendored
Normal file
47
third_party/zlib-ng/test/benchmarks/README.md
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
## Benchmarks
|
||||
These benchmarks are written using [Google Benchmark](https://github.com/google/benchmark).
|
||||
|
||||
*Repetitions*
|
||||
|
||||
To increase the number of times each benchmark iteration is run use:
|
||||
|
||||
```
|
||||
--benchmark_repetitions=20
|
||||
```
|
||||
|
||||
*Filters*
|
||||
|
||||
To filter out which benchmarks are performed use:
|
||||
|
||||
```
|
||||
--benchmark_filter="adler32*"
|
||||
```
|
||||
|
||||
There are two different benchmarks, micro and macro.
|
||||
|
||||
### Benchmark benchmark_zlib
|
||||
These are microbenchmarks intended to test lower level subfunctions of the library.
|
||||
|
||||
Benchmarks include implementations of:
|
||||
- Adler32
|
||||
- CRC
|
||||
- 256 byte comparisons
|
||||
- SIMD accelerated "slide hash" routine
|
||||
|
||||
By default these benchmarks report things on the nanosecond scale and are small enough
|
||||
to measure very minute differences.
|
||||
|
||||
### Benchmark benchmark_zlib_apps
|
||||
These benchmarks measure applications of zlib as a whole. Currently the only examples
|
||||
are PNG encoding and decoding. The PNG encode and decode tests leveraging procedurally
|
||||
generated and highly compressible image data.
|
||||
|
||||
Additionally, a test called `png_decode_realistic` that will decode any RGB 8 BPP encoded
|
||||
set of PNGs in the working directory under a directory named "test_pngs" with files named
|
||||
{0..1}.png. If these images do not exist, they will error out and the benchmark will move
|
||||
on to the next set of benchmarks.
|
||||
|
||||
*benchmark_zlib_apps_alt*
|
||||
|
||||
The user can compile a comparison benchmark application linking to any zlib-compatible
|
||||
implementation of his or her choosing.
|
100
third_party/zlib-ng/test/benchmarks/benchmark_adler32.cc
vendored
Normal file
100
third_party/zlib-ng/test/benchmarks/benchmark_adler32.cc
vendored
Normal file
@@ -0,0 +1,100 @@
|
||||
/* benchmark_adler32.cc -- benchmark adler32 variants
|
||||
* Copyright (C) 2022 Nathan Moinvaziri, Adam Stylinski
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
extern "C" {
|
||||
# include "zbuild.h"
|
||||
# include "zutil_p.h"
|
||||
# include "arch_functions.h"
|
||||
# include "../test_cpu_features.h"
|
||||
}
|
||||
|
||||
#define MAX_RANDOM_INTS (1024 * 1024)
|
||||
#define MAX_RANDOM_INTS_SIZE (MAX_RANDOM_INTS * sizeof(uint32_t))
|
||||
|
||||
class adler32: public benchmark::Fixture {
|
||||
private:
|
||||
uint32_t *random_ints;
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State& state) {
|
||||
/* Control the alignment so that we have the best case scenario for loads. With
|
||||
* AVX512, unaligned loads can mean we're crossing a cacheline boundary at every load.
|
||||
* And while this is a realistic scenario, it makes it difficult to compare benchmark
|
||||
* to benchmark because one allocation could have been aligned perfectly for the loads
|
||||
* while the subsequent one happened to not be. This is not to be advantageous to AVX512
|
||||
* (indeed, all lesser SIMD implementations benefit from this aligned allocation), but to
|
||||
* control the _consistency_ of the results */
|
||||
random_ints = (uint32_t *)zng_alloc(MAX_RANDOM_INTS_SIZE);
|
||||
assert(random_ints != NULL);
|
||||
|
||||
for (int32_t i = 0; i < MAX_RANDOM_INTS; i++) {
|
||||
random_ints[i] = rand();
|
||||
}
|
||||
}
|
||||
|
||||
void Bench(benchmark::State& state, adler32_func adler32) {
|
||||
uint32_t hash = 0;
|
||||
|
||||
for (auto _ : state) {
|
||||
hash = adler32(hash, (const unsigned char *)random_ints, (size_t)state.range(0));
|
||||
}
|
||||
|
||||
benchmark::DoNotOptimize(hash);
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State& state) {
|
||||
zng_free(random_ints);
|
||||
}
|
||||
};
|
||||
|
||||
#define BENCHMARK_ADLER32(name, fptr, support_flag) \
|
||||
BENCHMARK_DEFINE_F(adler32, name)(benchmark::State& state) { \
|
||||
if (!support_flag) { \
|
||||
state.SkipWithError("CPU does not support " #name); \
|
||||
} \
|
||||
Bench(state, fptr); \
|
||||
} \
|
||||
BENCHMARK_REGISTER_F(adler32, name)->Arg(1)->Arg(8)->Arg(12)->Arg(16)->Arg(32)->Arg(64)->Arg(512)->Arg(4<<10)->Arg(32<<10)->Arg(256<<10)->Arg(4096<<10)
|
||||
|
||||
BENCHMARK_ADLER32(c, adler32_c, 1);
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
BENCHMARK_ADLER32(native, native_adler32, 1);
|
||||
#else
|
||||
|
||||
#ifdef ARM_NEON
|
||||
BENCHMARK_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon);
|
||||
#endif
|
||||
|
||||
#ifdef PPC_VMX
|
||||
BENCHMARK_ADLER32(vmx, adler32_vmx, test_cpu_features.power.has_altivec);
|
||||
#endif
|
||||
#ifdef POWER8_VSX
|
||||
BENCHMARK_ADLER32(power8, adler32_power8, test_cpu_features.power.has_arch_2_07);
|
||||
#endif
|
||||
|
||||
#ifdef RISCV_RVV
|
||||
BENCHMARK_ADLER32(rvv, adler32_rvv, test_cpu_features.riscv.has_rvv);
|
||||
#endif
|
||||
|
||||
#ifdef X86_SSSE3
|
||||
BENCHMARK_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
BENCHMARK_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2);
|
||||
#endif
|
||||
#ifdef X86_AVX512
|
||||
BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512_common);
|
||||
#endif
|
||||
#ifdef X86_AVX512VNNI
|
||||
BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
|
||||
#endif
|
||||
|
||||
#endif
|
130
third_party/zlib-ng/test/benchmarks/benchmark_adler32_copy.cc
vendored
Normal file
130
third_party/zlib-ng/test/benchmarks/benchmark_adler32_copy.cc
vendored
Normal file
@@ -0,0 +1,130 @@
|
||||
/* benchmark_adler32_copy.cc -- benchmark adler32 (elided copy) variants
|
||||
* Copyright (C) 2022 Nathan Moinvaziri, Adam Stylinski
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
extern "C" {
|
||||
# include "zbuild.h"
|
||||
# include "zutil_p.h"
|
||||
# include "arch_functions.h"
|
||||
# include "../test_cpu_features.h"
|
||||
}
|
||||
|
||||
#define MAX_RANDOM_INTS (1024 * 1024)
|
||||
#define MAX_RANDOM_INTS_SIZE (MAX_RANDOM_INTS * sizeof(uint32_t))
|
||||
|
||||
typedef uint32_t (*adler32_cpy_func)(uint32_t adler, unsigned char *dst, const uint8_t *buf, size_t len);
|
||||
|
||||
class adler32_copy: public benchmark::Fixture {
|
||||
private:
|
||||
uint32_t *random_ints_src;
|
||||
uint32_t *random_ints_dst;
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State& state) {
|
||||
/* Control the alignment so that we have the best case scenario for loads. With
|
||||
* AVX512, unaligned loads can mean we're crossing a cacheline boundary at every load.
|
||||
* And while this is a realistic scenario, it makes it difficult to compare benchmark
|
||||
* to benchmark because one allocation could have been aligned perfectly for the loads
|
||||
* while the subsequent one happened to not be. This is not to be advantageous to AVX512
|
||||
* (indeed, all lesser SIMD implementations benefit from this aligned allocation), but to
|
||||
* control the _consistency_ of the results */
|
||||
random_ints_src = (uint32_t *)zng_alloc(MAX_RANDOM_INTS_SIZE);
|
||||
random_ints_dst = (uint32_t *)zng_alloc(MAX_RANDOM_INTS_SIZE);
|
||||
assert(random_ints_src != NULL);
|
||||
assert(random_ints_dst != NULL);
|
||||
|
||||
for (int32_t i = 0; i < MAX_RANDOM_INTS; i++) {
|
||||
random_ints_src[i] = rand();
|
||||
}
|
||||
}
|
||||
|
||||
void Bench(benchmark::State& state, adler32_cpy_func adler32_func) {
|
||||
uint32_t hash = 0;
|
||||
|
||||
for (auto _ : state) {
|
||||
hash = adler32_func(hash, (unsigned char *)random_ints_dst,
|
||||
(const unsigned char*)random_ints_src, (size_t)state.range(0));
|
||||
}
|
||||
|
||||
benchmark::DoNotOptimize(hash);
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State& state) {
|
||||
zng_free(random_ints_src);
|
||||
zng_free(random_ints_dst);
|
||||
}
|
||||
};
|
||||
|
||||
#define BENCHMARK_ADLER32_COPY(name, fptr, support_flag) \
|
||||
BENCHMARK_DEFINE_F(adler32_copy, name)(benchmark::State& state) { \
|
||||
if (!support_flag) { \
|
||||
state.SkipWithError("CPU does not support " #name); \
|
||||
} \
|
||||
Bench(state, fptr); \
|
||||
} \
|
||||
BENCHMARK_REGISTER_F(adler32_copy, name)->Range(8192, MAX_RANDOM_INTS_SIZE);
|
||||
|
||||
#define BENCHMARK_ADLER32_BASELINE_COPY(name, fptr, support_flag) \
|
||||
BENCHMARK_DEFINE_F(adler32_copy, name)(benchmark::State& state) { \
|
||||
if (!support_flag) { \
|
||||
state.SkipWithError("CPU does not support " #name); \
|
||||
} \
|
||||
Bench(state, [](uint32_t init_sum, unsigned char *dst, \
|
||||
const uint8_t *buf, size_t len) -> uint32_t { \
|
||||
memcpy(dst, buf, (size_t)len); \
|
||||
return fptr(init_sum, buf, len); \
|
||||
}); \
|
||||
} \
|
||||
BENCHMARK_REGISTER_F(adler32_copy, name)->Range(8192, MAX_RANDOM_INTS_SIZE);
|
||||
|
||||
BENCHMARK_ADLER32_BASELINE_COPY(c, adler32_c, 1);
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
BENCHMARK_ADLER32_BASELINE_COPY(native, native_adler32, 1);
|
||||
#else
|
||||
|
||||
#ifdef ARM_NEON
|
||||
/* If we inline this copy for neon, the function would go here */
|
||||
//BENCHMARK_ADLER32_COPY(neon, adler32_neon, test_cpu_features.arm.has_neon);
|
||||
BENCHMARK_ADLER32_BASELINE_COPY(neon_copy_baseline, adler32_neon, test_cpu_features.arm.has_neon);
|
||||
#endif
|
||||
|
||||
#ifdef PPC_VMX
|
||||
//BENCHMARK_ADLER32_COPY(vmx_inline_copy, adler32_fold_copy_vmx, test_cpu_features.power.has_altivec);
|
||||
BENCHMARK_ADLER32_BASELINE_COPY(vmx_copy_baseline, adler32_vmx, test_cpu_features.power.has_altivec);
|
||||
#endif
|
||||
#ifdef POWER8_VSX
|
||||
//BENCHMARK_ADLER32_COPY(power8_inline_copy, adler32_fold_copy_power8, test_cpu_features.power.has_arch_2_07);
|
||||
BENCHMARK_ADLER32_BASELINE_COPY(power8, adler32_power8, test_cpu_features.power.has_arch_2_07);
|
||||
#endif
|
||||
|
||||
#ifdef RISCV_RVV
|
||||
//BENCHMARK_ADLER32_COPY(rvv, adler32_rvv, test_cpu_features.riscv.has_rvv);
|
||||
BENCHMARK_ADLER32_BASELINE_COPY(rvv, adler32_rvv, test_cpu_features.riscv.has_rvv);
|
||||
#endif
|
||||
|
||||
#ifdef X86_SSE42
|
||||
BENCHMARK_ADLER32_BASELINE_COPY(sse42_baseline, adler32_ssse3, test_cpu_features.x86.has_ssse3);
|
||||
BENCHMARK_ADLER32_COPY(sse42, adler32_fold_copy_sse42, test_cpu_features.x86.has_sse42);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, test_cpu_features.x86.has_avx2);
|
||||
BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, test_cpu_features.x86.has_avx2);
|
||||
#endif
|
||||
#ifdef X86_AVX512
|
||||
BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, test_cpu_features.x86.has_avx512_common);
|
||||
BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.has_avx512_common);
|
||||
#endif
|
||||
#ifdef X86_AVX512VNNI
|
||||
BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
|
||||
BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
|
||||
#endif
|
||||
|
||||
#endif
|
93
third_party/zlib-ng/test/benchmarks/benchmark_compare256.cc
vendored
Normal file
93
third_party/zlib-ng/test/benchmarks/benchmark_compare256.cc
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
/* benchmark_compare256.cc -- benchmark compare256 variants
|
||||
* Copyright (C) 2022 Nathan Moinvaziri
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
extern "C" {
|
||||
# include "zbuild.h"
|
||||
# include "zutil_p.h"
|
||||
# include "arch_functions.h"
|
||||
# include "../test_cpu_features.h"
|
||||
# include "arch/generic/compare256_p.h"
|
||||
}
|
||||
|
||||
#define MAX_COMPARE_SIZE (256)
|
||||
|
||||
class compare256: public benchmark::Fixture {
|
||||
private:
|
||||
uint8_t *str1;
|
||||
uint8_t *str2;
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State& state) {
|
||||
str1 = (uint8_t *)zng_alloc(MAX_COMPARE_SIZE);
|
||||
assert(str1 != NULL);
|
||||
memset(str1, 'a', MAX_COMPARE_SIZE);
|
||||
|
||||
str2 = (uint8_t *)zng_alloc(MAX_COMPARE_SIZE);
|
||||
assert(str2 != NULL);
|
||||
memset(str2, 'a', MAX_COMPARE_SIZE);
|
||||
}
|
||||
|
||||
void Bench(benchmark::State& state, compare256_func compare256) {
|
||||
int32_t match_len = (int32_t)state.range(0) - 1;
|
||||
uint32_t len = 0;
|
||||
|
||||
str2[match_len] = 0;
|
||||
for (auto _ : state) {
|
||||
len = compare256((const uint8_t *)str1, (const uint8_t *)str2);
|
||||
}
|
||||
str2[match_len] = 'a';
|
||||
|
||||
benchmark::DoNotOptimize(len);
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State& state) {
|
||||
zng_free(str1);
|
||||
zng_free(str2);
|
||||
}
|
||||
};
|
||||
|
||||
#define BENCHMARK_COMPARE256(name, fptr, support_flag) \
|
||||
BENCHMARK_DEFINE_F(compare256, name)(benchmark::State& state) { \
|
||||
if (!support_flag) { \
|
||||
state.SkipWithError("CPU does not support " #name); \
|
||||
} \
|
||||
Bench(state, fptr); \
|
||||
} \
|
||||
BENCHMARK_REGISTER_F(compare256, name)->Range(1, MAX_COMPARE_SIZE);
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
BENCHMARK_COMPARE256(native, native_compare256, 1);
|
||||
#else
|
||||
|
||||
BENCHMARK_COMPARE256(8, compare256_8, 1);
|
||||
BENCHMARK_COMPARE256(16, compare256_16, 1);
|
||||
#if defined(HAVE_BUILTIN_CTZ)
|
||||
BENCHMARK_COMPARE256(32, compare256_32, 1);
|
||||
#endif
|
||||
#if defined(HAVE_BUILTIN_CTZLL)
|
||||
BENCHMARK_COMPARE256(64, compare256_64, 1);
|
||||
#endif
|
||||
|
||||
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
||||
BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
|
||||
#endif
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
BENCHMARK_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2);
|
||||
#endif
|
||||
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
|
||||
BENCHMARK_COMPARE256(neon, compare256_neon, test_cpu_features.arm.has_neon);
|
||||
#endif
|
||||
#ifdef POWER9
|
||||
BENCHMARK_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch_3_00);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
BENCHMARK_COMPARE256(rvv, compare256_rvv, test_cpu_features.riscv.has_rvv);
|
||||
#endif
|
||||
|
||||
#endif
|
69
third_party/zlib-ng/test/benchmarks/benchmark_compare256_rle.cc
vendored
Normal file
69
third_party/zlib-ng/test/benchmarks/benchmark_compare256_rle.cc
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
/* benchmark_compare256_rle.cc -- benchmark compare256_rle variants
|
||||
* Copyright (C) 2022 Nathan Moinvaziri
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
extern "C" {
|
||||
# include "zbuild.h"
|
||||
# include "zutil_p.h"
|
||||
# include "compare256_rle.h"
|
||||
}
|
||||
|
||||
#define MAX_COMPARE_SIZE (256)
|
||||
|
||||
class compare256_rle: public benchmark::Fixture {
|
||||
private:
|
||||
uint8_t *str1;
|
||||
uint8_t *str2;
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State& state) {
|
||||
str1 = (uint8_t *)zng_alloc(MAX_COMPARE_SIZE);
|
||||
assert(str1 != NULL);
|
||||
memset(str1, 'a', MAX_COMPARE_SIZE);
|
||||
|
||||
str2 = (uint8_t *)zng_alloc(MAX_COMPARE_SIZE);
|
||||
assert(str2 != NULL);
|
||||
memset(str2, 'a', MAX_COMPARE_SIZE);
|
||||
}
|
||||
|
||||
void Bench(benchmark::State& state, compare256_rle_func compare256_rle) {
|
||||
int32_t match_len = (int32_t)state.range(0) - 1;
|
||||
uint32_t len = 0;
|
||||
|
||||
str2[match_len] = 0;
|
||||
for (auto _ : state) {
|
||||
len = compare256_rle((const uint8_t *)str1, (const uint8_t *)str2);
|
||||
}
|
||||
str2[match_len] = 'a';
|
||||
|
||||
benchmark::DoNotOptimize(len);
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State& state) {
|
||||
zng_free(str1);
|
||||
zng_free(str2);
|
||||
}
|
||||
};
|
||||
|
||||
#define BENCHMARK_COMPARE256_RLE(name, fptr, support_flag) \
|
||||
BENCHMARK_DEFINE_F(compare256_rle, name)(benchmark::State& state) { \
|
||||
if (!support_flag) { \
|
||||
state.SkipWithError("CPU does not support " #name); \
|
||||
} \
|
||||
Bench(state, fptr); \
|
||||
} \
|
||||
BENCHMARK_REGISTER_F(compare256_rle, name)->Range(1, MAX_COMPARE_SIZE);
|
||||
|
||||
BENCHMARK_COMPARE256_RLE(8, compare256_rle_8, 1);
|
||||
BENCHMARK_COMPARE256_RLE(16, compare256_rle_16, 1);
|
||||
#if defined(HAVE_BUILTIN_CTZ)
|
||||
BENCHMARK_COMPARE256_RLE(32, compare256_rle_32, 1);
|
||||
#endif
|
||||
#if defined(HAVE_BUILTIN_CTZLL)
|
||||
BENCHMARK_COMPARE256_RLE(64, compare256_rle_64, 1);
|
||||
#endif
|
67
third_party/zlib-ng/test/benchmarks/benchmark_compress.cc
vendored
Normal file
67
third_party/zlib-ng/test/benchmarks/benchmark_compress.cc
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
/* benchmark_compress.cc -- benchmark compress()
|
||||
* Copyright (C) 2024 Hans Kristian Rosbach
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
extern "C" {
|
||||
# include "zbuild.h"
|
||||
# include "zutil_p.h"
|
||||
# if defined(ZLIB_COMPAT)
|
||||
# include "zlib.h"
|
||||
# else
|
||||
# include "zlib-ng.h"
|
||||
# endif
|
||||
}
|
||||
|
||||
#define MAX_SIZE (32 * 1024)
|
||||
|
||||
class compress_bench: public benchmark::Fixture {
|
||||
private:
|
||||
size_t maxlen;
|
||||
uint8_t *inbuff;
|
||||
uint8_t *outbuff;
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State& state) {
|
||||
const char teststr[42] = "Hello hello World broken Test tast mello.";
|
||||
maxlen = MAX_SIZE;
|
||||
|
||||
inbuff = (uint8_t *)zng_alloc(MAX_SIZE + 1);
|
||||
assert(inbuff != NULL);
|
||||
|
||||
outbuff = (uint8_t *)zng_alloc(MAX_SIZE + 1);
|
||||
assert(outbuff != NULL);
|
||||
|
||||
int pos = 0;
|
||||
for (int32_t i = 0; i < MAX_SIZE - 42 ; i+=42){
|
||||
pos += sprintf((char *)inbuff+pos, "%s", teststr);
|
||||
}
|
||||
}
|
||||
|
||||
void Bench(benchmark::State& state) {
|
||||
int err = 0;
|
||||
|
||||
for (auto _ : state) {
|
||||
err = PREFIX(compress)(outbuff, &maxlen, inbuff, (size_t)state.range(0));
|
||||
}
|
||||
|
||||
benchmark::DoNotOptimize(err);
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State& state) {
|
||||
zng_free(inbuff);
|
||||
zng_free(outbuff);
|
||||
}
|
||||
};
|
||||
|
||||
#define BENCHMARK_COMPRESS(name) \
|
||||
BENCHMARK_DEFINE_F(compress_bench, name)(benchmark::State& state) { \
|
||||
Bench(state); \
|
||||
} \
|
||||
BENCHMARK_REGISTER_F(compress_bench, name)->Arg(1)->Arg(8)->Arg(16)->Arg(32)->Arg(64)->Arg(512)->Arg(4<<10)->Arg(32<<10);
|
||||
|
||||
BENCHMARK_COMPRESS(compress_bench);
|
83
third_party/zlib-ng/test/benchmarks/benchmark_crc32.cc
vendored
Normal file
83
third_party/zlib-ng/test/benchmarks/benchmark_crc32.cc
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
/* benchmark_crc32.cc -- benchmark crc32 variants
|
||||
* Copyright (C) 2022 Nathan Moinvaziri
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
extern "C" {
|
||||
# include "zbuild.h"
|
||||
# include "zutil_p.h"
|
||||
# include "arch_functions.h"
|
||||
# include "../test_cpu_features.h"
|
||||
}
|
||||
|
||||
#define MAX_RANDOM_INTS (1024 * 1024)
|
||||
#define MAX_RANDOM_INTS_SIZE (MAX_RANDOM_INTS * sizeof(uint32_t))
|
||||
|
||||
class crc32: public benchmark::Fixture {
|
||||
private:
|
||||
uint32_t *random_ints;
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State& state) {
|
||||
random_ints = (uint32_t *)zng_alloc(MAX_RANDOM_INTS_SIZE);
|
||||
assert(random_ints != NULL);
|
||||
|
||||
for (int32_t i = 0; i < MAX_RANDOM_INTS; i++) {
|
||||
random_ints[i] = rand();
|
||||
}
|
||||
}
|
||||
|
||||
void Bench(benchmark::State& state, crc32_func crc32) {
|
||||
uint32_t hash = 0;
|
||||
|
||||
for (auto _ : state) {
|
||||
hash = crc32(hash, (const unsigned char *)random_ints, (size_t)state.range(0));
|
||||
}
|
||||
|
||||
benchmark::DoNotOptimize(hash);
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State& state) {
|
||||
zng_free(random_ints);
|
||||
}
|
||||
};
|
||||
|
||||
#define BENCHMARK_CRC32(name, fptr, support_flag) \
|
||||
BENCHMARK_DEFINE_F(crc32, name)(benchmark::State& state) { \
|
||||
if (!support_flag) { \
|
||||
state.SkipWithError("CPU does not support " #name); \
|
||||
} \
|
||||
Bench(state, fptr); \
|
||||
} \
|
||||
BENCHMARK_REGISTER_F(crc32, name)->Arg(1)->Arg(8)->Arg(12)->Arg(16)->Arg(32)->Arg(64)->Arg(512)->Arg(4<<10)->Arg(32<<10)->Arg(256<<10)->Arg(4096<<10);
|
||||
|
||||
BENCHMARK_CRC32(braid, PREFIX(crc32_braid), 1);
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
BENCHMARK_CRC32(native, native_crc32, 1);
|
||||
#else
|
||||
|
||||
#ifdef ARM_ACLE
|
||||
BENCHMARK_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32);
|
||||
#endif
|
||||
#ifdef POWER8_VSX_CRC32
|
||||
BENCHMARK_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07);
|
||||
#endif
|
||||
#ifdef S390_CRC32_VX
|
||||
BENCHMARK_CRC32(vx, crc32_s390_vx, test_cpu_features.s390.has_vx);
|
||||
#endif
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
/* CRC32 fold does a memory copy while hashing */
|
||||
BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq);
|
||||
#endif
|
||||
#ifdef X86_VPCLMULQDQ_CRC
|
||||
/* CRC32 fold does a memory copy while hashing */
|
||||
BENCHMARK_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq));
|
||||
#endif
|
||||
|
||||
#endif
|
32
third_party/zlib-ng/test/benchmarks/benchmark_main.cc
vendored
Normal file
32
third_party/zlib-ng/test/benchmarks/benchmark_main.cc
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
/* benchmark_main.cc -- benchmark suite main entry point
|
||||
* Copyright (C) 2022 Nathan Moinvaziri
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
#ifndef BUILD_ALT
|
||||
extern "C" {
|
||||
# include "zbuild.h"
|
||||
# include "../test_cpu_features.h"
|
||||
|
||||
# ifndef DISABLE_RUNTIME_CPU_DETECTION
|
||||
struct cpu_features test_cpu_features;
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
#ifndef BUILD_ALT
|
||||
# ifndef DISABLE_RUNTIME_CPU_DETECTION
|
||||
cpu_check_features(&test_cpu_features);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
::benchmark::Initialize(&argc, argv);
|
||||
::benchmark::RunSpecifiedBenchmarks();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
126
third_party/zlib-ng/test/benchmarks/benchmark_png_decode.cc
vendored
Normal file
126
third_party/zlib-ng/test/benchmarks/benchmark_png_decode.cc
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
#include <stdio.h>
|
||||
#include <benchmark/benchmark.h>
|
||||
#include "benchmark_png_shared.h"
|
||||
#include <assert.h>
|
||||
|
||||
class png_decode: public benchmark::Fixture {
|
||||
protected:
|
||||
png_dat inpng[10];
|
||||
|
||||
/* Backing this on the heap is a more realistic benchmark */
|
||||
uint8_t *output_img_buf = NULL;
|
||||
|
||||
public:
|
||||
/* Let's make the vanilla version have something extremely compressible */
|
||||
virtual void init_img(png_bytep img_bytes, size_t width, size_t height) {
|
||||
init_compressible(img_bytes, width*height);
|
||||
}
|
||||
|
||||
void SetUp(const ::benchmark::State& state) {
|
||||
output_img_buf = (uint8_t*)malloc(IMWIDTH * IMHEIGHT * 3);
|
||||
assert(output_img_buf != NULL);
|
||||
init_img(output_img_buf, IMWIDTH, IMHEIGHT);
|
||||
|
||||
/* First we need to author the png bytes to be decoded */
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
inpng[i] = {NULL, 0, 0};
|
||||
encode_png(output_img_buf, &inpng[i], i, IMWIDTH, IMHEIGHT);
|
||||
}
|
||||
}
|
||||
|
||||
/* State in this circumstance will convey the compression level */
|
||||
void Bench(benchmark::State &state) {
|
||||
for (auto _ : state) {
|
||||
int compress_lvl = state.range(0);
|
||||
png_parse_dat in = { inpng[compress_lvl].buf };
|
||||
uint32_t width, height;
|
||||
decode_png(&in, (png_bytepp)&output_img_buf, IMWIDTH * IMHEIGHT * 3, width, height);
|
||||
}
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State &state) {
|
||||
free(output_img_buf);
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
free(inpng[i].buf);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class png_decode_realistic: public png_decode {
|
||||
private:
|
||||
bool test_files_found = false;
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State &state) {
|
||||
output_img_buf = NULL;
|
||||
output_img_buf = (uint8_t*)malloc(IMWIDTH * IMHEIGHT * 3);
|
||||
/* Let's take all the images at different compression levels and jam their bytes into buffers */
|
||||
char test_fname[25];
|
||||
FILE *files[10];
|
||||
|
||||
/* Set all to NULL */
|
||||
memset(files, 0, sizeof(FILE*));
|
||||
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
sprintf(test_fname, "test_pngs/%1lu.png", i);
|
||||
FILE *in_img = fopen(test_fname, "r");
|
||||
if (in_img == NULL) {
|
||||
for (size_t j = 0; j < i; ++j) {
|
||||
if (files[j])
|
||||
fclose(files[j]);
|
||||
}
|
||||
|
||||
/* For proper cleanup */
|
||||
for (size_t j = i; j < 10; ++j) {
|
||||
inpng[i] = { NULL, 0, 0 };
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
files[i] = in_img;
|
||||
}
|
||||
|
||||
test_files_found = true;
|
||||
/* Now that we've established we have all the png files, let's read all of their bytes into buffers */
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
FILE *in_file = files[i];
|
||||
fseek(in_file, 0, SEEK_END);
|
||||
size_t num_bytes = ftell(in_file);
|
||||
rewind(in_file);
|
||||
|
||||
uint8_t *raw_file = (uint8_t*)malloc(num_bytes);
|
||||
if (raw_file == NULL)
|
||||
abort();
|
||||
|
||||
inpng[i].buf = raw_file;
|
||||
inpng[i].len = num_bytes;
|
||||
inpng[i].buf_rem = 0;
|
||||
|
||||
size_t bytes_read = fread(raw_file, 1, num_bytes, in_file);
|
||||
if (bytes_read != num_bytes) {
|
||||
fprintf(stderr, "couldn't read all of the bytes for file test_pngs/%lu.png", i);
|
||||
abort();
|
||||
}
|
||||
|
||||
fclose(in_file);
|
||||
}
|
||||
}
|
||||
|
||||
void Bench(benchmark::State &state) {
|
||||
if (!test_files_found) {
|
||||
state.SkipWithError("Test imagery in test_pngs not found");
|
||||
}
|
||||
|
||||
png_decode::Bench(state);
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_DEFINE_F(png_decode, png_decode)(benchmark::State &state) {
|
||||
Bench(state);
|
||||
}
|
||||
BENCHMARK_REGISTER_F(png_decode, png_decode)->DenseRange(0, 9, 1)->Unit(benchmark::kMicrosecond);
|
||||
|
||||
BENCHMARK_DEFINE_F(png_decode_realistic, png_decode_realistic)(benchmark::State &state) {
|
||||
Bench(state);
|
||||
}
|
||||
BENCHMARK_REGISTER_F(png_decode_realistic, png_decode_realistic)->DenseRange(0, 9, 1)->Unit(benchmark::kMicrosecond);
|
54
third_party/zlib-ng/test/benchmarks/benchmark_png_encode.cc
vendored
Normal file
54
third_party/zlib-ng/test/benchmarks/benchmark_png_encode.cc
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <benchmark/benchmark.h>
|
||||
#include "benchmark_png_shared.h"
|
||||
|
||||
#define IMWIDTH 1024
|
||||
#define IMHEIGHT 1024
|
||||
|
||||
class png_encode: public benchmark::Fixture {
|
||||
private:
|
||||
png_dat outpng;
|
||||
|
||||
/* Backing this on the heap is a more realistic benchmark */
|
||||
uint8_t *input_img_buf = NULL;
|
||||
|
||||
public:
|
||||
/* Let's make the vanilla version have something extremely compressible */
|
||||
virtual void init_img(png_bytep img_bytes, size_t width, size_t height) {
|
||||
init_compressible(img_bytes, width * height);
|
||||
}
|
||||
|
||||
void SetUp(const ::benchmark::State& state) {
|
||||
input_img_buf = (uint8_t*)malloc(IMWIDTH * IMHEIGHT * 3);
|
||||
outpng.buf = (uint8_t*)malloc(IMWIDTH * IMHEIGHT * 3);
|
||||
/* Using malloc rather than zng_alloc so that we can call realloc.
|
||||
* IMWIDTH * IMHEIGHT is likely to be more than enough bytes, though,
|
||||
* given that a simple run length encoding already pretty much can
|
||||
* reduce to this */
|
||||
outpng.len = 0;
|
||||
outpng.buf_rem = IMWIDTH * IMHEIGHT * 3;
|
||||
assert(input_img_buf != NULL);
|
||||
assert(outpng.buf != NULL);
|
||||
init_img(input_img_buf, IMWIDTH, IMHEIGHT);
|
||||
}
|
||||
|
||||
/* State in this circumstance will convey the compression level */
|
||||
void Bench(benchmark::State &state) {
|
||||
for (auto _ : state) {
|
||||
encode_png((png_bytep)input_img_buf, &outpng, state.range(0), IMWIDTH, IMHEIGHT);
|
||||
outpng.buf_rem = outpng.len;
|
||||
outpng.len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State &state) {
|
||||
free(input_img_buf);
|
||||
free(outpng.buf);
|
||||
}
|
||||
};
|
||||
|
||||
BENCHMARK_DEFINE_F(png_encode, encode_compressible)(benchmark::State &state) {
|
||||
Bench(state);
|
||||
}
|
||||
BENCHMARK_REGISTER_F(png_encode, encode_compressible)->DenseRange(0, 9, 1)->Unit(benchmark::kMicrosecond);
|
146
third_party/zlib-ng/test/benchmarks/benchmark_png_shared.h
vendored
Normal file
146
third_party/zlib-ng/test/benchmarks/benchmark_png_shared.h
vendored
Normal file
@@ -0,0 +1,146 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#define IMWIDTH 1024
|
||||
#define IMHEIGHT 1024
|
||||
|
||||
extern "C" {
|
||||
# include <png.h>
|
||||
}
|
||||
|
||||
typedef struct _png_dat {
|
||||
uint8_t *buf;
|
||||
int64_t len;
|
||||
size_t buf_rem;
|
||||
} png_dat;
|
||||
|
||||
typedef struct _png_parse_dat {
|
||||
uint8_t *cur_pos;
|
||||
} png_parse_dat;
|
||||
|
||||
/* Write a customized write callback so that we write back to an in-memory buffer.
|
||||
* This allows the testing to not involve disk IO */
|
||||
static void png_write_cb(png_structp pngp, png_bytep data, png_size_t len) {
|
||||
png_dat *dat = (png_dat*)png_get_io_ptr(pngp);
|
||||
size_t curSize = dat->len + len;
|
||||
|
||||
/* realloc double the requested buffer size to prevent excessive reallocs */
|
||||
if (dat->buf_rem < len) {
|
||||
dat->buf = (uint8_t*)realloc(dat->buf, dat->len + dat->buf_rem + 2 * len);
|
||||
|
||||
if (!dat->buf) {
|
||||
/* Pretty unlikely but we'll put it here just in case */
|
||||
fprintf(stderr, "realloc failed, exiting\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
dat->buf_rem += 2 * len;
|
||||
}
|
||||
|
||||
memcpy(dat->buf + dat->len, data, len);
|
||||
dat->len = curSize;
|
||||
dat->buf_rem -= len;
|
||||
}
|
||||
|
||||
static void init_compressible(png_bytep buf, size_t num_pix) {
|
||||
/* It doesn't actually matter what we make this, but for
|
||||
* the sake of a reasonable test image, let's make this
|
||||
* be a stripe of R, G, & B, with no alpha channel */
|
||||
int32_t i = 0;
|
||||
int32_t red_stop = num_pix / 3;
|
||||
int32_t blue_stop = 2 * num_pix / 3;
|
||||
int32_t green_stop = num_pix;
|
||||
|
||||
for (int32_t x = 0; i < red_stop; x += 3, ++i) {
|
||||
buf[x] = 255;
|
||||
buf[x + 1] = 0;
|
||||
buf[x + 2] = 0;
|
||||
}
|
||||
|
||||
for (int32_t x = 3 * i; i < blue_stop; x+= 3, ++i) {
|
||||
buf[x] = 0;
|
||||
buf[x + 1] = 255;
|
||||
buf[x + 2] = 0;
|
||||
}
|
||||
|
||||
for (int32_t x = 3 * i; i < green_stop; x += 3, ++i) {
|
||||
buf[x] = 0;
|
||||
buf[x + 1] = 0;
|
||||
buf[x + 2] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void encode_png(png_bytep buf, png_dat *outpng, int32_t comp_level, uint32_t width, uint32_t height) {
|
||||
png_structp png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
|
||||
|
||||
/* Most of this error handling is _likely_ not necessary. Likewise it's likely
|
||||
* a lot of this stuff can be done in the setup function to avoid measuring this
|
||||
* fixed setup time, but for now we'll do it here */
|
||||
if (!png) abort();
|
||||
|
||||
png_infop info = png_create_info_struct(png);
|
||||
if (!info) abort();
|
||||
|
||||
png_set_write_fn(png, outpng, png_write_cb, NULL);
|
||||
png_bytep *png_row_ptrs = new png_bytep[height];
|
||||
for (int i = 0; i < IMHEIGHT; ++i) {
|
||||
png_row_ptrs[i] = (png_bytep)&buf[3*i*width];
|
||||
}
|
||||
|
||||
png_set_IHDR(png, info, IMWIDTH, IMHEIGHT, 8, PNG_COLOR_TYPE_RGB,
|
||||
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT,
|
||||
PNG_FILTER_TYPE_DEFAULT);
|
||||
|
||||
png_write_info(png, info);
|
||||
png_set_compression_level(png, comp_level);
|
||||
png_set_filter(png, 0, PNG_FILTER_NONE);
|
||||
png_write_image(png, (png_bytepp)png_row_ptrs);
|
||||
png_write_end(png, NULL);
|
||||
png_destroy_write_struct(&png, &info);
|
||||
delete[] png_row_ptrs;
|
||||
}
|
||||
|
||||
static void read_from_pngdat(png_structp png, png_bytep out, png_size_t bytes_to_read) {
|
||||
png_parse_dat *io = (png_parse_dat*)png_get_io_ptr(png);
|
||||
memcpy(out, io->cur_pos, bytes_to_read);
|
||||
io->cur_pos += bytes_to_read;
|
||||
}
|
||||
|
||||
static inline int decode_png(png_parse_dat *dat, png_bytepp out_bytes, size_t in_size, uint32_t &width, uint32_t &height) {
|
||||
png_structp png = NULL;
|
||||
png = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
|
||||
|
||||
if (!png) abort();
|
||||
png_infop info = NULL;
|
||||
info = png_create_info_struct(png);
|
||||
if (!info) abort();
|
||||
|
||||
png_set_read_fn(png, dat, read_from_pngdat);
|
||||
png_read_info(png, info);
|
||||
|
||||
int bit_depth = 0, color_type = -1;
|
||||
png_get_IHDR(png, info, &width, &height, &bit_depth, &color_type, NULL, NULL, NULL);
|
||||
|
||||
size_t im_size = width * height * bit_depth/8 * 3;
|
||||
if (color_type != PNG_COLOR_TYPE_RGB) {
|
||||
fprintf(stderr, "expected an 8 bpp RGB image\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
if (im_size > in_size) {
|
||||
*out_bytes = (png_bytep)realloc(*out_bytes, im_size);
|
||||
}
|
||||
|
||||
png_bytep *out_rows = new png_bytep[height];
|
||||
for (size_t i = 0; i < height; ++i)
|
||||
out_rows[i] = *out_bytes + (width*i*3);
|
||||
|
||||
png_read_rows(png, out_rows, NULL, height);
|
||||
png_destroy_read_struct(&png, &info, NULL);
|
||||
delete[] out_rows;
|
||||
|
||||
return im_size;
|
||||
}
|
98
third_party/zlib-ng/test/benchmarks/benchmark_slidehash.cc
vendored
Normal file
98
third_party/zlib-ng/test/benchmarks/benchmark_slidehash.cc
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
/* benchmark_slidehash.cc -- benchmark slide_hash variants
|
||||
* Copyright (C) 2022 Adam Stylinski, Nathan Moinvaziri
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
extern "C" {
|
||||
# include "zbuild.h"
|
||||
# include "zutil_p.h"
|
||||
# include "deflate.h"
|
||||
# include "arch_functions.h"
|
||||
# include "../test_cpu_features.h"
|
||||
}
|
||||
|
||||
#define MAX_RANDOM_INTS 32768
|
||||
|
||||
class slide_hash: public benchmark::Fixture {
|
||||
private:
|
||||
uint16_t *l0;
|
||||
uint16_t *l1;
|
||||
deflate_state *s_g;
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State& state) {
|
||||
l0 = (uint16_t *)zng_alloc(HASH_SIZE * sizeof(uint16_t));
|
||||
|
||||
for (uint32_t i = 0; i < HASH_SIZE; i++) {
|
||||
l0[i] = rand();
|
||||
}
|
||||
|
||||
l1 = (uint16_t *)zng_alloc(MAX_RANDOM_INTS * sizeof(uint16_t));
|
||||
|
||||
for (int32_t i = 0; i < MAX_RANDOM_INTS; i++) {
|
||||
l1[i] = rand();
|
||||
}
|
||||
|
||||
deflate_state *s = (deflate_state*)malloc(sizeof(deflate_state));
|
||||
s->head = l0;
|
||||
s->prev = l1;
|
||||
s_g = s;
|
||||
}
|
||||
|
||||
void Bench(benchmark::State& state, slide_hash_func slide_hash) {
|
||||
s_g->w_size = (uint32_t)state.range(0);
|
||||
|
||||
for (auto _ : state) {
|
||||
slide_hash(s_g);
|
||||
benchmark::DoNotOptimize(s_g);
|
||||
}
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State& state) {
|
||||
zng_free(l0);
|
||||
zng_free(l1);
|
||||
}
|
||||
};
|
||||
|
||||
#define BENCHMARK_SLIDEHASH(name, fptr, support_flag) \
|
||||
BENCHMARK_DEFINE_F(slide_hash, name)(benchmark::State& state) { \
|
||||
if (!support_flag) { \
|
||||
state.SkipWithError("CPU does not support " #name); \
|
||||
} \
|
||||
Bench(state, fptr); \
|
||||
} \
|
||||
BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(1024, MAX_RANDOM_INTS);
|
||||
|
||||
BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
BENCHMARK_SLIDEHASH(native, native_slide_hash, 1);
|
||||
#else
|
||||
|
||||
#ifdef ARM_SIMD
|
||||
BENCHMARK_SLIDEHASH(armv6, slide_hash_armv6, test_cpu_features.arm.has_simd);
|
||||
#endif
|
||||
#ifdef ARM_NEON
|
||||
BENCHMARK_SLIDEHASH(neon, slide_hash_neon, test_cpu_features.arm.has_neon);
|
||||
#endif
|
||||
#ifdef POWER8_VSX
|
||||
BENCHMARK_SLIDEHASH(power8, slide_hash_power8, test_cpu_features.power.has_arch_2_07);
|
||||
#endif
|
||||
#ifdef PPC_VMX
|
||||
BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, test_cpu_features.power.has_altivec);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
BENCHMARK_SLIDEHASH(rvv, slide_hash_rvv, test_cpu_features.riscv.has_rvv);
|
||||
#endif
|
||||
#ifdef X86_SSE2
|
||||
BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
BENCHMARK_SLIDEHASH(avx2, slide_hash_avx2, test_cpu_features.x86.has_avx2);
|
||||
#endif
|
||||
|
||||
#endif
|
94
third_party/zlib-ng/test/benchmarks/benchmark_uncompress.cc
vendored
Normal file
94
third_party/zlib-ng/test/benchmarks/benchmark_uncompress.cc
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
/* benchmark_uncompress.cc -- benchmark uncompress()
|
||||
* Copyright (C) 2024 Hans Kristian Rosbach
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
extern "C" {
|
||||
# include "zbuild.h"
|
||||
# include "zutil_p.h"
|
||||
# if defined(ZLIB_COMPAT)
|
||||
# include "zlib.h"
|
||||
# else
|
||||
# include "zlib-ng.h"
|
||||
# endif
|
||||
}
|
||||
|
||||
#define MAX_SIZE (1024 * 1024)
|
||||
#define NUM_TESTS 6
|
||||
|
||||
class uncompress_bench: public benchmark::Fixture {
|
||||
private:
|
||||
size_t maxlen;
|
||||
uint8_t *inbuff;
|
||||
uint8_t *outbuff;
|
||||
uint8_t *compressed_buff[NUM_TESTS];
|
||||
uLong compressed_sizes[NUM_TESTS];
|
||||
int64_t sizes[NUM_TESTS] = {1, 64, 1024, 16384, 128*1024, 1024*1024};
|
||||
|
||||
public:
|
||||
void SetUp(const ::benchmark::State& state) {
|
||||
const char teststr[42] = "Hello hello World broken Test tast mello.";
|
||||
maxlen = MAX_SIZE;
|
||||
|
||||
inbuff = (uint8_t *)zng_alloc(MAX_SIZE + 1);
|
||||
assert(inbuff != NULL);
|
||||
|
||||
outbuff = (uint8_t *)zng_alloc(MAX_SIZE + 1);
|
||||
assert(outbuff != NULL);
|
||||
|
||||
// Initialize input buffer
|
||||
int pos = 0;
|
||||
for (int32_t i = 0; i < MAX_SIZE - 42 ; i+=42){
|
||||
pos += sprintf((char *)inbuff+pos, "%s", teststr);
|
||||
}
|
||||
|
||||
// Compress data into different buffers
|
||||
for (size_t i = 0; i < NUM_TESTS; ++i) {
|
||||
compressed_buff[i] = (uint8_t *)zng_alloc(MAX_SIZE + 1);
|
||||
assert(compressed_buff[i] != NULL);
|
||||
|
||||
uLong compressed_size = maxlen;
|
||||
int err = PREFIX(compress)(compressed_buff[i], &compressed_size, inbuff, sizes[i]);
|
||||
if (err != Z_OK) {
|
||||
fprintf(stderr, "Compression failed with error %d\n", err);
|
||||
abort();
|
||||
}
|
||||
compressed_sizes[i] = compressed_size;
|
||||
}
|
||||
}
|
||||
|
||||
void Bench(benchmark::State& state) {
|
||||
int err = 0;
|
||||
|
||||
for (auto _ : state) {
|
||||
int index = 0;
|
||||
while (sizes[index] != state.range(0)) ++index;
|
||||
|
||||
uLong out_size = maxlen;
|
||||
err = PREFIX(uncompress)(outbuff, &out_size, compressed_buff[index], compressed_sizes[index]);
|
||||
}
|
||||
|
||||
benchmark::DoNotOptimize(err);
|
||||
}
|
||||
|
||||
void TearDown(const ::benchmark::State& state) {
|
||||
zng_free(inbuff);
|
||||
zng_free(outbuff);
|
||||
|
||||
for (size_t i = 0; i < NUM_TESTS; ++i) {
|
||||
zng_free(compressed_buff[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define BENCHMARK_UNCOMPRESS(name) \
|
||||
BENCHMARK_DEFINE_F(uncompress_bench, name)(benchmark::State& state) { \
|
||||
Bench(state); \
|
||||
} \
|
||||
BENCHMARK_REGISTER_F(uncompress_bench, name)->Arg(1)->Arg(64)->Arg(1024)->Arg(16<<10)->Arg(128<<10)->Arg(1024<<10);
|
||||
|
||||
BENCHMARK_UNCOMPRESS(uncompress_bench);
|
Reference in New Issue
Block a user