fix: breakpad use miniz
Some checks failed
sm-rpc / build (Debug, arm-linux-gnueabihf) (push) Successful in 1m34s
sm-rpc / build (Debug, aarch64-linux-gnu) (push) Successful in 2m46s
sm-rpc / build (Debug, host.gcc) (push) Failing after 1m28s
sm-rpc / build (Release, aarch64-linux-gnu) (push) Successful in 2m14s
sm-rpc / build (Release, arm-linux-gnueabihf) (push) Successful in 2m8s
sm-rpc / build (Debug, mipsel-linux-gnu) (push) Successful in 5m35s
sm-rpc / build (Release, host.gcc) (push) Failing after 1m55s
sm-rpc / build (Release, mipsel-linux-gnu) (push) Successful in 7m21s

This commit is contained in:
tqcq
2025-08-25 15:24:22 +08:00
parent a58517497b
commit 68b2e7f763
728 changed files with 489652 additions and 1211 deletions

View File

@@ -0,0 +1,75 @@
# Makefile for zlib-ng
# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler
# Copyright (C) 2024 Hans Kristian Rosbach
# For conditions of distribution and use, see copyright notice in zlib.h
CC=
CFLAGS=
SFLAGS=
INCLUDES=
SRCDIR=.
SRCTOP=../..
TOPDIR=$(SRCTOP)
all: \
adler32_c.o adler32_c.lo \
adler32_fold_c.o adler32_fold_c.lo \
chunkset_c.o chunkset_c.lo \
compare256_c.o compare256_c.lo \
crc32_braid_c.o crc32_braid_c.lo \
crc32_fold_c.o crc32_fold_c.lo \
slide_hash_c.o slide_hash_c.lo
adler32_c.o: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c
adler32_c.lo: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c
adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
chunkset_c.lo: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCDIR)/compare256_p.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCDIR)/compare256_p.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c
crc32_braid_c.lo: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c
crc32_fold_c.o: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c
crc32_fold_c.lo: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c
slide_hash_c.o: $(SRCDIR)/slide_hash_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_c.c
slide_hash_c.lo: $(SRCDIR)/slide_hash_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_c.c
mostlyclean: clean
clean:
rm -f *.o *.lo *~
rm -rf objs
rm -f *.gcda *.gcno *.gcov
distclean: clean
rm -f Makefile

View File

@@ -0,0 +1,54 @@
/* adler32.c -- compute the Adler-32 checksum of a data stream
* Copyright (C) 1995-2011, 2016 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "functable.h"
#include "adler32_p.h"
/* ========================================================================= */
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) {
uint32_t sum2;
unsigned n;
/* split Adler-32 into component sums */
sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
/* in case user likes doing a byte at a time, keep it fast */
if (UNLIKELY(len == 1))
return adler32_len_1(adler, buf, sum2);
/* initial Adler-32 value (deferred check for len == 1 speed) */
if (UNLIKELY(buf == NULL))
return 1L;
/* in case short lengths are provided, keep it somewhat fast */
if (UNLIKELY(len < 16))
return adler32_len_16(adler, buf, len, sum2);
/* do length NMAX blocks -- requires just one modulo operation */
while (len >= NMAX) {
len -= NMAX;
#ifdef UNROLL_MORE
n = NMAX / 16; /* NMAX is divisible by 16 */
#else
n = NMAX / 8; /* NMAX is divisible by 8 */
#endif
do {
#ifdef UNROLL_MORE
DO16(adler, sum2, buf); /* 16 sums unrolled */
buf += 16;
#else
DO8(adler, sum2, buf, 0); /* 8 sums unrolled */
buf += 8;
#endif
} while (--n);
adler %= BASE;
sum2 %= BASE;
}
/* do remaining bytes (less than NMAX, still just one modulo) */
return adler32_len_64(adler, buf, len, sum2);
}

View File

@@ -0,0 +1,15 @@
/* adler32_fold.c -- adler32 folding interface
* Copyright (C) 2022 Adam Stylinski
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "functable.h"
#include <limits.h>
Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
adler = FUNCTABLE_CALL(adler32)(adler, src, len);
memcpy(dst, src, len);
return adler;
}

View File

@@ -0,0 +1,53 @@
/* chunk_permute_table.h - shared AVX/SSSE3 permutation table for use with chunkmemset family of functions.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifndef CHUNK_PERMUTE_TABLE_H_
#define CHUNK_PERMUTE_TABLE_H_
#include "zbuild.h"
/* Need entries for all numbers not an even modulus for 1, 2, 4, 8, 16 & 32 */
static const ALIGNED_(32) uint8_t permute_table[26*32] = {
0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, /* dist 3 */
0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, /* dist 5 */
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, /* dist 6 */
0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, /* dist 7 */
0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, /* dist 9 */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, /* dist 10 */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* dist 11 */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, /* dist 12 */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 2, 3, 4, 5, /* dist 13 */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, 2, 3, /* dist 14 */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, /* dist 15 */
/* Beyond dists of 15 means we have to permute from a vector > len(m128i). Because AVX couldn't permute
* beyond 128 bit lanes until AVX512 for sub 4-byte sequences, we have to do some math here for an eventual
* blend with a comparison. That means we need to wrap the indices with yet another derived table. For simplicity,
* we'll use absolute indexing here to derive a blend vector. This is actually a lot simpler with ARM's TBL, but,
* this is what we're dealt.
*/
16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* dist 17 */
16, 17, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, /* dist 18 */
16, 17, 18, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, /* dist 19 */
16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, /* dist 20 */
16, 17, 18, 19, 20, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, /* dist 21 */
16, 17, 18, 19, 20, 21, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* dist 22 */
16, 17, 18, 19, 20, 21, 22, 0, 1, 2, 3, 4, 5, 6, 7, 8, /* dist 23 */
16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, /* dist 24 */
16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 1, 2, 3, 4, 5, 6, /* dist 25 */
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 1, 2, 3, 4, 5, /* dist 26 */
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 0, 1, 2, 3, 4, /* dist 27 */
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3, /* dist 28 */
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 0, 1, 2, /* dist 29 */
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 0, 1, /* dist 30 */
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, /* dist 31 */
};
typedef struct lut_rem_pair_s {
uint16_t idx;
uint16_t remval;
} lut_rem_pair;
#endif

View File

@@ -0,0 +1,42 @@
/* chunkset.c -- inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "zmemory.h"
typedef uint64_t chunk_t;
#define CHUNK_SIZE 8
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint32_t tmp = zng_memread_4(from);
*chunk = tmp | ((chunk_t)tmp << 32);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
*chunk = zng_memread_8(from);
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
*chunk = zng_memread_8(s);
}
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
zng_memwrite_8(out, *chunk);
}
#define CHUNKSIZE chunksize_c
#define CHUNKCOPY chunkcopy_c
#define CHUNKUNROLL chunkunroll_c
#define CHUNKMEMSET chunkmemset_c
#define CHUNKMEMSET_SAFE chunkmemset_safe_c
#include "chunkset_tpl.h"
#define INFLATE_FAST inflate_fast_c
#include "inffast_tpl.h"

View File

@@ -0,0 +1,31 @@
/* compare256.c -- 256 byte memory comparison with match length return
* Copyright (C) 2020 Nathan Moinvaziri
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "compare256_p.h"
// Set optimal COMPARE256 function variant
#if OPTIMAL_CMP == 8
# define COMPARE256 compare256_8
#elif defined(HAVE_BUILTIN_CTZLL)
# define COMPARE256 compare256_64
#elif defined(HAVE_BUILTIN_CTZ)
# define COMPARE256 compare256_32
#else
# define COMPARE256 compare256_16
#endif
Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
return COMPARE256(src0, src1);
}
// Generate longest_match_c
#define LONGEST_MATCH longest_match_c
#include "match_tpl.h"
// Generate longest_match_slow_c
#define LONGEST_MATCH_SLOW
#define LONGEST_MATCH longest_match_slow_c
#include "match_tpl.h"

View File

@@ -0,0 +1,123 @@
/* compare256_p.h -- 256 byte memory comparison with match length return
* Copyright (C) 2020 Nathan Moinvaziri
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"
/* 8-bit integer comparison */
static inline uint32_t compare256_8(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
do {
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
} while (len < 256);
return 256;
}
/* 16-bit integer comparison */
static inline uint32_t compare256_16(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
do {
if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
} while (len < 256);
return 256;
}
#ifdef HAVE_BUILTIN_CTZ
/* 32-bit integer comparison */
static inline uint32_t compare256_32(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
do {
uint32_t sv, mv, diff;
sv = zng_memread_4(src0);
mv = zng_memread_4(src1);
diff = sv ^ mv;
if (diff) {
# if BYTE_ORDER == LITTLE_ENDIAN
uint32_t match_byte = __builtin_ctz(diff) / 8;
# else
uint32_t match_byte = __builtin_clz(diff) / 8;
# endif
return len + match_byte;
}
src0 += 4, src1 += 4, len += 4;
} while (len < 256);
return 256;
}
#endif
#ifdef HAVE_BUILTIN_CTZLL
/* 64-bit integer comparison */
static inline uint32_t compare256_64(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
do {
uint64_t sv, mv, diff;
sv = zng_memread_8(src0);
mv = zng_memread_8(src1);
diff = sv ^ mv;
if (diff) {
# if BYTE_ORDER == LITTLE_ENDIAN
uint64_t match_byte = __builtin_ctzll(diff) / 8;
# else
uint64_t match_byte = __builtin_clzll(diff) / 8;
# endif
return len + (uint32_t)match_byte;
}
src0 += 8, src1 += 8, len += 8;
} while (len < 256);
return 256;
}
#endif

View File

@@ -0,0 +1,216 @@
/* crc32_braid.c -- compute the CRC-32 of a data stream
* Copyright (C) 1995-2022 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*
* This interleaved implementation of a CRC makes use of pipelined multiple
* arithmetic-logic units, commonly found in modern CPU cores. It is due to
* Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
*/
#include "zbuild.h"
#include "crc32_braid_p.h"
#include "crc32_braid_tbl.h"
/*
A CRC of a message is computed on N braids of words in the message, where
each word consists of W bytes (4 or 8). If N is 3, for example, then three
running sparse CRCs are calculated respectively on each braid, at these
indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
This is done starting at a word boundary, and continues until as many blocks
of N * W bytes as are available have been processed. The results are combined
into a single CRC at the end. For this code, N must be in the range 1..6 and
W must be 4 or 8. The upper limit on N can be increased if desired by adding
more #if blocks, extending the patterns apparent in the code. In addition,
crc32 tables would need to be regenerated, if the maximum N value is increased.
N and W are chosen empirically by benchmarking the execution time on a given
processor. The choices for N and W below were based on testing on Intel Kaby
Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
Octeon II processors. The Intel, AMD, and ARM processors were all fastest
with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
They were all tested with either gcc or clang, all using the -O3 optimization
level. Your mileage may vary.
*/
/* ========================================================================= */
#ifdef W
/*
Return the CRC of the W bytes in the word_t data, taking the
least-significant byte of the word as the first byte of data, without any pre
or post conditioning. This is used to combine the CRCs of each braid.
*/
#if BYTE_ORDER == LITTLE_ENDIAN
static uint32_t crc_word(z_word_t data) {
int k;
for (k = 0; k < W; k++)
data = (data >> 8) ^ crc_table[data & 0xff];
return (uint32_t)data;
}
#elif BYTE_ORDER == BIG_ENDIAN
static z_word_t crc_word(z_word_t data) {
int k;
for (k = 0; k < W; k++)
data = (data << 8) ^
crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
return data;
}
#endif /* BYTE_ORDER */
#endif /* W */
/* ========================================================================= */
Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) {
uint32_t c;
/* Pre-condition the CRC */
c = (~crc) & 0xffffffff;
#ifdef W
/* If provided enough bytes, do a braided CRC calculation. */
if (len >= N * W + W - 1) {
size_t blks;
z_word_t const *words;
int k;
/* Compute the CRC up to a z_word_t boundary. */
while (len && ((uintptr_t)buf & (W - 1)) != 0) {
len--;
DO1;
}
/* Compute the CRC on as many N z_word_t blocks as are available. */
blks = len / (N * W);
len -= blks * N * W;
words = (z_word_t const *)buf;
z_word_t crc0, word0, comb;
#if N > 1
z_word_t crc1, word1;
#if N > 2
z_word_t crc2, word2;
#if N > 3
z_word_t crc3, word3;
#if N > 4
z_word_t crc4, word4;
#if N > 5
z_word_t crc5, word5;
#endif
#endif
#endif
#endif
#endif
/* Initialize the CRC for each braid. */
crc0 = ZSWAPWORD(c);
#if N > 1
crc1 = 0;
#if N > 2
crc2 = 0;
#if N > 3
crc3 = 0;
#if N > 4
crc4 = 0;
#if N > 5
crc5 = 0;
#endif
#endif
#endif
#endif
#endif
/* Process the first blks-1 blocks, computing the CRCs on each braid independently. */
while (--blks) {
/* Load the word for each braid into registers. */
word0 = crc0 ^ words[0];
#if N > 1
word1 = crc1 ^ words[1];
#if N > 2
word2 = crc2 ^ words[2];
#if N > 3
word3 = crc3 ^ words[3];
#if N > 4
word4 = crc4 ^ words[4];
#if N > 5
word5 = crc5 ^ words[5];
#endif
#endif
#endif
#endif
#endif
words += N;
/* Compute and update the CRC for each word. The loop should get unrolled. */
crc0 = BRAID_TABLE[0][word0 & 0xff];
#if N > 1
crc1 = BRAID_TABLE[0][word1 & 0xff];
#if N > 2
crc2 = BRAID_TABLE[0][word2 & 0xff];
#if N > 3
crc3 = BRAID_TABLE[0][word3 & 0xff];
#if N > 4
crc4 = BRAID_TABLE[0][word4 & 0xff];
#if N > 5
crc5 = BRAID_TABLE[0][word5 & 0xff];
#endif
#endif
#endif
#endif
#endif
for (k = 1; k < W; k++) {
crc0 ^= BRAID_TABLE[k][(word0 >> (k << 3)) & 0xff];
#if N > 1
crc1 ^= BRAID_TABLE[k][(word1 >> (k << 3)) & 0xff];
#if N > 2
crc2 ^= BRAID_TABLE[k][(word2 >> (k << 3)) & 0xff];
#if N > 3
crc3 ^= BRAID_TABLE[k][(word3 >> (k << 3)) & 0xff];
#if N > 4
crc4 ^= BRAID_TABLE[k][(word4 >> (k << 3)) & 0xff];
#if N > 5
crc5 ^= BRAID_TABLE[k][(word5 >> (k << 3)) & 0xff];
#endif
#endif
#endif
#endif
#endif
}
}
/* Process the last block, combining the CRCs of the N braids at the same time. */
comb = crc_word(crc0 ^ words[0]);
#if N > 1
comb = crc_word(crc1 ^ words[1] ^ comb);
#if N > 2
comb = crc_word(crc2 ^ words[2] ^ comb);
#if N > 3
comb = crc_word(crc3 ^ words[3] ^ comb);
#if N > 4
comb = crc_word(crc4 ^ words[4] ^ comb);
#if N > 5
comb = crc_word(crc5 ^ words[5] ^ comb);
#endif
#endif
#endif
#endif
#endif
words += N;
Assert(comb <= UINT32_MAX, "comb should fit in uint32_t");
c = (uint32_t)ZSWAPWORD(comb);
/* Update the pointer to the remaining bytes to process. */
buf = (const unsigned char *)words;
}
#endif /* W */
/* Complete the computation of the CRC on any remaining bytes. */
while (len >= 8) {
len -= 8;
DO8;
}
while (len) {
len--;
DO1;
}
/* Return the CRC, post-conditioned. */
return c ^ 0xffffffff;
}

View File

@@ -0,0 +1,31 @@
/* crc32_fold.c -- crc32 folding interface
* Copyright (C) 2021 Nathan Moinvaziri
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "zutil.h"
#include "functable.h"
#include "crc32.h"
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) {
crc->value = CRC32_INITIAL_VALUE;
return crc->value;
}
Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
crc->value = FUNCTABLE_CALL(crc32)(crc->value, src, len);
memcpy(dst, src, len);
}
Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) {
/* Note: while this is basically the same thing as the vanilla CRC function, we still need
* a functable entry for it so that we can generically dispatch to this function with the
* same arguments for the versions that _do_ do a folding CRC but we don't want a copy. The
* init_crc is an unused argument in this context */
Z_UNUSED(init_crc);
crc->value = FUNCTABLE_CALL(crc32)(crc->value, src, len);
}
Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc) {
return crc->value;
}

View File

@@ -0,0 +1,57 @@
/* generic_functions.h -- generic C implementations for arch-specific functions.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifndef GENERIC_FUNCTIONS_H_
#define GENERIC_FUNCTIONS_H_
#include "zendian.h"
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc);
Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc);
Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1);
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);
uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t chunksize_c(void);
uint8_t* chunkmemset_safe_c(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
typedef void (*slide_hash_func)(deflate_state *s);
void slide_hash_c(deflate_state *s);
uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// Generic code
# define native_adler32 adler32_c
# define native_adler32_fold_copy adler32_fold_copy_c
# define native_chunkmemset_safe chunkmemset_safe_c
# define native_chunksize chunksize_c
# define native_crc32 PREFIX(crc32_braid)
# define native_crc32_fold crc32_fold_c
# define native_crc32_fold_copy crc32_fold_copy_c
# define native_crc32_fold_final crc32_fold_final_c
# define native_crc32_fold_reset crc32_fold_reset_c
# define native_inflate_fast inflate_fast_c
# define native_slide_hash slide_hash_c
# define native_longest_match longest_match_c
# define native_longest_match_slow longest_match_slow_c
# define native_compare256 compare256_c
#endif
#endif

View File

@@ -0,0 +1,52 @@
/* slide_hash.c -- slide hash table C implementation
*
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "deflate.h"
/* ===========================================================================
* Slide the hash table when sliding the window down (could be avoided with 32
* bit values at the expense of memory usage). We slide even when level == 0 to
* keep the hash table consistent if we switch back to level > 0 later.
*/
static inline void slide_hash_c_chain(Pos *table, uint32_t entries, uint16_t wsize) {
#ifdef NOT_TWEAK_COMPILER
table += entries;
do {
unsigned m;
m = *--table;
*table = (Pos)(m >= wsize ? m-wsize : 0);
/* If entries is not on any hash chain, prev[entries] is garbage but
* its value will never be used.
*/
} while (--entries);
#else
{
/* As of I make this change, gcc (4.8.*) isn't able to vectorize
* this hot loop using saturated-subtraction on x86-64 architecture.
* To avoid this defect, we can change the loop such that
* o. the pointer advance forward, and
* o. demote the variable 'm' to be local to the loop, and
* choose type "Pos" (instead of 'unsigned int') for the
* variable to avoid unnecessary zero-extension.
*/
unsigned int i;
Pos *q = table;
for (i = 0; i < entries; i++) {
Pos m = *q;
Pos t = (Pos)wsize;
*q++ = (Pos)(m >= t ? m-t: 0);
}
}
#endif /* NOT_TWEAK_COMPILER */
}
Z_INTERNAL void slide_hash_c(deflate_state *s) {
uint16_t wsize = (uint16_t)s->w_size;
slide_hash_c_chain(s->head, HASH_SIZE, wsize);
slide_hash_c_chain(s->prev, wsize, wsize);
}