set(GENERIC_ARCHDIR "arch/generic")
set(ZLIB_ARCH_SRCS)
-set(ZLIB_ARCH_HDRS ${GENERIC_ARCHDIR}/generic_features.h)
+set(ZLIB_ARCH_HDRS ${GENERIC_ARCHDIR}/generic_functions.h)
if(BASEARCH_ARM_FOUND)
set(ARCHDIR "arch/arm")
endif()
endif()
endif()
- list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h)
+ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h ${ARCHDIR}/arm_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c)
if(WITH_ACLE)
check_acle_compiler_flag()
add_definitions(-DPOWER_FEATURES)
endif()
if(HAVE_VMX OR HAVE_POWER8_INTRIN OR HAVE_POWER9_INTRIN)
- list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h)
+ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h ${ARCHDIR}/power_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c)
endif()
# VMX specific options and files
if(HAVE_RVV_INTRIN)
add_definitions(-DRISCV_FEATURES)
add_definitions(-DRISCV_RVV)
- list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h)
+ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h ${ARCHDIR}/riscv_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c)
# FIXME: we will not set compile flags for riscv_features.c when
# the kernels update hwcap or hwprobe for riscv
check_s390_intrinsics()
if(HAVE_S390_INTRIN)
add_definitions(-DS390_FEATURES)
- list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h)
+ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h ${ARCHDIR}/s390_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/s390_features.c)
endif()
if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE)
endif()
elseif(BASEARCH_X86_FOUND)
add_definitions(-DX86_FEATURES)
- list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h)
+ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h ${ARCHDIR}/x86_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c)
if(MSVC)
list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h)
${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h
)
set(ZLIB_PRIVATE_HDRS
- arch/generic/adler32_fold_c.h
- arch/generic/crc32_fold_c.h
adler32_p.h
chunkset_tpl.h
compare256_rle.h
+ cpu_functions.h
cpu_features.h
crc32_braid_p.h
crc32_braid_comb_p.h
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifndef ARM_H_
-#define ARM_H_
+#ifndef ARM_FEATURES_H_
+#define ARM_FEATURES_H_
struct arm_cpu_features {
int has_simd;
void Z_INTERNAL arm_check_features(struct arm_cpu_features *features);
-#ifdef CPU_FEATURES_H_
-
-#ifdef ARM_NEON
-uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t chunksize_neon(void);
-uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
-
-# ifdef HAVE_BUILTIN_CTZLL
- uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1);
-# ifdef DEFLATE_H_
- uint32_t longest_match_neon(deflate_state *const s, Pos cur_match);
- uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match);
-# endif
-# endif
-# ifdef DEFLATE_H_
- void slide_hash_neon(deflate_state *s);
-# endif
-# ifdef INFLATE_H_
- void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
-# endif
-#endif
-
-#ifdef ARM_ACLE
-uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
-
-# ifdef DEFLATE_H_
- void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
- Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
- uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
-# endif
-#endif
-
-#ifdef ARM_SIMD
-# ifdef DEFLATE_H_
- void slide_hash_armv6(deflate_state *s);
-# endif
-#endif
-
-#endif
-
-#endif /* ARM_H_ */
+#endif /* ARM_FEATURES_H_ */
--- /dev/null
+/* arm_functions.h -- ARM implementations for arch-specific functions.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef ARM_FUNCTIONS_H_
+#define ARM_FUNCTIONS_H_
+
+
+#ifdef ARM_NEON
+uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
+uint32_t chunksize_neon(void);
+uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+
+# ifdef HAVE_BUILTIN_CTZLL
+uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1);
+uint32_t longest_match_neon(deflate_state *const s, Pos cur_match);
+uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match);
+# endif
+void slide_hash_neon(deflate_state *s);
+void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
+#endif
+
+#ifdef ARM_ACLE
+uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
+
+void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
+Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
+uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
+#endif
+
+#ifdef ARM_SIMD
+void slide_hash_armv6(deflate_state *s);
+#endif
+
+#endif /* ARM_FUNCTIONS_H_ */
adler32_c.lo: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c
-adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/adler32_fold_c.h
+adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
-adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/adler32_fold_c.h
+adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
crc32_braid_c.lo: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c
-crc32_fold_c.o: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/crc32_fold_c.h
+crc32_fold_c.o: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c
-crc32_fold_c.lo: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/crc32_fold_c.h
+crc32_fold_c.lo: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c
insert_string_c.o: $(SRCDIR)/insert_string_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h $(SRCTOP)/insert_string_tpl.h
#include "zbuild.h"
#include "functable.h"
-#include "adler32_fold_c.h"
#include <limits.h>
+++ /dev/null
-/* adler32_fold.h -- adler32 folding interface
- * Copyright (C) 2022 Adam Stylinski
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#ifndef ADLER32_FOLD_C_H_
-#define ADLER32_FOLD_C_H_
-
-Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
-
-#endif
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
+#include "zutil.h"
#include "functable.h"
#include "crc32.h"
-#include "crc32_fold_c.h"
-
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) {
crc->value = CRC32_INITIAL_VALUE;
return crc->value;
+++ /dev/null
-/* crc32_fold.h -- crc32 folding interface
- * Copyright (C) 2021 Nathan Moinvaziri
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-#ifndef CRC32_FOLD_C_H_
-#define CRC32_FOLD_C_H_
-
-Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc);
-Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
-Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
-Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc);
-
-#endif
-/* generic_features.h -- generic C implementations for arch-specific features
+/* generic_functions.h -- generic C implementations for arch-specific functions.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifndef GENERIC_FEATURES_H_
-#define GENERIC_FEATURES_H_
+#ifndef GENERIC_FUNCTIONS_H_
+#define GENERIC_FUNCTIONS_H_
+
+#include "zendian.h"
+
+Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc);
+Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
+Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
+Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc);
+
+Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1);
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);
uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
+
uint32_t chunksize_c(void);
uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
-#ifdef INFLATE_H_
void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
-#endif
uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
# endif
#endif
-#ifdef DEFLATE_H_
-typedef void (*slide_hash_func)(deflate_state *s);
+typedef void (*slide_hash_func)(deflate_state *s);
void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
# endif
#endif
-
-#endif
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifndef POWER_H_
-#define POWER_H_
+#ifndef POWER_FEATURES_H_
+#define POWER_FEATURES_H_
struct power_cpu_features {
int has_altivec;
void Z_INTERNAL power_check_features(struct power_cpu_features *features);
-#ifdef CPU_FEATURES_H_
-
-#ifdef PPC_VMX
-uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
-# ifdef DEFLATE_H_
- void slide_hash_vmx(deflate_state *s);
-# endif
-#endif
-
-#ifdef POWER8_VSX
-uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t chunksize_power8(void);
-uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
-uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
-# ifdef DEFLATE_H_
- void slide_hash_power8(deflate_state *s);
-# endif
-# ifdef INFLATE_H_
- void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
-# endif
-#endif
-
-#ifdef POWER9
-uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
-# ifdef DEFLATE_H_
- uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
- uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
-# endif
-#endif
-
-#endif
-
-#endif /* POWER_H_ */
+#endif /* POWER_FEATURES_H_ */
--- /dev/null
+/* power_functions.h -- POWER implementations for arch-specific functions.
+ * Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
+ * Copyright (C) 2021 Mika T. Lindqvist <postmaster@raasu.org>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef POWER_FUNCTIONS_H_
+#define POWER_FUNCTIONS_H_
+
+#ifdef PPC_VMX
+uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
+void slide_hash_vmx(deflate_state *s);
+#endif
+
+#ifdef POWER8_VSX
+uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
+uint32_t chunksize_power8(void);
+uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
+void slide_hash_power8(deflate_state *s);
+void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
+#endif
+
+#ifdef POWER9
+uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
+uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
+uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
+#endif
+
+#endif /* POWER_FUNCTIONS_H_ */
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifndef RISCV_H_
-#define RISCV_H_
+#ifndef RISCV_FEATURES_H_
+#define RISCV_FEATURES_H_
struct riscv_cpu_features {
int has_rvv;
void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features);
-#ifdef CPU_FEATURES_H_
-
-#ifdef RISCV_RVV
-uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
-uint32_t chunksize_rvv(void);
-uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left);
-uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1);
-
-# ifdef DEFLATE_H_
- uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match);
- uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match);
- void slide_hash_rvv(deflate_state *s);
-# endif
-# ifdef INFLATE_H_
- void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
-# endif
-#endif
-
-#endif
-
-#endif /* RISCV_H_ */
+#endif /* RISCV_FEATURES_H_ */
--- /dev/null
+/* riscv_functions.h -- RISCV implementations for arch-specific functions.
+ *
+ * Copyright (C) 2023 SiFive, Inc. All rights reserved.
+ * Contributed by Alex Chiang <alex.chiang@sifive.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef RISCV_FUNCTIONS_H_
+#define RISCV_FUNCTIONS_H_
+
+#ifdef RISCV_RVV
+uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
+uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t chunksize_rvv(void);
+uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1);
+
+uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match);
+uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match);
+void slide_hash_rvv(deflate_state *s);
+void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
+#endif
+
+#endif /* RISCV_FUNCTIONS_H_ */
*/
#include "zbuild.h"
-#include "crc32_braid_p.h"
+#include "cpu_functions.h"
#include <vecintrin.h>
+/* s390_features.h -- check for s390 features.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
#ifndef S390_FEATURES_H_
#define S390_FEATURES_H_
void Z_INTERNAL s390_check_features(struct s390_cpu_features *features);
-#ifdef CPU_FEATURES_H_
-
-#ifdef S390_CRC32_VX
-uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
-#endif
-
-#endif
-
#endif
--- /dev/null
+/* s390_functions.h -- s390 implementations for arch-specific functions.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef S390_FUNCTIONS_H_
+#define S390_FUNCTIONS_H_
+
+#ifdef S390_CRC32_VX
+uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
+#endif
+
+#endif
#include "zbuild.h"
#include "adler32_p.h"
-#include "cpu_features.h"
+#include "cpu_functions.h"
#include <immintrin.h>
#include "x86_intrins.h"
#include "adler32_avx512_p.h"
#include "zbuild.h"
#include "adler32_p.h"
-#include "cpu_features.h"
+#include "cpu_functions.h"
#include <immintrin.h>
#include "x86_intrins.h"
#include "adler32_avx512_p.h"
void Z_INTERNAL x86_check_features(struct x86_cpu_features *features);
-#ifdef CPU_FEATURES_H_
-
-#include "fallback_builtins.h"
-#include "crc32.h"
-
-#ifdef X86_SSE2
-uint32_t chunksize_sse2(void);
-uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
-
-# ifdef HAVE_BUILTIN_CTZ
- uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
-# ifdef DEFLATE_H_
- uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
- uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
- void slide_hash_sse2(deflate_state *s);
-# endif
-# endif
-# ifdef INFLATE_H_
- void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start);
-# endif
-#endif
-
-#ifdef X86_SSSE3
-uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
-uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left);
-# ifdef INFLATE_H_
- void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
-# endif
-#endif
-
-#ifdef X86_SSE42
-uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
-# ifdef DEFLATE_H_
- void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count);
- Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str);
- uint32_t update_hash_sse42(deflate_state *const s, uint32_t h, uint32_t val);
-# endif
-#endif
-
-#ifdef X86_AVX2
-uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
-uint32_t chunksize_avx2(void);
-uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
-
-# ifdef HAVE_BUILTIN_CTZ
- uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
-# ifdef DEFLATE_H_
- uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
- uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
- void slide_hash_avx2(deflate_state *s);
-# endif
-# endif
-# ifdef INFLATE_H_
- void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start);
-# endif
-#endif
-#ifdef X86_AVX512
-uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
-#endif
-#ifdef X86_AVX512VNNI
-uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
-#endif
-
-#ifdef X86_PCLMULQDQ_CRC
-uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc);
-void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
-void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
-uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
-uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
-#endif
-#ifdef X86_VPCLMULQDQ_CRC
-uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc);
-void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
-void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
-uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
-uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
-#endif
-
-#endif
-
-#endif /* CPU_H_ */
+#endif /* X86_FEATURES_H_ */
--- /dev/null
+/* x86_functions.h -- x86 implementations for arch-specific functions.
+ * Copyright (C) 2013 Intel Corporation Jim Kukunas
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef X86_FUNCTIONS_H_
+#define X86_FUNCTIONS_H_
+
+#ifdef X86_SSE2
+uint32_t chunksize_sse2(void);
+uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+
+# ifdef HAVE_BUILTIN_CTZ
+ uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
+ uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
+ uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
+ void slide_hash_sse2(deflate_state *s);
+# endif
+ void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start);
+#endif
+
+#ifdef X86_SSSE3
+uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
+uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
+#endif
+
+#ifdef X86_SSE42
+uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count);
+Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str);
+uint32_t update_hash_sse42(deflate_state *const s, uint32_t h, uint32_t val);
+#endif
+
+#ifdef X86_AVX2
+uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
+uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t chunksize_avx2(void);
+uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+
+# ifdef HAVE_BUILTIN_CTZ
+ uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
+ uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
+ uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
+ void slide_hash_avx2(deflate_state *s);
+# endif
+ void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start);
+#endif
+#ifdef X86_AVX512
+uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
+uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+#endif
+#ifdef X86_AVX512VNNI
+uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
+uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+#endif
+
+#ifdef X86_PCLMULQDQ_CRC
+uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc);
+void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
+void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
+uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
+uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
+#endif
+#ifdef X86_VPCLMULQDQ_CRC
+uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc);
+void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
+void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
+uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
+uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
+#endif
+
+#endif /* X86_FUNCTIONS_H_ */
# include "arch/riscv/riscv_features.h"
#endif
-#include "arch/generic/generic_features.h"
-
struct cpu_features {
#if defined(X86_FEATURES)
struct x86_cpu_features x86;
--- /dev/null
+/* cpu_functions.h -- CPU-specific function prototypes.
+ * Copyright (C) 2017 Hans Kristian Rosbach
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef CPU_FUNCTIONS_H_
+#define CPU_FUNCTIONS_H_
+
+#include "zbuild.h"
+#include "zutil.h"
+#include "crc32.h"
+#include "deflate.h"
+#include "fallback_builtins.h"
+
+#include "arch/generic/generic_functions.h"
+
+#if defined(X86_FEATURES)
+# include "arch/x86/x86_functions.h"
+#elif defined(ARM_FEATURES)
+# include "arch/arm/arm_functions.h"
+#elif defined(PPC_FEATURES) || defined(POWER_FEATURES)
+# include "arch/power/power_functions.h"
+#elif defined(S390_FEATURES)
+# include "arch/s390/s390_functions.h"
+#elif defined(RISCV_FEATURES)
+# include "arch/riscv/riscv_functions.h"
+#endif
+
+#endif
/* CRC polynomial. */
#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
-extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
-
#endif /* CRC32_BRAID_P_H_ */
#include "zendian.h"
#include "functable.h"
#include "cpu_features.h"
+#include "cpu_functions.h"
#if defined(_MSC_VER)
# include <intrin.h>
#define FUNCTABLE_H_
#include "deflate.h"
-#include "arch/generic/crc32_fold_c.h"
-#include "arch/generic/adler32_fold_c.h"
-#include "inftrees.h"
-#include "inflate.h"
+#include "crc32.h"
struct functable_s {
void (* force_init) (void);
extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
+# include "cpu_functions.h"
# include "../test_cpu_features.h"
}
extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
+# include "cpu_functions.h"
# include "../test_cpu_features.h"
}
extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
+# include "cpu_functions.h"
# include "../test_cpu_features.h"
}
# include "zbuild.h"
# include "zutil_p.h"
# include "compare256_rle.h"
-# include "cpu_features.h"
}
#define MAX_COMPARE_SIZE (256)
extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
+# include "cpu_functions.h"
# include "../test_cpu_features.h"
}
# include "zbuild.h"
# include "zutil_p.h"
# include "deflate.h"
+# include "cpu_functions.h"
# include "../test_cpu_features.h"
}
extern "C" {
# include "zbuild.h"
+# include "cpu_functions.h"
# include "test_cpu_features.h"
}
extern "C" {
# include "zbuild.h"
# include "zutil.h"
+# include "cpu_functions.h"
# include "test_cpu_features.h"
}
extern "C" {
# include "zbuild.h"
+# include "cpu_functions.h"
# include "test_cpu_features.h"
}
# Keep the dependences in sync with top-level Makefile.in
adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32_c.obj: $(SRCDIR)/arch/generic/adler32_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
-adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/adler32_fold_c.h
+adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h
chunkset_c.obj: $(SRCDIR)/arch/generic/chunkset_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/chunkset_tpl.h $(SRCDIR)/inffast_tpl.h
compare256_c.obj: $(SRCDIR)/arch/generic/compare256_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil_p.h $(SRCDIR)/deflate.h $(SRCDIR)/fallback_builtins.h
functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h
crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_tbl.h
crc32_braid_c.obj: $(SRCDIR)/arch/generic/crc32_braid_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h
crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h
-crc32_fold_c.obj: $(SRCDIR)/arch/generic/crc32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/crc32_fold_c.h
+crc32_fold_c.obj: $(SRCDIR)/arch/generic/crc32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h
deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
# Keep the dependences in sync with top-level Makefile.in
adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32_c.obj: $(SRCDIR)/arch/generic/adler32_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
-adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/adler32_fold_c.h
+adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h
functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h
gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h
gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h
crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_tbl.h
crc32_braid_c.obj: $(SRCDIR)/arch/generic/crc32_braid_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h
crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h
-crc32_fold_c.obj: $(SRCDIR)/arch/generic/crc32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/crc32_fold_c.h
+crc32_fold_c.obj: $(SRCDIR)/arch/generic/crc32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h
deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
$(SRCDIR)/arch/x86/adler32_ssse3_p.h
adler32_ssse3.obj: $(SRCDIR)/arch/x86/adler32_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h \
$(SRCDIR)/arch/x86/adler32_ssse3_p.h
-adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/adler32_fold_c.h
+adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h
functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h
gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h
gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h
crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_tbl.h
crc32_braid_c.obj: $(SRCDIR)/arch/generic/crc32_braid_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h
crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h
-crc32_fold_c.obj: $(SRCDIR)/arch/generic/crc32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/crc32_fold_c.h
+crc32_fold_c.obj: $(SRCDIR)/arch/generic/crc32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h
crc32_pclmulqdq.obj: $(SRCDIR)/arch/x86/crc32_pclmulqdq.c $(SRCDIR)/arch/x86/crc32_pclmulqdq_tpl.h $(SRCDIR)/arch/x86/crc32_fold_pclmulqdq_tpl.h \
- $(SRCDIR)/arch/generic/crc32_fold_c.h $(SRCDIR)/zbuild.h
+ $(SRCDIR)/zbuild.h
deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h