From: Hans Kristian Rosbach Date: Wed, 3 Jan 2024 14:22:10 +0000 (+0100) Subject: Move adler32 C fallbacks to arch/generic X-Git-Tag: 2.2.0~126 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4e132cc0ecfba74568989e98f7ceb699e9bfcc3c;p=thirdparty%2Fzlib-ng.git Move adler32 C fallbacks to arch/generic --- diff --git a/CMakeLists.txt b/CMakeLists.txt index ce061371..536f30cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -977,6 +977,7 @@ set(ZLIB_PUBLIC_HDRS ${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h ) set(ZLIB_PRIVATE_HDRS + arch/generic/adler32_fold_c.h adler32_p.h chunkset_tpl.h compare256_rle.h @@ -1003,8 +1004,9 @@ set(ZLIB_PRIVATE_HDRS zutil.h ) set(ZLIB_SRCS + arch/generic/adler32_c.c + arch/generic/adler32_fold_c.c adler32.c - adler32_fold.c chunkset.c compare256.c compress.c diff --git a/Makefile.in b/Makefile.in index 3859eb53..e4295902 100644 --- a/Makefile.in +++ b/Makefile.in @@ -74,8 +74,9 @@ man3dir = ${mandir}/man3 pkgconfigdir = ${libdir}/pkgconfig OBJZ = \ + arch/generic/adler32_c.o \ + arch/generic/adler32_fold_c.o \ adler32.o \ - adler32_fold.o \ chunkset.o \ compare256.o \ compress.o \ @@ -112,8 +113,9 @@ TESTOBJG = OBJC = $(OBJZ) $(OBJG) PIC_OBJZ = \ + arch/generic/adler32_c.lo \ + arch/generic/adler32_fold_c.lo \ adler32.lo \ - adler32_fold.lo \ chunkset.lo \ compare256.lo \ compress.lo \ @@ -169,6 +171,12 @@ $(ARCHDIR)/%.o: $(SRCDIR)/$(ARCHDIR)/%.c $(ARCHDIR)/%.lo: $(SRCDIR)/$(ARCHDIR)/%.c $(MAKE) -C $(ARCHDIR) $(notdir $@) +arch/generic/%.o: $(SRCDIR)/arch/generic/%.c + $(MAKE) -C arch/generic $(notdir $@) + +arch/generic/%.lo: $(SRCDIR)/arch/generic/%.c + $(MAKE) -C arch/generic $(notdir $@) + %.o: $(ARCHDIR)/%.o -cp $< $@ diff --git a/adler32.c b/adler32.c index 95ac13c3..b6cf5289 100644 --- a/adler32.c +++ b/adler32.c @@ -7,52 +7,6 @@ #include "functable.h" #include "adler32_p.h" -/* ========================================================================= */ -Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) { - uint32_t sum2; - unsigned n; - - /* split Adler-32 into component sums */ - sum2 = (adler >> 16) & 0xffff; - adler &= 0xffff; - - /* in case user likes doing a byte at a time, keep it fast */ - if (UNLIKELY(len == 1)) - return adler32_len_1(adler, buf, sum2); - - /* initial Adler-32 value (deferred check for len == 1 speed) */ - if (UNLIKELY(buf == NULL)) - return 1L; - - /* in case short lengths are provided, keep it somewhat fast */ - if (UNLIKELY(len < 16)) - return adler32_len_16(adler, buf, len, sum2); - - /* do length NMAX blocks -- requires just one modulo operation */ - while (len >= NMAX) { - len -= NMAX; -#ifdef UNROLL_MORE - n = NMAX / 16; /* NMAX is divisible by 16 */ -#else - n = NMAX / 8; /* NMAX is divisible by 8 */ -#endif - do { -#ifdef UNROLL_MORE - DO16(adler, sum2, buf); /* 16 sums unrolled */ - buf += 16; -#else - DO8(adler, sum2, buf, 0); /* 8 sums unrolled */ - buf += 8; -#endif - } while (--n); - adler %= BASE; - sum2 %= BASE; - } - - /* do remaining bytes (less than NMAX, still just one modulo) */ - return adler32_len_64(adler, buf, len, sum2); -} - #ifdef ZLIB_COMPAT unsigned long Z_EXPORT PREFIX(adler32_z)(unsigned long adler, const unsigned char *buf, size_t len) { return (unsigned long)functable.adler32((uint32_t)adler, buf, len); diff --git a/arch/generic/Makefile.in b/arch/generic/Makefile.in index c717026f..5dc27a6a 100644 --- a/arch/generic/Makefile.in +++ b/arch/generic/Makefile.in @@ -1,5 +1,6 @@ -# Makefile for zlib +# Makefile for zlib-ng # Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler +# Copyright (C) 2024 Hans Kristian Rosbach # For conditions of distribution and use, see copyright notice in zlib.h CC= @@ -11,12 +12,27 @@ SRCDIR=. SRCTOP=../.. TOPDIR=$(SRCTOP) -all: +all: \ + adler32_c.o adler32_c.lo \ + adler32_fold_c.o adler32_fold_c.lo + + +adler32_c.o: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c + +adler32_c.lo: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c + +adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/adler32_fold_c.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c + +adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/adler32_fold_c.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c mostlyclean: clean clean: - rm -f *.o *.lo *~ \ + rm -f *.o *.lo *~ rm -rf objs rm -f *.gcda *.gcno *.gcov diff --git a/arch/generic/adler32_c.c b/arch/generic/adler32_c.c new file mode 100644 index 00000000..64258c89 --- /dev/null +++ b/arch/generic/adler32_c.c @@ -0,0 +1,54 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zbuild.h" +#include "functable.h" +#include "adler32_p.h" + +/* ========================================================================= */ +Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) { + uint32_t sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (UNLIKELY(len == 1)) + return adler32_len_1(adler, buf, sum2); + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (UNLIKELY(buf == NULL)) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (UNLIKELY(len < 16)) + return adler32_len_16(adler, buf, len, sum2); + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; +#ifdef UNROLL_MORE + n = NMAX / 16; /* NMAX is divisible by 16 */ +#else + n = NMAX / 8; /* NMAX is divisible by 8 */ +#endif + do { +#ifdef UNROLL_MORE + DO16(adler, sum2, buf); /* 16 sums unrolled */ + buf += 16; +#else + DO8(adler, sum2, buf, 0); /* 8 sums unrolled */ + buf += 8; +#endif + } while (--n); + adler %= BASE; + sum2 %= BASE; + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + return adler32_len_64(adler, buf, len, sum2); +} diff --git a/adler32_fold.c b/arch/generic/adler32_fold_c.c similarity index 93% rename from adler32_fold.c rename to arch/generic/adler32_fold_c.c index e2f6f9ac..abefdc5b 100644 --- a/adler32_fold.c +++ b/arch/generic/adler32_fold_c.c @@ -5,7 +5,7 @@ #include "zbuild.h" #include "functable.h" -#include "adler32_fold.h" +#include "adler32_fold_c.h" #include diff --git a/adler32_fold.h b/arch/generic/adler32_fold_c.h similarity index 84% rename from adler32_fold.h rename to arch/generic/adler32_fold_c.h index 20aa1c74..b231b1a9 100644 --- a/adler32_fold.h +++ b/arch/generic/adler32_fold_c.h @@ -3,8 +3,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifndef ADLER32_FOLD_H_ -#define ADLER32_FOLD_H_ +#ifndef ADLER32_FOLD_C_H_ +#define ADLER32_FOLD_C_H_ Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); diff --git a/arch/x86/adler32_avx2.c b/arch/x86/adler32_avx2.c index e2882cb7..5b0d32d7 100644 --- a/arch/x86/adler32_avx2.c +++ b/arch/x86/adler32_avx2.c @@ -11,7 +11,6 @@ #include "zbuild.h" #include -#include "adler32_fold.h" #include "adler32_p.h" #include "adler32_avx2_p.h" #include "x86_intrins.h" diff --git a/arch/x86/adler32_avx512.c b/arch/x86/adler32_avx512.c index 6f5671b7..83d521ab 100644 --- a/arch/x86/adler32_avx512.c +++ b/arch/x86/adler32_avx512.c @@ -10,7 +10,6 @@ #include "zbuild.h" #include "adler32_p.h" -#include "adler32_fold.h" #include "cpu_features.h" #include #include "x86_intrins.h" diff --git a/arch/x86/adler32_avx512_vnni.c b/arch/x86/adler32_avx512_vnni.c index c4faceda..b8ccd9ac 100644 --- a/arch/x86/adler32_avx512_vnni.c +++ b/arch/x86/adler32_avx512_vnni.c @@ -13,7 +13,6 @@ #include "adler32_p.h" #include "cpu_features.h" #include -#include "adler32_fold.h" #include "x86_intrins.h" #include "adler32_avx512_p.h" #include "adler32_avx2_p.h" diff --git a/arch/x86/adler32_sse42.c b/arch/x86/adler32_sse42.c index 0b1addbc..df0739d1 100644 --- a/arch/x86/adler32_sse42.c +++ b/arch/x86/adler32_sse42.c @@ -8,7 +8,6 @@ #include "zbuild.h" #include "adler32_p.h" -#include "adler32_fold.h" #include "adler32_ssse3_p.h" #include diff --git a/configure b/configure index 6de26247..02bac6c7 100755 --- a/configure +++ b/configure @@ -2109,6 +2109,21 @@ for file in $SRCDIR/$ARCHDIR/*.c; do fi done +# Generate Makefile in generic arch dir +mkdir -p arch/generic + +sed < $SRCDIR/arch/generic/Makefile.in " +/^CC *=/s#=.*#=$CC# +/^CFLAGS *=/s#=.*#=$CFLAGS# +/^SFLAGS *=/s#=.*#=$SFLAGS# +/^INCLUDES *=/s#=.*#=$INCLUDES# +/^SRCDIR *=/s#=.*#=$SRCDIR/arch/generic# +/^SRCTOP *=/s#=.*#=$SRCDIR# +/^BUILDDIR *=/s#=.*#=$BUILDDIR# +" > arch/generic/Makefile + +## TODO: Process header dependencies + # Emscripten does not support large amounts of data via stdin/out # https://github.com/emscripten-core/emscripten/issues/16755#issuecomment-1102732849 if test "$CHOST" != "wasm32"; then diff --git a/cpu_features.h b/cpu_features.h index 00fa6c74..25c6b439 100644 --- a/cpu_features.h +++ b/cpu_features.h @@ -6,7 +6,6 @@ #ifndef CPU_FEATURES_H_ #define CPU_FEATURES_H_ -#include "adler32_fold.h" #include "crc32_fold.h" #if defined(X86_FEATURES) diff --git a/deflate.h b/deflate.h index 8001b47c..55053b7f 100644 --- a/deflate.h +++ b/deflate.h @@ -12,7 +12,6 @@ #include "zutil.h" #include "zendian.h" -#include "adler32_fold.h" #include "crc32_fold.h" /* define NO_GZIP when compiling if you want to disable gzip header and diff --git a/functable.h b/functable.h index 9f78188e..9f5dad90 100644 --- a/functable.h +++ b/functable.h @@ -8,7 +8,7 @@ #include "deflate.h" #include "crc32_fold.h" -#include "adler32_fold.h" +#include "adler32_fold_c.h" #ifdef ZLIB_COMPAT typedef struct z_stream_s z_stream; diff --git a/inflate.h b/inflate.h index 39cdf5d6..7a90c5ae 100644 --- a/inflate.h +++ b/inflate.h @@ -11,7 +11,6 @@ #ifndef INFLATE_H_ #define INFLATE_H_ -#include "adler32_fold.h" #include "crc32_fold.h" /* define NO_GZIP when compiling if you want to disable gzip header and trailer decoding by inflate(). diff --git a/win32/Makefile.a64 b/win32/Makefile.a64 index ec4a1cc1..f099cb06 100644 --- a/win32/Makefile.a64 +++ b/win32/Makefile.a64 @@ -44,7 +44,8 @@ SUFFIX = OBJS = \ adler32.obj \ - adler32_fold.obj \ + adler32_c.obj \ + adler32_fold_c.obj \ arm_features.obj \ chunkset.obj \ compare256.obj \ @@ -169,6 +170,9 @@ gzwrite2.obj: gzwrite.c {$(TOP)/arch/arm}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< +{$(TOP)/arch/generic}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + {$(TOP)/test}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $< @@ -176,8 +180,9 @@ $(TOP)/zconf$(SUFFIX).h: zconf SRCDIR = $(TOP) # Keep the dependences in sync with top-level Makefile.in -adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h -adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h +adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h +adler32_c.obj: $(SRCDIR)/arch/generic/adler32_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h +adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/adler32_fold_c.h chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h diff --git a/win32/Makefile.arm b/win32/Makefile.arm index 4c7813f5..4cdd8952 100644 --- a/win32/Makefile.arm +++ b/win32/Makefile.arm @@ -49,7 +49,8 @@ SUFFIX = OBJS = \ adler32.obj \ - adler32_fold.obj \ + adler32_c.obj \ + adler32_fold_c.obj \ arm_features.obj \ chunkset.obj \ compare256.obj \ @@ -190,6 +191,9 @@ gzwrite2.obj: gzwrite.c {$(TOP)/arch/arm}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< +{$(TOP)/arch/generic}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + {$(TOP)/test}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $< @@ -197,8 +201,9 @@ $(TOP)/zconf$(SUFFIX).h: zconf SRCDIR = $(TOP) # Keep the dependences in sync with top-level Makefile.in -adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h -adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h +adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h +adler32_c.obj: $(SRCDIR)/arch/generic/adler32_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h +adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/adler32_fold_c.h functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h diff --git a/win32/Makefile.msc b/win32/Makefile.msc index 044cc596..470a3791 100644 --- a/win32/Makefile.msc +++ b/win32/Makefile.msc @@ -47,12 +47,13 @@ SUFFIX = OBJS = \ adler32.obj \ + adler32_c.obj \ adler32_avx2.obj \ adler32_avx512.obj \ adler32_avx512_vnni.obj \ adler32_sse42.obj \ adler32_ssse3.obj \ - adler32_fold.obj \ + adler32_fold_c.obj \ chunkset.obj \ chunkset_avx2.obj \ chunkset_sse2.obj \ @@ -178,6 +179,9 @@ gzwrite2.obj: gzwrite.c {$(TOP)/arch/x86}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< +{$(TOP)/arch/generic}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + {$(TOP)/test}.c.obj: $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $< @@ -185,15 +189,16 @@ $(TOP)/zconf$(SUFFIX).h: zconf SRCDIR = $(TOP) # Keep the dependences in sync with top-level Makefile.in -adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h +adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h +adler32_c.obj: $(SRCDIR)/arch/generic/adler32_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h adler32_avx2.obj: $(SRCDIR)/arch/x86/adler32_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/fallback_builtins.h adler32_avx512.obj: $(SRCDIR)/arch/x86/adler32_avx512.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/arch/x86/adler32_avx512_p.h adler32_avx512_vnni.obj: $(SRCDIR)/arch/x86/adler32_avx512_vnni.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/arch/x86/adler32_avx512_p.h -adler32_sse42.obj: $(SRCDIR)/arch/x86/adler32_sse42.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/adler32_fold.h \ +adler32_sse42.obj: $(SRCDIR)/arch/x86/adler32_sse42.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h \ $(SRCDIR)/arch/x86/adler32_ssse3_p.h -adler32_ssse3.obj: $(SRCDIR)/arch/x86/adler32_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/adler32_fold.h \ +adler32_ssse3.obj: $(SRCDIR)/arch/x86/adler32_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h \ $(SRCDIR)/arch/x86/adler32_ssse3_p.h -adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h +adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/adler32_fold_c.h functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h