]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Since we long ago make unaligned reads safe (by using memcpy or intrinsics),
authorHans Kristian Rosbach <hk-git@circlestorm.org>
Tue, 17 Dec 2024 22:02:32 +0000 (23:02 +0100)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Fri, 20 Dec 2024 23:46:48 +0000 (00:46 +0100)
it is time to replace the UNALIGNED_OK checks that have since really only been
used to select the optimal comparison sizes for the arch instead.

17 files changed:
.github/workflows/cmake.yml
CMakeLists.txt
README.md
arch/generic/compare256_c.c
arch/generic/generic_functions.h
chunkset_tpl.h
cmake/detect-sanitizer.cmake
compare256_rle.h
configure
deflate_rle.c
insert_string_tpl.h
match_tpl.h
test/benchmarks/benchmark_compare256.cc
test/benchmarks/benchmark_compare256_rle.cc
test/test_compare256.cc
test/test_compare256_rle.cc
zbuild.h

index a0f3bc57faaf49ec4999217bff35bc78b728c094..02f98272ed9105b51faccc12ac2087e9dd352fc0 100644 (file)
@@ -81,11 +81,11 @@ jobs:
             build-src-dir: ../zlib-ng/test/add-subdirectory-project
             readonly-project-dir: true
 
-          - name: Ubuntu GCC -O1 No Unaligned UBSAN
+          - name: Ubuntu GCC -O1 UBSAN
             os: ubuntu-latest
             compiler: gcc
             cxx-compiler: g++
-            cmake-args: -DWITH_UNALIGNED=OFF -DWITH_SANITIZER=Undefined
+            cmake-args: -DWITH_SANITIZER=Undefined
             codecov: ubuntu_gcc_o1
             cflags: -O1
 
index 11e9ae5579bd004b0fb669415aa5160b77ef6991..01edc15376f4a19aebc4880d7ae58e428be1da3c 100644 (file)
@@ -93,7 +93,6 @@ option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF)
 option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
 option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
 option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF)
-option(WITH_UNALIGNED "Support unaligned reads on platforms that support it" ON)
 
 set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols.
 Useful when embedding into a larger library.
@@ -147,7 +146,6 @@ mark_as_advanced(FORCE
     WITH_RVV
     WITH_INFLATE_STRICT
     WITH_INFLATE_ALLOW_INVALID_DIST
-    WITH_UNALIGNED
     INSTALL_UTILS
     )
 
@@ -336,12 +334,6 @@ if(NOT WITH_NATIVE_INSTRUCTIONS)
     endforeach()
 endif()
 
-# Set architecture alignment requirements
-if(NOT WITH_UNALIGNED)
-    add_definitions(-DNO_UNALIGNED)
-    message(STATUS "Unaligned reads manually disabled")
-endif()
-
 # Apply warning compiler flags
 if(WITH_MAINTAINER_WARNINGS)
     add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE})
index 411621b52ffbc68deba41977095c6d2df8e3c531..28aad7f1dc4518c9b0edae6a0d0ed57ed3956789 100644 (file)
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ Features
   * Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
   * Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
   * Support for hardware-accelerated deflate using IBM Z DFLTCC
-* Unaligned memory read/writes and large bit buffer improvements
+* Safe unaligned memory read/writes and large bit buffer improvements
 * Includes improvements from Cloudflare and Intel forks
 * Configure, CMake, and NMake build system support
 * Comprehensive set of CMake unit tests
@@ -213,7 +213,6 @@ Advanced Build Options
 | WITH_CRC32_VX                   | --without-crc32-vx    | Build with vectorized CRC32 on IBM Z                                | ON                     |
 | WITH_DFLTCC_DEFLATE             | --with-dfltcc-deflate | Build with DFLTCC intrinsics for compression on IBM Z               | OFF                    |
 | WITH_DFLTCC_INFLATE             | --with-dfltcc-inflate | Build with DFLTCC intrinsics for decompression on IBM Z             | OFF                    |
-| WITH_UNALIGNED                  | --without-unaligned   | Allow optimizations that use unaligned reads if safe on current arch| ON                     |
 | WITH_INFLATE_STRICT             |                       | Build with strict inflate distance checking                         | OFF                    |
 | WITH_INFLATE_ALLOW_INVALID_DIST |                       | Build with zero fill for inflate invalid distances                  | OFF                    |
 | INSTALL_UTILS                   |                       | Copy minigzip and minideflate during install                        | OFF                    |
index 0c12cb3a4ec4be52b447c7f13ee5fcae584cc90a..3704c2f6cc5ca8f85376528a817d58f959c145d1 100644 (file)
@@ -57,7 +57,7 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
 
 #include "match_tpl.h"
 
-#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
 /* 16-bit unaligned integer comparison */
 static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) {
     uint32_t len = 0;
@@ -138,8 +138,8 @@ Z_INTERNAL uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *
 
 #endif
 
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
-/* UNALIGNED64_OK, 64-bit integer comparison */
+#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
+/* 64-bit integer comparison */
 static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const uint8_t *src1) {
     uint32_t len = 0;
 
index e243f32665d52eb77d6ad87e2c2b9ddda7518f69..eaba70c31d131e7a2188e69b7ee21bdb3ee6a5d5 100644 (file)
@@ -28,13 +28,13 @@ void     inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
 uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
 
 uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
-#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
-uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
+    uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
 #  ifdef HAVE_BUILTIN_CTZ
-    uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
+        uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
 #  endif
-#  if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
-    uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
+#  if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
+        uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
 #  endif
 #endif
 
@@ -43,29 +43,24 @@ typedef void (*slide_hash_func)(deflate_state *s);
 void     slide_hash_c(deflate_state *s);
 
 uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
-#  if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
+uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
     uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
-#    ifdef HAVE_BUILTIN_CTZ
+    uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
+#  ifdef HAVE_BUILTIN_CTZ
         uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
-#    endif
-#    if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
-        uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
-#    endif
+        uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
 #  endif
-
-uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
-#  if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
-    uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
-    uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
-#    ifdef UNALIGNED64_OK
+#  if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
+        uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
         uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
-#    endif
 #  endif
+#endif
 
 
 // Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions.
-#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
-#  if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
+#  if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
 #    define longest_match_generic longest_match_unaligned_64
 #    define longest_match_slow_generic longest_match_slow_unaligned_64
 #    define compare256_generic compare256_unaligned_64
index 5d4cacbd9d42eabf108266d56a2c911e1ba406d1..383b4d8f84b1120c1affbe1738cca5a8ca95594b 100644 (file)
@@ -227,17 +227,15 @@ rem_bytes:
 }
 
 Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, uint8_t *from, unsigned len, unsigned left) {
-#if !defined(UNALIGNED64_OK)
-#  if !defined(UNALIGNED_OK)
+#if OPTIMAL_CMP < 32
     static const uint32_t align_mask = 7;
-#  else
+#elif OPTIMAL_CMP == 32
     static const uint32_t align_mask = 3;
-#  endif
 #endif
 
     len = MIN(len, left);
 
-#if !defined(UNALIGNED64_OK)
+#if OPTIMAL_CMP < 64
     while (((uintptr_t)out & align_mask) && (len > 0)) {
         *out++ = *from++;
         --len;
index f9521ec2f541005ce0883678eecf546f93cfdc19..b71c1a37f37d49acc5beef0a6460577b17e95e72 100644 (file)
@@ -111,6 +111,7 @@ endmacro()
 
 macro(add_undefined_sanitizer)
     set(known_checks
+        alignment
         array-bounds
         bool
         bounds
@@ -137,10 +138,6 @@ macro(add_undefined_sanitizer)
         vptr
         )
 
-    # Only check for alignment sanitizer flag if unaligned access is not supported
-    if(NOT WITH_UNALIGNED)
-        list(APPEND known_checks alignment)
-    endif()
     # Object size sanitizer has no effect at -O0 and produces compiler warning if enabled
     if(NOT CMAKE_C_FLAGS MATCHES "-O0")
         list(APPEND known_checks object-size)
@@ -153,12 +150,6 @@ macro(add_undefined_sanitizer)
         add_compile_options(-fsanitize=${supported_checks})
         add_link_options(-fsanitize=${supported_checks})
 
-        # Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if
-        # it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing.
-        if(WITH_UNALIGNED)
-            add_compile_options(-fno-sanitize=alignment)
-        endif()
-
         add_common_sanitizer_flags()
     else()
         message(STATUS "Undefined behavior sanitizer is not supported")
index 0f3998d4a3f3840ac5392bff6abc63467710d0d7..ccfbeba2a635bd9e20253d933b7f4d52d002ca22 100644 (file)
@@ -42,7 +42,7 @@ static inline uint32_t compare256_rle_c(const uint8_t *src0, const uint8_t *src1
     return 256;
 }
 
-#ifdef UNALIGNED_OK
+#if OPTIMAL_CMP >= 32
 /* 16-bit unaligned integer comparison */
 static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
     uint32_t len = 0;
@@ -100,7 +100,7 @@ static inline uint32_t compare256_rle_unaligned_32(const uint8_t *src0, const ui
 
 #endif
 
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
 /* 64-bit unaligned integer comparison */
 static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const uint8_t *src1) {
     uint32_t src0_cmp32, len = 0;
index e6270ae035079d82f21befcd1d1802620afe71c2..5cd8163ada074bf2c9fdea892e9c95e83a4f3341 100755 (executable)
--- a/configure
+++ b/configure
@@ -87,7 +87,6 @@ mandir=${mandir-'${prefix}/share/man'}
 shared_ext='.so'
 shared=1
 gzfileops=1
-unalignedok=1
 compat=0
 cover=0
 build32=0
@@ -164,7 +163,6 @@ case "$1" in
       echo '    [--warn]                    Enables extra compiler warnings' | tee -a configure.log
       echo '    [--debug]                   Enables extra debug prints during operation' | tee -a configure.log
       echo '    [--zlib-compat]             Compiles for zlib-compatible API instead of zlib-ng API' | tee -a configure.log
-      echo '    [--without-unaligned]       Compiles without fast unaligned access' | tee -a configure.log
       echo '    [--without-gzfileops]       Compiles without the gzfile parts of the API enabled' | tee -a configure.log
       echo '    [--without-optimizations]   Compiles without support for optional instruction sets' | tee -a configure.log
       echo '    [--without-new-strategies]  Compiles without using new additional deflate strategies' | tee -a configure.log
@@ -195,7 +193,6 @@ case "$1" in
     -s* | --shared | --enable-shared) shared=1; shift ;;
     -t | --static) shared=0; shift ;;
     --zlib-compat) compat=1; shift ;;
-    --without-unaligned) unalignedok=0; shift ;;
     --without-gzfileops) gzfileops=0; shift ;;
     --cover) cover=1; shift ;;
     -3* | --32) build32=1; shift ;;
@@ -876,13 +873,6 @@ else
   PIC_TESTOBJG="\$(OBJG)"
 fi
 
-# set architecture alignment requirements
-if test $unalignedok -eq 0; then
-  CFLAGS="${CFLAGS} -DNO_UNALIGNED"
-  SFLAGS="${SFLAGS} -DNO_UNALIGNED"
-  echo "Unaligned reads manually disabled." | tee -a configure.log
-fi
-
 # enable reduced memory configuration
 if test $reducedmem -eq 1; then
   echo "Configuring for reduced memory environment." | tee -a configure.log
index e8e501b1d229f963dd970088bdf6cfa793b99cc3..551fe02a062772a059577c5542ef4f9714d626e0 100644 (file)
@@ -10,8 +10,8 @@
 #include "deflate_p.h"
 #include "functable.h"
 
-#ifdef UNALIGNED_OK
-#  if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+#if OPTIMAL_CMP >= 32
+#  if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
 #    define compare256_rle compare256_rle_unaligned_64
 #  elif defined(HAVE_BUILTIN_CTZ)
 #    define compare256_rle compare256_rle_unaligned_32
index a5685c4ed7de3da269687d13b57f035011e8fd66..e7037c04e69b9a713d6b9b0ce07a84f3e2da0929 100644 (file)
 #  define HASH_CALC_MASK HASH_MASK
 #endif
 #ifndef HASH_CALC_READ
-#  ifdef UNALIGNED_OK
-#    if BYTE_ORDER == LITTLE_ENDIAN
-#      define HASH_CALC_READ \
-          memcpy(&val, strstart, sizeof(val));
-#    else
-#      define HASH_CALC_READ \
-          memcpy(&val, strstart, sizeof(val)); \
-          val = ZSWAP32(val);
-#    endif
+#  if BYTE_ORDER == LITTLE_ENDIAN
+#    define HASH_CALC_READ \
+        memcpy(&val, strstart, sizeof(val));
 #  else
 #    define HASH_CALC_READ \
-        val  = ((uint32_t)(strstart[0])); \
-        val |= ((uint32_t)(strstart[1]) << 8); \
-        val |= ((uint32_t)(strstart[2]) << 16); \
-        val |= ((uint32_t)(strstart[3]) << 24);
+        memcpy(&val, strstart, sizeof(val)); \
+        val = ZSWAP32(val);
 #  endif
 #endif
 
index 9c258242cd7640bc6c1eabcb6287a622357eb45c..f44da750fb355ccbae1fe0c015943115878f1a10 100644 (file)
@@ -40,7 +40,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
     uint32_t chain_length, nice_match, best_len, offset;
     uint32_t lookahead = s->lookahead;
     Pos match_offset = 0;
-#ifdef UNALIGNED_OK
+#if OPTIMAL_CMP >= 32
     uint8_t scan_start[8];
 #endif
     uint8_t scan_end[8];
@@ -59,20 +59,20 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
      * to find the next best match length.
      */
     offset = best_len-1;
-#ifdef UNALIGNED_OK
+#if OPTIMAL_CMP >= 32
     if (best_len >= sizeof(uint32_t)) {
         offset -= 2;
-#ifdef UNALIGNED64_OK
+#if OPTIMAL_CMP >= 64
         if (best_len >= sizeof(uint64_t))
             offset -= 4;
 #endif
     }
 #endif
 
-#ifdef UNALIGNED64_OK
+#if OPTIMAL_CMP >= 64
     memcpy(scan_start, scan, sizeof(uint64_t));
     memcpy(scan_end, scan+offset, sizeof(uint64_t));
-#elif defined(UNALIGNED_OK)
+#elif OPTIMAL_CMP >= 32
     memcpy(scan_start, scan, sizeof(uint32_t));
     memcpy(scan_end, scan+offset, sizeof(uint32_t));
 #else
@@ -138,7 +138,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
          * that depend on those values. However the length of the match is limited to the
          * lookahead, so the output of deflate is not affected by the uninitialized values.
          */
-#ifdef UNALIGNED_OK
+#if OPTIMAL_CMP >= 32
         if (best_len < sizeof(uint32_t)) {
             for (;;) {
                 if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
@@ -146,7 +146,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
                     break;
                 GOTO_NEXT_CHAIN;
             }
-#  ifdef UNALIGNED64_OK
+#  if OPTIMAL_CMP >= 64
         } else if (best_len >= sizeof(uint64_t)) {
             for (;;) {
                 if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
@@ -186,19 +186,19 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
                 return best_len;
 
             offset = best_len-1;
-#ifdef UNALIGNED_OK
+#if OPTIMAL_CMP >= 32
             if (best_len >= sizeof(uint32_t)) {
                 offset -= 2;
-#ifdef UNALIGNED64_OK
+#if OPTIMAL_CMP >= 64
                 if (best_len >= sizeof(uint64_t))
                     offset -= 4;
 #endif
             }
 #endif
 
-#ifdef UNALIGNED64_OK
+#if OPTIMAL_CMP >= 64
             memcpy(scan_end, scan+offset, sizeof(uint64_t));
-#elif defined(UNALIGNED_OK)
+#elif OPTIMAL_CMP >= 32
             memcpy(scan_end, scan+offset, sizeof(uint32_t));
 #else
             scan_end[0] = *(scan+offset);
index efdbbacc9fc1f0ab8f65c27af54610d09b8da4e1..a9aa0fca65e0663d12ef06727089960cba73e17e 100644 (file)
@@ -66,14 +66,14 @@ BENCHMARK_COMPARE256(c, compare256_c, 1);
 BENCHMARK_COMPARE256(native, native_compare256, 1);
 #else
 
-#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
 BENCHMARK_COMPARE256(unaligned_16, compare256_unaligned_16, 1);
-#ifdef HAVE_BUILTIN_CTZ
+#  if defined(HAVE_BUILTIN_CTZ)
 BENCHMARK_COMPARE256(unaligned_32, compare256_unaligned_32, 1);
-#endif
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+#  endif
+#  if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
 BENCHMARK_COMPARE256(unaligned_64, compare256_unaligned_64, 1);
-#endif
+#  endif
 #endif
 #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
 BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
index 3b1eced453a2e8f4ff12429d4ca54c1b548f3d63..9eb299f3b4428b72258f2269627eddfd90cf401e 100644 (file)
@@ -61,12 +61,12 @@ public:
 
 BENCHMARK_COMPARE256_RLE(c, compare256_rle_c, 1);
 
-#ifdef UNALIGNED_OK
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
 BENCHMARK_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1);
-#ifdef HAVE_BUILTIN_CTZ
+#  if defined(HAVE_BUILTIN_CTZ)
 BENCHMARK_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1);
-#endif
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+#  endif
+#  if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
 BENCHMARK_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1);
-#endif
+#  endif
 #endif
index e1662cdf04953938752f44f37c4cdc4ee8159cd7..97e28470370bfb10529cce2caf0040d5f25a36dc 100644 (file)
@@ -65,15 +65,16 @@ TEST_COMPARE256(c, compare256_c, 1)
 TEST_COMPARE256(native, native_compare256, 1)
 #else
 
-#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
 TEST_COMPARE256(unaligned_16, compare256_unaligned_16, 1)
-#ifdef HAVE_BUILTIN_CTZ
+#  if defined(HAVE_BUILTIN_CTZ)
 TEST_COMPARE256(unaligned_32, compare256_unaligned_32, 1)
-#endif
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+#  endif
+#  if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
 TEST_COMPARE256(unaligned_64, compare256_unaligned_64, 1)
+#  endif
 #endif
-#endif
+
 #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
 TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2)
 #endif
index 5333ce76894f2dcb7ccdd4143b28a30fe353e03c..6c5d9d4f8fc282aa42fd97d9b07af55adde6fbd6 100644 (file)
@@ -52,12 +52,12 @@ static inline void compare256_rle_match_check(compare256_rle_func compare256_rle
 
 TEST_COMPARE256_RLE(c, compare256_rle_c, 1)
 
-#ifdef UNALIGNED_OK
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
 TEST_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1)
-#ifdef HAVE_BUILTIN_CTZ
+#  if defined(HAVE_BUILTIN_CTZ)
 TEST_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1)
-#endif
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+#  endif
+#  if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
 TEST_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1)
-#endif
+#  endif
 #endif
index 0023a235d2a091ce3a68ddff2703f4b14adb137b..4d3fc5f2e4ec00e40c9ce3543143ddf4ef64f0cf 100644 (file)
--- a/zbuild.h
+++ b/zbuild.h
 #  define Tracecv(c, x)
 #endif
 
-#ifndef NO_UNALIGNED
-#  if defined(__x86_64__) || defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64)
-#    define UNALIGNED_OK
-#    define UNALIGNED64_OK
-#  elif defined(__i386__) || defined(__i486__) || defined(__i586__) || \
-        defined(__i686__) || defined(_X86_) || defined(_M_IX86)
-#    define UNALIGNED_OK
-#  elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
-#    if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
-#      define UNALIGNED_OK
-#      define UNALIGNED64_OK
-#    endif
-#  elif defined(__arm__) || (_M_ARM >= 7)
-#    if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
-#      define UNALIGNED_OK
-#    endif
-#  elif defined(__powerpc64__) || defined(__ppc64__)
-#    if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#      define UNALIGNED_OK
-#      define UNALIGNED64_OK
-#    endif
+#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64)
+#  define OPTIMAL_CMP 64
+#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || \
+      defined(__i686__) || defined(_X86_) || defined(_M_IX86)
+#  define OPTIMAL_CMP 32
+#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
+#  if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
+#    define OPTIMAL_CMP 64
+#  endif
+#elif defined(__arm__) || (_M_ARM >= 7)
+#  if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
+#    define OPTIMAL_CMP 32
 #  endif
+#elif defined(__powerpc64__) || defined(__ppc64__)
+#  if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#    define OPTIMAL_CMP 64
+#  endif
+#endif
+#if defined(NO_UNALIGNED)
+#  undef OPTIMAL_CMP
 #endif
+#if !defined(OPTIMAL_CMP)
+#  define OPTIMAL_CMP 8
+#endif
+
 
 #if defined(__has_feature)
 #  if __has_feature(address_sanitizer)