Add ARMv6 version of slide_hash

author Cameron Cawley <ccawley2011@gmail.com>

Wed, 12 Jul 2023 17:48:48 +0000 (18:48 +0100)

committer Hans Kristian Rosbach <hk-github@circlestorm.org>

Sat, 16 Sep 2023 09:11:18 +0000 (11:11 +0200)
author Cameron Cawley <ccawley2011@gmail.com>
Wed, 12 Jul 2023 17:48:48 +0000 (18:48 +0100)
committer Hans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 16 Sep 2023 09:11:18 +0000 (11:11 +0200)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index f74c0e454d3238ad3b523f7109dc53ff5ecb2e83..6cccb4659a0f2de6158bd63077b4e8f0db7e741d 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -101,6 +101,7 @@ set_property(CACHE WITH_SANITIZER PROPERTY STRINGS "Memory" "Address" "Undefined
  if(BASEARCH_ARM_FOUND)
      option(WITH_ACLE "Build with ACLE" ON)
      option(WITH_NEON "Build with NEON intrinsics" ON)
+    option(WITH_ARMV6 "Build with ARMv6 SIMD" ON)
  elseif(BASEARCH_PPC_FOUND)
      option(WITH_ALTIVEC "Build with AltiVec (VMX) optimisations for PowerPC" ON)
      option(WITH_POWER8 "Build with optimisations for POWER8" ON)
@@ -128,6 +129,7 @@ mark_as_advanced(FORCE
      ZLIB_SYMBOL_PREFIX
      WITH_REDUCED_MEM
      WITH_ACLE WITH_NEON
+    WITH_ARMV6
      WITH_DFLTCC_DEFLATE
      WITH_DFLTCC_INFLATE
      WITH_CRC32_VX
@@ -292,6 +294,10 @@ endif()
  #
  # Check for standard/system includes
  #
+check_include_file(arm_acle.h  HAVE_ARM_ACLE_H)
+if(HAVE_ARM_ACLE_H)
+    add_definitions(-DHAVE_ARM_ACLE_H)
+endif()
  check_include_file(sys/auxv.h  HAVE_SYS_AUXV_H)
  if(HAVE_SYS_AUXV_H)
      add_definitions(-DHAVE_SYS_AUXV_H)
@@ -648,6 +654,23 @@ if(WITH_OPTIM)
                  set(WITH_NEON OFF)
              endif()
          endif()
+        if(WITH_ARMV6)
+            check_armv6_compiler_flag()
+            if(HAVE_ARMV6_INLINE_ASM OR HAVE_ARMV6_INTRIN)
+                add_definitions(-DARM_SIMD)
+                set(ARMV6_SRCS ${ARCHDIR}/slide_hash_armv6.c)
+                set_property(SOURCE ${ARMV6_SRCS} PROPERTY COMPILE_FLAGS "${ARMV6FLAG} ${NOLTOFLAG}")
+                list(APPEND ZLIB_ARCH_SRCS ${ARMV6_SRCS})
+                add_feature_info(ARMV6 1 "Support ARMv6 SIMD instructions in slide_hash, using \"${ARMV6FLAG}\"")
+                if(HAVE_ARMV6_INTRIN)
+                    add_definitions(-DARM_SIMD_INTRIN)
+                endif()
+            else()
+                set(WITH_ARMV6 OFF)
+            endif()
+        else()
+            set(WITH_ARMV6 OFF)
+        endif()
      elseif(BASEARCH_PPC_FOUND)
          # Common arch detection code
          if(WITH_ALTIVEC)
@@ -1215,6 +1238,7 @@ add_feature_info(WITH_INFLATE_ALLOW_INVALID_DIST WITH_INFLATE_ALLOW_INVALID_DIST
  if(BASEARCH_ARM_FOUND)
      add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE")
      add_feature_info(WITH_NEON WITH_NEON "Build with NEON intrinsics")
+    add_feature_info(WITH_ARMV6 WITH_ARMV6 "Build with ARMv6 SIMD")
  elseif(BASEARCH_PPC_FOUND)
      add_feature_info(WITH_ALTIVEC WITH_ALTIVEC "Build with AltiVec optimisations")
      add_feature_info(WITH_POWER8 WITH_POWER8 "Build with optimisations for POWER8")
diff --git a/README.md b/README.md

index c83b8487f1a02f1e7d2f5876751492bd40ad3649..dfd461a014db5bd087e1334801a9c676fa730a6f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ Features
    * Adler32 implementation using SSSE3, AVX2, AVX512, AVX512-VNNI, Neon, VMX & VSX
    * CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, ACLE, & IBM Z
    * Hash table implementation using CRC32-C intrinsics on x86 and ARM
-  * Slide hash implementations using SSE2, AVX2, Neon, VMX & VSX
+  * Slide hash implementations using SSE2, AVX2, ARMv6, Neon, VMX & VSX
    * Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
    * Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
    * Support for hardware-accelerated deflate using IBM Z DFLTCC
@@ -194,6 +194,7 @@ Advanced Build Options
  | WITH_VPCLMULQDQ                 | --without-vpclmulqdq  | Build with VPCLMULQDQ intrinsics                                    | ON                     |
  | WITH_ACLE                       | --without-acle        | Build with ACLE intrinsics                                          | ON                     |
  | WITH_NEON                       | --without-neon        | Build with NEON intrinsics                                          | ON                     |
+| WITH_ARMV6                      | --without-armv6       | Build with ARMv6 intrinsics                                         | ON                     |
  | WITH_ALTIVEC                    | --without-altivec     | Build with AltiVec (VMX) intrinsics                                 | ON                     |
  | WITH_POWER8                     | --without-power8      | Build with POWER8 optimisations                                     | ON                     |
  | WITH_RVV                        |                       | Build with RVV intrinsics                                           | ON                     |
diff --git a/arch/arm/Makefile.in b/arch/arm/Makefile.in

index 717754760f139b400d45351bbf17b3150c28019d..9d05b00b54eded69fab70971d8fffa1412e2ee5a 100644 (file)
--- a/arch/arm/Makefile.in
+++ b/arch/arm/Makefile.in
@@ -10,6 +10,7 @@ SUFFIX=
  
  ACLEFLAG=
  NEONFLAG=
+ARMV6FLAG=
  NOLTOFLAG=
  
  SRCDIR=.
@@ -23,6 +24,7 @@ all: \
         compare256_neon.o compare256_neon.lo \
         crc32_acle.o crc32_acle.lo \
         slide_hash_neon.o slide_hash_neon.lo \
+       slide_hash_armv6.o slide_hash_armv6.lo \
         insert_string_acle.o insert_string_acle.lo
  
  adler32_neon.o:
@@ -61,6 +63,12 @@ slide_hash_neon.o:
  slide_hash_neon.lo:
         $(CC) $(SFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_neon.c
  
+slide_hash_armv6.o:
+       $(CC) $(CFLAGS) $(ARMV6FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_armv6.c
+
+slide_hash_armv6.lo:
+       $(CC) $(SFLAGS) $(ARMV6FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_armv6.c
+
  insert_string_acle.o:
         $(CC) $(CFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
  
diff --git a/arch/arm/acle_intrins.h b/arch/arm/acle_intrins.h

new file mode 100644 (file)

index 0000000..e9eef12
--- /dev/null
+++ b/arch/arm/acle_intrins.h
@@ -0,0 +1,27 @@
+#ifndef ARM_ACLE_INTRINS_H
+#define ARM_ACLE_INTRINS_H
+
+#include <stdint.h>
+#ifdef _MSC_VER
+#  include <intrin.h>
+#elif defined(HAVE_ARM_ACLE_H)
+#  include <arm_acle.h>
+#endif
+
+#ifdef ARM_SIMD
+#ifdef _MSC_VER
+typedef uint32_t uint16x2_t;
+
+#define __uqsub16 _arm_uqsub16
+#elif !defined(ARM_SIMD_INTRIN)
+typedef uint32_t uint16x2_t;
+
+static inline uint16x2_t __uqsub16(uint16x2_t __a, uint16x2_t __b) {
+    uint16x2_t __c;
+    __asm__ __volatile__("uqsub16\t%0, %1, %2" : "=r" (__c) : "r"(__a), "r"(__b));
+    return __c;
+}
+#endif
+#endif
+
+#endif // include guard ARM_ACLE_INTRINS_H
diff --git a/arch/arm/arm_features.c b/arch/arm/arm_features.c

index 8ef82000998a76d53551be01a52b1ef045d53e13..a0e070ba956115d6c905558a2fc73d92fb65276d 100644 (file)
--- a/arch/arm/arm_features.c
+++ b/arch/arm/arm_features.c
@@ -72,10 +72,28 @@ static inline int arm_has_neon() {
  }
  #endif
  
+/* AArch64 does not have ARMv6 SIMD. */
+#if !defined(__aarch64__) && !defined(_M_ARM64) && !defined(_M_ARM64EC)
+static inline int arm_has_simd() {
+#if defined(__linux__) && defined(HAVE_SYS_AUXV_H)
+    const char *platform = (const char *)getauxval(AT_PLATFORM);
+    return strncmp(platform, "v6l", 3) == 0
+        || strncmp(platform, "v7l", 3) == 0
+        || strncmp(platform, "v8l", 3) == 0;
+#elif defined(ARM_NOCHECK_SIMD)
+    return 1;
+#else
+    return 0;
+#endif
+}
+#endif
+
  void Z_INTERNAL arm_check_features(struct arm_cpu_features *features) {
  #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
+    features->has_simd = 0; /* never available */
      features->has_neon = 1; /* always available */
  #else
+    features->has_simd = arm_has_simd();
      features->has_neon = arm_has_neon();
  #endif
      features->has_crc32 = arm_has_crc32();
diff --git a/arch/arm/arm_features.h b/arch/arm/arm_features.h

index 6fcd8d3eb52b68f3f74eabb8c5c3aabbd870f0cb..eca078e310ead6cdadf3b895e1a6451c1af435b7 100644 (file)
--- a/arch/arm/arm_features.h
+++ b/arch/arm/arm_features.h
@@ -6,6 +6,7 @@
  #define ARM_H_
  
  struct arm_cpu_features {
+    int has_simd;
      int has_neon;
      int has_crc32;
  };
diff --git a/arch/arm/slide_hash_armv6.c b/arch/arm/slide_hash_armv6.c

new file mode 100644 (file)

index 0000000..0a2eecc
--- /dev/null
+++ b/arch/arm/slide_hash_armv6.c
@@ -0,0 +1,47 @@
+/* slide_hash_armv6.c -- Optimized hash table shifting for ARMv6 with support for SIMD instructions
+ * Copyright (C) 2023 Cameron Cawley
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#if defined(ARM_SIMD)
+#include "acle_intrins.h"
+#include "../../zbuild.h"
+#include "../../deflate.h"
+
+/* SIMD version of hash_chain rebase */
+static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) {
+    Z_REGISTER uint16x2_t v;
+    uint16x2_t p0, p1, p2, p3;
+    Z_REGISTER size_t n;
+
+    size_t size = entries*sizeof(table[0]);
+    Assert((size % (sizeof(uint16x2_t) * 4) == 0), "hash table size err");
+
+    Assert(sizeof(Pos) == 2, "Wrong Pos size");
+    v = wsize | (wsize << 16);
+
+    n = size / (sizeof(uint16x2_t) * 4);
+    do {
+        p0 = *((const uint16x2_t *)(table));
+        p1 = *((const uint16x2_t *)(table+2));
+        p2 = *((const uint16x2_t *)(table+4));
+        p3 = *((const uint16x2_t *)(table+6));
+        p0 = __uqsub16(p0, v);
+        p1 = __uqsub16(p1, v);
+        p2 = __uqsub16(p2, v);
+        p3 = __uqsub16(p3, v);
+        *((uint16x2_t *)(table)) = p0;
+        *((uint16x2_t *)(table+2)) = p1;
+        *((uint16x2_t *)(table+4)) = p2;
+        *((uint16x2_t *)(table+6)) = p3;
+        table += 8;
+    } while (--n);
+}
+
+Z_INTERNAL void slide_hash_armv6(deflate_state *s) {
+    unsigned int wsize = s->w_size;
+
+    slide_hash_chain(s->head, HASH_SIZE, wsize);
+    slide_hash_chain(s->prev, wsize, wsize);
+}
+#endif
diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake

index a4a28b445aec78b3c921d60ecdffcde31ce57c6f..04830fe613a7d97450ff0d49f833c8f69aeac055 100644 (file)
--- a/cmake/detect-intrinsics.cmake
+++ b/cmake/detect-intrinsics.cmake
@@ -44,6 +44,46 @@ macro(check_acle_compiler_flag)
      endif()
  endmacro()
  
+macro(check_armv6_compiler_flag)
+    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+        if(NOT NATIVEFLAG)
+            check_c_compiler_flag("-march=armv6" HAVE_MARCH_ARMV6)
+            if(HAVE_MARCH_ARMV6)
+                set(ARMV6FLAG "-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support")
+            endif()
+        endif()
+    endif()
+    # Check whether compiler supports ARMv6 inline asm
+    set(CMAKE_REQUIRED_FLAGS "${ARMV6FLAG} ${NATIVEFLAG}")
+    check_c_source_compile_or_run(
+        "unsigned int f(unsigned int a, unsigned int b) {
+            unsigned int c;
+            __asm__ __volatile__ ( \"uqsub16 %0, %1, %2\" : \"=r\" (c) : \"r\" (a), \"r\" (b) );
+            return (int)c;
+        }
+        int main(void) { return 0; }"
+        HAVE_ARMV6_INLINE_ASM
+    )
+    # Check whether compiler supports ARMv6 intrinsics
+    check_c_source_compile_or_run(
+        "#if defined(_MSC_VER)
+        #include <intrin.h>
+        #else
+        #include <arm_acle.h>
+        #endif
+        unsigned int f(unsigned int a, unsigned int b) {
+        #if defined(_MSC_VER)
+            return _arm_uqsub16(a, b);
+        #else
+            return __uqsub16(a, b);
+        #endif
+        }
+        int main(void) { return 0; }"
+        HAVE_ARMV6_INTRIN
+    )
+    set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
  macro(check_avx512_intrinsics)
      if(CMAKE_C_COMPILER_ID MATCHES "Intel")
          if(CMAKE_HOST_UNIX OR APPLE)
diff --git a/configure b/configure

index d96455e6c1eaccede0812cdef6c250f8cd17bb10..29de72cecf007ca4c1b8c4dde866dfa7a6e98480 100755 (executable)
--- a/configure
+++ b/configure
@@ -93,6 +93,7 @@ build32=0
  build64=0
  buildvpclmulqdq=1
  buildacle=1
+buildarmv6=1
  buildaltivec=1
  buildpower8=1
  buildpower9=1
@@ -115,6 +116,7 @@ vpclmulflag="-mvpclmulqdq -mavx512f"
  xsaveflag="-mxsave"
  acleflag=
  neonflag=
+armv6flag=
  noltoflag="-fno-lto"
  vgfmaflag="-march=z13"
  vmxflag="-maltivec"
@@ -167,6 +169,7 @@ case "$1" in
        echo '    [--without-new-strategies]  Compiles without using new additional deflate strategies' | tee -a configure.log
        echo '    [--without-acle]            Compiles without ARM C Language Extensions' | tee -a configure.log
        echo '    [--without-neon]            Compiles without ARM Neon SIMD instruction set' | tee -a configure.log
+      echo '    [--without-armv6]           Compiles without ARMv6 SIMD instruction set' | tee -a configure.log
        echo '    [--without-altivec]         Compiles without PPC AltiVec support' | tee -a configure.log
        echo '    [--without-power8]          Compiles without Power8 instruction set' | tee -a configure.log
        echo '    [--with-dfltcc-deflate]     Use DEFLATE CONVERSION CALL instruction for compression on IBM Z' | tee -a configure.log
@@ -198,6 +201,7 @@ case "$1" in
      --without-vpclmulqdq) buildvpclmulqdq=0; shift ;;
      --without-acle) buildacle=0; shift ;;
      --without-neon) buildneon=0; shift ;;
+    --without-armv6) buildarmv6=0; shift ;;
      --without-altivec) buildaltivec=0 ; shift ;;
      --without-power8) buildpower8=0 ; shift ;;
      --without-power9) buildpower9=0 ; shift ;;
@@ -1177,6 +1181,52 @@ EOF
      fi
  }
  
+check_armv6_intrinsics() {
+    # Check whether -march=armv6 works correctly
+    cat > $test.c << EOF
+int main() { return 0; }
+EOF
+    if try $CC -c $CFLAGS -march=armv6 $test.c; then
+        armv6flag=-march=armv6
+        echo "Check whether -march=armv6 works ... Yes." | tee -a configure.log
+    else
+        echo "Check whether -march=armv6 works ... No." | tee -a configure.log
+    fi
+
+    # Check whether compiler supports ARMv6 inline asm
+    cat > $test.c << EOF
+unsigned int f(unsigned int a, unsigned int b) {
+    unsigned int c;
+    __asm__ __volatile__ ( "uqsub16 %0, %1, %2" : "=r" (c) : "r" (a), "r" (b) );
+    return c;
+}
+int main(void) { return 0; }
+EOF
+    if try ${CC} ${CFLAGS} ${armv6flag} $test.c; then
+        echo "Checking for ARMv6 inline assembly ... Yes." | tee -a configure.log
+        HAVE_ARMV6_INLINE_ASM=1
+    else
+        echo "Checking for ARMv6 inline assembly ... No." | tee -a configure.log
+        HAVE_ARMV6_INLINE_ASM=0
+    fi
+
+    # Check whether compiler supports ARMv6 intrinsics
+    cat > $test.c << EOF
+#include <arm_acle.h>
+unsigned int f(unsigned int a, unsigned int b) {
+    return __uqsub16(a, b);
+}
+int main(void) { return 0; }
+EOF
+    if try ${CC} ${CFLAGS} ${armv6flag} $test.c; then
+        echo "Checking for ARMv6 intrinsics ... Yes." | tee -a configure.log
+        HAVE_ARMV6_INTRIN=1
+    else
+        echo "Checking for ARMv6 intrinsics ... No." | tee -a configure.log
+        HAVE_ARMV6_INTRIN=0
+    fi
+}
+
  check_pclmulqdq_intrinsics() {
      # Check whether compiler supports PCLMULQDQ intrinsics
      cat > $test.c << EOF
@@ -1592,6 +1642,18 @@ EOF
              ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} arm_features.o"
              ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} arm_features.lo"
  
+            cat > $test.c <<EOF
+#include <arm_acle.h>
+EOF
+            if try $CC -c $CFLAGS $test.c; then
+                echo "Checking for arm_acle.h... Yes." | tee -a configure.log
+                CFLAGS="${CFLAGS} -DHAVE_ARM_ACLE_H"
+                SFLAGS="${SFLAGS} -DHAVE_ARM_ACLE_H"
+            else
+                echo "Checking for arm_acle.h... No." | tee -a configure.log
+            fi
+
+
              if test $LINUX -eq 1; then
                  if test "$ARCH" = "aarch64"; then
                      cat > $test.c <<EOF
@@ -1688,6 +1750,24 @@ EOF
                      ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo compare256_neon.lo slide_hash_neon.lo"
                  fi
              fi
+
+            if test $buildarmv6 -eq 1; then
+                check_armv6_intrinsics
+
+                if test $HAVE_ARMV6_INTRIN -eq 1 || test $HAVE_ARMV6_INLINE_ASM -eq 1; then
+                    CFLAGS="${CFLAGS} -DARM_SIMD"
+                    SFLAGS="${SFLAGS} -DARM_SIMD"
+
+                    if test $HAVE_ARMV6_INTRIN -eq 1; then
+                        CFLAGS="${CFLAGS} -DARM_SIMD_INTRIN"
+                        SFLAGS="${SFLAGS} -DARM_SIMD_INTRIN"
+                    fi
+
+                    ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_hash_armv6.o"
+                    ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_hash_armv6.lo"
+                fi
+            fi
+
          fi
      ;;
      powerpc*)
@@ -1859,6 +1939,7 @@ echo vpclmulflag = $vpclmulflag >> configure.log
  echo xsaveflag = $xsaveflag >> configure.log
  echo acleflag = $acleflag >> configure.log
  echo neonflag = $neonflag >> configure.log
+echo armv6flag = $armv6flag >> configure.log
  echo ARCHDIR = ${ARCHDIR} >> configure.log
  echo ARCH_STATIC_OBJS = ${ARCH_STATIC_OBJS} >> configure.log
  echo ARCH_SHARED_OBJS = ${ARCH_SHARED_OBJS} >> configure.log
@@ -2001,6 +2082,7 @@ sed < $SRCDIR/$ARCHDIR/Makefile.in "
  /^XSAVEFLAG *=/s#=.*#=$xsaveflag#
  /^ACLEFLAG *=/s#=.*#=$acleflag#
  /^NEONFLAG *=/s#=.*#=$neonflag#
+/^ARMV6FLAG *=/s#=.*#=$armv6flag#
  /^NOLTOFLAG *=/s#=.*#=$noltoflag#
  /^VGFMAFLAG *=/s#=.*#=$vgfmaflag#
  /^PPCFLAGS *=/s#=.*#=$vmxflag#
diff --git a/cpu_features.h b/cpu_features.h

index 870f6e656c65121d7d65acaa42889b0950c6ecc5..faca52ad4f4eccd2977ac4ac354443fcdb7f257b 100644 (file)
--- a/cpu_features.h
+++ b/cpu_features.h
@@ -261,7 +261,11 @@ typedef void (*slide_hash_func)(deflate_state *s);
  
  #ifdef X86_SSE2
  extern void slide_hash_sse2(deflate_state *s);
-#elif defined(ARM_NEON)
+#endif
+#if defined(ARM_SIMD)
+extern void slide_hash_armv6(deflate_state *s);
+#endif
+#if defined(ARM_NEON)
  extern void slide_hash_neon(deflate_state *s);
  #endif
  #if defined(PPC_VMX)
diff --git a/functable.c b/functable.c

index 6e195aceaf05fb9e86529b44a067a4acb6def316..3ef840cffcaaa63054cdb9915308d849b7c60a68 100644 (file)
--- a/functable.c
+++ b/functable.c
@@ -142,6 +142,15 @@ static void init_functable(void) {
  #endif
  
  
+    // ARM - SIMD
+#ifdef ARM_SIMD
+#  ifndef ARM_NOCHECK_SIMD
+    if (cf.arm.has_simd)
+#  endif
+    {
+        ft.slide_hash = &slide_hash_armv6;
+    }
+#endif
      // ARM - NEON
  #ifdef ARM_NEON
  #  ifndef ARM_NOCHECK_NEON
diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc

index b5ab45616108a0b2b6f628fb7f14676f83d6f44d..e098c815e83a43bc259ac3ab00316828ea3d794c 100644 (file)
--- a/test/benchmarks/benchmark_slidehash.cc
+++ b/test/benchmarks/benchmark_slidehash.cc
@@ -68,6 +68,9 @@ public:
  
  BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
  
+#ifdef ARM_SIMD
+BENCHMARK_SLIDEHASH(armv6, slide_hash_armv6, test_cpu_features.arm.has_simd);
+#endif
  #ifdef ARM_NEON
  BENCHMARK_SLIDEHASH(neon, slide_hash_neon, test_cpu_features.arm.has_neon);
  #endif
diff --git a/win32/Makefile.arm b/win32/Makefile.arm

index 7d3f1b58a931cb0bb88dd2bb94bd31cdcd7ce8c1..34dfe6bba6e1aa22b835053e505dc25229a2c253 100644 (file)
--- a/win32/Makefile.arm
+++ b/win32/Makefile.arm
@@ -41,6 +41,7 @@ WITH_GZFILEOP = yes
  ZLIB_COMPAT =
  WITH_ACLE =
  WITH_NEON =
+WITH_ARMV6 =
  WITH_VFPV3 =
  NEON_ARCH = /arch:VFPv4
  SUFFIX =
@@ -110,6 +111,13 @@ WFLAGS = $(WFLAGS) \
         #
  OBJS = $(OBJS) adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj
  !endif
+!if "$(WITH_ARMV6)" != ""
+WFLAGS = $(WFLAGS) \
+       -DARM_SIMD \
+       -DARM_NOCHECK_SIMD \
+       #
+OBJS = $(OBJS) slide_hash_armv6.obj
+!endif
  
  # targets
  all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \
author	Cameron Cawley <ccawley2011@gmail.com>
	Wed, 12 Jul 2023 17:48:48 +0000 (18:48 +0100)
committer	Hans Kristian Rosbach <hk-github@circlestorm.org>
	Sat, 16 Sep 2023 09:11:18 +0000 (11:11 +0200)
CMakeLists.txt		patch \| blob \| blame \| history
README.md		patch \| blob \| blame \| history
arch/arm/Makefile.in		patch \| blob \| blame \| history
arch/arm/acle_intrins.h	[new file with mode: 0644]	patch \| blob
arch/arm/arm_features.c		patch \| blob \| blame \| history
arch/arm/arm_features.h		patch \| blob \| blame \| history
arch/arm/slide_hash_armv6.c	[new file with mode: 0644]	patch \| blob
cmake/detect-intrinsics.cmake		patch \| blob \| blame \| history
configure		patch \| blob \| blame \| history
cpu_features.h		patch \| blob \| blame \| history
functable.c		patch \| blob \| blame \| history
test/benchmarks/benchmark_slidehash.cc		patch \| blob \| blame \| history
win32/Makefile.arm		patch \| blob \| blame \| history