# include <nmmintrin.h>
#endif
-/* AVX2 unaligned intrinsic comparison */
-static inline uint32_t compare256_unaligned_avx2_static(const uint8_t *src0, const uint8_t *src1) {
+static inline uint32_t compare256_avx2_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
do {
return 256;
}
-Z_INTERNAL uint32_t compare256_unaligned_avx2(const uint8_t *src0, const uint8_t *src1) {
- return compare256_unaligned_avx2_static(src0, src1);
+Z_INTERNAL uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1) {
+ return compare256_avx2_static(src0, src1);
}
-#define LONGEST_MATCH longest_match_unaligned_avx2
-#define COMPARE256 compare256_unaligned_avx2_static
+#define LONGEST_MATCH longest_match_avx2
+#define COMPARE256 compare256_avx2_static
#include "match_tpl.h"
#define LONGEST_MATCH_SLOW
-#define LONGEST_MATCH longest_match_slow_unaligned_avx2
-#define COMPARE256 compare256_unaligned_avx2_static
+#define LONGEST_MATCH longest_match_slow_avx2
+#define COMPARE256 compare256_avx2_static
#include "match_tpl.h"
#include <emmintrin.h>
-static inline uint32_t compare256_unaligned_sse2_static(const uint8_t *src0, const uint8_t *src1) {
+static inline uint32_t compare256_sse2_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
int align_offset = ((uintptr_t)src0) & 15;
const uint8_t *end0 = src0 + 256;
return 256;
}
-Z_INTERNAL uint32_t compare256_unaligned_sse2(const uint8_t *src0, const uint8_t *src1) {
- return compare256_unaligned_sse2_static(src0, src1);
+Z_INTERNAL uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1) {
+ return compare256_sse2_static(src0, src1);
}
-#define LONGEST_MATCH longest_match_unaligned_sse2
-#define COMPARE256 compare256_unaligned_sse2_static
+#define LONGEST_MATCH longest_match_sse2
+#define COMPARE256 compare256_sse2_static
#include "match_tpl.h"
#define LONGEST_MATCH_SLOW
-#define LONGEST_MATCH longest_match_slow_unaligned_sse2
-#define COMPARE256 compare256_unaligned_sse2_static
+#define LONGEST_MATCH longest_match_slow_sse2
+#define COMPARE256 compare256_sse2_static
#include "match_tpl.h"
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
extern uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
#endif
+#endif
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
-extern uint32_t compare256_unaligned_sse2(const uint8_t *src0, const uint8_t *src1);
+extern uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
#endif
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
-extern uint32_t compare256_unaligned_avx2(const uint8_t *src0, const uint8_t *src1);
-#endif
+extern uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
#endif
#ifdef DEFLATE_H_
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
#endif
+#endif
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
-extern uint32_t longest_match_unaligned_sse2(deflate_state *const s, Pos cur_match);
+extern uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
#endif
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
-extern uint32_t longest_match_unaligned_avx2(deflate_state *const s, Pos cur_match);
-#endif
+extern uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
#endif
/* longest_match_slow */
#ifdef UNALIGNED64_OK
extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
#endif
+#endif
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
-extern uint32_t longest_match_slow_unaligned_sse2(deflate_state *const s, Pos cur_match);
+extern uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
#endif
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
-extern uint32_t longest_match_slow_unaligned_avx2(deflate_state *const s, Pos cur_match);
-#endif
+extern uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
#endif
/* quick_insert_string */
# else
functable.longest_match = &longest_match_unaligned_16;
# endif
-# if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
- if (x86_cpu_has_sse2)
- functable.longest_match = &longest_match_unaligned_sse2;
-# endif
-# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
- if (x86_cpu_has_avx2)
- functable.longest_match = &longest_match_unaligned_avx2;
-# endif
#else
functable.longest_match = &longest_match_c;
#endif
+#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
+ if (x86_cpu_has_sse2)
+ functable.longest_match = &longest_match_sse2;
+#endif
+#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+ if (x86_cpu_has_avx2)
+ functable.longest_match = &longest_match_avx2;
+#endif
return functable.longest_match(s, cur_match);
}
# else
functable.longest_match_slow = &longest_match_slow_unaligned_16;
# endif
-# if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
- if (x86_cpu_has_sse2)
- functable.longest_match_slow = &longest_match_slow_unaligned_sse2;
-# endif
-# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
- if (x86_cpu_has_avx2)
- functable.longest_match_slow = &longest_match_slow_unaligned_avx2;
-# endif
#else
functable.longest_match_slow = &longest_match_slow_c;
#endif
+#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
+ if (x86_cpu_has_sse2)
+ functable.longest_match_slow = &longest_match_slow_sse2;
+#endif
+#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+ if (x86_cpu_has_avx2)
+ functable.longest_match_slow = &longest_match_slow_avx2;
+#endif
return functable.longest_match_slow(s, cur_match);
}
# else
functable.compare256 = &compare256_unaligned_16;
# endif
-# if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
- if (x86_cpu_has_sse2)
- functable.compare256 = &compare256_unaligned_sse2;
-# endif
-# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
- if (x86_cpu_has_avx2)
- functable.compare256 = &compare256_unaligned_avx2;
-# endif
#else
functable.compare256 = &compare256_c;
#endif
+#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
+ if (x86_cpu_has_sse2)
+ functable.compare256 = &compare256_sse2;
+#endif
+#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+ if (x86_cpu_has_avx2)
+ functable.compare256 = &compare256_avx2;
+#endif
return functable.compare256(src0, src1);
}
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
BENCHMARK_COMPARE256(unaligned_64, compare256_unaligned_64, 1);
#endif
+#endif
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
-BENCHMARK_COMPARE256(unaligned_sse2, compare256_unaligned_sse2, x86_cpu_has_sse2);
+BENCHMARK_COMPARE256(sse2, compare256_sse2, x86_cpu_has_sse2);
#endif
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
-BENCHMARK_COMPARE256(unaligned_avx2, compare256_unaligned_avx2, x86_cpu_has_avx2);
-#endif
+BENCHMARK_COMPARE256(avx2, compare256_avx2, x86_cpu_has_avx2);
#endif
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
TEST_COMPARE256(unaligned_64, compare256_unaligned_64, 1)
#endif
+#endif
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
-TEST_COMPARE256(unaligned_sse2, compare256_unaligned_sse2, x86_cpu_has_sse2)
+TEST_COMPARE256(sse2, compare256_sse2, x86_cpu_has_sse2)
#endif
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
-TEST_COMPARE256(unaligned_avx2, compare256_unaligned_avx2, x86_cpu_has_avx2)
-#endif
+TEST_COMPARE256(avx2, compare256_avx2, x86_cpu_has_avx2)
#endif