#endif
/* UNALIGNED_OK, AVX2 intrinsic comparison */
-static inline int32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
- const unsigned char *src0start = src0;
- const unsigned char *src0end = src0 + 256;
-
+static inline int32_t compare256_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
+ int32_t len = 0;
+
do {
__m256i ymm_src0, ymm_src1, ymm_cmp;
ymm_src0 = _mm256_loadu_si256((__m256i*)src0);
int mask = _mm256_movemask_epi8(ymm_cmp);
if ((unsigned int)mask != 0xFFFFFFFF) {
int match_byte = __builtin_ctz(~mask); /* Invert bits so identical = 0 */
- return (int32_t)(src0 - src0start + match_byte);
+ return (int32_t)(len + match_byte);
}
- src0 += 32, src1 += 32;
+ src0 += 32, src1 += 32, len += 32;
ymm_src0 = _mm256_loadu_si256((__m256i*)src0);
ymm_src1 = _mm256_loadu_si256((__m256i*)src1);
mask = _mm256_movemask_epi8(ymm_cmp);
if ((unsigned int)mask != 0xFFFFFFFF) {
int match_byte = __builtin_ctz(~mask);
- return (int32_t)(src0 - src0start + match_byte);
+ return (int32_t)(len + match_byte);
}
- src0 += 32, src1 += 32;
- } while (src0 < src0end);
+ src0 += 32, src1 += 32, len += 32;
+ } while (len < 256);
- if (*(uint16_t *)src0 == *(uint16_t *)src1)
- src0 += 2, src1 += 2;
- else if (*src0 == *src1)
- src0 += 1, src1 += 1;
+ return len;
+}
+
+static inline int32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
+ if (*(uint16_t *)src0 != *(uint16_t *)src1)
+ return (*src0 == *src1);
+
+ src0 += 2, src1 += 2;
+ if (*src0 != *src1)
+ return 2;
+ if (src0[1] != src1[1])
+ return 3;
- return (int32_t)(src0 - src0start);
+ return compare256_unaligned_avx2_static(src0, src1) + 2;
}
int32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1) {
}
#define LONGEST_MATCH longest_match_unaligned_avx2
+#define COMPARE256 compare256_unaligned_avx2_static
#define COMPARE258 compare258_unaligned_avx2_static
#include "match_p.h"
#endif
/* UNALIGNED_OK, SSE4.2 intrinsic comparison */
-static inline int32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
-#ifdef _MSC_VER
- const unsigned char *src0start = src0;
- const unsigned char *src0end = src0 + 256;
+static inline int32_t compare256_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
+ int32_t len = 0;
do {
#define mode _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY
xmm_src1 = _mm_loadu_si128((__m128i *)src1);
ret = _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
- return (int32_t)(src0 - src0start + ret);
+ return (int32_t)(len + ret);
}
- src0 += 16, src1 += 16;
+ src0 += 16, src1 += 16, len += 16;
xmm_src0 = _mm_loadu_si128((__m128i *)src0);
xmm_src1 = _mm_loadu_si128((__m128i *)src1);
ret = _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
- return (int32_t)(src0 - src0start + ret);
+ return (int32_t)(len + ret);
}
- src0 += 16, src1 += 16;
- } while (src0 < src0end);
-
- if (*(uint16_t *)src0 == *(uint16_t *)src1)
- src0 += 2, src1 += 2;
- else if (*src0 == *src1)
- src0 += 1, src1 += 1;
-
- return (int32_t)(src0 - src0start);
-#else
- uintptr_t ax, dx, cx;
- __m128i xmm_src0;
-
- ax = 16;
- dx = 16;
- /* Set cx to something, otherwise gcc thinks it's used
- uninitalised */
- cx = 0;
-
- __asm__ __volatile__ (
- "1:"
- "movdqu -16(%[src0], %[ax]), %[xmm_src0]\n\t"
- "pcmpestri $0x18, -16(%[src1], %[ax]), %[xmm_src0]\n\t"
- "jc 2f\n\t"
- "add $16, %[ax]\n\t"
-
- "movdqu -16(%[src0], %[ax]), %[xmm_src0]\n\t"
- "pcmpestri $0x18, -16(%[src1], %[ax]), %[xmm_src0]\n\t"
- "jc 2f\n\t"
- "add $16, %[ax]\n\t"
-
- "cmp $256 + 16, %[ax]\n\t"
- "jb 1b\n\t"
-
-# if !defined(__x86_64__)
- "movzwl -16(%[src0], %[ax]), %[dx]\n\t"
-# else
- "movzwq -16(%[src0], %[ax]), %[dx]\n\t"
-# endif
- "xorw -16(%[src1], %[ax]), %%dx\n\t"
- "jnz 3f\n\t"
-
- "add $2, %[ax]\n\t"
- "jmp 4f\n\t"
- "3:\n\t"
- "rep; bsf %[dx], %[cx]\n\t"
- "shr $3, %[cx]\n\t"
- "2:"
- "add %[cx], %[ax]\n\t"
- "4:"
- : [ax] "+a" (ax),
- [cx] "+c" (cx),
- [dx] "+d" (dx),
- [xmm_src0] "=x" (xmm_src0)
- : [src0] "r" (src0),
- [src1] "r" (src1)
- : "cc"
- );
- return (int32_t)(ax - 16);
-#endif
+ src0 += 16, src1 += 16, len += 16;
+ } while (len < 256);
+
+ return len;
+}
+
+static inline int32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
+ if (*(uint16_t *)src0 != *(uint16_t *)src1)
+ return (*src0 == *src1);
+
+ src0 += 2, src1 += 2;
+
+ if (*src0 != *src1)
+ return 2;
+ if (src0[1] != src1[1])
+ return 3;
+
+ return compare256_unaligned_sse4_static(src0, src1) + 2;
}
int32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) {
}
#define LONGEST_MATCH longest_match_unaligned_sse4
+#define COMPARE256 compare256_unaligned_sse4_static
#define COMPARE258 compare258_unaligned_sse4_static
#include "match_p.h"
#include "fallback_builtins.h"
/* ALIGNED, byte comparison */
-static inline int32_t compare258_c_static(const unsigned char *src0, const unsigned char *src1) {
+static inline int32_t compare256_c_static(const unsigned char *src0, const unsigned char *src1) {
const unsigned char *src0start = src0;
- const unsigned char *src0end = src0 + 258;
+ const unsigned char *src0end = src0 + 256;
do {
if (*src0 != *src1)
if (*src0 != *src1)
break;
src0 += 1, src1 += 1;
- if (*src0 != *src1)
- break;
- src0 += 1, src1 += 1;
- if (*src0 != *src1)
- break;
- src0 += 1, src1 += 1;
} while (src0 < src0end);
-
+
return (int32_t)(src0 - src0start);
}
+static inline int32_t compare258_c_static(const unsigned char *src0, const unsigned char *src1) {
+ if (*src0 != *src1)
+ return 0;
+ src0 += 1, src1 += 1;
+ if (*src0 != *src1)
+ return 1;
+ src0 += 1, src1 += 1;
+ if (*src0 != *src1)
+ return 2;
+
+ return compare256_c_static(src0, src1) + 2;
+}
+
int32_t compare258_c(const unsigned char *src0, const unsigned char *src1) {
return compare258_c_static(src0, src1);
}
#define LONGEST_MATCH longest_match_c
+#define COMPARE256 compare256_c_static
#define COMPARE258 compare258_c_static
#include "match_p.h"
#ifdef UNALIGNED_OK
/* UNALIGNED_OK, 16-bit integer comparison */
-static inline int32_t compare258_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
+static inline int32_t compare256_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
const unsigned char *src0start = src0;
- const unsigned char *src0end = src0 + 258;
+ const unsigned char *src0end = src0 + 256;
do {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
if (*(uint16_t *)src0 != *(uint16_t *)src1)
break;
src0 += 2, src1 += 2;
+ if (*(uint16_t *)src0 != *(uint16_t *)src1)
+ break;
+ src0 += 2, src1 += 2;
} while (src0 < src0end);
- if (*src0 == *src1)
- src0 += 1;
+ return (int32_t)(src0 - src0start) + (src0 < src0end && *src0 == *src1);
+}
- return (int32_t)(src0 - src0start);
+static inline int32_t compare258_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
+ if (*(uint16_t *)src0 != *(uint16_t *)src1)
+ return (*src0 == *src1);
+
+ return compare256_unaligned_16_static(src0+2, src1+2) + 2;
}
int32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1) {
}
#define LONGEST_MATCH longest_match_unaligned_16
+#define COMPARE256 compare256_unaligned_16_static
#define COMPARE258 compare258_unaligned_16_static
#include "match_p.h"
#ifdef HAVE_BUILTIN_CTZ
/* UNALIGNED_OK, 32-bit integer comparison */
-static inline int32_t compare258_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
- const unsigned char *src0start = src0;
- const unsigned char *src0end = src0 + 256;
+static inline int32_t compare256_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
+ int32_t len = 0;
do {
uint32_t sv = *(uint32_t *)src0;
if (xor) {
uint32_t match_byte = __builtin_ctz(xor) / 8;
- return (int32_t)(src0 - src0start + match_byte);
+ return (int32_t)(len + match_byte);
}
- src0 += 4, src1 += 4;
- } while (src0 < src0end);
+ src0 += 4, src1 += 4, len += 4;
+ } while (len < 256);
- if (*(uint16_t *)src0 == *(uint16_t *)src1)
- src0 += 2, src1 += 2;
- else if (*src0 == *src1)
- src0 += 1, src1 += 1;
+ return len;
+}
- return (int32_t)(src0 - src0start);
+static inline int32_t compare258_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
+ if (*(uint16_t *)src0 != *(uint16_t *)src1)
+ return (*src0 == *src1);
+
+ src0 += 2, src1 += 2;
+ if (*src0 != *src1)
+ return 2;
+ if (src0[1] != src1[1])
+ return 3;
+
+ return compare256_unaligned_32_static(src0, src1) + 2;
}
int32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1) {
}
#define LONGEST_MATCH longest_match_unaligned_32
+#define COMPARE256 compare256_unaligned_32_static
#define COMPARE258 compare258_unaligned_32_static
#include "match_p.h"
#ifdef HAVE_BUILTIN_CTZLL
/* UNALIGNED_OK, 64-bit integer comparison */
-static inline int32_t compare258_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
- const unsigned char *src0start = src0;
- const unsigned char *src0end = src0 + 256;
+static inline int32_t compare256_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
+ int32_t len = 0;
do {
uint64_t sv = *(uint64_t *)src0;
if (xor) {
uint64_t match_byte = __builtin_ctzll(xor) / 8;
- return (int32_t)(src0 - src0start + match_byte);
+ return (int32_t)(len + match_byte);
}
- src0 += 8, src1 += 8;
- } while (src0 < src0end);
+ src0 += 8, src1 += 8, len += 8;
+ } while (len < 256);
- if (*(uint16_t *)src0 == *(uint16_t *)src1)
- src0 += 2, src1 += 2;
- else if (*src0 == *src1)
- src0 += 1, src1 += 1;
+ return len;
+}
- return (int32_t)(src0 - src0start);
+static inline int32_t compare258_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
+ if (*(uint16_t *)src0 != *(uint16_t *)src1)
+ return (*src0 == *src1);
+
+ src0 += 2, src1 += 2;
+ if (*src0 != *src1)
+ return 2;
+ if (src0[1] != src1[1])
+ return 3;
+
+ return compare256_unaligned_64_static(src0, src1) + 2;
}
int32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1) {
}
#define LONGEST_MATCH longest_match_unaligned_64
+#define COMPARE256 compare256_unaligned_64_static
#define COMPARE258 compare258_unaligned_64_static
#include "match_p.h"
if (!cont)
break;
+#if MIN_MATCH >= 2 && defined(UNALIGNED_OK)
+ len = COMPARE256(scan+2, match+2) + 2;
+#else
len = COMPARE258(scan, match);
+#endif
+
Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
if (len > best_len) {
return best_len;
return s->lookahead;
}
+
+#undef LONGEST_MATCH
+#undef COMPARE256
+#undef COMPARE258