From: Nathan Moinvaziri Date: Sat, 23 May 2020 03:06:21 +0000 (-0700) Subject: Unroll more in compare258_c for performance improvement. X-Git-Tag: 1.9.9-b1~294 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=71fd2dcc40ed12bca3787b30a1ff867b6d2fe4ee;p=thirdparty%2Fzlib-ng.git Unroll more in compare258_c for performance improvement. Unify length count variable across all compare256 variants. Early return without break for possible performance improvements. --- diff --git a/arch/x86/compare258_avx.c b/arch/x86/compare258_avx.c index 74ca87ac9..34e93bbcb 100644 --- a/arch/x86/compare258_avx.c +++ b/arch/x86/compare258_avx.c @@ -44,7 +44,7 @@ static inline int32_t compare256_unaligned_avx2_static(const unsigned char *src0 src0 += 32, src1 += 32, len += 32; } while (len < 256); - return len; + return 256; } static inline int32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) { diff --git a/arch/x86/compare258_sse.c b/arch/x86/compare258_sse.c index 27bae0510..9695529e2 100644 --- a/arch/x86/compare258_sse.c +++ b/arch/x86/compare258_sse.c @@ -51,7 +51,7 @@ static inline int32_t compare256_unaligned_sse4_static(const unsigned char *src0 src0 += 16, src1 += 16, len += 16; } while (len < 256); - return len; + return 256; } static inline int32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) { diff --git a/compare258.c b/compare258.c index 7763bb6a1..76d062779 100644 --- a/compare258.c +++ b/compare258.c @@ -10,25 +10,36 @@ /* ALIGNED, byte comparison */ static inline int32_t compare256_c_static(const unsigned char *src0, const unsigned char *src1) { - const unsigned char *src0start = src0; - const unsigned char *src0end = src0 + 256; + int32_t len = 0; do { if (*src0 != *src1) - break; - src0 += 1, src1 += 1; + return len + (*src0 == *src1); + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len + (*src0 == *src1); + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len + (*src0 == *src1); + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len + (*src0 == *src1); + src0 += 1, src1 += 1, len += 1; if (*src0 != *src1) - break; - src0 += 1, src1 += 1; + return len + (*src0 == *src1); + src0 += 1, src1 += 1, len += 1; if (*src0 != *src1) - break; - src0 += 1, src1 += 1; + return len + (*src0 == *src1); + src0 += 1, src1 += 1, len += 1; if (*src0 != *src1) - break; - src0 += 1, src1 += 1; - } while (src0 < src0end); + return len + (*src0 == *src1); + src0 += 1, src1 += 1, len += 1; + if (*src0 != *src1) + return len + (*src0 == *src1); + src0 += 1, src1 += 1, len += 1; + } while (len < 256); - return (int32_t)(src0 - src0start); + return 256; } static inline int32_t compare258_c_static(const unsigned char *src0, const unsigned char *src1) { @@ -55,25 +66,24 @@ int32_t compare258_c(const unsigned char *src0, const unsigned char *src1) { #ifdef UNALIGNED_OK /* UNALIGNED_OK, 16-bit integer comparison */ static inline int32_t compare256_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) { - const unsigned char *src0start = src0; - const unsigned char *src0end = src0 + 256; + int32_t len = 0; do { if (*(uint16_t *)src0 != *(uint16_t *)src1) - break; - src0 += 2, src1 += 2; + return len + (*src0 == *src1); + src0 += 2, src1 += 2, len += 2; if (*(uint16_t *)src0 != *(uint16_t *)src1) - break; - src0 += 2, src1 += 2; + return len + (*src0 == *src1); + src0 += 2, src1 += 2, len += 2; if (*(uint16_t *)src0 != *(uint16_t *)src1) - break; - src0 += 2, src1 += 2; + return len + (*src0 == *src1); + src0 += 2, src1 += 2, len += 2; if (*(uint16_t *)src0 != *(uint16_t *)src1) - break; - src0 += 2, src1 += 2; - } while (src0 < src0end); + return len + (*src0 == *src1); + src0 += 2, src1 += 2, len += 2; + } while (len < 256); - return (int32_t)(src0 - src0start) + (src0 < src0end && *src0 == *src1); + return 256; } static inline int32_t compare258_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) { @@ -111,7 +121,7 @@ static inline int32_t compare256_unaligned_32_static(const unsigned char *src0, src0 += 4, src1 += 4, len += 4; } while (len < 256); - return len; + return 256; } static inline int32_t compare258_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) { @@ -151,7 +161,7 @@ static inline int32_t compare256_unaligned_64_static(const unsigned char *src0, src0 += 8, src1 += 8, len += 8; } while (len < 256); - return len; + return 256; } static inline int32_t compare258_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {