From: Victor Julien Date: Fri, 6 May 2022 15:46:40 +0000 (+0200) Subject: memcmp: work around GCC 12+ 'blend' issues X-Git-Tag: suricata-7.0.0-beta1~627 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=87c5d6943709de225d946f5eab8f1d24cdefa491;p=thirdparty%2Fsuricata.git memcmp: work around GCC 12+ 'blend' issues Since GCC 12 the memcmp code using `_mm_blendv_epi8` failed to work. Inspection of the disassembled objects suggests that it simply omits the instruction on systems that are not AVX512 capable. On AVX512 it does replace it with VPCMPB logic that appears to work. Luckily our use of blend is actually uncessary. A simple AND is sufficient. Bug: #5312. --- diff --git a/src/util-memcmp.h b/src/util-memcmp.h index 1830e5a358..733622e867 100644 --- a/src/util-memcmp.h +++ b/src/util-memcmp.h @@ -107,7 +107,6 @@ static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t n) size_t m = 0; __m128i ucase = _mm_load_si128((const __m128i *) scmemcmp_uppercase); - __m128i nulls = _mm_setzero_si128(); __m128i uplow = _mm_set1_epi8(0x20); do { @@ -126,7 +125,7 @@ static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t n) mask = _mm_cmpestrm(ucase, 2, b2, len, _SIDD_CMP_RANGES | _SIDD_UNIT_MASK); /* Next we use that mask to create a new: this one has 0x20 for * the uppercase chars, 00 for all other. */ - mask = _mm_blendv_epi8(nulls, uplow, mask); + mask = _mm_and_si128(uplow, mask); /* finally, merge the mask and the buffer converting the * uppercase to lowercase */ b2 = _mm_add_epi8(b2, mask); @@ -190,7 +189,6 @@ static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t len) /* setup registers for upper to lower conversion */ upper1 = _mm_set1_epi8(UPPER_LOW); upper2 = _mm_set1_epi8(UPPER_HIGH); - nulls = _mm_setzero_si128(); uplow = _mm_set1_epi8(0x20); do { @@ -212,7 +210,7 @@ static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t len) mask1 = _mm_cmpeq_epi8(mask1, mask2); /* Next we use that mask to create a new: this one has 0x20 for * the uppercase chars, 00 for all other. */ - mask1 = _mm_blendv_epi8(nulls, uplow, mask1); + mask1 = _mm_and_si128(uplow, mask1); /* add to b2, converting uppercase to lowercase */ b2 = _mm_add_epi8(b2, mask1);