x86: Optimize SSE2 memchr overflow calculation

author H.J. Lu <hjl.tools@gmail.com>

Fri, 19 May 2017 17:46:29 +0000 (10:46 -0700)

committer H.J. Lu <hjl.tools@gmail.com>

Fri, 19 May 2017 17:48:45 +0000 (10:48 -0700)
author H.J. Lu <hjl.tools@gmail.com>
Fri, 19 May 2017 17:46:29 +0000 (10:46 -0700)
committer H.J. Lu <hjl.tools@gmail.com>
Fri, 19 May 2017 17:48:45 +0000 (10:48 -0700)
diff --git a/ChangeLog b/ChangeLog

index 3c8d9f14d53a123a2948c07599e9cee8b6436cc6..b9fbdb8351839ee6891d1875718df4d2012fda3d 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-05-19  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * sysdeps/i386/i686/multiarch/memchr-sse2.S (MEMCHR): Use
+       "edx + ecx - 16" to avoid possible addition overflow.
+       * sysdeps/x86_64/memchr.S (memchr): Likewise.
+
  2017-05-19  Adhemerval Zanella  <adhemerval.zanella@linaro.org>
  
         * misc/Makefile (CFLAGS-vmsplice.c): Remove rule.
diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2.S b/sysdeps/i386/i686/multiarch/memchr-sse2.S

index e41f324a7723dcba406684675af521c21a047100..172d70de13aff94ace6498dbf371216b41262b4c 100644 (file)
--- a/sysdeps/i386/i686/multiarch/memchr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/memchr-sse2.S
@@ -117,14 +117,12 @@ L(crosscache):
  
  # ifndef USE_AS_RAWMEMCHR
         jnz     L(match_case2_prolog1)
-        /* Calculate the last acceptable address and check for possible
-           addition overflow by using satured math:
-           edx = ecx + edx
-           edx |= -(edx < ecx)  */
-       add     %ecx, %edx
-       sbb     %eax, %eax
-       or      %eax, %edx
-       sub     $16, %edx
+        /* "ecx" is less than 16.  Calculate "edx + ecx - 16" by using
+          "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void
+          possible addition overflow.  */
+       neg     %ecx
+       add     $16, %ecx
+       sub     %ecx, %edx
         jbe     L(return_null)
         lea     16(%edi), %edi
  # else
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S

index a205a2599825f52982588a0204fb93c3424e438b..f82e1c5bf72e901bfd15593165a8a68e6b4a6ce9 100644 (file)
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -76,14 +76,12 @@ L(crosscache):
  
         .p2align 4
  L(unaligned_no_match):
-        /* Calculate the last acceptable address and check for possible
-           addition overflow by using satured math:
-           rdx = rcx + rdx
-           rdx |= -(rdx < rcx)  */
-       add     %rcx, %rdx
-       sbb     %rax, %rax
-       or      %rax, %rdx
-       sub     $16, %rdx
+        /* "rcx" is less than 16.  Calculate "rdx + rcx - 16" by using
+          "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
+          possible addition overflow.  */
+       neg     %rcx
+       add     $16, %rcx
+       sub     %rcx, %rdx
         jbe     L(return_null)
         add     $16, %rdi
         sub     $64, %rdx
author	H.J. Lu <hjl.tools@gmail.com>
	Fri, 19 May 2017 17:46:29 +0000 (10:46 -0700)
committer	H.J. Lu <hjl.tools@gmail.com>
	Fri, 19 May 2017 17:48:45 +0000 (10:48 -0700)
ChangeLog		patch \| blob \| blame \| history
sysdeps/i386/i686/multiarch/memchr-sse2.S		patch \| blob \| blame \| history
sysdeps/x86_64/memchr.S		patch \| blob \| blame \| history