x86: Only align destination to 1x VEC_SIZE in memset 4x loop

author Noah Goldstein <goldstein.w.n@gmail.com>

Wed, 1 Nov 2023 20:30:26 +0000 (15:30 -0500)

committer Sunil K Pandey <skpgkp2@gmail.com>

Fri, 10 Jan 2025 16:49:42 +0000 (08:49 -0800)
author Noah Goldstein <goldstein.w.n@gmail.com>
Wed, 1 Nov 2023 20:30:26 +0000 (15:30 -0500)
committer Sunil K Pandey <skpgkp2@gmail.com>
Fri, 10 Jan 2025 16:49:42 +0000 (08:49 -0800)
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S

index 905d0fa4643d57684460bb0c0d8138aa89f02ff7..bc4053d1c508a34f502609dad882ff02b4eb7fae 100644 (file)
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -301,7 +301,7 @@ L(more_2x_vec):
         leaq    (VEC_SIZE * 4)(%rax), %LOOP_REG
  #endif
         /* Align dst for loop.  */
-       andq    $(VEC_SIZE * -2), %LOOP_REG
+       andq    $(VEC_SIZE * -1), %LOOP_REG
         .p2align 4
  L(loop):
         VMOVA   %VEC(0), LOOP_4X_OFFSET(%LOOP_REG)
author	Noah Goldstein <goldstein.w.n@gmail.com>
	Wed, 1 Nov 2023 20:30:26 +0000 (15:30 -0500)
committer	Sunil K Pandey <skpgkp2@gmail.com>
	Fri, 10 Jan 2025 16:49:42 +0000 (08:49 -0800)