Before
1089d083117 Simplify (B * v + C) * D -> BD* v + CD when B,C,D are all INTEGER_CST.
the loop was
.L2:
movl (%rdi,%rdx), %eax
addl $12345, %eax
imull $-
1564285888, %eax, %eax
leal -
333519936(%rax), %eax
movl %eax, (%rsi,%rdx)
addq $4, %rdx
cmpq $1024, %rdx
jne .L2
There were 1 addl and 1 leal. 1 addq was to update the loop counter. The
optimized loop is
.L2:
imull $-
1564285888, (%rdi,%rax), %edx
subl $
1269844480, %edx
movl %edx, (%rsi,%rax)
addq $4, %rax
cmpq $1024, %rax
jne .L2
1 addl is changed to subl and leal is removed. Adjust assembly scan to
check for 1 subl and 1 addl/addq as well as lea removal.
* gcc.target/i386/pr53533-1.c: Adjust assembly scan.
* gcc.target/i386/pr53533-3.c: Likewise.
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
/* { dg-do compile } */
/* { dg-options "-O1" } */
/* { dg-final { scan-assembler-times "imull\[ \t\]" "1" } } */
-/* { dg-final { scan-assembler-times "(?:addl|subl)\[ \t\]" "1" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "subl\[ \t\]" "1" } } */
+/* { dg-final { scan-assembler-times "add(?:l|q)\[ \t\]" "1" } } */
+/* { dg-final { scan-assembler-not "leal" } } */
void
__attribute__((noipa))
/* { dg-do compile } */
/* { dg-options "-O1 -fwrapv" } */
/* { dg-final { scan-assembler-times "imull\[ \t\]" "1" } } */
-/* { dg-final { scan-assembler-times "(?:addl|subl)\[ \t\]" "1" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "subl\[ \t\]" "1" } } */
+/* { dg-final { scan-assembler-times "add(?:l|q)\[ \t\]" "1" } } */
+/* { dg-final { scan-assembler-not "leal" } } */
void
__attribute__((noipa))