After
commit
3f176e1adc6bc9cc2c21222d776b51d9f43cb66b
Author: Tamar Christina <tamar.christina@arm.com>
Date: Thu Nov 9 13:59:39 2023 +0000
middle-end: optimize fneg (fabs (x)) to copysign (x, -1) [PR109154]
fneg (fabs (x)) is expanded to copysign (x, -1). Swap constraints for
operands[1] and operands[2] in copysign<mode>3 pattern to optimize
y = copysign (x, const_double)
instead of
y = copysign (const_double, x)
Simplify
y = copysign (x, positive_const_double)
to
y = ~signbit_mask & x
and
y = copysign (x, negative_const_double)
to
y = signbit_mask | x
gcc/
PR target/99930
PR target/122323
* config/i386/i386-expand.cc (ix86_expand_copysign): Swap
operands[1] with operands[2]. Optimize copysign (x, const_double)
instead of copysign (const_double, x).
* config/i386/i386.md (copysign<mode>3): Swap constraints for
operands[1] and operands[2].
gcc/testsuite/
PR target/99930
PR target/122323
* gcc.target/i386/builtin-copysign-2.c: New test.
* gcc.target/i386/builtin-copysign-3.c: Likewise.
* gcc.target/i386/builtin-copysign-4.c: Likewise.
* gcc.target/i386/builtin-copysign-5.c: Likewise.
* gcc.target/i386/builtin-copysign-6.c: Likewise.
* gcc.target/i386/builtin-copysign-7.c: Likewise.
* gcc.target/i386/builtin-copysign-8a.c: Likewise.
* gcc.target/i386/builtin-copysign-8b.c: Likewise.
* gcc.target/i386/builtin-fabs-1.c: Likewise.
* gcc.target/i386/builtin-fabs-2.c: Likewise.
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
vdest = gen_reg_rtx (vmode);
else
dest = NULL_RTX;
- op1 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode);
+ op1 = lowpart_subreg (vmode, force_reg (mode, operands[1]), mode);
mask = ix86_build_signbit_mask (vmode, TARGET_AVX512F && mode != HFmode, 0);
- if (CONST_DOUBLE_P (operands[1]))
+ if (CONST_DOUBLE_P (operands[2]))
{
- op0 = simplify_unary_operation (ABS, mode, operands[1], mode);
- /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a. */
- if (op0 == CONST0_RTX (mode))
+ if (real_isneg (CONST_DOUBLE_REAL_VALUE (operands[2])))
+ /* Simplify b = copysign (a, negative) to b = mask | a. */
+ op1 = gen_rtx_IOR (vmode, mask, op1);
+ else
{
- emit_move_insn (vdest, gen_rtx_AND (vmode, mask, op1));
- if (dest)
- emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode));
- return;
+ /* Simplify b = copysign (a, positive) to b = invert_mask & a. */
+ rtx invert_mask
+ = ix86_build_signbit_mask (vmode,
+ TARGET_AVX512F && mode != HFmode,
+ true);
+ op1 = gen_rtx_AND (vmode, invert_mask, op1);
}
-
- if (GET_MODE_SIZE (mode) < 16)
- op0 = ix86_build_const_vector (vmode, false, op0);
- op0 = force_reg (vmode, op0);
+ emit_move_insn (vdest, op1);
+ if (dest)
+ emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode));
+ return;
}
else
- op0 = lowpart_subreg (vmode, force_reg (mode, operands[1]), mode);
+ op0 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode);
op2 = gen_reg_rtx (vmode);
op3 = gen_reg_rtx (vmode);
- emit_move_insn (op2, gen_rtx_AND (vmode,
- gen_rtx_NOT (vmode, mask),
- op0));
- emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1));
+ rtx invert_mask;
+ /* NB: Generate vmovdqa, vpandn, vpand, vpor for AVX and generate pand,
+ pand, por for SSE. */
+ if (TARGET_AVX)
+ invert_mask = gen_rtx_NOT (vmode, mask);
+ else
+ invert_mask = ix86_build_signbit_mask (vmode,
+ TARGET_AVX512F && mode != HFmode,
+ true);
+ emit_move_insn (op2, gen_rtx_AND (vmode, invert_mask, op1));
+ emit_move_insn (op3, gen_rtx_AND (vmode, mask, op0));
emit_move_insn (vdest, gen_rtx_IOR (vmode, op2, op3));
if (dest)
emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode));
(define_expand "copysign<mode>3"
[(match_operand:SSEMODEF 0 "register_operand")
- (match_operand:SSEMODEF 1 "nonmemory_operand")
- (match_operand:SSEMODEF 2 "register_operand")]
+ (match_operand:SSEMODEF 1 "register_operand")
+ (match_operand:SSEMODEF 2 "nonmemory_operand")]
"(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
|| (TARGET_SSE && (<MODE>mode == TFmode))
|| (TARGET_AVX512FP16 && (<MODE>mode ==HFmode))"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+** .cfi_startproc
+** andps .LC[0-9]+\(%rip\), %xmm0
+** ret
+**...
+*/
+
+float
+foo (float x)
+{
+ return __builtin_copysignf (x, 0.0);
+}
+
+/* { dg-final { scan-assembler-times ".long 0" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times ".long 2147483647" 1 { target { ! ia32 } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+** .cfi_startproc
+** orps .LC[0-9]+\(%rip\), %xmm0
+** ret
+**...
+*/
+
+float
+foo (float x)
+{
+ return __builtin_copysignf (x, -3.0);
+}
+
+/* { dg-final { scan-assembler-times ".long 0" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times ".long -2147483648" 1 { target { ! ia32 } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+** .cfi_startproc
+** pand .LC[0-9]+\(%rip\), %xmm0
+** ret
+**...
+*/
+
+__float128
+foo (__float128 x)
+{
+ return __builtin_copysignq (x, 0.0);
+}
+
+/* { dg-final { scan-assembler-times ".long -1" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times ".long 2147483647" 1 { target { ! ia32 } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+** .cfi_startproc
+** pand .LC[0-9]+\(%rip\), %xmm0
+** ret
+**...
+*/
+
+__float128
+foo (__float128 x)
+{
+ return __builtin_copysignq (0.0, x);
+}
+
+/* { dg-final { scan-assembler-times ".long 0" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times ".long -2147483648" 1 { target { ! ia32 } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+** .cfi_startproc
+** pand .LC[0-9]+\(%rip\), %xmm0
+** ret
+**...
+*/
+
+__float128
+foo (__float128 x)
+{
+ return __builtin_copysignq (x, 3.4);
+}
+
+/* { dg-final { scan-assembler-times ".long -1" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times ".long 2147483647" 1 { target { ! ia32 } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+** .cfi_startproc
+** pand .LC[0-9]+\(%rip\), %xmm0
+** por .LC[0-9]+\(%rip\), %xmm0
+** ret
+**...
+*/
+
+__float128
+foo (__float128 x)
+{
+ return __builtin_copysignq (3.4, x);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+** .cfi_startproc
+** pand .LC[0-9]+\(%rip\), %xmm0
+** pand .LC[0-9]+\(%rip\), %xmm1
+** por %xmm1, %xmm0
+** ret
+**...
+*/
+
+__float128
+foo (__float128 x, __float128 y)
+{
+ return __builtin_copysignq (x, y);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx512f -mavx" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+** .cfi_startproc
+** vmovdqa .LC[0-9]+\(%rip\), %xmm2
+** vpandn %xmm0, %xmm2, %xmm0
+** vpand %xmm2, %xmm1, %xmm1
+** vpor %xmm1, %xmm0, %xmm0
+** ret
+**...
+*/
+
+#include "builtin-copysign-8a.c"
--- /dev/null
+/* PR target/122323 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+** .cfi_startproc
+** orps .LC[0-9]+\(%rip\), %xmm0
+** ret
+**...
+*/
+
+float
+foo (float x)
+{
+ return -__builtin_fabsf (x);
+}
+
+/* { dg-final { scan-assembler-times ".long 0" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times ".long -2147483648" 1 { target { ! ia32 } } } } */
--- /dev/null
+/* PR target/99930 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */
+
+/*
+**foo:
+**...
+** ja .L[0-9]+
+** movss 4\(%rdi\), %xmm1
+** orps %xmm1, %xmm0
+** comiss %xmm0, %xmm2
+** seta %al
+** ret
+**...
+*/
+
+
+bool
+foo (float n[2], float m)
+{
+ for (int i = 0; i < 2; i++)
+ if (m > -__builtin_fabsf (n[i]))
+ return true;
+ return false;
+}