From: Tamar Christina Date: Thu, 9 Nov 2023 13:59:39 +0000 (+0000) Subject: middle-end: optimize fneg (fabs (x)) to copysign (x, -1) [PR109154] X-Git-Tag: basepoints/gcc-15~4855 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3f176e1adc6bc9cc2c21222d776b51d9f43cb66b;p=thirdparty%2Fgcc.git middle-end: optimize fneg (fabs (x)) to copysign (x, -1) [PR109154] This patch transforms fneg (fabs (x)) into copysign (x, -1) which is more canonical and allows a target to expand this sequence efficiently. Such sequences are common in scientific code working with gradients. There is an existing canonicalization of copysign (x, -1) to fneg (fabs (x)) which I remove since this is a less efficient form. The testsuite is also updated in light of this. gcc/ChangeLog: PR tree-optimization/109154 * match.pd: Add new neg+abs rule, remove inverse copysign rule. gcc/testsuite/ChangeLog: PR tree-optimization/109154 * gcc.dg/fold-copysign-1.c: Updated. * gcc.dg/pr55152-2.c: Updated. * gcc.dg/tree-ssa/abs-4.c: Updated. * gcc.dg/tree-ssa/backprop-6.c: Updated. * gcc.dg/tree-ssa/copy-sign-2.c: Updated. * gcc.dg/tree-ssa/mult-abs-2.c: Updated. * gcc.target/aarch64/fneg-abs_1.c: New test. * gcc.target/aarch64/fneg-abs_2.c: New test. * gcc.target/aarch64/fneg-abs_3.c: New test. * gcc.target/aarch64/fneg-abs_4.c: New test. * gcc.target/aarch64/sve/fneg-abs_1.c: New test. * gcc.target/aarch64/sve/fneg-abs_2.c: New test. * gcc.target/aarch64/sve/fneg-abs_3.c: New test. * gcc.target/aarch64/sve/fneg-abs_4.c: New test. --- diff --git a/gcc/match.pd b/gcc/match.pd index 68a1587ea246..5928acbb14e2 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -1118,14 +1118,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (hypots @0 (copysigns @1 @2)) (hypots @0 @1)))) -/* copysign(x, CST) -> [-]abs (x). */ +/* copysign(x, CST) -> abs (x). */ (for copysigns (COPYSIGN_ALL) (simplify (copysigns @0 REAL_CST@1) - (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1))) - (negate (abs @0)) + (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1))) (abs @0)))) +/* Transform fneg (fabs (X)) -> copysign (X, -1). */ +(simplify + (negate (abs @0)) + (IFN_COPYSIGN @0 { build_minus_one_cst (type); })) + /* copysign(copysign(x, y), z) -> copysign(x, z). */ (for copysigns (COPYSIGN_ALL) (simplify diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c index f17d65c24ee4..f9cafd14ab05 100644 --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c @@ -12,5 +12,5 @@ double bar (double x) return __builtin_copysign (x, minuszero); } -/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */ -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */ +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */ diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c index 54db0f2062da..605f202ed6bc 100644 --- a/gcc/testsuite/gcc.dg/pr55152-2.c +++ b/gcc/testsuite/gcc.dg/pr55152-2.c @@ -10,4 +10,5 @@ int f(int a) return (a<-a)?a:-a; } -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c index 6197519faf7b..e1b825f37f69 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c @@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; } /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP */ /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */ -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */ -/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c index 31f05716f149..c3a138642d6f 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c @@ -26,5 +26,6 @@ TEST_FUNCTION (float, f) TEST_FUNCTION (double, ) TEST_FUNCTION (long double, l) -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */ -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c index de52c5f7c806..e5d565c4b983 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c @@ -10,4 +10,5 @@ float f1(float x) float t = __builtin_copysignf (1.0f, -x); return x * t; } -/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */ +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c index a41f1baf2566..a22896b21c8b 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c @@ -34,4 +34,5 @@ float i1(float x) { return x * (x <= 0.f ? 1.f : -1.f); } -/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */ +/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */ +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */ diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c new file mode 100644 index 000000000000..f823013c3ddf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include + +/* +** t1: +** orr v[0-9]+.2s, #128, lsl #24 +** ret +*/ +float32x2_t t1 (float32x2_t a) +{ + return vneg_f32 (vabs_f32 (a)); +} + +/* +** t2: +** orr v[0-9]+.4s, #128, lsl #24 +** ret +*/ +float32x4_t t2 (float32x4_t a) +{ + return vnegq_f32 (vabsq_f32 (a)); +} + +/* +** t3: +** adrp x0, .LC[0-9]+ +** ldr q[0-9]+, \[x0, #:lo12:.LC0\] +** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** ret +*/ +float64x2_t t3 (float64x2_t a) +{ + return vnegq_f64 (vabsq_f64 (a)); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c new file mode 100644 index 000000000000..141121176b30 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include +#include + +/* +** f1: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float32_t f1 (float32_t a) +{ + return -fabsf (a); +} + +/* +** f2: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float64_t f2 (float64_t a) +{ + return -fabs (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c new file mode 100644 index 000000000000..b4652173a95d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include +#include + +/* +** f1: +** ... +** ldr q[0-9]+, \[x0\] +** orr v[0-9]+.4s, #128, lsl #24 +** str q[0-9]+, \[x0\], 16 +** ... +*/ +void f1 (float32_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabsf (a[i]); +} + +/* +** f2: +** ... +** ldr q[0-9]+, \[x0\] +** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** str q[0-9]+, \[x0\], 16 +** ... +*/ +void f2 (float64_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabs (a[i]); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c new file mode 100644 index 000000000000..10879dea7446 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include + +/* +** negabs: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +double negabs (double x) +{ + unsigned long long y; + memcpy (&y, &x, sizeof(double)); + y = y | (1UL << 63); + memcpy (&x, &y, sizeof(double)); + return x; +} + +/* +** negabsf: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float negabsf (float x) +{ + unsigned int y; + memcpy (&y, &x, sizeof(float)); + y = y | (1U << 31); + memcpy (&x, &y, sizeof(float)); + return x; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c new file mode 100644 index 000000000000..0c7664e6de77 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include + +/* +** t1: +** orr v[0-9]+.2s, #128, lsl #24 +** ret +*/ +float32x2_t t1 (float32x2_t a) +{ + return vneg_f32 (vabs_f32 (a)); +} + +/* +** t2: +** orr v[0-9]+.4s, #128, lsl #24 +** ret +*/ +float32x4_t t2 (float32x4_t a) +{ + return vnegq_f32 (vabsq_f32 (a)); +} + +/* +** t3: +** adrp x0, .LC[0-9]+ +** ldr q[0-9]+, \[x0, #:lo12:.LC0\] +** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** ret +*/ +float64x2_t t3 (float64x2_t a) +{ + return vnegq_f64 (vabsq_f64 (a)); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c new file mode 100644 index 000000000000..a60cd31b9294 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include +#include + +/* +** f1: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float32_t f1 (float32_t a) +{ + return -fabsf (a); +} + +/* +** f2: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float64_t f2 (float64_t a) +{ + return -fabs (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c new file mode 100644 index 000000000000..1bf34328d884 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include +#include + +/* +** f1: +** ... +** ld1w z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\] +** orr z[0-9]+.s, z[0-9]+.s, #0x80000000 +** st1w z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\] +** ... +*/ +void f1 (float32_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabsf (a[i]); +} + +/* +** f2: +** ... +** ld1d z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\] +** orr z[0-9]+.d, z[0-9]+.d, #0x8000000000000000 +** st1d z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\] +** ... +*/ +void f2 (float64_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabs (a[i]); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c new file mode 100644 index 000000000000..21f2a8da2a5d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include + +/* +** negabs: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +double negabs (double x) +{ + unsigned long long y; + memcpy (&y, &x, sizeof(double)); + y = y | (1UL << 63); + memcpy (&x, &y, sizeof(double)); + return x; +} + +/* +** negabsf: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float negabsf (float x) +{ + unsigned int y; + memcpy (&y, &x, sizeof(float)); + y = y | (1U << 31); + memcpy (&x, &y, sizeof(float)); + return x; +} +