From 2738955004256c2e9753364d78a7be340323b74b Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Wed, 24 May 2023 17:32:20 +0100 Subject: [PATCH] PR middle-end/109840: Preserve popcount/parity type in match.pd. PR middle-end/109840 is a regression introduced by my recent patch to fold popcount(bswap(x)) as popcount(x). When the bswap and the popcount have the same precision, everything works fine, but this optimization also allowed a zero-extension between the two. The oversight is that we need to be strict with type conversions, both to avoid accidentally changing the argument type to popcount, and also to reflect the effects of argument/return-value promotion in the call to bswap, so this zero extension needs to be preserved/explicit in the optimized form. Interestingly, match.pd should (in theory) be able to narrow calls to popcount and parity, removing a zero-extension from its argument, but that is an independent optimization, that needs to check IFN_ support. Many thanks to Andrew Pinski for his help/fixes with these transformations. 2023-05-24 Roger Sayle gcc/ChangeLog PR middle-end/109840 * match.pd : Preserve zero-extension when optimizing popcount((T)bswap(x)) and popcount((T)rotate(x,y)) as popcount((T)x), so the popcount's argument keeps the same type. : Likewise preserve extensions when simplifying parity((T)bswap(x)) and parity((T)rotate(x,y)) as parity((T)x), so that the parity's argument type is the same. gcc/testsuite/ChangeLog PR middle-end/109840 * gcc.dg/fold-parity-8.c: New test. * gcc.dg/fold-popcount-11.c: Likewise. --- gcc/match.pd | 27 ++++++++++++++++--------- gcc/testsuite/gcc.dg/fold-parity-8.c | 25 +++++++++++++++++++++++ gcc/testsuite/gcc.dg/fold-popcount-11.c | 25 +++++++++++++++++++++++ 3 files changed, 67 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/fold-parity-8.c create mode 100644 gcc/testsuite/gcc.dg/fold-popcount-11.c diff --git a/gcc/match.pd b/gcc/match.pd index 1fe0559acfb9..6e32f476e973 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -7865,10 +7865,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (popcount (convert?@0 (bswap:s@1 @2))) (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && INTEGRAL_TYPE_P (TREE_TYPE (@1))) - (with { unsigned int prec0 = TYPE_PRECISION (TREE_TYPE (@0)); - unsigned int prec1 = TYPE_PRECISION (TREE_TYPE (@1)); } - (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (TREE_TYPE (@1)))) - (popcount @2))))))) + (with { tree type0 = TREE_TYPE (@0); + tree type1 = TREE_TYPE (@1); + unsigned int prec0 = TYPE_PRECISION (type0); + unsigned int prec1 = TYPE_PRECISION (type1); } + (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (type1))) + (popcount (convert:type0 (convert:type1 @2))))))))) /* popcount(rotate(X Y)) is popcount(X). */ (for popcount (POPCOUNT) @@ -7878,10 +7880,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && INTEGRAL_TYPE_P (TREE_TYPE (@1)) && (GIMPLE || !TREE_SIDE_EFFECTS (@3))) - (with { unsigned int prec0 = TYPE_PRECISION (TREE_TYPE (@0)); - unsigned int prec1 = TYPE_PRECISION (TREE_TYPE (@1)); } - (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (TREE_TYPE (@1)))) - (popcount @2))))))) + (with { tree type0 = TREE_TYPE (@0); + tree type1 = TREE_TYPE (@1); + unsigned int prec0 = TYPE_PRECISION (type0); + unsigned int prec1 = TYPE_PRECISION (type1); } + (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (type1))) + (popcount (convert:type0 @2)))))))) /* Canonicalize POPCOUNT(x)&1 as PARITY(X). */ (simplify @@ -7923,7 +7927,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && INTEGRAL_TYPE_P (TREE_TYPE (@1)) && TYPE_PRECISION (TREE_TYPE (@0)) >= TYPE_PRECISION (TREE_TYPE (@1))) - (parity @2))))) + (with { tree type0 = TREE_TYPE (@0); + tree type1 = TREE_TYPE (@1); } + (parity (convert:type0 (convert:type1 @2)))))))) /* parity(rotate(X Y)) is parity(X). */ (for parity (PARITY) @@ -7935,7 +7941,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && (GIMPLE || !TREE_SIDE_EFFECTS (@3)) && TYPE_PRECISION (TREE_TYPE (@0)) >= TYPE_PRECISION (TREE_TYPE (@1))) - (parity @2))))) + (with { tree type0 = TREE_TYPE (@0); } + (parity (convert:type0 @2))))))) /* parity(X)^parity(Y) is parity(X^Y). */ (simplify diff --git a/gcc/testsuite/gcc.dg/fold-parity-8.c b/gcc/testsuite/gcc.dg/fold-parity-8.c new file mode 100644 index 000000000000..48e1f7f2b599 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-8.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned short x) +{ + unsigned short t1 = __builtin_bswap16(x); + unsigned int t2 = t1; + return __builtin_parity (t2); +} + +int fool(unsigned short x) +{ + unsigned short t1 = __builtin_bswap16(x); + unsigned long t2 = t1; + return __builtin_parityl (t2); +} + +int fooll(unsigned short x) +{ + unsigned short t1 = __builtin_bswap16(x); + unsigned long long t2 = t1; + return __builtin_parityll (t2); +} + +/* { dg-final { scan-tree-dump-not "bswap" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/fold-popcount-11.c b/gcc/testsuite/gcc.dg/fold-popcount-11.c new file mode 100644 index 000000000000..e59be003abb7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-popcount-11.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned short x) +{ + unsigned short t1 = __builtin_bswap16(x); + unsigned int t2 = t1; + return __builtin_popcount (t2); +} + +int fool(unsigned short x) +{ + unsigned short t1 = __builtin_bswap16(x); + unsigned long t2 = t1; + return __builtin_popcountl (t2); +} + +int fooll(unsigned short x) +{ + unsigned short t1 = __builtin_bswap16(x); + unsigned long long t2 = t1; + return __builtin_popcountll (t2); +} + +/* { dg-final { scan-tree-dump-not "bswap" "optimized" } } */ -- 2.47.2