]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/117000 - elide .REDUC_IOR with compare against zero
authorRichard Biener <rguenther@suse.de>
Tue, 8 Oct 2024 07:01:01 +0000 (09:01 +0200)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 9 Oct 2024 07:52:44 +0000 (09:52 +0200)
The following adds a pattern to elide a .REDUC_IOR operation when
the result is compared against zero with a cbranch.  I've resorted
to using can_compare_p since that's what RTL expansion eventually
checks - while GIMPLE allowed whole vector equality compares for long
I'll notice vector lowering won't lower unsupported ones and RTL
expansion doesn't seem to try using [u]cmp<vector-mode> optabs
(and neither x86 nor aarch64 implements those).  There's cstore
but no target implements that for vector modes either.

PR tree-optimization/117000
* match.pd (.REDUC_IOR !=/== 0): New pattern.
* gimple-match-head.cc: Include memmodel.h and optabs.h.
* generic-match-head.cc: Likewise.

* gcc.target/i386/pr117000.c: New testcase.

gcc/generic-match-head.cc
gcc/gimple-match-head.cc
gcc/match.pd
gcc/testsuite/gcc.target/i386/pr117000.c [new file with mode: 0644]

index 42dee6266132dd1b260da2adcbe9b1d1cd2b48ac..7d7e2a9f792daeeda30e6c7ba35d5542a79d1760 100644 (file)
@@ -35,6 +35,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "memmodel.h"
+#include "optabs.h"
 #include "optabs-tree.h"
 #include "dbgcnt.h"
 #include "tm.h"
index 4147a0eb38a7d0fd6958921856d7dfb987567eaf..b9d5f751b7cb5c3c1c37d5d795d33ea6bc3dab90 100644 (file)
@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "memmodel.h"
+#include "optabs.h"
 #include "optabs-tree.h"
 #include "tree-eh.h"
 #include "dbgcnt.h"
index e73bb7e2109b8238ab39e09c97b9603217240ebe..755ed13e77d11d3a045e6a0a92da14ca03509e55 100644 (file)
@@ -10474,6 +10474,15 @@ and,
   (simplify (reduc (op @0 VECTOR_CST@1))
     (op (reduc:type @0) (reduc:type @1))))
 
+/* Simplify .REDUC_IOR (@0) ==/!= 0 to @0 ==/!= 0.  */
+(for cmp (eq ne)
+ (simplify
+  (cmp (IFN_REDUC_IOR @0) integer_zerop)
+  (if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (@0)))
+       && can_compare_p (cmp == EQ_EXPR ? EQ : NE, TYPE_MODE (TREE_TYPE (@0)),
+                        ccp_jump))
+   (cmp @0 { build_zero_cst (TREE_TYPE (@0)); }))))
+
 /* Simplify vector floating point operations of alternating sub/add pairs
    into using an fneg of a wider element type followed by a normal add.
    under IEEE 754 the fneg of the wider type will negate every even entry
diff --git a/gcc/testsuite/gcc.target/i386/pr117000.c b/gcc/testsuite/gcc.target/i386/pr117000.c
new file mode 100644 (file)
index 0000000..04f9434
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" { target sse4 } } */
+
+int eq(unsigned long *x, unsigned long *y)
+{
+    unsigned long folded = 0;
+    for (int i = 0; i < 4; ++i)
+      folded |= x[i] ^ y[i];
+    return folded == 0;
+}
+
+/* We want to elide the .REDUC_IOR with the compare against zero.  */
+/* { dg-final { scan-assembler "ptest" } } */