middle-end: fix masking for partial vectors and early break [PR119351]

author Tamar Christina <tamar.christina@arm.com>

Thu, 17 Apr 2025 09:25:43 +0000 (10:25 +0100)

committer Tamar Christina <tamar.christina@arm.com>

Thu, 17 Apr 2025 09:41:10 +0000 (10:41 +0100)
author Tamar Christina <tamar.christina@arm.com>
Thu, 17 Apr 2025 09:25:43 +0000 (10:25 +0100)
committer Tamar Christina <tamar.christina@arm.com>
Thu, 17 Apr 2025 09:41:10 +0000 (10:41 +0100)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c

new file mode 100644 (file)

index 0000000..85aab35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
@@ -0,0 +1,39 @@
+/* Fix for PR119351 alignment peeling with vectors and VLS.  */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -msve-vector-bits=256 --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" ""} } */
+
+#define N 512
+#define START 1
+#define END 505
+ 
+int x[N] __attribute__((aligned(32)));
+
+/*
+** foo:
+**     ...
+**     ld1w    z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\]
+**     cmple   p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+**     ptest   p[0-9]+, p[0-9]+.b
+**     ...
+*/
+
+int __attribute__((noipa))
+foo (void)
+{
+  int z = 0;
+  for (unsigned int i = START; i < END; ++i)
+    {
+      z++;
+      if (x[i] > 0)
+        continue;
+    
+      return z;
+    }
+  return -1;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
+/* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351_run.c b/gcc/testsuite/gcc.target/aarch64/sve/pr119351_run.c

new file mode 100644 (file)

index 0000000..d36ab0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351_run.c
@@ -0,0 +1,20 @@
+/* Fix for PR119351 alignment peeling with vectors and VLS.  */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#include "pr119351.c"
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  x[0] = 1;
+  x[1] = 21;
+  x[2] = 39;
+  x[3] = 59;
+  int res = foo ();
+  if (res != 4)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc

index 7f874354e75e8d4d3a7196df4e9b559ac641827c..5af1973734e20b22d81e1624933d8265448c1e1a 100644 (file)
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -13615,29 +13615,23 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
       codegen so we must replace the original insn.  */
    gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
    gcond *cond_stmt = as_a <gcond *>(orig_stmt);
+
+  tree cst = build_zero_cst (vectype);
+  auto bb = gimple_bb (cond_stmt);
+  edge exit_true_edge = EDGE_SUCC (bb, 0);
+  if (exit_true_edge->flags & EDGE_FALSE_VALUE)
+    exit_true_edge = EDGE_SUCC (bb, 1);
+  gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
+
    /* When vectorizing we assume that if the branch edge is taken that we're
       exiting the loop.  This is not however always the case as the compiler will
       rewrite conditions to always be a comparison against 0.  To do this it
       sometimes flips the edges.  This is fine for scalar,  but for vector we
-     then have to flip the test, as we're still assuming that if you take the
-     branch edge that we found the exit condition.  i.e. we need to know whether
-     we are generating a `forall` or an `exist` condition.  */
-  auto new_code = NE_EXPR;
-  auto reduc_optab = ior_optab;
-  auto reduc_op = BIT_IOR_EXPR;
-  tree cst = build_zero_cst (vectype);
-  edge exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 0);
-  if (exit_true_edge->flags & EDGE_FALSE_VALUE)
-    exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 1);
-  gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
-  if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
-                            exit_true_edge->dest))
-    {
-      new_code = EQ_EXPR;
-      reduc_optab = and_optab;
-      reduc_op = BIT_AND_EXPR;
-      cst = build_minus_one_cst (vectype);
-    }
+     then have to negate the result of the test, as we're still assuming that if
+     you take the branch edge that we found the exit condition.  i.e. we need to
+     know whether we are generating a `forall` or an `exist` condition.  */
+  bool flipped = flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+                                       exit_true_edge->dest);
  
    /* Analyze only.  */
    if (!vec_stmt)
@@ -13653,14 +13647,13 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
         }
  
        if (ncopies > 1
-         && direct_optab_handler (reduc_optab, mode) == CODE_FOR_nothing)
+         && direct_optab_handler (ior_optab, mode) == CODE_FOR_nothing)
         {
           if (dump_enabled_p ())
               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                "can't vectorize early exit because the "
-                              "target does not support boolean vector %s "
+                              "target does not support boolean vector IOR "
                                "for type %T.\n",
-                              reduc_optab == ior_optab ? "OR" : "AND",
                                vectype);
           return false;
         }
@@ -13720,6 +13713,29 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
         stmts.quick_push (gimple_assign_lhs (stmt));
      }
  
+  /* If we're comparing against a previous forall we need to negate the resullts
+     before we do the final comparison or reduction.  */
+  if (flipped)
+    {
+      /* Rewrite the if(all(mask)) into if (!all(mask)) which is the same as
+        if (any(~mask)) by negating the masks and flipping the branches.
+
+       1. For unmasked loops we simply reduce the ~mask.
+       2. For masked loops we reduce (~mask & loop_mask) which is the same as
+          doing (mask & loop_mask) ^ loop_mask.  */
+      for (unsigned i = 0; i < stmts.length (); i++)
+       {
+         tree inv_lhs = make_temp_ssa_name (vectype, NULL, "vexit_inv");
+         auto inv_stmt = gimple_build_assign (inv_lhs, BIT_NOT_EXPR, stmts[i]);
+         vect_finish_stmt_generation (loop_vinfo, stmt_info, inv_stmt,
+                                      &cond_gsi);
+         stmts[i] = inv_lhs;
+       }
+
+      EDGE_SUCC (bb, 0)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
+      EDGE_SUCC (bb, 1)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
+    }
+
    /* Determine if we need to reduce the final value.  */
    if (stmts.length () > 1)
      {
@@ -13758,7 +13774,7 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
           new_temp = make_temp_ssa_name (vectype, NULL, "vexit_reduc");
           tree arg0 = workset.pop ();
           tree arg1 = workset.pop ();
-         new_stmt = gimple_build_assign (new_temp, reduc_op, arg0, arg1);
+         new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, arg1);
           vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
                                        &cond_gsi);
           workset.quick_insert (0, new_temp);
@@ -13781,7 +13797,7 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
  
    gcc_assert (new_temp);
  
-  gimple_cond_set_condition (cond_stmt, new_code, new_temp, cst);
+  gimple_cond_set_condition (cond_stmt, NE_EXPR, new_temp, cst);
    update_stmt (orig_stmt);
  
    if (slp_node)
author	Tamar Christina <tamar.christina@arm.com>
	Thu, 17 Apr 2025 09:25:43 +0000 (10:25 +0100)
committer	Tamar Christina <tamar.christina@arm.com>
	Thu, 17 Apr 2025 09:41:10 +0000 (10:41 +0100)
gcc/testsuite/gcc.target/aarch64/sve/pr119351.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/pr119351_run.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-stmts.cc		patch \| blob \| blame \| history