]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
middle-end: maintain LCSSA form when peeled vector iterations have virtual operands
authorTamar Christina <tamar.christina@arm.com>
Fri, 12 Jan 2024 15:25:34 +0000 (15:25 +0000)
committerTamar Christina <tamar.christina@arm.com>
Fri, 12 Jan 2024 15:31:06 +0000 (15:31 +0000)
This patch fixes several interconnected issues.

1. When picking an exit we wanted to check for niter_desc.may_be_zero not true.
   i.e. we want to pick an exit which we know will iterate at least once.
   However niter_desc.may_be_zero is not a boolean.  It is a tree that encodes
   a boolean value.  !niter_desc.may_be_zero is just checking if we have some
   information, not what the information is.  This leads us to pick a more
   difficult to vectorize exit more often than we should.

2. Because we had this bug, we used to pick an alternative exit much more ofthen
   which showed one issue, when the loop accesses memory and we "invert it" we
   would corrupt the VUSE chain.  This is because on an peeled vector iteration
   every exit restarts the loop (i.e. they're all early) BUT since we may have
   performed a store, the vUSE would need to be updated.  This version maintains
   virtual PHIs correctly in these cases.   Note that we can't simply remove all
   of them and recreate them because we need the PHI nodes still in the right
   order for if skip_vector.

3. Since we're moving the stores to a safe location I don't think we actually
   need to analyze whether the store is in range of the memref,  because if we
   ever get there, we know that the loads must be in range, and if the loads are
   in range and we get to the store we know the early breaks were not taken and
   so the scalar loop would have done the VF stores too.

4. Instead of searching for where to move stores to, they should always be in
   exit belonging to the latch.  We can only ever delay stores and even if we
   pick a different exit than the latch one as the main one, effects still
   happen in program order when vectorized.  If we don't move the stores to the
   latch exit but instead to whever we pick as the "main" exit then we can
   perform incorrect memory accesses (luckily these are trapped by verify_ssa).

5. We only used to analyze loads inside the same BB as an early break, and also
   we'd never analyze the ones inside the block where we'd be moving memory
   references to.  This is obviously bogus and to fix it this patch splits apart
   the two constraints.  We first validate that all load memory references are
   in bounds and only after that do we perform the alias checks for the writes.
   This makes the code simpler to understand and more trivially correct.

gcc/ChangeLog:

PR tree-optimization/113137
PR tree-optimization/113136
PR tree-optimization/113172
PR tree-optimization/113178
* tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
Maintain PHIs on inverted loops.
(vect_do_peeling): Maintain virtual PHIs on inverted loops.
* tree-vect-loop.cc (vec_init_loop_exit_info): Pick exit closes to
latch.
(vect_create_loop_vinfo): Record all conds instead of only alt ones.

gcc/testsuite/ChangeLog:

PR tree-optimization/113137
PR tree-optimization/113136
PR tree-optimization/113172
PR tree-optimization/113178
* g++.dg/vect/vect-early-break_4-pr113137.cc: New test.
* g++.dg/vect/vect-early-break_5-pr113137.cc: New test.
* gcc.dg/vect/vect-early-break_95-pr113137.c: New test.
* gcc.dg/vect/vect-early-break_96-pr113136.c: New test.
* gcc.dg/vect/vect-early-break_97-pr113172.c: New test.

gcc/testsuite/g++.dg/vect/vect-early-break_4-pr113137.cc [new file with mode: 0644]
gcc/testsuite/g++.dg/vect/vect-early-break_5-pr113137.cc [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break_95-pr113137.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break_96-pr113136.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-early-break_97-pr113172.c [new file with mode: 0644]
gcc/tree-vect-loop-manip.cc
gcc/tree-vect-loop.cc

diff --git a/gcc/testsuite/g++.dg/vect/vect-early-break_4-pr113137.cc b/gcc/testsuite/g++.dg/vect/vect-early-break_4-pr113137.cc
new file mode 100644 (file)
index 0000000..f78db86
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+int b;
+void a() __attribute__((__noreturn__));
+void c() {
+  char *buf;
+  int bufsz = 64;
+  while (b) {
+    !bufsz ? a(), 0 : *buf++ = bufsz--;
+    b -= 4;
+  }
+}
diff --git a/gcc/testsuite/g++.dg/vect/vect-early-break_5-pr113137.cc b/gcc/testsuite/g++.dg/vect/vect-early-break_5-pr113137.cc
new file mode 100644 (file)
index 0000000..dcd19fa
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+char UnpackReadTables_BitLength[20];
+int UnpackReadTables_ZeroCount;
+void UnpackReadTables() {
+  for (unsigned I = 0; I < 20;)
+    while (UnpackReadTables_ZeroCount-- &&
+           I < sizeof(UnpackReadTables_BitLength))
+      UnpackReadTables_BitLength[I++] = 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_95-pr113137.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_95-pr113137.c
new file mode 100644 (file)
index 0000000..e8f5b06
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-w" } */
+
+short gen_to_words_words;
+void gen_to_words() {
+  unsigned short *lp = &gen_to_words_words;
+  long carry;
+  for (carry = 1, lp--; carry; lp--) {
+    carry = *lp + carry;
+    *lp = carry >>= 16;
+    if (lp == &gen_to_words_words)
+      break;
+  }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_96-pr113136.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_96-pr113136.c
new file mode 100644 (file)
index 0000000..016e749
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+struct _reent { union { struct { char _l64a_buf[8]; } _reent; } _new; };
+static const char R64_ARRAY[] = "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+char *
+_l64a_r (struct _reent *rptr,
+     long value)
+{
+  char *ptr;
+  char *result;
+  int i, index;
+  unsigned long tmp = (unsigned long)value & 0xffffffff;
+  result = 
+          ((
+          rptr
+          )->_new._reent._l64a_buf)
+                               ;
+  ptr = result;
+  for (i = 0; i < 60; ++i)
+    {
+      if (tmp == 0)
+ {
+   *ptr = '\0';
+   break;
+ }
+      *ptr++ = R64_ARRAY[index];
+      tmp >>= 6;
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_97-pr113172.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_97-pr113172.c
new file mode 100644 (file)
index 0000000..625b227
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+int tswchp_2;
+short cpy_buf[8];
+void ts_endcmd() {
+  int i = 0;
+  for (; i < 8 && i < tswchp_2; i++)
+    cpy_buf[i] = i;
+}
index 8aa7575a0e39b51215610d21e5ba81dd271bdd47..da64738c71011ec04868664e76b40cde8bbbbd06 100644 (file)
@@ -1626,11 +1626,12 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
          flush_pending_stmts (e);
        }
 
+      bool peeled_iters = single_pred (loop->latch) != loop_exit->src;
       /* Record the new SSA names in the cache so that we can skip materializing
         them again when we fill in the rest of the LCSSA variables.  */
       for (auto phi : new_phis)
        {
-         tree new_arg = gimple_phi_arg (phi, loop_exit->dest_idx)->def;
+         tree new_arg = gimple_phi_arg_def (phi, loop_exit->dest_idx);
 
          if (!SSA_VAR_P (new_arg))
            continue;
@@ -1653,7 +1654,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
              continue;
            }
 
-         /* If we decide to remove the PHI node we should also not
+         /* If we decided not to remove the PHI node we should also not
             rematerialize it later on.  */
          new_phi_args.put (new_arg, gimple_phi_result (phi));
 
@@ -1667,7 +1668,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
       edge loop_entry = single_succ_edge (new_preheader);
       if (flow_loops)
        {
-         bool peeled_iters = single_pred (loop->latch) != loop_exit->src;
          /* Link through the main exit first.  */
          for (auto gsi_from = gsi_start_phis (loop->header),
               gsi_to = gsi_start_phis (new_loop->header);
@@ -1693,8 +1693,11 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
                    }
                }
              /* If we have multiple exits and the vector loop is peeled then we
-                need to use the value at start of loop.  */
-             if (peeled_iters)
+                need to use the value at start of loop.  If we're looking at
+                virtual operands we have to keep the original link.   Virtual
+                operands don't all become the same because we'll corrupt the
+                vUSE chains among others.  */
+             if (peeled_iters && !virtual_operand_p (new_arg))
                {
                  tree tmp_arg = gimple_phi_result (from_phi);
                  if (!new_phi_args.get (tmp_arg))
@@ -1726,9 +1729,31 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
 
                  tree alt_arg = gimple_phi_result (from_phi);
                  edge main_e = single_succ_edge (alt_loop_exit_block);
-                 for (edge e : loop_exits)
-                   if (e != loop_exit)
-                     SET_PHI_ARG_DEF (to_phi, main_e->dest_idx, alt_arg);
+
+                 /* Now update the virtual PHI nodes with the right value.  */
+                 if (peeled_iters
+                     && virtual_operand_p (alt_arg)
+                     && flow_bb_inside_loop_p (loop,
+                               gimple_bb (SSA_NAME_DEF_STMT (alt_arg))))
+                   {
+                       /* Link the alternative exit one.  */
+                       tree def
+                         = gimple_phi_arg_def (to_phi, loop_exit->dest_idx);
+                       gphi *def_phi = as_a <gphi *> (SSA_NAME_DEF_STMT (def));
+                       SET_PHI_ARG_DEF (def_phi, 0, alt_arg);
+
+                       /* And now the main merge block.  */
+                       gphi *iter_phi
+                         = as_a <gphi *> (SSA_NAME_DEF_STMT (alt_arg));
+                       unsigned latch_idx
+                         = single_succ_edge (loop->latch)->dest_idx;
+                       tree exit_val
+                         = gimple_phi_arg_def (iter_phi, latch_idx);
+                       alt_arg = copy_ssa_name (def);
+                       gphi *l_phi = create_phi_node (alt_arg, main_e->src);
+                       SET_PHI_ARG_DEF (l_phi, 0, exit_val);
+                   }
+                 SET_PHI_ARG_DEF (to_phi, main_e->dest_idx, alt_arg);
                }
 
              set_immediate_dominator (CDI_DOMINATORS, new_preheader,
@@ -3413,6 +3438,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
        vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
                                          update_e);
 
+      /* If we have a peeled vector iteration we will never skip the epilog loop
+        and we can simplify the cfg a lot by not doing the edge split.  */
       if (skip_epilog || LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
        {
          guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
index 38bd8267ee1964e820548c3ff754d0124c31a24f..0f4a557fe2339b095f6ae833f8f0ef847fdf7f47 100644 (file)
@@ -989,7 +989,11 @@ vec_init_loop_exit_info (class loop *loop)
       if (number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
          && !chrec_contains_undetermined (niter_desc.niter))
        {
-         if (!niter_desc.may_be_zero || !candidate)
+         tree may_be_zero = niter_desc.may_be_zero;
+         if (integer_zerop (may_be_zero)
+             && (!candidate
+                 || dominated_by_p (CDI_DOMINATORS, exit->src,
+                                    candidate->src)))
            candidate = exit;
        }
     }