]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-scalar-evolution: Fold loop PHIs with known nonzero niter
authorAbhishek Kaushik <abhishek.kaushik@arm.com>
Wed, 27 May 2026 11:09:12 +0000 (11:09 +0000)
committerAlex Coplan <alex.coplan@arm.com>
Wed, 3 Jun 2026 16:34:04 +0000 (17:34 +0100)
SCEV sometimes fails to compute the final value of a loop PHI when the
entry value does not match the value produced by later iterations.  For
example:

  # y = PHI <y_next, 666>
  i_next = i + 1;
  y_next = i_next * 10;

The PHI value is:

  666, 10, 20, 30, ...

This is not a normal affine recurrence.  But if the latch count is
known to be nonzero, the entry value cannot be the value seen after
the loop.  The final value is the latch argument evaluated at iteration
niter - 1.

Teach SCEV final value replacement to handle this case.

This patch was bootstrapped and regression tested on aarch64-linux-gnu.

gcc/:

* tree-scalar-evolution.cc
(compute_final_value_from_loop_phi_latch): New function.
(final_value_replacement_loop): Use it.

gcc/testsuite/:

* gcc.dg/tree-ssa/sccp-loop-phi.c: New test.
* gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c: New test.

gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c [new file with mode: 0644]
gcc/tree-scalar-evolution.cc

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c
new file mode 100644 (file)
index 0000000..139d521
--- /dev/null
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sccp-details" } */
+
+int
+nested_unused_phi1 (void)
+{
+  int acc = 0;
+
+  for (int i = 0; i < 20; i++)
+    {
+      int x = 0;
+      for (int j = 0, y = 123; j != i; j++, y = i + 10)
+       x = y;
+      acc += x;
+    }
+
+  return acc;
+}
+
+int
+nested_unused_phi2 (void)
+{
+  int acc = 0;
+
+  for (int i = 0; i != 20; i++)
+    {
+      int x = 0;
+      for (int j = 0, y = 123; j != i + 1; j++, y = i + 10)
+       x = y;
+      acc += x;
+    }
+
+  return acc;
+}
+
+int
+nested_unused_phi3 (void)
+{
+  int acc = 0;
+
+  for (int i = 0; i != 20; i++)
+    {
+      int x = 0;
+      for (int j = 0, y = 123; j != i + 2; j++, y = i + 10)
+       x = y;
+      acc += x;
+    }
+
+  return acc;
+}
+
+/* Expected:
+   nested_unused_phi1 -> 123 + sum_{i=2}^{19} (i + 10) = 492
+   nested_unused_phi2 -> 123 + sum_{i=1}^{19} (i + 10) = 503
+   nested_unused_phi3 -> sum_{i=0}^{19} (i + 10)       = 390
+
+   These functions could be optimized by sccp, but currently are not because
+   they are not normal affine recurrence.
+   compute_final_value_from_loop_phi_latch tries to optimize these cases but
+   fails to do so for nested_unsed_phi1/2 as the niter in the inner loop might
+   be zero.
+   It should technically be able to optimized nested_unused_phi3 as we know
+   that niter > 0 for the inner loop but tree_expr_nonzero_p (niter) returns
+   false and we don't.
+*/
+
+/* { dg-final { scan-tree-dump-times "not replacing:" 6 "sccp" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
new file mode 100644 (file)
index 0000000..3518a19
--- /dev/null
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sccp -fdump-tree-optimized" } */
+
+int
+const_init_unused_phi (void)
+{
+  int x = 0;
+  for (int i = 0, y = 666; i != 19; i++, y = i * 10)
+    x = y;
+  return x;
+}
+
+int
+conditional_init_unused_phi (int c)
+{
+  int x = 0;
+  int y;
+
+  if (c != 0)
+    y = 20;
+  else
+    y = 30;
+
+  for (int i = 0; i != 19; i++, y = i * 10)
+    x = y;
+
+  return x;
+}
+
+int
+nested_unused_phi (void)
+{
+  int acc = 0;
+
+  for (int i = 0; i < 20; i++)
+    {
+      int x = 0;
+      for (int j = 0, y = 123; j != 19; j++, y = i + 10)
+       x = y;
+      acc += x;
+    }
+
+  return acc;
+}
+
+/* Expected:
+   const_init_unused_phi         -> 180
+   conditional_init_unused_phi   -> 180
+   nested_unused_phi             -> sum_{i=0}^{19} (i + 10) = 390
+   There should be no loop left in the optimized dump.
+*/
+
+/* { dg-final { scan-tree-dump-times "return 180" 2 "sccp" } } */
+/* { dg-final { scan-tree-dump "return 390" "sccp" } } */
+/* { dg-final { scan-tree-dump-times "goto" 0 "optimized" } } */
index d8315177ff3ac81dec2ebe9607f9c121661d470f..279ff46474e487a166397096d661abfd215b338f 100644 (file)
@@ -3854,6 +3854,69 @@ analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef,
   return fold_build2 (code1, type, inv, match_op[0]);
 }
 
+/* Try to compute the final value of PHIDEF when PHIDEF is the result of a
+   loop-header PHI.
+
+   This handles the nonzero-latch-count delayed-value form:
+
+     y_phi = PHI <latch_arg (latch), init (preheader)>
+
+   If the latch count is known to be nonzero, the final value is:
+
+     latch_arg evaluated at iteration niter - 1
+
+   Return NULL_TREE if the pattern does not apply.  */
+static tree
+compute_final_value_from_loop_phi_latch (class loop *loop,
+       class loop *ex_loop, gphi *header_phi, tree niter, bool* folded_casts)
+{
+  if (gimple_bb (header_phi) != loop->header
+      || gimple_phi_num_args (header_phi) != 2)
+    return NULL_TREE;
+
+  /* If niter is a symbolic value make sure it can never be zero, otherwise we
+     do a bad replacement.  */
+  if (!tree_expr_nonzero_p (niter))
+    return NULL_TREE;
+
+  tree latch_arg = PHI_ARG_DEF_FROM_EDGE (header_phi,
+                                         loop_latch_edge (loop));
+
+  tree ev = analyze_scalar_evolution_in_loop (ex_loop,
+                                             loop,
+                                             latch_arg,
+                                             folded_casts);
+  if (ev == chrec_dont_know)
+    return NULL_TREE;
+
+  bool invariant_p;
+  if (no_evolution_in_loop_p (ev, ex_loop->num, &invariant_p) && invariant_p)
+    return ev;
+  else if (TREE_CODE (ev) == POLYNOMIAL_CHREC
+          && get_chrec_loop (ev) == ex_loop)
+  {
+    tree niter_type = TREE_TYPE (niter);
+    tree prev_iter = fold_build2 (MINUS_EXPR,
+                                 niter_type,
+                                 niter,
+                                 build_one_cst (niter_type));
+
+    tree res = chrec_apply (ex_loop->num, ev, prev_iter);
+    if (res == chrec_dont_know)
+      return NULL_TREE;
+
+    if (chrec_contains_symbols_defined_in_loop (res, ex_loop->num))
+    {
+      res = instantiate_parameters (ex_loop, res);
+      if (res == chrec_dont_know)
+       return NULL_TREE;
+    }
+
+    return res;
+  }
+  return NULL_TREE;
+}
+
 /* Do final value replacement for LOOP, return true if we did anything.  */
 
 bool
@@ -3865,7 +3928,11 @@ final_value_replacement_loop (class loop *loop)
   if (!exit)
     return false;
 
-  tree niter = number_of_latch_executions (loop);
+  class tree_niter_desc niter_desc;
+  if (!number_of_iterations_exit (loop, exit, &niter_desc, false))
+    return false;
+
+  tree niter = niter_desc.niter;
   if (niter == chrec_dont_know)
     return false;
 
@@ -3904,9 +3971,13 @@ final_value_replacement_loop (class loop *loop)
       def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
                                              &folded_casts);
 
-      tree bitinv_def, bit_def;
+      tree bitinv_def, bit_def, phi_latch_final_value;
       unsigned HOST_WIDE_INT niter_num;
 
+      gphi *header_phi = TREE_CODE (phidef) == SSA_NAME
+                        ? dyn_cast<gphi*> (SSA_NAME_DEF_STMT (phidef))
+                        : NULL;
+
       if (def != chrec_dont_know)
        def = compute_overall_effect_of_inner_loop (ex_loop, def);
 
@@ -3941,6 +4012,16 @@ final_value_replacement_loop (class loop *loop)
                                                                   niter_num)))
        def = bit_def;
 
+      else if (header_phi
+              && integer_zerop (niter_desc.may_be_zero)
+              && (phi_latch_final_value
+                  = compute_final_value_from_loop_phi_latch (loop,
+                                                             ex_loop,
+                                                             header_phi,
+                                                             niter,
+                                                             &folded_casts)))
+       def = phi_latch_final_value;
+
       bool cond_overflow_p;
       if (!tree_does_not_contain_chrecs (def)
          || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)