From 2c9ddd6687b0366e2a0b7e0a5d0b205d31dbb77a Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Mon, 27 Oct 2025 13:48:09 +0000 Subject: [PATCH] vect: Fix uncounted PHI handling of `slpeel_tree_duplicate_loop_to_edge_cfg' Given how present requirements for loops, early-break or otherwise, to have a known iteration count, there is currently no need for single-exit loops to reset induction variables and accumulators prior to entering the exit loop. For multiple-exit uncounted loops, there are provisions in the code for resetting IVs and accumulators on exiting the loop via early exits. This is extended to the main exit (though only in multiple-exit loops) if `peeled_iters' is set to `true', wherein the definition of `peeled_iters' is equivalent to that of LOOP_VINFO_EARLY_BREAKS_VECT_PEELED, but is evaluated independently as the function does not have access to loop_vinfo. Therefore, the first fix is to ensure that, just as for LOOP_VINFO_EARLY_BREAKS_VECT_PEELED, `peeled_iters' also evaluates to true for uncounted loops. The second fix implemented here is: given the relevant logic is currently hidden behind the `multiple_exits_p', we enable relevant logic via use of the new function argument `uncounted_p'. gcc/ChangeLog: * tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg): reset IVs and accumulators for all exits for uncounted loops. * tree-vectorizer.h (slpeel_tree_duplicate_loop_to_edge_cfg): add boolean `uncounted_p' argument. --- gcc/tree-vect-loop-manip.cc | 20 +++++++++++++------- gcc/tree-vectorizer.h | 3 ++- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 0fa814f4769..f859f2c5711 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -1481,7 +1481,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, class loop *scalar_loop, edge scalar_exit, edge e, edge *new_e, bool flow_loops, - vec *updated_doms) + vec *updated_doms, + bool uncounted_p) { class loop *new_loop; basic_block *new_bbs, *bbs, *pbbs; @@ -1652,7 +1653,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, the continuation values into the epilogue header. Do not bother with exit PHIs for the early exits but their live virtual operand. We'll fix up things below. */ - if (multiple_exits_p) + if (multiple_exits_p || uncounted_p) { edge loop_e = single_succ_edge (new_preheader); new_preheader = split_edge (loop_e); @@ -1707,7 +1708,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, if (flow_loops) { edge loop_entry = single_succ_edge (new_preheader); - bool peeled_iters = single_pred (loop->latch) != loop_exit->src; + bool peeled_iters = (uncounted_p + || single_pred (loop->latch) != loop_exit->src); /* Record the new SSA names in the cache so that we can skip materializing them again when we fill in the rest of the LC SSA @@ -1737,7 +1739,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, /* Create the merge PHI nodes in new_preheader and populate the arguments for the exits. */ - if (multiple_exits_p) + if (multiple_exits_p || uncounted_p) { for (auto gsi_from = gsi_start_phis (loop->header), gsi_to = gsi_start_phis (new_loop->header); @@ -1789,7 +1791,10 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, /* And adjust the epilog entry value. */ adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res); } + } + if (multiple_exits_p) + { /* After creating the merge PHIs handle the early exits those should use the values at the start of the loop. */ for (auto gsi_from = gsi_start_phis (loop->header), @@ -1826,7 +1831,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, /* For the single exit case only create the missing LC PHI nodes for the continuation of the loop IVs that are not also already reductions and thus had LC PHI nodes on the exit already. */ - else + if (!multiple_exits_p && !uncounted_p) { for (auto gsi_from = gsi_start_phis (loop->header), gsi_to = gsi_start_phis (new_loop->header); @@ -1869,7 +1874,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, /* Finally after wiring the new epilogue we need to update its main exit to the original function exit we recorded. Other exits are already correct. */ - if (multiple_exits_p) + if (multiple_exits_p || uncounted_p) { class loop *update_loop = new_loop; doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header); @@ -3499,7 +3504,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, auto_vec doms; epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e, - &new_epilog_e, true, &doms); + &new_epilog_e, true, &doms, + uncounted_p); LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e; gcc_assert (epilog); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index b5cb835a82b..2eb022e505d 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2467,7 +2467,8 @@ extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge, class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, edge, class loop *, edge, edge, edge *, bool = true, - vec * = NULL); + vec * = NULL, + bool = false); class loop *vect_loop_versioning (loop_vec_info, gimple *); extern class loop *vect_do_peeling (loop_vec_info, tree, tree, tree *, tree *, tree *, int, bool, bool, -- 2.47.3