From: Richard Biener Date: Tue, 12 Dec 2023 13:01:47 +0000 (+0100) Subject: tree-optimization/112961 - include latch in if-conversion CSE X-Git-Tag: basepoints/gcc-15~3682 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=878cb5acf0c499702ffd315e273f55e8bd0970b8;p=thirdparty%2Fgcc.git tree-optimization/112961 - include latch in if-conversion CSE The following makes sure to also process the (empty) latch when performing CSE on the if-converted loop body. That's important to get all uses of copies propagated out on the backedge as well. To avoid CSE on the PHI nodes itself which is prohibitive (see PR90402) this temporarily adds a fake entry edge to the loop. PR tree-optimization/112961 * tree-if-conv.cc (tree_if_conversion): Instead of excluding the latch block from VN, add a fake entry edge. * g++.dg/vect/pr112961.cc: New testcase. --- diff --git a/gcc/testsuite/g++.dg/vect/pr112961.cc b/gcc/testsuite/g++.dg/vect/pr112961.cc new file mode 100644 index 000000000000..52759e180fbe --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr112961.cc @@ -0,0 +1,17 @@ +// { dg-do compile } +// { dg-require-effective-target vect_int } + +inline const int& maxx (const int& a, const int &b) +{ + return a > b ? a : b; +} + +int foo(int *a) +{ + int max = 0; + for (int i = 0; i < 1024; ++i) + max = maxx(max, a[i]); + return max; +} + +// { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail vect_no_int_min_max } } } diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index 0bde281c2468..f9fd01499374 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -3734,7 +3734,7 @@ tree_if_conversion (class loop *loop, vec *preds) auto_vec reads_to_lower; auto_vec writes_to_lower; bitmap exit_bbs; - edge pe; + edge pe, e; auto_vec refs; bool loop_versioned; @@ -3894,11 +3894,13 @@ tree_if_conversion (class loop *loop, vec *preds) /* Perform local CSE, this esp. helps the vectorizer analysis if loads and stores are involved. CSE only the loop body, not the entry PHIs, those are to be kept in sync with the non-if-converted copy. + Do this by adding a fake entry edge - we do want to include the + latch as otherwise copies on a reduction path cannot be propagated out. ??? We'll still keep dead stores though. */ + e = make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), loop->header, EDGE_FAKE); exit_bbs = BITMAP_ALLOC (NULL); for (edge exit : get_loop_exit_edges (loop)) bitmap_set_bit (exit_bbs, exit->dest->index); - bitmap_set_bit (exit_bbs, loop->latch->index); std::pair *name_pair; unsigned ssa_names_idx; @@ -3908,6 +3910,9 @@ tree_if_conversion (class loop *loop, vec *preds) todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs); + /* Remove the fake edge again. */ + remove_edge (e); + /* Delete dead predicate computations. */ ifcvt_local_dce (loop); BITMAP_FREE (exit_bbs);