From: Richard Biener Date: Thu, 6 Jun 2019 11:06:45 +0000 (+0000) Subject: Backport PRs 90328, 90402, 90450, 90474 X-Git-Tag: releases/gcc-9.2.0~268 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6ea3780cabf07978f19aab01ed11d105bf3fdcb4;p=thirdparty%2Fgcc.git Backport PRs 90328, 90402, 90450, 90474 2019-06-06 Richard Biener Backport from mainline 2019-05-22 Richard Biener PR tree-optimization/90450 * tree-ssa-loop-im.c (struct im_mem_ref): Add ref_decomposed. (mem_ref_hasher::equal): Check it. (mem_ref_alloc): Initialize it. (gather_mem_refs_stmt): Set it. 2019-05-15 Richard Biener PR c/90474 * c-common.c (c_common_mark_addressable_vec): Also mark a COMPOUND_LITERAL_EXPR_DECL addressable similar to c_mark_addressable. 2019-05-13 Richard Biener PR tree-optimization/90402 * tree-if-conv.c (tree_if_conversion): Value number only the loop body by making the latch an exit of the region as well. * tree-ssa-sccvn.c (process_bb): Add flag whether to skip processing PHIs. (do_rpo_vn): Deal with multiple edges into the entry block that are not backedges inside the region by skipping PHIs of the entry block. * gcc.dg/torture/pr90402-1.c: New testcase. 2019-05-06 Richard Biener PR tree-optimization/90328 * tree-data-ref.h (dr_may_alias_p): Pass in the actual loop nest. * tree-data-ref.c (dr_may_alias_p): Check whether the clique is valid in the loop nest before using it. (initialize_data_dependence_relation): Adjust. * graphite-scop-detection.c (build_alias_set): Pass the SCOP enclosing loop as loop-nest to dr_may_alias_p. * gcc.dg/torture/pr90328.c: New testcase. From-SVN: r271995 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7dcaa7c4971e..a53ac8176cf5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,36 @@ +2019-06-06 Richard Biener + + Backport from mainline + 2019-05-22 Richard Biener + + PR tree-optimization/90450 + * tree-ssa-loop-im.c (struct im_mem_ref): Add ref_decomposed. + (mem_ref_hasher::equal): Check it. + (mem_ref_alloc): Initialize it. + (gather_mem_refs_stmt): Set it. + + 2019-05-13 Richard Biener + + PR tree-optimization/90402 + * tree-if-conv.c (tree_if_conversion): Value number only + the loop body by making the latch an exit of the region + as well. + * tree-ssa-sccvn.c (process_bb): Add flag whether to skip + processing PHIs. + (do_rpo_vn): Deal with multiple edges into the entry block + that are not backedges inside the region by skipping PHIs + of the entry block. + + 2019-05-06 Richard Biener + + PR tree-optimization/90328 + * tree-data-ref.h (dr_may_alias_p): Pass in the actual loop nest. + * tree-data-ref.c (dr_may_alias_p): Check whether the clique + is valid in the loop nest before using it. + (initialize_data_dependence_relation): Adjust. + * graphite-scop-detection.c (build_alias_set): Pass the SCOP enclosing + loop as loop-nest to dr_may_alias_p. + 2019-06-05 Eric Botcazou * fold-const.c (extract_muldiv_1) : Do not distribute a diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 871464b8e555..31a7ca109a55 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,13 @@ +2019-06-06 Richard Biener + + Backport from mainline + 2019-05-15 Richard Biener + + PR c/90474 + * c-common.c (c_common_mark_addressable_vec): Also mark + a COMPOUND_LITERAL_EXPR_DECL addressable similar to + c_mark_addressable. + 2019-05-23 Eric Botcazou * c-ada-spec.c (compare_node): Compare the DECL_UIDs as a last resort. diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index 99ca1ad3727c..d220e813564a 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -6554,6 +6554,8 @@ c_common_mark_addressable_vec (tree t) return; if (!VAR_P (t) || !DECL_HARD_REGISTER (t)) TREE_ADDRESSABLE (t) = 1; + if (TREE_CODE (t) == COMPOUND_LITERAL_EXPR) + TREE_ADDRESSABLE (COMPOUND_LITERAL_EXPR_DECL (t)) = 1; } diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index 45f459a3b783..4534d43721f4 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -1417,9 +1417,13 @@ build_alias_set (scop_p scop) int i, j; int *all_vertices; + struct loop *nest + = find_common_loop (scop->scop_info->region.entry->dest->loop_father, + scop->scop_info->region.exit->src->loop_father); + FOR_EACH_VEC_ELT (scop->drs, i, dr1) for (j = i+1; scop->drs.iterate (j, &dr2); j++) - if (dr_may_alias_p (dr1->dr, dr2->dr, true)) + if (dr_may_alias_p (dr1->dr, dr2->dr, nest)) { /* Dependences in the same alias set need to be handled by just looking at DR_ACCESS_FNs. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 877dbbdb32a9..9b909b3b4456 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,16 @@ +2019-06-06 Richard Biener + + Backport from mainline + 2019-05-13 Richard Biener + + PR tree-optimization/90402 + * gcc.dg/torture/pr90402-1.c: New testcase. + + 2019-05-06 Richard Biener + + PR tree-optimization/90328 + * gcc.dg/torture/pr90328.c: New testcase. + 2019-06-05 Eric Botcazou * gnat.dg/specs/discr6.ads: New test. diff --git a/gcc/testsuite/gcc.dg/torture/pr90328.c b/gcc/testsuite/gcc.dg/torture/pr90328.c new file mode 100644 index 000000000000..a70f3dd425e5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr90328.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ + +void g(int*__restrict x, int*y) +{ + *x = *y; +} + +void __attribute__((noipa)) f(int* a,int* b) +{ + for(int i=0;i<1024;++i) + g(a+i,b+i); +} + +int main() +{ + int x[1025]; + for (int i = 0; i < 1025; ++i) + x[i] = i+1; + f(x+1, x); + for (int i = 0; i < 1025; ++i) + if (x[i] != 1) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/pr90402-1.c b/gcc/testsuite/gcc.dg/torture/pr90402-1.c new file mode 100644 index 000000000000..c4bd8945ed5c --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr90402-1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-mavx" { target x86_64-*-* i?86-*-* } } */ + +int kn, ha; + +int +c7 (void) +{ +} + +void +ul (int w3) +{ + kn = c7 (); + + while (w3 < 1) + { + ha += !!kn ? 1 : w3; + + for (kn = 0; kn < 2; ++kn) + { + } + + ++w3; + } +} diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index ccb1cfc53695..ad0a00720d1d 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -2232,7 +2232,7 @@ object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj) bool dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, - bool loop_nest) + struct loop *loop_nest) { tree addr_a = DR_BASE_OBJECT (a); tree addr_b = DR_BASE_OBJECT (b); @@ -2256,6 +2256,11 @@ dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF) && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF) + /* For cross-iteration dependences the cliques must be valid for the + whole loop, not just individual iterations. */ + && (!loop_nest + || MR_DEPENDENCE_CLIQUE (addr_a) == 1 + || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique) && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b) && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b)) return false; @@ -2367,7 +2372,7 @@ initialize_data_dependence_relation (struct data_reference *a, } /* If the data references do not alias, then they are independent. */ - if (!dr_may_alias_p (a, b, loop_nest.exists ())) + if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL)) { DDR_ARE_DEPENDENT (res) = chrec_known; return res; diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index 2a5082db3981..70cbb03b49c4 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -478,7 +478,7 @@ dr_alignment (data_reference *dr) } extern bool dr_may_alias_p (const struct data_reference *, - const struct data_reference *, bool); + const struct data_reference *, struct loop *); extern bool dr_equal_offsets_p (struct data_reference *, struct data_reference *); diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index ec2db007a616..98566e3fd194 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -3066,10 +3066,12 @@ tree_if_conversion (struct loop *loop, vec *preds) ifcvt_local_dce (loop->header); /* Perform local CSE, this esp. helps the vectorizer analysis if loads - and stores are involved. + and stores are involved. CSE only the loop body, not the entry + PHIs, those are to be kept in sync with the non-if-converted copy. ??? We'll still keep dead stores though. */ exit_bbs = BITMAP_ALLOC (NULL); bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index); + bitmap_set_bit (exit_bbs, loop->latch->index); todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs); BITMAP_FREE (exit_bbs); diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 56d8e8e4330b..2064c2900fb7 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -115,9 +115,10 @@ struct mem_ref_loc struct im_mem_ref { - unsigned id : 31; /* ID assigned to the memory reference + unsigned id : 30; /* ID assigned to the memory reference (its index in memory_accesses.refs_list) */ unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */ + unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */ hashval_t hash; /* Its hash value. */ /* The memory access itself and associated caching of alias-oracle @@ -173,7 +174,8 @@ inline bool mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2) { if (obj2->max_size_known_p ()) - return (operand_equal_p (mem1->mem.base, obj2->base, 0) + return (mem1->ref_decomposed + && operand_equal_p (mem1->mem.base, obj2->base, 0) && known_eq (mem1->mem.offset, obj2->offset) && known_eq (mem1->mem.size, obj2->size) && known_eq (mem1->mem.max_size, obj2->max_size) @@ -1389,6 +1391,7 @@ mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id) ao_ref_init (&ref->mem, error_mark_node); ref->id = id; ref->ref_canonical = false; + ref->ref_decomposed = false; ref->hash = hash; ref->stored = NULL; bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack); @@ -1476,6 +1479,7 @@ gather_mem_refs_stmt (struct loop *loop, gimple *stmt) HOST_WIDE_INT offset, size, max_size; poly_int64 saved_maxsize = aor.max_size, mem_off; tree mem_base; + bool ref_decomposed; if (aor.max_size_known_p () && aor.offset.is_constant (&offset) && aor.size.is_constant (&size) @@ -1489,12 +1493,14 @@ gather_mem_refs_stmt (struct loop *loop, gimple *stmt) aor.size) && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off))) { + ref_decomposed = true; hash = iterative_hash_expr (ao_ref_base (&aor), 0); hash = iterative_hash_host_wide_int (offset, hash); hash = iterative_hash_host_wide_int (size, hash); } else { + ref_decomposed = false; hash = iterative_hash_expr (aor.ref, 0); aor.max_size = -1; } @@ -1543,6 +1549,7 @@ gather_mem_refs_stmt (struct loop *loop, gimple *stmt) { id = memory_accesses.refs_list.length (); ref = mem_ref_alloc (&aor, hash, id); + ref->ref_decomposed = ref_decomposed; memory_accesses.refs_list.safe_push (ref); *slot = ref; diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index c3ca49bd8678..b4f626000dd0 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -5979,7 +5979,7 @@ insert_related_predicates_on_edge (enum tree_code code, tree *ops, edge pred_e) static unsigned process_bb (rpo_elim &avail, basic_block bb, bool bb_visited, bool iterate_phis, bool iterate, bool eliminate, - bool do_region, bitmap exit_bbs) + bool do_region, bitmap exit_bbs, bool skip_phis) { unsigned todo = 0; edge_iterator ei; @@ -5990,7 +5990,8 @@ process_bb (rpo_elim &avail, basic_block bb, /* If we are in loop-closed SSA preserve this state. This is relevant when called on regions from outside of FRE/PRE. */ bool lc_phi_nodes = false; - if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) + if (!skip_phis + && loops_state_satisfies_p (LOOP_CLOSED_SSA)) FOR_EACH_EDGE (e, ei, bb->preds) if (e->src->loop_father != e->dest->loop_father && flow_loop_nested_p (e->dest->loop_father, @@ -6011,67 +6012,68 @@ process_bb (rpo_elim &avail, basic_block bb, } /* Value-number all defs in the basic-block. */ - for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); - gsi_next (&gsi)) - { - gphi *phi = gsi.phi (); - tree res = PHI_RESULT (phi); - vn_ssa_aux_t res_info = VN_INFO (res); - if (!bb_visited) - { - gcc_assert (!res_info->visited); - res_info->valnum = VN_TOP; - res_info->visited = true; - } + if (!skip_phis) + for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gphi *phi = gsi.phi (); + tree res = PHI_RESULT (phi); + vn_ssa_aux_t res_info = VN_INFO (res); + if (!bb_visited) + { + gcc_assert (!res_info->visited); + res_info->valnum = VN_TOP; + res_info->visited = true; + } - /* When not iterating force backedge values to varying. */ - visit_stmt (phi, !iterate_phis); - if (virtual_operand_p (res)) - continue; + /* When not iterating force backedge values to varying. */ + visit_stmt (phi, !iterate_phis); + if (virtual_operand_p (res)) + continue; - /* Eliminate */ - /* The interesting case is gcc.dg/tree-ssa/pr22230.c for correctness - how we handle backedges and availability. - And gcc.dg/tree-ssa/ssa-sccvn-2.c for optimization. */ - tree val = res_info->valnum; - if (res != val && !iterate && eliminate) - { - if (tree leader = avail.eliminate_avail (bb, res)) - { - if (leader != res - /* Preserve loop-closed SSA form. */ - && (! lc_phi_nodes - || is_gimple_min_invariant (leader))) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Replaced redundant PHI node " - "defining "); - print_generic_expr (dump_file, res); - fprintf (dump_file, " with "); - print_generic_expr (dump_file, leader); - fprintf (dump_file, "\n"); - } - avail.eliminations++; + /* Eliminate */ + /* The interesting case is gcc.dg/tree-ssa/pr22230.c for correctness + how we handle backedges and availability. + And gcc.dg/tree-ssa/ssa-sccvn-2.c for optimization. */ + tree val = res_info->valnum; + if (res != val && !iterate && eliminate) + { + if (tree leader = avail.eliminate_avail (bb, res)) + { + if (leader != res + /* Preserve loop-closed SSA form. */ + && (! lc_phi_nodes + || is_gimple_min_invariant (leader))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Replaced redundant PHI node " + "defining "); + print_generic_expr (dump_file, res); + fprintf (dump_file, " with "); + print_generic_expr (dump_file, leader); + fprintf (dump_file, "\n"); + } + avail.eliminations++; - if (may_propagate_copy (res, leader)) - { - /* Schedule for removal. */ - avail.to_remove.safe_push (phi); - continue; - } - /* ??? Else generate a copy stmt. */ - } - } - } - /* Only make defs available that not already are. But make - sure loop-closed SSA PHI node defs are picked up for - downstream uses. */ - if (lc_phi_nodes - || res == val - || ! avail.eliminate_avail (bb, res)) - avail.eliminate_push_avail (bb, res); - } + if (may_propagate_copy (res, leader)) + { + /* Schedule for removal. */ + avail.to_remove.safe_push (phi); + continue; + } + /* ??? Else generate a copy stmt. */ + } + } + } + /* Only make defs available that not already are. But make + sure loop-closed SSA PHI node defs are picked up for + downstream uses. */ + if (lc_phi_nodes + || res == val + || ! avail.eliminate_avail (bb, res)) + avail.eliminate_push_avail (bb, res); + } /* For empty BBs mark outgoing edges executable. For non-empty BBs we do this when processing the last stmt as we have to do this @@ -6415,6 +6417,13 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, bitmap_set_bit (exit_bbs, EXIT_BLOCK); } + /* Clear EDGE_DFS_BACK on "all" entry edges, RPO order compute will + re-mark those that are contained in the region. */ + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, entry->dest->preds) + e->flags &= ~EDGE_DFS_BACK; + int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (fn) - NUM_FIXED_BLOCKS); int n = rev_post_order_and_mark_dfs_back_seme (fn, entry, exit_bbs, !loops_state_satisfies_p (LOOPS_NEED_FIXUP), rpo); @@ -6425,6 +6434,18 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, if (!do_region) BITMAP_FREE (exit_bbs); + /* If there are any non-DFS_BACK edges into entry->dest skip + processing PHI nodes for that block. This supports + value-numbering loop bodies w/o the actual loop. */ + FOR_EACH_EDGE (e, ei, entry->dest->preds) + if (e != entry + && !(e->flags & EDGE_DFS_BACK)) + break; + bool skip_entry_phis = e != NULL; + if (skip_entry_phis && dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Region does not contain all edges into " + "the entry block, skipping its PHIs.\n"); + int *bb_to_rpo = XNEWVEC (int, last_basic_block_for_fn (fn)); for (int i = 0; i < n; ++i) bb_to_rpo[rpo[i]] = i; @@ -6454,7 +6475,9 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, edge e; edge_iterator ei; FOR_EACH_EDGE (e, ei, bb->preds) - gcc_assert (e == entry || (e->src->flags & bb_in_region)); + gcc_assert (e == entry + || (skip_entry_phis && bb == entry->dest) + || (e->src->flags & bb_in_region)); } for (int i = 0; i < n; ++i) { @@ -6499,7 +6522,7 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, if (e->flags & EDGE_DFS_BACK) has_backedges = true; e->flags &= ~EDGE_EXECUTABLE; - if (iterate || e == entry) + if (iterate || e == entry || (skip_entry_phis && bb == entry->dest)) continue; if (bb_to_rpo[e->src->index] > i) { @@ -6532,7 +6555,7 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, edge_iterator ei; FOR_EACH_EDGE (e, ei, bb->preds) { - if (e == entry) + if (e == entry || (skip_entry_phis && bb == entry->dest)) continue; int max_rpo = MAX (rpo_state[i].max_rpo, rpo_state[bb_to_rpo[e->src->index]].max_rpo); @@ -6621,7 +6644,7 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, todo |= process_bb (avail, bb, rpo_state[idx].visited != 0, rpo_state[idx].iterate, - iterate, eliminate, do_region, exit_bbs); + iterate, eliminate, do_region, exit_bbs, false); rpo_state[idx].visited++; /* Verify if changed values flow over executable outgoing backedges @@ -6719,8 +6742,10 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, edge e; FOR_EACH_EDGE (e, ei, bb->preds) if (!(e->flags & EDGE_EXECUTABLE) - && !rpo_state[bb_to_rpo[e->src->index]].visited - && rpo_state[bb_to_rpo[e->src->index]].max_rpo >= (int)idx) + && (bb == entry->dest + || (!rpo_state[bb_to_rpo[e->src->index]].visited + && (rpo_state[bb_to_rpo[e->src->index]].max_rpo + >= (int)idx)))) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Cannot trust state of predecessor " @@ -6731,7 +6756,8 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, nblk++; todo |= process_bb (avail, bb, false, false, false, eliminate, - do_region, exit_bbs); + do_region, exit_bbs, + skip_entry_phis && bb == entry->dest); rpo_state[idx].visited++; FOR_EACH_EDGE (e, ei, bb->succs) @@ -6813,7 +6839,9 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, } /* Region-based entry for RPO VN. Performs value-numbering and elimination - on the SEME region specified by ENTRY and EXIT_BBS. */ + on the SEME region specified by ENTRY and EXIT_BBS. If ENTRY is not + the only edge into the region at ENTRY->dest PHI nodes in ENTRY->dest + are not considered. */ unsigned do_rpo_vn (function *fn, edge entry, bitmap exit_bbs)