From: Daniel Berlin Date: Tue, 14 Nov 2006 18:12:20 +0000 (+0000) Subject: re PR tree-optimization/27755 (PRE confused by control flow) X-Git-Tag: releases/gcc-4.3.0~8469 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d75dbccd3c515022479883aed7d9e93563132910;p=thirdparty%2Fgcc.git re PR tree-optimization/27755 (PRE confused by control flow) 2006-11-14 Daniel Berlin Fix PR tree-optimization/27755 * tree-ssa-pre.c: Update comments. (bb_bitmap_sets): Add pa_in and deferred member. (BB_DEFERRED): New macro. (maximal_set): New variable. (pre_stats): Add pa_insert member. (bitmap_set_and): Short circuit orig == dest. (bitmap_set_subtract_values): New function. (bitmap_set_contains_expr): Ditto. (translate_vuses_through_block): Add phiblock argument. (dependent_clean): New function. (compute_antic_aux): Update for maximal_set changes. (compute_partial_antic_aux): New function. (compute_antic): Handle partial anticipation. (do_partial_partial_insertion): New function. (insert_aux): Handle partial anticipation. (add_to_sets): Add to maximal set. (compute_avail): Ditto. (init_pre): Initialize maximal_set. (execute_pre): Do partial anticipation if -O3+. From-SVN: r118821 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d1f36dfa6b7f..a9e106a63072 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,27 @@ +2006-11-14 Daniel Berlin + + Fix PR tree-optimization/27755 + + * tree-ssa-pre.c: Update comments. + (bb_bitmap_sets): Add pa_in and deferred member. + (BB_DEFERRED): New macro. + (maximal_set): New variable. + (pre_stats): Add pa_insert member. + (bitmap_set_and): Short circuit orig == dest. + (bitmap_set_subtract_values): New function. + (bitmap_set_contains_expr): Ditto. + (translate_vuses_through_block): Add phiblock argument. + (dependent_clean): New function. + (compute_antic_aux): Update for maximal_set changes. + (compute_partial_antic_aux): New function. + (compute_antic): Handle partial anticipation. + (do_partial_partial_insertion): New function. + (insert_aux): Handle partial anticipation. + (add_to_sets): Add to maximal set. + (compute_avail): Ditto. + (init_pre): Initialize maximal_set. + (execute_pre): Do partial anticipation if -O3+. + 2006-11-14 Paolo Bonzini PR rtl-optimization/29798 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-16.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-16.c new file mode 100644 index 000000000000..b087dc1b45c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-16.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-pre-stats -std=c99" } */ +int foo(int k, int *x) +{ + int j=0; + int res = 0; + /* We should pull res = *x all the way out of the do-while */ + do { + for (int n=0;n<3;++n); + res = *x; + } while (++jaux))->exp_gen -#define PHI_GEN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->phi_gen -#define TMP_GEN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->tmp_gen -#define AVAIL_OUT(BB) ((bb_bitmap_sets_t) ((BB)->aux))->avail_out -#define ANTIC_IN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->antic_in -#define RVUSE_IN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->rvuse_in -#define RVUSE_GEN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->rvuse_gen -#define RVUSE_KILL(BB) ((bb_bitmap_sets_t) ((BB)->aux))->rvuse_kill -#define RVUSE_OUT(BB) ((bb_bitmap_sets_t) ((BB)->aux))->rvuse_out -#define NEW_SETS(BB) ((bb_bitmap_sets_t) ((BB)->aux))->new_sets -#define ANTIC_SAFE_LOADS(BB) ((bb_bitmap_sets_t) ((BB)->aux))->antic_safe_loads -#define BB_VISITED(BB) ((bb_bitmap_sets_t) ((BB)->aux))->visited + + /* True we have deferred processing this block during ANTIC + calculation until its successor is processed. */ + unsigned int deferred : 1; +} *bb_value_sets_t; + +#define EXP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->exp_gen +#define PHI_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->phi_gen +#define TMP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->tmp_gen +#define AVAIL_OUT(BB) ((bb_value_sets_t) ((BB)->aux))->avail_out +#define ANTIC_IN(BB) ((bb_value_sets_t) ((BB)->aux))->antic_in +#define PA_IN(BB) ((bb_value_sets_t) ((BB)->aux))->pa_in +#define RVUSE_IN(BB) ((bb_value_sets_t) ((BB)->aux))->rvuse_in +#define RVUSE_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->rvuse_gen +#define RVUSE_KILL(BB) ((bb_value_sets_t) ((BB)->aux))->rvuse_kill +#define RVUSE_OUT(BB) ((bb_value_sets_t) ((BB)->aux))->rvuse_out +#define NEW_SETS(BB) ((bb_value_sets_t) ((BB)->aux))->new_sets +#define ANTIC_SAFE_LOADS(BB) ((bb_value_sets_t) ((BB)->aux))->antic_safe_loads +#define BB_VISITED(BB) ((bb_value_sets_t) ((BB)->aux))->visited +#define BB_DEFERRED(BB) ((bb_value_sets_t) ((BB)->aux))->deferred + +/* Maximal set of values, used to initialize the ANTIC problem, which + is an intersection problem. */ +static bitmap_set_t maximal_set; /* Basic block list in postorder. */ static int *postorder; @@ -336,6 +356,9 @@ static struct /* The number of new expressions/temporaries generated by PRE. */ int insertions; + /* The number of inserts found due to partial anticipation */ + int pa_insert; + /* The number of new PHI nodes added by PRE. */ int phis; @@ -344,6 +367,7 @@ static struct } pre_stats; +static bool do_partial_partial; static tree bitmap_find_leader (bitmap_set_t, tree); static void bitmap_value_insert_into_set (bitmap_set_t, tree); static void bitmap_value_replace_in_set (bitmap_set_t, tree); @@ -632,19 +656,23 @@ bitmap_set_and (bitmap_set_t dest, bitmap_set_t orig) { bitmap_iterator bi; unsigned int i; - bitmap temp = BITMAP_ALLOC (&grand_bitmap_obstack); - bitmap_and_into (dest->values, orig->values); - - bitmap_copy (temp, dest->expressions); - EXECUTE_IF_SET_IN_BITMAP (temp, 0, i, bi) + if (dest != orig) { - tree expr = expression_for_id (i); - tree val = get_value_handle (expr); - if (!bitmap_bit_p (dest->values, VALUE_HANDLE_ID (val))) - bitmap_clear_bit (dest->expressions, i); + bitmap temp = BITMAP_ALLOC (&grand_bitmap_obstack); + + bitmap_and_into (dest->values, orig->values); + + bitmap_copy (temp, dest->expressions); + EXECUTE_IF_SET_IN_BITMAP (temp, 0, i, bi) + { + tree expr = expression_for_id (i); + tree val = get_value_handle (expr); + if (!bitmap_bit_p (dest->values, VALUE_HANDLE_ID (val))) + bitmap_clear_bit (dest->expressions, i); + } + BITMAP_FREE (temp); } - BITMAP_FREE (temp); } /* Subtract all values and expressions contained in ORIG from DEST. */ @@ -669,6 +697,26 @@ bitmap_set_subtract (bitmap_set_t dest, bitmap_set_t orig) return result; } +/* Subtract all the values in bitmap set B from bitmap set A. */ + +static void +bitmap_set_subtract_values (bitmap_set_t a, bitmap_set_t b) +{ + unsigned int i; + bitmap_iterator bi; + bitmap temp = BITMAP_ALLOC (&grand_bitmap_obstack); + + bitmap_copy (temp, a->expressions); + EXECUTE_IF_SET_IN_BITMAP (temp, 0, i, bi) + { + tree expr = expression_for_id (i); + if (bitmap_set_contains_value (b, get_value_handle (expr))) + bitmap_remove_from_set (a, expr); + } + BITMAP_FREE (temp); +} + + /* Return true if bitmapped set SET contains the value VAL. */ static bool @@ -683,6 +731,12 @@ bitmap_set_contains_value (bitmap_set_t set, tree val) return bitmap_bit_p (set->values, VALUE_HANDLE_ID (val)); } +static inline bool +bitmap_set_contains_expr (bitmap_set_t set, tree expr) +{ + return bitmap_bit_p (set->expressions, get_expression_id (expr)); +} + /* Replace an instance of value LOOKFOR with expression EXPR in SET. */ static void @@ -855,11 +909,14 @@ pool_copy_list (tree list) return head; } -/* Translate the vuses in the VUSES vector backwards through phi - nodes, so that they have the value they would have in BLOCK. */ +/* Translate the vuses in the VUSES vector backwards through phi nodes + in PHIBLOCK, so that they have the value they would have in + BLOCK. */ static VEC(tree, gc) * -translate_vuses_through_block (VEC (tree, gc) *vuses, basic_block block) +translate_vuses_through_block (VEC (tree, gc) *vuses, + basic_block phiblock, + basic_block block) { tree oldvuse; VEC(tree, gc) *result = NULL; @@ -868,7 +925,8 @@ translate_vuses_through_block (VEC (tree, gc) *vuses, basic_block block) for (i = 0; VEC_iterate (tree, vuses, i, oldvuse); i++) { tree phi = SSA_NAME_DEF_STMT (oldvuse); - if (TREE_CODE (phi) == PHI_NODE) + if (TREE_CODE (phi) == PHI_NODE + && bb_for_stmt (phi) == phiblock) { edge e = find_edge (block, bb_for_stmt (phi)); if (e) @@ -1047,7 +1105,7 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2, if (listchanged) vn_lookup_or_add (newarglist, NULL); - tvuses = translate_vuses_through_block (vuses, pred); + tvuses = translate_vuses_through_block (vuses, phiblock, pred); if (listchanged || (newop0 != oldop0) || (oldop2 != newop2) || vuses != tvuses) @@ -1073,7 +1131,8 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2, oldvuses = VALUE_HANDLE_VUSES (get_value_handle (expr)); if (oldvuses) - newvuses = translate_vuses_through_block (oldvuses, pred); + newvuses = translate_vuses_through_block (oldvuses, phiblock, + pred); if (oldvuses != newvuses) vn_lookup_or_add_with_vuses (expr, newvuses); @@ -1137,7 +1196,8 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2, oldvuses = VALUE_HANDLE_VUSES (get_value_handle (expr)); if (oldvuses) - newvuses = translate_vuses_through_block (oldvuses, pred); + newvuses = translate_vuses_through_block (oldvuses, phiblock, + pred); if (newop0 != oldop0 || newvuses != oldvuses || newop1 != oldop1 @@ -1258,9 +1318,13 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2, { tree phi = NULL; edge e; + tree def_stmt; gcc_assert (TREE_CODE (expr) == SSA_NAME); - if (TREE_CODE (SSA_NAME_DEF_STMT (expr)) == PHI_NODE) - phi = SSA_NAME_DEF_STMT (expr); + + def_stmt = SSA_NAME_DEF_STMT (expr); + if (TREE_CODE (def_stmt) == PHI_NODE + && bb_for_stmt (def_stmt) == phiblock) + phi = def_stmt; else return expr; @@ -1498,7 +1562,10 @@ valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, tree expr, return false; case tcc_exceptional: - return true; + { + gcc_assert (TREE_CODE (expr) == SSA_NAME); + return bitmap_set_contains_expr (AVAIL_OUT (block), expr); + } case tcc_declaration: return !vuses_dies_in_block_x (VALUE_HANDLE_VUSES (vh), block); @@ -1509,6 +1576,27 @@ valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, tree expr, } } +/* Clean the set of expressions that are no longer valid in SET1 or + SET2. This means expressions that are made up of values we have no + leaders for in SET1 or SET2. This version is used for partial + anticipation, which means it is not valid in either ANTIC_IN or + PA_IN. */ + +static void +dependent_clean (bitmap_set_t set1, bitmap_set_t set2, basic_block block) +{ + VEC (tree, heap) *exprs = sorted_array_from_bitmap_set (set1); + tree expr; + int i; + + for (i = 0; VEC_iterate (tree, exprs, i, expr); i++) + { + if (!valid_in_sets (set1, set2, expr, block)) + bitmap_remove_from_set (set1, expr); + } + VEC_free (tree, heap, exprs); +} + /* Clean the set of expressions that are no longer valid in SET. This means expressions that are made up of values we have no leaders for in SET. */ @@ -1556,6 +1644,7 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) edge_iterator ei; old = ANTIC_OUT = S = NULL; + BB_VISITED (block) = 1; /* If any edges from predecessors are abnormal, antic_in is empty, so do nothing. */ @@ -1564,7 +1653,6 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) old = ANTIC_IN (block); ANTIC_OUT = bitmap_set_new (); - BB_VISITED (block) = 1; /* If the block has no successors, ANTIC_OUT is empty. */ if (EDGE_COUNT (block->succs) == 0) @@ -1574,9 +1662,38 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) else if (single_succ_p (block)) { basic_block succ_bb = single_succ (block); - phi_translate_set (ANTIC_OUT, ANTIC_IN (succ_bb), - block, succ_bb); + + /* We trade iterations of the dataflow equations for having to + phi translate the maximal set, which is incredibly slow + (since the maximal set often has 300+ members, even when you + have a small number of blocks). + Basically, we defer the computation of ANTIC for this block + until we have processed it's successor, which will inveitably + have a *much* smaller set of values to phi translate once + clean has been run on it. + The cost of doing this is that we technically perform more + iterations, however, they are lower cost iterations. + + Timings for PRE on tramp3d-v4: + without maximal set fix: 11 seconds + with maximal set fix/without deferring: 26 seconds + with maximal set fix/with deferring: 11 seconds + */ + + if (!BB_VISITED (succ_bb)) + { + changed = true; + SET_BIT (changed_blocks, block->index); + BB_VISITED (block) = 0; + BB_DEFERRED (block) = 1; + goto maybe_dump_sets; + } + else + phi_translate_set (ANTIC_OUT, ANTIC_IN (succ_bb), + block, succ_bb); } + + /* If we have multiple successors, we take the intersection of all of them. */ else @@ -1584,36 +1701,31 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) VEC(basic_block, heap) * worklist; size_t i; basic_block bprime, first; - bool any_visited = false; worklist = VEC_alloc (basic_block, heap, EDGE_COUNT (block->succs)); FOR_EACH_EDGE (e, ei, block->succs) - { - any_visited |= BB_VISITED (e->dest); - VEC_quick_push (basic_block, worklist, e->dest); - } - - if (any_visited) - { - first = VEC_index (basic_block, worklist, 0); - - bitmap_set_copy (ANTIC_OUT, ANTIC_IN (first)); + VEC_quick_push (basic_block, worklist, e->dest); + first = VEC_index (basic_block, worklist, 0); - for (i = 1; VEC_iterate (basic_block, worklist, i, bprime); i++) - { - if (!BB_VISITED (bprime)) - continue; + if (!BB_VISITED (first)) + bitmap_set_copy (ANTIC_OUT, maximal_set); + else + bitmap_set_copy (ANTIC_OUT, ANTIC_IN (first)); - bitmap_set_and (ANTIC_OUT, ANTIC_IN (bprime)); - } - VEC_free (basic_block, heap, worklist); + for (i = 1; VEC_iterate (basic_block, worklist, i, bprime); i++) + { + if (!BB_VISITED (bprime)) + bitmap_set_and (ANTIC_OUT, maximal_set); + else + bitmap_set_and (ANTIC_OUT, ANTIC_IN (bprime)); } + VEC_free (basic_block, heap, worklist); } /* Generate ANTIC_OUT - TMP_GEN. */ S = bitmap_set_subtract (ANTIC_OUT, TMP_GEN (block)); - /* Start ANTIC_IN with EXP_GEN - TMP_GEN */ + /* Start ANTIC_IN with EXP_GEN - TMP_GEN. */ ANTIC_IN (block) = bitmap_set_subtract (EXP_GEN (block), TMP_GEN (block)); @@ -1624,7 +1736,9 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) expression_for_id (bii)); clean (ANTIC_IN (block), block); - if (!bitmap_set_equal (old, ANTIC_IN (block))) + + /* !old->expressions can happen when we deferred a block. */ + if (!old->expressions || !bitmap_set_equal (old, ANTIC_IN (block))) { changed = true; SET_BIT (changed_blocks, block->index); @@ -1637,16 +1751,26 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) maybe_dump_sets: if (dump_file && (dump_flags & TDF_DETAILS)) { - if (ANTIC_OUT) - print_bitmap_set (dump_file, ANTIC_OUT, "ANTIC_OUT", block->index); + if (!BB_DEFERRED (block) || BB_VISITED (block)) + { + if (ANTIC_OUT) + print_bitmap_set (dump_file, ANTIC_OUT, "ANTIC_OUT", block->index); - if (ANTIC_SAFE_LOADS (block)) - print_bitmap_set (dump_file, ANTIC_SAFE_LOADS (block), - "ANTIC_SAFE_LOADS", block->index); - print_bitmap_set (dump_file, ANTIC_IN (block), "ANTIC_IN", block->index); + if (ANTIC_SAFE_LOADS (block)) + print_bitmap_set (dump_file, ANTIC_SAFE_LOADS (block), + "ANTIC_SAFE_LOADS", block->index); + print_bitmap_set (dump_file, ANTIC_IN (block), "ANTIC_IN", + block->index); - if (S) - print_bitmap_set (dump_file, S, "S", block->index); + if (S) + print_bitmap_set (dump_file, S, "S", block->index); + } + else + { + fprintf (dump_file, + "Block %d was deferred for a future iteration.\n", + block->index); + } } if (old) bitmap_set_free (old); @@ -1657,6 +1781,126 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) return changed; } +/* Compute PARTIAL_ANTIC for BLOCK. + + If succs(BLOCK) > 1 then + PA_OUT[BLOCK] = value wise union of PA_IN[b] + all ANTIC_IN not + in ANTIC_OUT for all succ(BLOCK) + else if succs(BLOCK) == 1 then + PA_OUT[BLOCK] = phi_translate (PA_IN[succ(BLOCK)]) + + PA_IN[BLOCK] = dependent_clean(PA_OUT[BLOCK] - TMP_GEN[BLOCK] + - ANTIC_IN[BLOCK]) + +*/ +static bool +compute_partial_antic_aux (basic_block block, + bool block_has_abnormal_pred_edge) +{ + bool changed = false; + bitmap_set_t old_PA_IN; + bitmap_set_t PA_OUT; + edge e; + edge_iterator ei; + + old_PA_IN = PA_OUT = NULL; + + /* If any edges from predecessors are abnormal, antic_in is empty, + so do nothing. */ + if (block_has_abnormal_pred_edge) + goto maybe_dump_sets; + + old_PA_IN = PA_IN (block); + PA_OUT = bitmap_set_new (); + + /* If the block has no successors, ANTIC_OUT is empty. */ + if (EDGE_COUNT (block->succs) == 0) + ; + /* If we have one successor, we could have some phi nodes to + translate through. Note that we can't phi translate across DFS + back edges in partial antic, because it uses a union operation + on the successors. For recurrences like IV's, we will end up generating a + new value in the set on each go around (i + 3 (VH.1) VH.1 + 1 + (VH.2), VH.2 + 1 (VH.3), etc), forever. */ + else if (single_succ_p (block)) + { + basic_block succ = single_succ (block); + if (!(single_succ_edge (block)->flags & EDGE_DFS_BACK)) + phi_translate_set (PA_OUT, PA_IN (succ), block, succ); + } + /* If we have multiple successors, we take the union of all of + them. */ + else + { + VEC(basic_block, heap) * worklist; + size_t i; + basic_block bprime; + + worklist = VEC_alloc (basic_block, heap, EDGE_COUNT (block->succs)); + FOR_EACH_EDGE (e, ei, block->succs) + { + if (e->flags & EDGE_DFS_BACK) + continue; + VEC_quick_push (basic_block, worklist, e->dest); + } + if (VEC_length (basic_block, worklist) > 0) + { + for (i = 0; VEC_iterate (basic_block, worklist, i, bprime); i++) + { + unsigned int i; + bitmap_iterator bi; + + FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (bprime), i, bi) + bitmap_value_insert_into_set (PA_OUT, + expression_for_id (i)); + + FOR_EACH_EXPR_ID_IN_SET (PA_IN (bprime), i, bi) + bitmap_value_insert_into_set (PA_OUT, + expression_for_id (i)); + } + } + VEC_free (basic_block, heap, worklist); + } + + /* PA_IN starts with PA_OUT - TMP_GEN. + Then we subtract things from ANTIC_IN. */ + PA_IN (block) = bitmap_set_subtract (PA_OUT, TMP_GEN (block)); + + /* For partial antic, we want to put back in the phi results, since + we will properly avoid making them partially antic over backedges. */ + bitmap_ior_into (PA_IN (block)->values, PHI_GEN (block)->values); + bitmap_ior_into (PA_IN (block)->expressions, PHI_GEN (block)->expressions); + + /* PA_IN[block] = PA_IN[block] - ANTIC_IN[block] */ + bitmap_set_subtract_values (PA_IN (block), ANTIC_IN (block)); + + dependent_clean (PA_IN (block), ANTIC_IN (block), block); + + if (!bitmap_set_equal (old_PA_IN, PA_IN (block))) + { + changed = true; + SET_BIT (changed_blocks, block->index); + FOR_EACH_EDGE (e, ei, block->preds) + SET_BIT (changed_blocks, e->src->index); + } + else + RESET_BIT (changed_blocks, block->index); + + maybe_dump_sets: + if (dump_file && (dump_flags & TDF_DETAILS)) + { + if (PA_OUT) + print_bitmap_set (dump_file, PA_OUT, "PA_OUT", block->index); + + print_bitmap_set (dump_file, PA_IN (block), "PA_IN", block->index); + } + if (old_PA_IN) + bitmap_set_free (old_PA_IN); + if (PA_OUT) + bitmap_set_free (PA_OUT); + return changed; +} + /* Compute ANTIC and partial ANTIC sets. */ static void @@ -1688,13 +1932,16 @@ compute_antic (void) } BB_VISITED (block) = 0; + BB_DEFERRED (block) = 0; /* While we are here, give empty ANTIC_IN sets to each block. */ ANTIC_IN (block) = bitmap_set_new (); + PA_IN (block) = bitmap_set_new (); } /* At the exit block we anticipate nothing. */ ANTIC_IN (EXIT_BLOCK_PTR) = bitmap_set_new (); BB_VISITED (EXIT_BLOCK_PTR) = 1; + PA_IN (EXIT_BLOCK_PTR) = bitmap_set_new (); changed_blocks = sbitmap_alloc (last_basic_block + 1); sbitmap_ones (changed_blocks); @@ -1714,12 +1961,44 @@ compute_antic (void) block->index)); } } + /* Theoretically possible, but *highly* unlikely. */ + gcc_assert (num_iterations < 50); } if (dump_file && (dump_flags & TDF_STATS)) fprintf (dump_file, "compute_antic required %d iterations\n", num_iterations); + if (do_partial_partial) + { + sbitmap_ones (changed_blocks); + mark_dfs_back_edges (); + num_iterations = 0; + changed = true; + while (changed) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Starting iteration %d\n", num_iterations); + num_iterations++; + changed = false; + for (i = 0; i < last_basic_block - NUM_FIXED_BLOCKS; i++) + { + if (TEST_BIT (changed_blocks, postorder[i])) + { + basic_block block = BASIC_BLOCK (postorder[i]); + changed + |= compute_partial_antic_aux (block, + TEST_BIT (has_abnormal_preds, + block->index)); + } + } + /* Theoretically possible, but *highly* unlikely. */ + gcc_assert (num_iterations < 50); + } + if (dump_file && (dump_flags & TDF_STATS)) + fprintf (dump_file, "compute_partial_antic required %d iterations\n", + num_iterations); + } sbitmap_free (has_abnormal_preds); sbitmap_free (changed_blocks); } @@ -2558,7 +2837,7 @@ insert_into_preds_of_block (basic_block block, unsigned int exprnum, 3. Recursively call ourselves on the dominator children of BLOCK. Steps 1, 2a, and 3 are done by insert_aux. 2b, 2c and 2d are done by - do_regular_insertion. + do_regular_insertion and do_partial_insertion. */ @@ -2689,8 +2968,107 @@ do_regular_insertion (basic_block block, basic_block dom) } -/* Perform insertion of partially redundant expressions for block - BLOCK. */ +/* Perform insertion for partially anticipatable expressions. There + is only one case we will perform insertion for these. This case is + if the expression is partially anticipatable, and fully available. + In this case, we know that putting it earlier will enable us to + remove the later computation. */ + + +static bool +do_partial_partial_insertion (basic_block block, basic_block dom) +{ + bool new_stuff = false; + VEC (tree, heap) *exprs = sorted_array_from_bitmap_set (PA_IN (block)); + tree expr; + int i; + + for (i = 0; VEC_iterate (tree, exprs, i, expr); i++) + { + if (can_PRE_operation (expr) && !AGGREGATE_TYPE_P (TREE_TYPE (expr))) + { + tree *avail; + tree val; + bool by_all = true; + bool cant_insert = false; + edge pred; + basic_block bprime; + tree eprime = NULL_TREE; + edge_iterator ei; + + val = get_value_handle (expr); + if (bitmap_set_contains_value (PHI_GEN (block), val)) + continue; + if (bitmap_set_contains_value (AVAIL_OUT (dom), val)) + continue; + + avail = XCNEWVEC (tree, last_basic_block); + FOR_EACH_EDGE (pred, ei, block->preds) + { + tree vprime; + tree edoubleprime; + + /* This can happen in the very weird case + that our fake infinite loop edges have caused a + critical edge to appear. */ + if (EDGE_CRITICAL_P (pred)) + { + cant_insert = true; + break; + } + bprime = pred->src; + eprime = phi_translate (expr, ANTIC_IN (block), + PA_IN (block), + bprime, block); + + /* eprime will generally only be NULL if the + value of the expression, translated + through the PHI for this predecessor, is + undefined. If that is the case, we can't + make the expression fully redundant, + because its value is undefined along a + predecessor path. We can thus break out + early because it doesn't matter what the + rest of the results are. */ + if (eprime == NULL) + { + cant_insert = true; + break; + } + + eprime = fully_constant_expression (eprime); + vprime = get_value_handle (eprime); + gcc_assert (vprime); + edoubleprime = bitmap_find_leader (AVAIL_OUT (bprime), + vprime); + if (edoubleprime == NULL) + { + by_all = false; + break; + } + else + avail[bprime->index] = edoubleprime; + + } + + /* If we can insert it, it's not the same value + already existing along every predecessor, and + it's defined by some predecessor, it is + partially redundant. */ + if (!cant_insert && by_all) + { + pre_stats.pa_insert++; + if (insert_into_preds_of_block (block, get_expression_id (expr), + avail)) + new_stuff = true; + } + free (avail); + } + } + + VEC_free (tree, heap, exprs); + return new_stuff; +} static bool insert_aux (basic_block block) @@ -2723,6 +3101,8 @@ insert_aux (basic_block block) if (!single_pred_p (block)) { new_stuff |= do_regular_insertion (block, dom); + if (do_partial_partial) + new_stuff |= do_partial_partial_insertion (block, dom); } } } @@ -2797,6 +3177,11 @@ add_to_sets (tree var, tree expr, tree stmt, bitmap_set_t s1, if (s1) bitmap_insert_into_set (s1, var); + /* PHI nodes can't go in the maximal sets because they are not in + TMP_GEN, so it is possible to get into non-monotonic situations + during ANTIC calculation, because it will *add* bits. */ + if (!in_fre && TREE_CODE (SSA_NAME_DEF_STMT (var)) != PHI_NODE) + bitmap_value_insert_into_set (maximal_set, var); bitmap_value_insert_into_set (s2, var); } @@ -3281,6 +3666,8 @@ compute_avail (void) vn_lookup_or_add (def, NULL); bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR), def); + if (!in_fre) + bitmap_value_insert_into_set (maximal_set, def); bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR), def); } } @@ -3295,6 +3682,8 @@ compute_avail (void) vn_lookup_or_add (def, NULL); bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR), def); + if (!in_fre) + bitmap_value_insert_into_set (maximal_set, def); bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR), def); } } @@ -3410,6 +3799,8 @@ compute_avail (void) { tree val = vn_lookup_or_add (newt, stmt); vn_add (lhs, val); + if (!in_fre) + bitmap_value_insert_into_set (maximal_set, newt); bitmap_value_insert_into_set (EXP_GEN (block), newt); } bitmap_insert_into_set (TMP_GEN (block), lhs); @@ -3679,19 +4070,21 @@ init_pre (bool do_fre) connect_infinite_loops_to_exit (); memset (&pre_stats, 0, sizeof (pre_stats)); + postorder = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS); post_order_compute (postorder, false); FOR_ALL_BB (bb) bb->aux = xcalloc (1, sizeof (struct bb_bitmap_sets)); + calculate_dominance_info (CDI_POST_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + bitmap_obstack_initialize (&grand_bitmap_obstack); phi_translate_table = htab_create (5110, expr_pred_trans_hash, expr_pred_trans_eq, free); bitmap_set_pool = create_alloc_pool ("Bitmap sets", sizeof (struct bitmap_set), 30); - calculate_dominance_info (CDI_POST_DOMINATORS); - calculate_dominance_info (CDI_DOMINATORS); binary_node_pool = create_alloc_pool ("Binary tree nodes", tree_code_size (PLUS_EXPR), 30); unary_node_pool = create_alloc_pool ("Unary tree nodes", @@ -3716,6 +4109,8 @@ init_pre (bool do_fre) TMP_GEN (bb) = bitmap_set_new (); AVAIL_OUT (bb) = bitmap_set_new (); } + maximal_set = in_fre ? NULL : bitmap_set_new (); + need_eh_cleanup = BITMAP_ALLOC (NULL); } @@ -3787,6 +4182,7 @@ static void execute_pre (bool do_fre) { + do_partial_partial = optimize > 2; init_pre (do_fre); if (!do_fre) @@ -3829,6 +4225,7 @@ execute_pre (bool do_fre) if (dump_file && (dump_flags & TDF_STATS)) { fprintf (dump_file, "Insertions: %d\n", pre_stats.insertions); + fprintf (dump_file, "PA inserted: %d\n", pre_stats.pa_insert); fprintf (dump_file, "New PHIs: %d\n", pre_stats.phis); fprintf (dump_file, "Eliminated: %d\n", pre_stats.eliminations); fprintf (dump_file, "Constified: %d\n", pre_stats.constified);