loop-unswitch.c (unswitch_single_loop): Use optimize_loop_for_speed_p.

author Jan Hubicka <jh@suse.cz>

Fri, 29 Aug 2008 10:35:57 +0000 (12:35 +0200)

committer Jan Hubicka <hubicka@gcc.gnu.org>

Fri, 29 Aug 2008 10:35:57 +0000 (10:35 +0000)
author Jan Hubicka <jh@suse.cz>
Fri, 29 Aug 2008 10:35:57 +0000 (12:35 +0200)
committer Jan Hubicka <hubicka@gcc.gnu.org>
Fri, 29 Aug 2008 10:35:57 +0000 (10:35 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 0b75dca9f2e7b775073d5adbe32457061c60602b..637a89edd7367f63d197208fbef2ffa010b39846 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,57 @@
+2008-08-29  Jan Hubicka  <jh@suse.cz>
+
+       * loop-unswitch.c (unswitch_single_loop): Use optimize_loop_for_speed_p.
+       * tree-ssa-threadupdate.c (mark_threaded_blocks): Use optimize_function_for_size_p.
+       * tracer.c (ignore_bb_p): Use optimize_bb_for_size_p.
+       * postreload-gcse.c (eliminate_partially_redundant_load): Use optimize_bb_for_size_p.
+       * value-prof.c (gimple_divmod_fixed_value_transform,
+       gimple_mod_pow2_value_transform, gimple_mod_subtract_transform,
+       gimple_stringops_transform): Use optimize_bb_for_size_p.
+       * ipa-cp.c (ipcp_insert_stage): Use optimize_function_for_size_p.
+       * final.c (compute_alignments): Use optimize_function_for_size_p.
+       * builtins.c (fold_builtin_cabs): Use optimize_function_for_speed_p.
+       (fold_builtin_strcpy, fold_builtin_fputs): Use
+       optimize_function_for_size_p.
+       * fold-const.c (tree_swap_operands_p): Use optimize_function_for_size_p.
+       * recog.c (relax_delay_slots): Likewise.
+       * tree-ssa-math-opts.c (replace_reciprocal): Use optimize_bb_for_speed_p.
+       (execute_cse_reciprocals): Use optimize_bb_for_size_p.
+       * ipa-inline.c (cgraph_decide_recursive_inlining): Use
+       optimize_function_for_size_p.
+       (cgraph_decide_inlining_of_small_function): Use
+       optimize_function_for_size_p.
+       * global.c (find_reg): Use optimize_function_for_size_p.
+       * opts.c (decode_options): Do not clear flag_tree_ch, flag_inline_functions,
+       flag_unswitch_loops, flag_unroll_loops, flag_unroll_all_loops and
+       flag_prefetch_loop_arrays. Those can work it out from profile.
+       * tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely): Use
+       optimize_loop_for_speed_p.
+       * predict.c (optimize_bb_for_size_p, optimize_bb_for_speed_p): Constify
+       argument.
+       (optimize_loop_nest_for_size_p, optimize_loop_nest_for_speed_p): New.
+       * tree-parloops.c (parallelize_loops): Use optimize_loop_for_size_p.
+       * tree-eh.c (decide_copy_try_finally): Use optimize_function_for_size_p.
+       * local-alloc.c (block_alloc): Pass BB pointer.
+       (find_free_reg): Add BB pointer, use optimize_bb_for_size_p.
+       * gcse.c (gcse_main): Use optimize_function_for_size_p.
+       * loop-unroll.c (decide_unrolling_and_peeling): Use optimize_loop_for_size_p.
+       (decide_peel_completely): Likewise.
+       * tree-vect-analyze.c (vect_mark_for_runtime_alias_test): Use
+       optimize_loop_for_size_p.
+       (vect_enhance_data_refs_alignment): Likewise.
+       * tree-ssa-coalesce.c (coalesce_cost): Add optimize_for_size argument.
+       (coalesce_cost_bb, coalesce_cost_edge, create_outofssa_var_map): Update call.
+       * cfgcleanup.c (outgoing_edges_match): Use optimize_bb_for_speed_p.
+       (try_crossjump_bb): Use optimize_bb_for_size_p.
+       * tree-ssa-loop-prefetch.c (loop_prefetch_arrays): Use
+       optimize_loop_for_speed_p.
+       * bb-reorder.c (find_traces_1_round): Likewise.
+       (copy_bb): Use optimize_bb_for_speed_p.
+       (duplicate_computed_gotos): Likewise.
+       * basic-block.h (optimize_loop_nest_for_size_p,
+       optimize_loop_nest_for_speed_p): New.
+       * stmt.c (expand_case): Use optimize_insn_for_size_p.
+
  2008-08-29  Tristan Gingold  <gingold@adacore.com>
  
         * gcov.c (main): Call expandargv.
diff --git a/gcc/basic-block.h b/gcc/basic-block.h

index 50d3e628aeaac0b9352147ea4b2c0d75bbb5ea2c..e891f9e56757f5144cfe45ef2578fa42b1bba053 100644 (file)
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -841,6 +841,8 @@ extern bool optimize_function_for_size_p (struct function *);
  extern bool optimize_function_for_speed_p (struct function *);
  extern bool optimize_loop_for_size_p (struct loop *);
  extern bool optimize_loop_for_speed_p (struct loop *);
+extern bool optimize_loop_nest_for_size_p (struct loop *);
+extern bool optimize_loop_nest_for_speed_p (struct loop *);
  extern bool gimple_predicted_by_p (const_basic_block, enum br_predictor);
  extern bool rtl_predicted_by_p (const_basic_block, enum br_predictor);
  extern void gimple_predict_edge (edge, enum br_predictor, int);
diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c

index ae24c0c3245c7c9b086d58aa0397e10eb50daa9d..3bf2dc72c7efb0b5e8e7e7febc4299d492820b0f 100644 (file)
--- a/gcc/bb-reorder.c
+++ b/gcc/bb-reorder.c
@@ -648,7 +648,8 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
                           /* The loop has less than 4 iterations.  */
  
                           if (single_succ_p (bb)
-                             && copy_bb_p (best_edge->dest, !optimize_size))
+                             && copy_bb_p (best_edge->dest,
+                                           optimize_edge_for_speed_p (best_edge)))
                             {
                               bb = copy_bb (best_edge->dest, best_edge, bb,
                                             *n_traces);
@@ -1102,7 +1103,7 @@ connect_traces (int n_traces, struct trace *traces)
                  edge is traversed frequently enough.  */
               if (try_copy
                   && copy_bb_p (best->dest,
-                               !optimize_size
+                               optimize_edge_for_speed_p (best)
                                 && EDGE_FREQUENCY (best) >= freq_threshold
                                 && best->count >= count_threshold))
                 {
@@ -1173,7 +1174,7 @@ copy_bb_p (const_basic_block bb, int code_may_grow)
    if (EDGE_COUNT (bb->succs) > 8)
      return false;
  
-  if (code_may_grow && maybe_hot_bb_p (bb))
+  if (code_may_grow && optimize_bb_for_speed_p (bb))
      max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
  
    FOR_BB_INSNS (bb, insn)
@@ -1984,7 +1985,7 @@ gate_duplicate_computed_gotos (void)
  {
    if (targetm.cannot_modify_jumps_p ())
      return false;
-  return (optimize > 0 && flag_expensive_optimizations && !optimize_size);
+  return (optimize > 0 && flag_expensive_optimizations);
  }
  
  
@@ -2075,6 +2076,9 @@ duplicate_computed_gotos (void)
           || single_pred_p (single_succ (bb)))
         continue;
  
+      if (!optimize_bb_for_size_p (bb))
+       continue;
+
        /* The successor block has to be a duplication candidate.  */
        if (!bitmap_bit_p (candidates, single_succ (bb)->index))
         continue;
diff --git a/gcc/builtins.c b/gcc/builtins.c

index 59e4119efd4663fa482b67332c867ab1767759d1..81d0ab1dfa178f883ee06acaf43521f7e9e2f7d2 100644 (file)
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -7530,7 +7530,7 @@ fold_builtin_cabs (tree arg, tree type, tree fndecl)
  
    /* Don't do this when optimizing for size.  */
    if (flag_unsafe_math_optimizations
-      && optimize && !optimize_size)
+      && optimize && optimize_function_for_speed_p (cfun))
      {
        tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
  
@@ -8882,7 +8882,7 @@ fold_builtin_strcpy (tree fndecl, tree dest, tree src, tree len)
    if (operand_equal_p (src, dest, 0))
      return fold_convert (TREE_TYPE (TREE_TYPE (fndecl)), dest);
  
-  if (optimize_size)
+  if (optimize_function_for_size_p (cfun))
      return NULL_TREE;
  
    fn = implicit_built_in_decls[BUILT_IN_MEMCPY];
@@ -11501,7 +11501,7 @@ fold_builtin_fputs (tree arg0, tree arg1, bool ignore, bool unlocked, tree len)
      case 1: /* length is greater than 1, call fwrite.  */
        {
         /* If optimizing for size keep fputs.  */
-       if (optimize_size)
+       if (optimize_function_for_size_p (cfun))
           return NULL_TREE;
         /* New argument list transforming fputs(string, stream) to
            fwrite(string, 1, len, stream).  */
diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c

index c5c7950f8359fe9907aced1b54ce770ab720ee3d..a778e28e3869a3f7faf8aef0dd36b52825859bb3 100644 (file)
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@@ -1235,9 +1235,8 @@ outgoing_edges_match (int mode, basic_block bb1, basic_block bb2)
          we require the existing branches to have probabilities that are
          roughly similar.  */
        if (match
-         && !optimize_size
-         && maybe_hot_bb_p (bb1)
-         && maybe_hot_bb_p (bb2))
+         && optimize_bb_for_speed_p (bb1)
+         && optimize_bb_for_speed_p (bb2))
         {
           int prob2;
  
@@ -1684,7 +1683,7 @@ try_crossjump_bb (int mode, basic_block bb)
  
    /* Don't crossjump if this block ends in a computed jump,
       unless we are optimizing for size.  */
-  if (!optimize_size
+  if (optimize_bb_for_size_p (bb)
        && bb != EXIT_BLOCK_PTR
        && computed_jump_p (BB_END (bb)))
      return false;
diff --git a/gcc/final.c b/gcc/final.c

index d24d8fdb1772f9a3462bc2fe86ec4a40376cfd3f..01689c16301e18f3873c44e98091af8024bef006 100644 (file)
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -683,7 +683,7 @@ compute_alignments (void)
    label_align = XCNEWVEC (struct label_alignment, max_labelno - min_labelno + 1);
  
    /* If not optimizing or optimizing for size, don't assign any alignments.  */
-  if (! optimize || optimize_size)
+  if (! optimize || optimize_function_for_size_p (cfun))
      return 0;
  
    if (dump_file)
@@ -765,7 +765,7 @@ compute_alignments (void)
        /* In case block is frequent and reached mostly by non-fallthru edge,
          align it.  It is most likely a first block of loop.  */
        if (has_fallthru
-         && maybe_hot_bb_p (bb)
+         && optimize_bb_for_speed_p (bb)
           && branch_frequency + fallthru_frequency > freq_threshold
           && (branch_frequency
               > fallthru_frequency * PARAM_VALUE (PARAM_ALIGN_LOOP_ITERATIONS)))
diff --git a/gcc/fold-const.c b/gcc/fold-const.c

index 21142b85bc82880385b059750ebddadcd17f0ee3..af1643376f1ba332a7bfebf08837a916cdb75ac6 100644 (file)
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -6679,7 +6679,7 @@ tree_swap_operands_p (const_tree arg0, const_tree arg1, bool reorder)
    if (TREE_CONSTANT (arg0))
      return 1;
  
-  if (optimize_size)
+  if (cfun && optimize_function_for_size_p (cfun))
      return 0;
  
    if (reorder && flag_evaluation_order
@@ -10407,7 +10407,7 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1)
                 }
  
               /* Optimize x*x as pow(x,2.0), which is expanded as x*x.  */
-             if (! optimize_size
+             if (optimize_function_for_speed_p (cfun)
                   && operand_equal_p (arg0, arg1, 0))
                 {
                   tree powfn = mathfn_built_in (type, BUILT_IN_POW);
diff --git a/gcc/gcse.c b/gcc/gcse.c

index 008544f0f64dda4eb84ab02e3595b42cab682306..ee2d31e0a42df54a0c6288a95318debe8ceb55a6 100644 (file)
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -738,9 +738,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
           timevar_pop (TV_CPROP1);
         }
  
-      if (optimize_size)
-       /* Do nothing.  */ ;
-      else
+      if (optimize_function_for_speed_p (cfun))
         {
           timevar_push (TV_PRE);
           changed |= one_pre_gcse_pass (pass + 1);
@@ -773,7 +771,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
          for code size -- it rarely makes programs faster, and can make
          them bigger if we did partial redundancy elimination (when optimizing
          for space, we don't run the partial redundancy algorithms).  */
-      if (optimize_size)
+      if (optimize_function_for_size_p (cfun))
         {
           timevar_push (TV_HOIST);
           max_gcse_regno = max_reg_num ();
@@ -825,7 +823,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
    /* We are finished with alias.  */
    end_alias_analysis ();
  
-  if (!optimize_size && flag_gcse_sm)
+  if (optimize_function_for_speed_p (cfun) && flag_gcse_sm)
      {
        timevar_push (TV_LSM);
        store_motion ();
diff --git a/gcc/global.c b/gcc/global.c

index e0783d5237cf9b87e1516ebc00d0df8e629407be..88fe38352f7312b92726eb1bdd675cfa2ee64c89 100644 (file)
--- a/gcc/global.c
+++ b/gcc/global.c
@@ -1168,8 +1168,8 @@ find_reg (int num, HARD_REG_SET losers, int alt_regs_p, int accept_call_clobbere
        if (! accept_call_clobbered
           && allocno[num].calls_crossed != 0
           && allocno[num].throwing_calls_crossed == 0
-         && CALLER_SAVE_PROFITABLE (optimize_size ? allocno[num].n_refs : allocno[num].freq,
-                                    optimize_size ? allocno[num].calls_crossed
+         && CALLER_SAVE_PROFITABLE (optimize_function_for_size_p (cfun) ? allocno[num].n_refs : allocno[num].freq,
+                                    optimize_function_for_size_p (cfun) ? allocno[num].calls_crossed
                                      : allocno[num].freq_calls_crossed))
         {
           HARD_REG_SET new_losers;
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c

index b0ed074e3a942dec582ab7c6f09e157d68bd7945..ca7e231904b926a2e7c2b7f1092f3c6f4913413b 100644 (file)
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -1019,9 +1019,7 @@ ipcp_insert_stage (void)
        if (new_insns + growth > max_new_insns)
         break;
        if (growth
-          && (optimize_size
-             || (DECL_STRUCT_FUNCTION (node->decl)
-                 ->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)))
+         && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl)))
         {
           if (dump_file)
             fprintf (dump_file, "Not versioning, cold code would grow");
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c

index 3f44d2fca522c1e9ff15926a8857ace9be3af42c..94062f2a0f2a5b64cddbd1e864f935c6b6e6a7c2 100644 (file)
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -674,7 +674,7 @@ cgraph_decide_recursive_inlining (struct cgraph_node *node,
    int depth = 0;
    int n = 0;
  
-  if (optimize_size
+  if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl))
        || (!flag_inline_functions && !DECL_DECLARED_INLINE_P (node->decl)))
      return false;
  
@@ -951,7 +951,7 @@ cgraph_decide_inlining_of_small_functions (void)
        if (!flag_inline_functions
           && !DECL_DECLARED_INLINE_P (edge->callee->decl))
         not_good = N_("function not declared inline and code size would grow");
-      if (optimize_size)
+      if (optimize_function_for_size_p (DECL_STRUCT_FUNCTION(edge->caller->decl)))
         not_good = N_("optimizing for size and code size would grow");
        if (not_good && growth > 0 && cgraph_estimate_growth (edge->callee) > 0)
         {
diff --git a/gcc/local-alloc.c b/gcc/local-alloc.c

index e7bbcdaa16926075de89097f6aac1bfa2e31f91d..5926d6af3549d4b53a9ebc3cae5b14992125314e 100644 (file)
--- a/gcc/local-alloc.c
+++ b/gcc/local-alloc.c
@@ -299,7 +299,7 @@ static int contains_replace_regs (rtx);
  static int memref_referenced_p (rtx, rtx);
  static int memref_used_between_p (rtx, rtx, rtx);
  static void no_equiv (rtx, const_rtx, void *);
-static void block_alloc (int);
+static void block_alloc (basic_block);
  static int qty_sugg_compare (int, int);
  static int qty_sugg_compare_1 (const void *, const void *);
  static int qty_compare (int, int);
@@ -311,7 +311,7 @@ static void reg_is_set (rtx, const_rtx, void *);
  static void reg_is_born (rtx, int);
  static void wipe_dead_reg (rtx, int);
  static int find_free_reg (enum reg_class, enum machine_mode, int, int, int,
-                         int, int);
+                         int, int, basic_block);
  static void mark_life (int, enum machine_mode, int);
  static void post_mark_life (int, enum machine_mode, int, int, int);
  static int requires_inout (const char *);
@@ -436,7 +436,7 @@ local_alloc (void)
  
        next_qty = 0;
  
-      block_alloc (b->index);
+      block_alloc (b);
      }
  
    free (qty);
@@ -1270,7 +1270,7 @@ no_equiv (rtx reg, const_rtx store ATTRIBUTE_UNUSED, void *data ATTRIBUTE_UNUSED
     Only the pseudos that die but once can be handled.  */
  
  static void
-block_alloc (int b)
+block_alloc (basic_block b)
  {
    int i, q;
    rtx insn;
@@ -1283,7 +1283,7 @@ block_alloc (int b)
  
    /* Count the instructions in the basic block.  */
  
-  insn = BB_END (BASIC_BLOCK (b));
+  insn = BB_END (b);
    while (1)
      {
        if (!NOTE_P (insn))
@@ -1291,7 +1291,7 @@ block_alloc (int b)
           ++insn_count;
           gcc_assert (insn_count <= max_uid);
         }
-      if (insn == BB_HEAD (BASIC_BLOCK (b)))
+      if (insn == BB_HEAD (b))
         break;
        insn = PREV_INSN (insn);
      }
@@ -1302,14 +1302,14 @@ block_alloc (int b)
  
    /* Initialize table of hardware registers currently live.  */
  
-  REG_SET_TO_HARD_REG_SET (regs_live, DF_LR_IN (BASIC_BLOCK (b)));
+  REG_SET_TO_HARD_REG_SET (regs_live, DF_LR_IN (b));
  
    /* This is conservative, as this would include registers that are
       artificial-def'ed-but-not-used.  However, artificial-defs are
       rare, and such uninitialized use is rarer still, and the chance
       of this having any performance impact is even less, while the
       benefit is not having to compute and keep the TOP set around.  */
-  for (def_rec = df_get_artificial_defs (b); *def_rec; def_rec++)
+  for (def_rec = df_get_artificial_defs (b->index); *def_rec; def_rec++)
      {
        int regno = DF_REF_REGNO (*def_rec);
        if (regno < FIRST_PSEUDO_REGISTER)
@@ -1320,7 +1320,7 @@ block_alloc (int b)
       and assigns quantities to registers.
       It computes which registers to tie.  */
  
-  insn = BB_HEAD (BASIC_BLOCK (b));
+  insn = BB_HEAD (b);
    while (1)
      {
        if (!NOTE_P (insn))
@@ -1487,7 +1487,7 @@ block_alloc (int b)
        IOR_HARD_REG_SET (regs_live_at[2 * insn_number], regs_live);
        IOR_HARD_REG_SET (regs_live_at[2 * insn_number + 1], regs_live);
  
-      if (insn == BB_END (BASIC_BLOCK (b)))
+      if (insn == BB_END (b))
         break;
  
        insn = NEXT_INSN (insn);
@@ -1542,7 +1542,7 @@ block_alloc (int b)
        q = qty_order[i];
        if (qty_phys_num_sugg[q] != 0 || qty_phys_num_copy_sugg[q] != 0)
         qty[q].phys_reg = find_free_reg (qty[q].min_class, qty[q].mode, q,
-                                        0, 1, qty[q].birth, qty[q].death);
+                                        0, 1, qty[q].birth, qty[q].death, b);
        else
         qty[q].phys_reg = -1;
      }
@@ -1627,19 +1627,19 @@ block_alloc (int b)
                  a scheduling pass after reload and we are not optimizing
                  for code size.  */
               if (flag_schedule_insns_after_reload && dbg_cnt (local_alloc_for_sched)
-                 && !optimize_size
+                 && optimize_bb_for_speed_p (b)
                   && !SMALL_REGISTER_CLASSES)
                 {
                   qty[q].phys_reg = find_free_reg (qty[q].min_class,
                                                    qty[q].mode, q, 0, 0,
-                                                  fake_birth, fake_death);
+                                                  fake_birth, fake_death, b);
                   if (qty[q].phys_reg >= 0)
                     continue;
                 }
  #endif
               qty[q].phys_reg = find_free_reg (qty[q].min_class,
                                                qty[q].mode, q, 0, 0,
-                                              qty[q].birth, qty[q].death);
+                                              qty[q].birth, qty[q].death, b);
               if (qty[q].phys_reg >= 0)
                 continue;
             }
@@ -1647,17 +1647,17 @@ block_alloc (int b)
  #ifdef INSN_SCHEDULING
           /* Similarly, avoid false dependencies.  */
           if (flag_schedule_insns_after_reload && dbg_cnt (local_alloc_for_sched)
-             && !optimize_size
+             && optimize_bb_for_speed_p (b)
               && !SMALL_REGISTER_CLASSES
               && qty[q].alternate_class != NO_REGS)
             qty[q].phys_reg = find_free_reg (qty[q].alternate_class,
                                              qty[q].mode, q, 0, 0,
-                                            fake_birth, fake_death);
+                                            fake_birth, fake_death, b);
  #endif
           if (qty[q].alternate_class != NO_REGS)
             qty[q].phys_reg = find_free_reg (qty[q].alternate_class,
                                              qty[q].mode, q, 0, 0,
-                                            qty[q].birth, qty[q].death);
+                                            qty[q].birth, qty[q].death, b);
         }
      }
  
@@ -2145,7 +2145,7 @@ wipe_dead_reg (rtx reg, int output_p)
  static int
  find_free_reg (enum reg_class rclass, enum machine_mode mode, int qtyno,
                int accept_call_clobbered, int just_try_suggested,
-              int born_index, int dead_index)
+              int born_index, int dead_index, basic_block bb)
  {
    int i, ins;
    HARD_REG_SET first_used, used;
@@ -2261,7 +2261,7 @@ find_free_reg (enum reg_class rclass, enum machine_mode mode, int qtyno,
        /* Don't try the copy-suggested regs again.  */
        qty_phys_num_copy_sugg[qtyno] = 0;
        return find_free_reg (rclass, mode, qtyno, accept_call_clobbered, 1,
-                           born_index, dead_index);
+                           born_index, dead_index, bb);
      }
  
    /* We need not check to see if the current function has nonlocal
@@ -2274,11 +2274,12 @@ find_free_reg (enum reg_class rclass, enum machine_mode mode, int qtyno,
        && ! just_try_suggested
        && qty[qtyno].n_calls_crossed != 0
        && qty[qtyno].n_throwing_calls_crossed == 0
-      && CALLER_SAVE_PROFITABLE (optimize_size ? qty[qtyno].n_refs : qty[qtyno].freq,
-                                optimize_size ? qty[qtyno].n_calls_crossed
+      && CALLER_SAVE_PROFITABLE (optimize_bb_for_size_p (bb) ? qty[qtyno].n_refs
+                                : qty[qtyno].freq,
+                                optimize_bb_for_size_p (bb) ? qty[qtyno].n_calls_crossed
                                  : qty[qtyno].freq_calls_crossed))
      {
-      i = find_free_reg (rclass, mode, qtyno, 1, 0, born_index, dead_index);
+      i = find_free_reg (rclass, mode, qtyno, 1, 0, born_index, dead_index, bb);
        if (i >= 0)
         caller_save_needed = 1;
        return i;
diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c

index b6dc266d7c64484a1e6a27c1c3500c3551c7ca26..7995786329caeca81223c8dcbea606f5c7a8de61 100644 (file)
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@@ -269,7 +269,7 @@ decide_unrolling_and_peeling (int flags)
         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
  
        /* Do not peel cold areas.  */
-      if (!maybe_hot_bb_p (loop->header))
+      if (optimize_loop_for_size_p (loop))
         {
           if (dump_file)
             fprintf (dump_file, ";; Not considering loop, cold area\n");
@@ -368,7 +368,7 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
      }
  
    /* Do not peel cold areas.  */
-  if (!maybe_hot_bb_p (loop->header))
+  if (optimize_loop_for_size_p (loop))
      {
        if (dump_file)
         fprintf (dump_file, ";; Not considering loop, cold area\n");
diff --git a/gcc/loop-unswitch.c b/gcc/loop-unswitch.c

index 9d66d41db6eb250052d433cf019ced1f81e7a3b5..8770bf6b5a86de0d290b3503134ab1e3f5c79294 100644 (file)
--- a/gcc/loop-unswitch.c
+++ b/gcc/loop-unswitch.c
@@ -290,7 +290,7 @@ unswitch_single_loop (struct loop *loop, rtx cond_checked, int num)
      }
  
    /* Do not unswitch in cold areas.  */
-  if (!maybe_hot_bb_p (loop->header))
+  if (optimize_loop_for_size_p (loop))
      {
        if (dump_file)
         fprintf (dump_file, ";; Not unswitching, not hot area\n");
diff --git a/gcc/opts.c b/gcc/opts.c

index b759e2fc9225eb1b651bc02399bb719fcbfd1b70..8c46dfa9cf734c989b4cfde28f0689285f89c26d 100644 (file)
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -990,12 +990,6 @@ decode_options (unsigned int argc, const char **argv)
  
    if (optimize_size)
      {
-      /* Loop header copying usually increases size of the code.  This used not to
-        be true, since quite often it is possible to verify that the condition is
-        satisfied in the first iteration and therefore to eliminate it.  Jump
-        threading handles these cases now.  */
-      flag_tree_ch = 0;
-
        /* Conditional DCE generates bigger code.  */
        flag_tree_builtin_call_dce = 0;
  
@@ -1004,8 +998,6 @@ decode_options (unsigned int argc, const char **argv)
  
        /* These options are set with -O3, so reset for -Os */
        flag_predictive_commoning = 0;
-      flag_inline_functions = 0;
-      flag_unswitch_loops = 0;
        flag_gcse_after_reload = 0;
        flag_tree_vectorize = 0;
  
@@ -1029,12 +1021,6 @@ decode_options (unsigned int argc, const char **argv)
        align_labels = 1;
        align_functions = 1;
  
-      /* Unroll/prefetch switches that may be set on the command line, and tend to
-        generate bigger code.  */
-      flag_unroll_loops = 0;
-      flag_unroll_all_loops = 0;
-      flag_prefetch_loop_arrays = 0;
-
        /* Basic optimization options.  */
        optimize_size = 1;
        if (optimize > 2)
diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c

index 27889d01f2ee7f7bba340680f37be316956e723b..352503fcaae481bbbd060b1ea1ce1ff6ac6e17db 100644 (file)
--- a/gcc/postreload-gcse.c
+++ b/gcc/postreload-gcse.c
@@ -1066,7 +1066,7 @@ eliminate_partially_redundant_load (basic_block bb, rtx insn,
    if (/* No load can be replaced by copy.  */
        npred_ok == 0
        /* Prevent exploding the code.  */ 
-      || (optimize_size && npred_ok > 1)
+      || (optimize_bb_for_size_p (bb) && npred_ok > 1)
        /* If we don't have profile information we cannot tell if splitting 
           a critical edge is profitable or not so don't do it.  */
        || ((! profile_info || ! flag_branch_probabilities
diff --git a/gcc/predict.c b/gcc/predict.c

index 4de95e2f66fae44406665f9ab5d6506bbb7c53f5..bd9635e102c6fb2b5b5815bbb013fe4acb6e8f3c 100644 (file)
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -261,6 +261,37 @@ optimize_loop_for_speed_p (struct loop *loop)
    return optimize_bb_for_speed_p (loop->header);
  }
  
+/* Return TRUE when LOOP nest should be optimized for speed.  */
+
+bool
+optimize_loop_nest_for_speed_p (struct loop *loop)
+{
+  struct loop *l = loop;
+  if (optimize_loop_for_speed_p (loop))
+    return true;
+  l = loop->inner;
+  while (l != loop)
+    {
+      if (optimize_loop_for_speed_p (l))
+        return true;
+      if (l->inner)
+        l = l->inner;
+      else if (l->next)
+        l = l->next;
+      else
+       l = loop_outer (l);
+    }
+  return false;
+}
+
+/* Return TRUE when LOOP nest should be optimized for size.  */
+
+bool
+optimize_loop_nest_for_size_p (struct loop *loop)
+{
+  return !optimize_loop_nest_for_speed_p (loop);
+}
+
  /* Set RTL expansion for BB profile.  */
  
  void
diff --git a/gcc/reorg.c b/gcc/reorg.c

index 59c6eecf2c26481695f84c3f234de42640115cb5..97570e858ddd4ef44007fd1cb2eb74c37c559d6e 100644 (file)
--- a/gcc/reorg.c
+++ b/gcc/reorg.c
@@ -3439,7 +3439,7 @@ relax_delay_slots (rtx first)
  
          Only do so if optimizing for size since this results in slower, but
          smaller code.  */
-      if (optimize_size
+      if (optimize_function_for_size_p (cfun)
           && GET_CODE (PATTERN (delay_insn)) == RETURN
           && next
           && JUMP_P (next)
diff --git a/gcc/stmt.c b/gcc/stmt.c

index f05cd0956fcd8f087cc7e4dec826bf7be1085a43..2464466b376e4536576089147eeeaa7e9c28a47a 100644 (file)
--- a/gcc/stmt.c
+++ b/gcc/stmt.c
@@ -2419,7 +2419,7 @@ expand_case (tree exp)
  
        else if (count < case_values_threshold ()
                || compare_tree_int (range,
-                                   (optimize_size ? 3 : 10) * count) > 0
+                                   (optimize_insn_for_size_p () ? 3 : 10) * count) > 0
                /* RANGE may be signed, and really large ranges will show up
                   as negative numbers.  */
                || compare_tree_int (range, 0) < 0
@@ -2489,7 +2489,7 @@ expand_case (tree exp)
  
               /* Index jumptables from zero for suitable values of
                   minval to avoid a subtraction.  */
-             if (! optimize_size
+             if (optimize_insn_for_speed_p ()
                   && compare_tree_int (minval, 0) > 0
                   && compare_tree_int (minval, 3) < 0)
                 {
diff --git a/gcc/tracer.c b/gcc/tracer.c

index fab2f49fa9f56a52f7ccb18b8972884942969916..9bf2c57ded390bdb5bb757229500397a9528ed66 100644 (file)
--- a/gcc/tracer.c
+++ b/gcc/tracer.c
@@ -92,7 +92,7 @@ ignore_bb_p (const_basic_block bb)
  {
    if (bb->index < NUM_FIXED_BLOCKS)
      return true;
-  if (!maybe_hot_bb_p (bb))
+  if (optimize_bb_for_size_p (bb))
      return true;
    return false;
  }
diff --git a/gcc/tree-eh.c b/gcc/tree-eh.c

index a8885e36d1ab8008c63bea1646ba775d2687449b..5fe8f24ed27149676bffb0188af817a163365f85 100644 (file)
--- a/gcc/tree-eh.c
+++ b/gcc/tree-eh.c
@@ -1535,7 +1535,7 @@ decide_copy_try_finally (int ndests, gimple_seq finally)
    sw_estimate = 10 + 2 * ndests;
  
    /* Optimize for size clearly wants our best guess.  */
-  if (optimize_size)
+  if (optimize_function_for_size_p (cfun))
      return f_estimate < sw_estimate;
  
    /* ??? These numbers are completely made up so far.  */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c

index f2b8d042ef5b2f0a49dc6f0a0845d34420a47e10..0373205c9f3db1afd8721bb6abda642d0e12735c 100644 (file)
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -1843,7 +1843,7 @@ parallelize_loops (void)
      {
        htab_empty (reduction_list);
        if (/* Do not bother with loops in cold areas.  */
-         !maybe_hot_bb_p (loop->header)
+         optimize_loop_nest_for_size_p (loop)
           /* Or loops that roll too little.  */
           || expected_loop_iterations (loop) <= n_threads
           /* And of course, the loop must be parallelizable.  */
diff --git a/gcc/tree-ssa-coalesce.c b/gcc/tree-ssa-coalesce.c

index d5e5f8702ff69fa5359ffe64632447e2987ed00f..3af0c3285d82ed0e291677e8d4305b78fd6771b7 100644 (file)
--- a/gcc/tree-ssa-coalesce.c
+++ b/gcc/tree-ssa-coalesce.c
@@ -75,7 +75,7 @@ typedef struct coalesce_list_d
     possibly on CRITICAL edge and in HOT basic block.  */
  
  static inline int
-coalesce_cost (int frequency, bool hot, bool critical)
+coalesce_cost (int frequency, bool optimize_for_size, bool critical)
  {
    /* Base costs on BB frequencies bounded by 1.  */
    int cost = frequency;
@@ -83,12 +83,8 @@ coalesce_cost (int frequency, bool hot, bool critical)
    if (!cost)
      cost = 1;
  
-  if (optimize_size)
+  if (optimize_for_size)
      cost = 1;
-  else
-    /* It is more important to coalesce in HOT blocks.  */
-    if (hot)
-      cost *= 2;
  
    /* Inserting copy on critical edge costs more than inserting it elsewhere.  */
    if (critical)
@@ -102,7 +98,7 @@ coalesce_cost (int frequency, bool hot, bool critical)
  static inline int 
  coalesce_cost_bb (basic_block bb)
  {
-  return coalesce_cost (bb->frequency, maybe_hot_bb_p (bb), false);
+  return coalesce_cost (bb->frequency, optimize_bb_for_size_p (bb), false);
  }
  
  
@@ -115,7 +111,7 @@ coalesce_cost_edge (edge e)
      return MUST_COALESCE_COST;
  
    return coalesce_cost (EDGE_FREQUENCY (e), 
-                       maybe_hot_edge_p (e), 
+                       optimize_edge_for_size_p (e), 
                         EDGE_CRITICAL_P (e));
  }
  
@@ -1099,7 +1095,7 @@ create_outofssa_var_map (coalesce_list_p cl, bitmap used_in_copy)
                     if (SSA_NAME_VAR (outputs[match]) == SSA_NAME_VAR (input))
                       {
                         cost = coalesce_cost (REG_BR_PROB_BASE, 
-                                             maybe_hot_bb_p (bb),
+                                             optimize_bb_for_size_p (bb),
                                               false);
                         add_coalesce (cl, v1, v2, cost);
                         bitmap_set_bit (used_in_copy, v1);
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c

index 0096546534291fed580cf413a1b12dd9aa97af01..e278c55b08bbd3f0387bf8f03a968f13fd1e9441 100644 (file)
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@@ -359,7 +359,7 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
  
        FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
         {
-         if (may_increase_size && maybe_hot_bb_p (loop->header)
+         if (may_increase_size && optimize_loop_for_speed_p (loop)
               /* Unroll outermost loops only if asked to do so or they do
                  not cause code growth.  */
               && (unroll_outer
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c

index 02b4d7347dfa4197dcbd980239a51d7231f2c736..6da4bf232aaa9b52b8d3a1059ac8824e3dc1e4fe 100644 (file)
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -1460,7 +1460,7 @@ loop_prefetch_arrays (struct loop *loop)
    struct tree_niter_desc desc;
    bool unrolled = false, no_other_refs;
  
-  if (!maybe_hot_bb_p (loop->header))
+  if (optimize_loop_nest_for_size_p (loop))
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
         fprintf (dump_file, "  ignored (cold area)\n");
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c

index 844ec9d1ad0cca60c10718fc57846a2947d5df31..9f88ba6aec7beea75e8af7fd38a1d2a8463ebe22 100644 (file)
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -353,7 +353,8 @@ replace_reciprocal (use_operand_p use_p)
    basic_block bb = gimple_bb (use_stmt);
    struct occurrence *occ = (struct occurrence *) bb->aux;
  
-  if (occ->recip_def && use_stmt != occ->recip_def_stmt)
+  if (optimize_bb_for_speed_p (bb)
+      && occ->recip_def && use_stmt != occ->recip_def_stmt)
      {
        gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
        SET_USE (use_p, occ->recip_def);
@@ -445,7 +446,7 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
  static bool
  gate_cse_reciprocals (void)
  {
-  return optimize && !optimize_size && flag_reciprocal_math;
+  return optimize && flag_reciprocal_math;
  }
  
  /* Go through all the floating-point SSA_NAMEs, and call
@@ -500,6 +501,9 @@ execute_cse_reciprocals (void)
             execute_cse_reciprocals_1 (&gsi, def);
         }
  
+      if (optimize_bb_for_size_p (bb))
+        continue;
+
        /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
        for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
          {
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c

index dedd00ecb9a57dc735a8cca8f8dbe641a83fc5d1..b6d2fafaa0fd377fb2d24657ddd32bc5223a7c00 100644 (file)
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -994,7 +994,7 @@ mark_threaded_blocks (bitmap threaded_blocks)
  
    /* If optimizing for size, only thread through block if we don't have
       to duplicate it or it's an otherwise empty redirection block.  */
-  if (optimize_size)
+  if (optimize_function_for_size_p (cfun))
      {
        EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
         {
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c

index c672d7affecbddd09720302e205a68fed4686ce7..93cd643d0f2ac17d8e85ba86debd8f1743639214 100644 (file)
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@@ -1219,7 +1219,7 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
        print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
      }
  
-  if (optimize_size)
+  if (optimize_loop_nest_for_size_p (loop))
      {
        if (vect_print_dump_info (REPORT_DR_DETAILS))
         fprintf (vect_dump, "versioning not supported when optimizing for size.");
@@ -1993,7 +1993,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
  
    /* Try versioning if:
       1) flag_tree_vect_loop_version is TRUE
-     2) optimize_size is FALSE
+     2) optimize loop for speed
       3) there is at least one unsupported misaligned data ref with an unknown
          misalignment, and
       4) all misaligned data refs with a known misalignment are supported, and
@@ -2001,7 +2001,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
  
    do_versioning = 
         flag_tree_vect_loop_version 
-       && (!optimize_size)
+       && optimize_loop_nest_for_speed_p (loop)
         && (!loop->inner); /* FORNOW */
  
    if (do_versioning)
diff --git a/gcc/value-prof.c b/gcc/value-prof.c

index e24d1b1be0fcc5354b3c2f31ba8fa4b0390d1a18..fac124ffddf554834abf1e5e9f43c7bc4c629008 100644 (file)
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -669,7 +669,7 @@ gimple_divmod_fixed_value_transform (gimple_stmt_iterator *si)
       at least 50% of time (and 75% gives the guarantee of usage).  */
    if (simple_cst_equal (gimple_assign_rhs2 (stmt), value) != 1
        || 2 * count < all
-      || !maybe_hot_bb_p (gimple_bb (stmt)))
+      || optimize_bb_for_size_p (gimple_bb (stmt)))
      return false;
  
    if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
@@ -820,7 +820,7 @@ gimple_mod_pow2_value_transform (gimple_stmt_iterator *si)
    /* We require that we hit a power of 2 at least half of all evaluations.  */
    if (simple_cst_equal (gimple_assign_rhs2 (stmt), value) != 1
        || count < wrong_values
-      || !maybe_hot_bb_p (gimple_bb (stmt)))
+      || optimize_bb_for_size_p (gimple_bb (stmt)))
      return false;
  
    if (dump_file)
@@ -1017,7 +1017,7 @@ gimple_mod_subtract_transform (gimple_stmt_iterator *si)
         break;
      }
    if (i == steps
-      || !maybe_hot_bb_p (gimple_bb (stmt)))
+      || optimize_bb_for_size_p (gimple_bb (stmt)))
      return false;
  
    gimple_remove_histogram_value (cfun, stmt, histogram);
@@ -1397,7 +1397,7 @@ gimple_stringops_transform (gimple_stmt_iterator *gsi)
    /* We require that count is at least half of all; this means
       that for the transformation to fire the value must be constant
       at least 80% of time.  */
-  if ((6 * count / 5) < all || !maybe_hot_bb_p (gimple_bb (stmt)))
+  if ((6 * count / 5) < all || optimize_bb_for_size_p (gimple_bb (stmt)))
      return false;
    if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
      return false;
author	Jan Hubicka <jh@suse.cz>
	Fri, 29 Aug 2008 10:35:57 +0000 (12:35 +0200)
committer	Jan Hubicka <hubicka@gcc.gnu.org>
	Fri, 29 Aug 2008 10:35:57 +0000 (10:35 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/basic-block.h		patch \| blob \| blame \| history
gcc/bb-reorder.c		patch \| blob \| blame \| history
gcc/builtins.c		patch \| blob \| blame \| history
gcc/cfgcleanup.c		patch \| blob \| blame \| history
gcc/final.c		patch \| blob \| blame \| history
gcc/fold-const.c		patch \| blob \| blame \| history
gcc/gcse.c		patch \| blob \| blame \| history
gcc/global.c		patch \| blob \| blame \| history
gcc/ipa-cp.c		patch \| blob \| blame \| history
gcc/ipa-inline.c		patch \| blob \| blame \| history
gcc/local-alloc.c		patch \| blob \| blame \| history
gcc/loop-unroll.c		patch \| blob \| blame \| history
gcc/loop-unswitch.c		patch \| blob \| blame \| history
gcc/opts.c		patch \| blob \| blame \| history
gcc/postreload-gcse.c		patch \| blob \| blame \| history
gcc/predict.c		patch \| blob \| blame \| history
gcc/reorg.c		patch \| blob \| blame \| history
gcc/stmt.c		patch \| blob \| blame \| history
gcc/tracer.c		patch \| blob \| blame \| history
gcc/tree-eh.c		patch \| blob \| blame \| history
gcc/tree-parloops.c		patch \| blob \| blame \| history
gcc/tree-ssa-coalesce.c		patch \| blob \| blame \| history
gcc/tree-ssa-loop-ivcanon.c		patch \| blob \| blame \| history
gcc/tree-ssa-loop-prefetch.c		patch \| blob \| blame \| history
gcc/tree-ssa-math-opts.c		patch \| blob \| blame \| history
gcc/tree-ssa-threadupdate.c		patch \| blob \| blame \| history
gcc/tree-vect-analyze.c		patch \| blob \| blame \| history
gcc/value-prof.c		patch \| blob \| blame \| history