gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "predict.h"
  27 #include "emit-rtl.h"
  28 #include "cfgloop.h"
  29 #include "explow.h"
  30 #include "expr.h"
  31 #include "graphds.h"
  32 #include "params.h"
  33
  34 struct target_cfgloop default_target_cfgloop;
  35 #if SWITCHABLE_TARGET
  36 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  37 #endif
  38
  39 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  40
  41 bool
  42 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
  43 {
  44   /* It must be executed at least once each iteration.  */
  45   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  46     return false;
  47
  48   /* And just once.  */
  49   if (bb->loop_father != loop)
  50     return false;
  51
  52   /* But this was not enough.  We might have some irreducible loop here.  */
  53   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  54     return false;
  55
  56   return true;
  57 }
  58
  59 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  60    throw away all latch edges and mark blocks inside any remaining cycle.
  61    Everything is a bit complicated due to fact we do not want to do this
  62    for parts of cycles that only "pass" through some loop -- i.e. for
  63    each cycle, we want to mark blocks that belong directly to innermost
  64    loop containing the whole cycle.
  65
  66    LOOPS is the loop tree.  */
  67
  68 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  69 #define BB_REPR(BB) ((BB)->index + 1)
  70
  71 bool
  72 mark_irreducible_loops (void)
  73 {
  74   basic_block act;
  75   struct graph_edge *ge;
  76   edge e;
  77   edge_iterator ei;
  78   int src, dest;
  79   unsigned depth;
  80   struct graph *g;
  81   int num = number_of_loops (cfun);
  82   struct loop *cloop;
  83   bool irred_loop_found = false;
  84   int i;
  85
  86   gcc_assert (current_loops != NULL);
  87
  88   /* Reset the flags.  */
  89   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
  90                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
  91     {
  92       act->flags &= ~BB_IRREDUCIBLE_LOOP;
  93       FOR_EACH_EDGE (e, ei, act->succs)
  94         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
  95     }
  96
  97   /* Create the edge lists.  */
  98   g = new_graph (last_basic_block_for_fn (cfun) + num);
  99
 100   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 101                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 102     FOR_EACH_EDGE (e, ei, act->succs)
 103       {
 104         /* Ignore edges to exit.  */
 105         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 106           continue;
 107
 108         src = BB_REPR (act);
 109         dest = BB_REPR (e->dest);
 110
 111         /* Ignore latch edges.  */
 112         if (e->dest->loop_father->header == e->dest
 113             && e->dest->loop_father->latch == act)
 114           continue;
 115
 116         /* Edges inside a single loop should be left where they are.  Edges
 117            to subloop headers should lead to representative of the subloop,
 118            but from the same place.
 119
 120            Edges exiting loops should lead from representative
 121            of the son of nearest common ancestor of the loops in that
 122            act lays.  */
 123
 124         if (e->dest->loop_father->header == e->dest)
 125           dest = LOOP_REPR (e->dest->loop_father);
 126
 127         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 128           {
 129             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 130                                                       e->dest->loop_father));
 131             if (depth == loop_depth (act->loop_father))
 132               cloop = act->loop_father;
 133             else
 134               cloop = (*act->loop_father->superloops)[depth];
 135
 136             src = LOOP_REPR (cloop);
 137           }
 138
 139         add_edge (g, src, dest)->data = e;
 140       }
 141
 142   /* Find the strongly connected components.  */
 143   graphds_scc (g, NULL);
 144
 145   /* Mark the irreducible loops.  */
 146   for (i = 0; i < g->n_vertices; i++)
 147     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 148       {
 149         edge real = (edge) ge->data;
 150         /* edge E in graph G is irreducible if it connects two vertices in the
 151            same scc.  */
 152
 153         /* All edges should lead from a component with higher number to the
 154            one with lower one.  */
 155         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 156
 157         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 158           continue;
 159
 160         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 161         irred_loop_found = true;
 162         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 163           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 164       }
 165
 166   free_graph (g);
 167
 168   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 169   return irred_loop_found;
 170 }
 171
 172 /* Counts number of insns inside LOOP.  */
 173 int
 174 num_loop_insns (const struct loop *loop)
 175 {
 176   basic_block *bbs, bb;
 177   unsigned i, ninsns = 0;
 178   rtx_insn *insn;
 179
 180   bbs = get_loop_body (loop);
 181   for (i = 0; i < loop->num_nodes; i++)
 182     {
 183       bb = bbs[i];
 184       FOR_BB_INSNS (bb, insn)
 185         if (NONDEBUG_INSN_P (insn))
 186           ninsns++;
 187     }
 188   free (bbs);
 189
 190   if (!ninsns)
 191     ninsns = 1; /* To avoid division by zero.  */
 192
 193   return ninsns;
 194 }
 195
 196 /* Counts number of insns executed on average per iteration LOOP.  */
 197 int
 198 average_num_loop_insns (const struct loop *loop)
 199 {
 200   basic_block *bbs, bb;
 201   unsigned i, binsns, ninsns, ratio;
 202   rtx_insn *insn;
 203
 204   ninsns = 0;
 205   bbs = get_loop_body (loop);
 206   for (i = 0; i < loop->num_nodes; i++)
 207     {
 208       bb = bbs[i];
 209
 210       binsns = 0;
 211       FOR_BB_INSNS (bb, insn)
 212         if (NONDEBUG_INSN_P (insn))
 213           binsns++;
 214
 215       ratio = loop->header->frequency == 0
 216               ? BB_FREQ_MAX
 217               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 218       ninsns += binsns * ratio;
 219     }
 220   free (bbs);
 221
 222   ninsns /= BB_FREQ_MAX;
 223   if (!ninsns)
 224     ninsns = 1; /* To avoid division by zero.  */
 225
 226   return ninsns;
 227 }
 228
 229 /* Returns expected number of iterations of LOOP, according to
 230    measured or guessed profile.  No bounding is done on the
 231    value.  */
 232
 233 gcov_type
 234 expected_loop_iterations_unbounded (const struct loop *loop,
 235                                     bool *read_profile_p)
 236 {
 237   edge e;
 238   edge_iterator ei;
 239   gcov_type expected;
 240
 241   if (read_profile_p)
 242     *read_profile_p = false;
 243
 244   /* If we have no profile at all, use AVG_LOOP_NITER.  */
 245   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 246     expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 247   else if (loop->latch && (loop->latch->count || loop->header->count))
 248     {
 249       gcov_type count_in, count_latch;
 250
 251       count_in = 0;
 252       count_latch = 0;
 253
 254       FOR_EACH_EDGE (e, ei, loop->header->preds)
 255         if (e->src == loop->latch)
 256           count_latch = e->count;
 257         else
 258           count_in += e->count;
 259
 260       if (count_in == 0)
 261         expected = count_latch * 2;
 262       else
 263         {
 264           expected = (count_latch + count_in - 1) / count_in;
 265           if (read_profile_p)
 266             *read_profile_p = true;
 267         }
 268     }
 269   else
 270     {
 271       int freq_in, freq_latch;
 272
 273       freq_in = 0;
 274       freq_latch = 0;
 275
 276       FOR_EACH_EDGE (e, ei, loop->header->preds)
 277         if (flow_bb_inside_loop_p (loop, e->src))
 278           freq_latch += EDGE_FREQUENCY (e);
 279         else
 280           freq_in += EDGE_FREQUENCY (e);
 281
 282       if (freq_in == 0)
 283         {
 284           /* If we have no profile at all, use AVG_LOOP_NITER iterations.  */
 285           if (!freq_latch)
 286             expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 287           else
 288             expected = freq_latch * 2;
 289         }
 290       else
 291         expected = (freq_latch + freq_in - 1) / freq_in;
 292     }
 293
 294   HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
 295   if (max != -1 && max < expected)
 296     return max;
 297   return expected;
 298 }
 299
 300 /* Returns expected number of LOOP iterations.  The returned value is bounded
 301    by REG_BR_PROB_BASE.  */
 302
 303 unsigned
 304 expected_loop_iterations (struct loop *loop)
 305 {
 306   gcov_type expected = expected_loop_iterations_unbounded (loop);
 307   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 308 }
 309
 310 /* Returns the maximum level of nesting of subloops of LOOP.  */
 311
 312 unsigned
 313 get_loop_level (const struct loop *loop)
 314 {
 315   const struct loop *ploop;
 316   unsigned mx = 0, l;
 317
 318   for (ploop = loop->inner; ploop; ploop = ploop->next)
 319     {
 320       l = get_loop_level (ploop);
 321       if (l >= mx)
 322         mx = l + 1;
 323     }
 324   return mx;
 325 }
 326
 327 /* Initialize the constants for computing set costs.  */
 328
 329 void
 330 init_set_costs (void)
 331 {
 332   int speed;
 333   rtx_insn *seq;
 334   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
 335   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
 336   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
 337   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 338   unsigned i;
 339
 340   target_avail_regs = 0;
 341   target_clobbered_regs = 0;
 342   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 343     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 344         && !fixed_regs[i])
 345       {
 346         target_avail_regs++;
 347         if (call_used_regs[i])
 348           target_clobbered_regs++;
 349       }
 350
 351   target_res_regs = 3;
 352
 353   for (speed = 0; speed < 2; speed++)
 354      {
 355       crtl->maybe_hot_insn_p = speed;
 356       /* Set up the costs for using extra registers:
 357
 358          1) If not many free registers remain, we should prefer having an
 359             additional move to decreasing the number of available registers.
 360             (TARGET_REG_COST).
 361          2) If no registers are available, we need to spill, which may require
 362             storing the old value to memory and loading it back
 363             (TARGET_SPILL_COST).  */
 364
 365       start_sequence ();
 366       emit_move_insn (reg1, reg2);
 367       seq = get_insns ();
 368       end_sequence ();
 369       target_reg_cost [speed] = seq_cost (seq, speed);
 370
 371       start_sequence ();
 372       emit_move_insn (mem, reg1);
 373       emit_move_insn (reg2, mem);
 374       seq = get_insns ();
 375       end_sequence ();
 376       target_spill_cost [speed] = seq_cost (seq, speed);
 377     }
 378   default_rtl_profile ();
 379 }
 380
 381 /* Estimates cost of increased register pressure caused by making N_NEW new
 382    registers live around the loop.  N_OLD is the number of registers live
 383    around the loop.  If CALL_P is true, also take into account that
 384    call-used registers may be clobbered in the loop body, reducing the
 385    number of available registers before we spill.  */
 386
 387 unsigned
 388 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 389                             bool call_p)
 390 {
 391   unsigned cost;
 392   unsigned regs_needed = n_new + n_old;
 393   unsigned available_regs = target_avail_regs;
 394
 395   /* If there is a call in the loop body, the call-clobbered registers
 396      are not available for loop invariants.  */
 397   if (call_p)
 398     available_regs = available_regs - target_clobbered_regs;
 399
 400   /* If we have enough registers, we should use them and not restrict
 401      the transformations unnecessarily.  */
 402   if (regs_needed + target_res_regs <= available_regs)
 403     return 0;
 404
 405   if (regs_needed <= available_regs)
 406     /* If we are close to running out of registers, try to preserve
 407        them.  */
 408     cost = target_reg_cost [speed] * n_new;
 409   else
 410     /* If we run out of registers, it is very expensive to add another
 411        one.  */
 412     cost = target_spill_cost [speed] * n_new;
 413
 414   if (optimize && (flag_ira_region == IRA_REGION_ALL
 415                    || flag_ira_region == IRA_REGION_MIXED)
 416       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
 417     /* IRA regional allocation deals with high register pressure
 418        better.  So decrease the cost (to do more accurate the cost
 419        calculation for IRA, we need to know how many registers lives
 420        through the loop transparently).  */
 421     cost /= 2;
 422
 423   return cost;
 424 }
 425
 426 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 427
 428 void
 429 mark_loop_exit_edges (void)
 430 {
 431   basic_block bb;
 432   edge e;
 433
 434   if (number_of_loops (cfun) <= 1)
 435     return;
 436
 437   FOR_EACH_BB_FN (bb, cfun)
 438     {
 439       edge_iterator ei;
 440
 441       FOR_EACH_EDGE (e, ei, bb->succs)
 442         {
 443           if (loop_outer (bb->loop_father)
 444               && loop_exit_edge_p (bb->loop_father, e))
 445             e->flags |= EDGE_LOOP_EXIT;
 446           else
 447             e->flags &= ~EDGE_LOOP_EXIT;
 448         }
 449     }
 450 }
 451
 452 /* Return exit edge if loop has only one exit that is likely
 453    to be executed on runtime (i.e. it is not EH or leading
 454    to noreturn call.  */
 455
 456 edge
 457 single_likely_exit (struct loop *loop)
 458 {
 459   edge found = single_exit (loop);
 460   vec<edge> exits;
 461   unsigned i;
 462   edge ex;
 463
 464   if (found)
 465     return found;
 466   exits = get_loop_exit_edges (loop);
 467   FOR_EACH_VEC_ELT (exits, i, ex)
 468     {
 469       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
 470         continue;
 471       /* The constant of 5 is set in a way so noreturn calls are
 472          ruled out by this test.  The static branch prediction algorithm
 473          will not assign such a low probability to conditionals for usual
 474          reasons.  */
 475       if (profile_status_for_fn (cfun) != PROFILE_ABSENT
 476           && ex->probability < 5 && !ex->count)
 477         continue;
 478       if (!found)
 479         found = ex;
 480       else
 481         {
 482           exits.release ();
 483           return NULL;
 484         }
 485     }
 486   exits.release ();
 487   return found;
 488 }
 489
 490
 491 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 492    order against direction of edges from latch.  Specially, if
 493    header != latch, latch is the 1-st block.  */
 494
 495 vec<basic_block>
 496 get_loop_hot_path (const struct loop *loop)
 497 {
 498   basic_block bb = loop->header;
 499   vec<basic_block> path = vNULL;
 500   bitmap visited = BITMAP_ALLOC (NULL);
 501
 502   while (true)
 503     {
 504       edge_iterator ei;
 505       edge e;
 506       edge best = NULL;
 507
 508       path.safe_push (bb);
 509       bitmap_set_bit (visited, bb->index);
 510       FOR_EACH_EDGE (e, ei, bb->succs)
 511         if ((!best || e->probability > best->probability)
 512             && !loop_exit_edge_p (loop, e)
 513             && !bitmap_bit_p (visited, e->dest->index))
 514           best = e;
 515       if (!best || best->dest == loop->header)
 516         break;
 517       bb = best->dest;
 518     }
 519   BITMAP_FREE (visited);
 520   return path;
 521 }