gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2019 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "predict.h"
  27 #include "memmodel.h"
  28 #include "emit-rtl.h"
  29 #include "cfgloop.h"
  30 #include "explow.h"
  31 #include "expr.h"
  32 #include "graphds.h"
  33 #include "params.h"
  34 #include "sreal.h"
  35 #include "regs.h"
  36 #include "function-abi.h"
  37
  38 struct target_cfgloop default_target_cfgloop;
  39 #if SWITCHABLE_TARGET
  40 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  41 #endif
  42
  43 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  44
  45 bool
  46 just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
  47 {
  48   /* It must be executed at least once each iteration.  */
  49   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  50     return false;
  51
  52   /* And just once.  */
  53   if (bb->loop_father != loop)
  54     return false;
  55
  56   /* But this was not enough.  We might have some irreducible loop here.  */
  57   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  58     return false;
  59
  60   return true;
  61 }
  62
  63 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  64    throw away all latch edges and mark blocks inside any remaining cycle.
  65    Everything is a bit complicated due to fact we do not want to do this
  66    for parts of cycles that only "pass" through some loop -- i.e. for
  67    each cycle, we want to mark blocks that belong directly to innermost
  68    loop containing the whole cycle.
  69
  70    LOOPS is the loop tree.  */
  71
  72 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  73 #define BB_REPR(BB) ((BB)->index + 1)
  74
  75 bool
  76 mark_irreducible_loops (void)
  77 {
  78   basic_block act;
  79   struct graph_edge *ge;
  80   edge e;
  81   edge_iterator ei;
  82   int src, dest;
  83   unsigned depth;
  84   struct graph *g;
  85   int num = number_of_loops (cfun);
  86   class loop *cloop;
  87   bool irred_loop_found = false;
  88   int i;
  89
  90   gcc_assert (current_loops != NULL);
  91
  92   /* Reset the flags.  */
  93   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
  94                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
  95     {
  96       act->flags &= ~BB_IRREDUCIBLE_LOOP;
  97       FOR_EACH_EDGE (e, ei, act->succs)
  98         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
  99     }
 100
 101   /* Create the edge lists.  */
 102   g = new_graph (last_basic_block_for_fn (cfun) + num);
 103
 104   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 105                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 106     FOR_EACH_EDGE (e, ei, act->succs)
 107       {
 108         /* Ignore edges to exit.  */
 109         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 110           continue;
 111
 112         src = BB_REPR (act);
 113         dest = BB_REPR (e->dest);
 114
 115         /* Ignore latch edges.  */
 116         if (e->dest->loop_father->header == e->dest
 117             && e->dest->loop_father->latch == act)
 118           continue;
 119
 120         /* Edges inside a single loop should be left where they are.  Edges
 121            to subloop headers should lead to representative of the subloop,
 122            but from the same place.
 123
 124            Edges exiting loops should lead from representative
 125            of the son of nearest common ancestor of the loops in that
 126            act lays.  */
 127
 128         if (e->dest->loop_father->header == e->dest)
 129           dest = LOOP_REPR (e->dest->loop_father);
 130
 131         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 132           {
 133             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 134                                                       e->dest->loop_father));
 135             if (depth == loop_depth (act->loop_father))
 136               cloop = act->loop_father;
 137             else
 138               cloop = (*act->loop_father->superloops)[depth];
 139
 140             src = LOOP_REPR (cloop);
 141           }
 142
 143         add_edge (g, src, dest)->data = e;
 144       }
 145
 146   /* Find the strongly connected components.  */
 147   graphds_scc (g, NULL);
 148
 149   /* Mark the irreducible loops.  */
 150   for (i = 0; i < g->n_vertices; i++)
 151     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 152       {
 153         edge real = (edge) ge->data;
 154         /* edge E in graph G is irreducible if it connects two vertices in the
 155            same scc.  */
 156
 157         /* All edges should lead from a component with higher number to the
 158            one with lower one.  */
 159         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 160
 161         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 162           continue;
 163
 164         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 165         irred_loop_found = true;
 166         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 167           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 168       }
 169
 170   free_graph (g);
 171
 172   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 173   return irred_loop_found;
 174 }
 175
 176 /* Counts number of insns inside LOOP.  */
 177 int
 178 num_loop_insns (const class loop *loop)
 179 {
 180   basic_block *bbs, bb;
 181   unsigned i, ninsns = 0;
 182   rtx_insn *insn;
 183
 184   bbs = get_loop_body (loop);
 185   for (i = 0; i < loop->num_nodes; i++)
 186     {
 187       bb = bbs[i];
 188       FOR_BB_INSNS (bb, insn)
 189         if (NONDEBUG_INSN_P (insn))
 190           ninsns++;
 191     }
 192   free (bbs);
 193
 194   if (!ninsns)
 195     ninsns = 1; /* To avoid division by zero.  */
 196
 197   return ninsns;
 198 }
 199
 200 /* Counts number of insns executed on average per iteration LOOP.  */
 201 int
 202 average_num_loop_insns (const class loop *loop)
 203 {
 204   basic_block *bbs, bb;
 205   unsigned i, binsns;
 206   sreal ninsns;
 207   rtx_insn *insn;
 208
 209   ninsns = 0;
 210   bbs = get_loop_body (loop);
 211   for (i = 0; i < loop->num_nodes; i++)
 212     {
 213       bb = bbs[i];
 214
 215       binsns = 0;
 216       FOR_BB_INSNS (bb, insn)
 217         if (NONDEBUG_INSN_P (insn))
 218           binsns++;
 219
 220       ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
 221       /* Avoid overflows.   */
 222       if (ninsns > 1000000)
 223         return 100000;
 224     }
 225   free (bbs);
 226
 227   int64_t ret = ninsns.to_int ();
 228   if (!ret)
 229     ret = 1; /* To avoid division by zero.  */
 230
 231   return ret;
 232 }
 233
 234 /* Returns expected number of iterations of LOOP, according to
 235    measured or guessed profile.
 236
 237    This functions attempts to return "sane" value even if profile
 238    information is not good enough to derive osmething.
 239    If BY_PROFILE_ONLY is set, this logic is bypassed and function
 240    return -1 in those scenarios.  */
 241
 242 gcov_type
 243 expected_loop_iterations_unbounded (const class loop *loop,
 244                                     bool *read_profile_p,
 245                                     bool by_profile_only)
 246 {
 247   edge e;
 248   edge_iterator ei;
 249   gcov_type expected = -1;
 250
 251   if (read_profile_p)
 252     *read_profile_p = false;
 253
 254   /* If we have no profile at all, use AVG_LOOP_NITER.  */
 255   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 256     {
 257       if (by_profile_only)
 258         return -1;
 259       expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 260     }
 261   else if (loop->latch && (loop->latch->count.initialized_p ()
 262                            || loop->header->count.initialized_p ()))
 263     {
 264       profile_count count_in = profile_count::zero (),
 265                     count_latch = profile_count::zero ();
 266
 267       FOR_EACH_EDGE (e, ei, loop->header->preds)
 268         if (e->src == loop->latch)
 269           count_latch = e->count ();
 270         else
 271           count_in += e->count ();
 272
 273       if (!count_latch.initialized_p ())
 274         {
 275           if (by_profile_only)
 276             return -1;
 277           expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 278         }
 279       else if (!count_in.nonzero_p ())
 280         {
 281           if (by_profile_only)
 282             return -1;
 283           expected = count_latch.to_gcov_type () * 2;
 284         }
 285       else
 286         {
 287           expected = (count_latch.to_gcov_type () + count_in.to_gcov_type ()
 288                       - 1) / count_in.to_gcov_type ();
 289           if (read_profile_p
 290               && count_latch.reliable_p () && count_in.reliable_p ())
 291             *read_profile_p = true;
 292         }
 293     }
 294   else
 295     {
 296       if (by_profile_only)
 297         return -1;
 298       expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 299     }
 300
 301   if (!by_profile_only)
 302     {
 303       HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
 304       if (max != -1 && max < expected)
 305         return max;
 306     }
 307
 308   return expected;
 309 }
 310
 311 /* Returns expected number of LOOP iterations.  The returned value is bounded
 312    by REG_BR_PROB_BASE.  */
 313
 314 unsigned
 315 expected_loop_iterations (class loop *loop)
 316 {
 317   gcov_type expected = expected_loop_iterations_unbounded (loop);
 318   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 319 }
 320
 321 /* Returns the maximum level of nesting of subloops of LOOP.  */
 322
 323 unsigned
 324 get_loop_level (const class loop *loop)
 325 {
 326   const class loop *ploop;
 327   unsigned mx = 0, l;
 328
 329   for (ploop = loop->inner; ploop; ploop = ploop->next)
 330     {
 331       l = get_loop_level (ploop);
 332       if (l >= mx)
 333         mx = l + 1;
 334     }
 335   return mx;
 336 }
 337
 338 /* Initialize the constants for computing set costs.  */
 339
 340 void
 341 init_set_costs (void)
 342 {
 343   int speed;
 344   rtx_insn *seq;
 345   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
 346   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
 347   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
 348   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 349   unsigned i;
 350
 351   target_avail_regs = 0;
 352   target_clobbered_regs = 0;
 353   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 354     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 355         && !fixed_regs[i])
 356       {
 357         target_avail_regs++;
 358         /* ??? This is only a rough heuristic.  It doesn't cope well
 359            with alternative ABIs, but that's an optimization rather than
 360            correctness issue.  */
 361         if (default_function_abi.clobbers_full_reg_p (i))
 362           target_clobbered_regs++;
 363       }
 364
 365   target_res_regs = 3;
 366
 367   for (speed = 0; speed < 2; speed++)
 368      {
 369       crtl->maybe_hot_insn_p = speed;
 370       /* Set up the costs for using extra registers:
 371
 372          1) If not many free registers remain, we should prefer having an
 373             additional move to decreasing the number of available registers.
 374             (TARGET_REG_COST).
 375          2) If no registers are available, we need to spill, which may require
 376             storing the old value to memory and loading it back
 377             (TARGET_SPILL_COST).  */
 378
 379       start_sequence ();
 380       emit_move_insn (reg1, reg2);
 381       seq = get_insns ();
 382       end_sequence ();
 383       target_reg_cost [speed] = seq_cost (seq, speed);
 384
 385       start_sequence ();
 386       emit_move_insn (mem, reg1);
 387       emit_move_insn (reg2, mem);
 388       seq = get_insns ();
 389       end_sequence ();
 390       target_spill_cost [speed] = seq_cost (seq, speed);
 391     }
 392   default_rtl_profile ();
 393 }
 394
 395 /* Estimates cost of increased register pressure caused by making N_NEW new
 396    registers live around the loop.  N_OLD is the number of registers live
 397    around the loop.  If CALL_P is true, also take into account that
 398    call-used registers may be clobbered in the loop body, reducing the
 399    number of available registers before we spill.  */
 400
 401 unsigned
 402 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 403                             bool call_p)
 404 {
 405   unsigned cost;
 406   unsigned regs_needed = n_new + n_old;
 407   unsigned available_regs = target_avail_regs;
 408
 409   /* If there is a call in the loop body, the call-clobbered registers
 410      are not available for loop invariants.  */
 411   if (call_p)
 412     available_regs = available_regs - target_clobbered_regs;
 413
 414   /* If we have enough registers, we should use them and not restrict
 415      the transformations unnecessarily.  */
 416   if (regs_needed + target_res_regs <= available_regs)
 417     return 0;
 418
 419   if (regs_needed <= available_regs)
 420     /* If we are close to running out of registers, try to preserve
 421        them.  */
 422     cost = target_reg_cost [speed] * n_new;
 423   else
 424     /* If we run out of registers, it is very expensive to add another
 425        one.  */
 426     cost = target_spill_cost [speed] * n_new;
 427
 428   if (optimize && (flag_ira_region == IRA_REGION_ALL
 429                    || flag_ira_region == IRA_REGION_MIXED)
 430       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
 431     /* IRA regional allocation deals with high register pressure
 432        better.  So decrease the cost (to do more accurate the cost
 433        calculation for IRA, we need to know how many registers lives
 434        through the loop transparently).  */
 435     cost /= 2;
 436
 437   return cost;
 438 }
 439
 440 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 441
 442 void
 443 mark_loop_exit_edges (void)
 444 {
 445   basic_block bb;
 446   edge e;
 447
 448   if (number_of_loops (cfun) <= 1)
 449     return;
 450
 451   FOR_EACH_BB_FN (bb, cfun)
 452     {
 453       edge_iterator ei;
 454
 455       FOR_EACH_EDGE (e, ei, bb->succs)
 456         {
 457           if (loop_outer (bb->loop_father)
 458               && loop_exit_edge_p (bb->loop_father, e))
 459             e->flags |= EDGE_LOOP_EXIT;
 460           else
 461             e->flags &= ~EDGE_LOOP_EXIT;
 462         }
 463     }
 464 }
 465
 466 /* Return exit edge if loop has only one exit that is likely
 467    to be executed on runtime (i.e. it is not EH or leading
 468    to noreturn call.  */
 469
 470 edge
 471 single_likely_exit (class loop *loop)
 472 {
 473   edge found = single_exit (loop);
 474   vec<edge> exits;
 475   unsigned i;
 476   edge ex;
 477
 478   if (found)
 479     return found;
 480   exits = get_loop_exit_edges (loop);
 481   FOR_EACH_VEC_ELT (exits, i, ex)
 482     {
 483       if (probably_never_executed_edge_p (cfun, ex)
 484           /* We want to rule out paths to noreturns but not low probabilities
 485              resulting from adjustments or combining.
 486              FIXME: once we have better quality tracking, make this more
 487              robust.  */
 488           || ex->probability <= profile_probability::very_unlikely ())
 489         continue;
 490       if (!found)
 491         found = ex;
 492       else
 493         {
 494           exits.release ();
 495           return NULL;
 496         }
 497     }
 498   exits.release ();
 499   return found;
 500 }
 501
 502
 503 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 504    order against direction of edges from latch.  Specially, if
 505    header != latch, latch is the 1-st block.  */
 506
 507 vec<basic_block>
 508 get_loop_hot_path (const class loop *loop)
 509 {
 510   basic_block bb = loop->header;
 511   vec<basic_block> path = vNULL;
 512   bitmap visited = BITMAP_ALLOC (NULL);
 513
 514   while (true)
 515     {
 516       edge_iterator ei;
 517       edge e;
 518       edge best = NULL;
 519
 520       path.safe_push (bb);
 521       bitmap_set_bit (visited, bb->index);
 522       FOR_EACH_EDGE (e, ei, bb->succs)
 523         if ((!best || e->probability > best->probability)
 524             && !loop_exit_edge_p (loop, e)
 525             && !bitmap_bit_p (visited, e->dest->index))
 526           best = e;
 527       if (!best || best->dest == loop->header)
 528         break;
 529       bb = best->dest;
 530     }
 531   BITMAP_FREE (visited);
 532   return path;
 533 }