gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2020 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "predict.h"
  27 #include "memmodel.h"
  28 #include "emit-rtl.h"
  29 #include "cfgloop.h"
  30 #include "explow.h"
  31 #include "expr.h"
  32 #include "graphds.h"
  33 #include "sreal.h"
  34 #include "regs.h"
  35 #include "function-abi.h"
  36
  37 struct target_cfgloop default_target_cfgloop;
  38 #if SWITCHABLE_TARGET
  39 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  40 #endif
  41
  42 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  43
  44 bool
  45 just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
  46 {
  47   /* It must be executed at least once each iteration.  */
  48   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  49     return false;
  50
  51   /* And just once.  */
  52   if (bb->loop_father != loop)
  53     return false;
  54
  55   /* But this was not enough.  We might have some irreducible loop here.  */
  56   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  57     return false;
  58
  59   return true;
  60 }
  61
  62 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  63    throw away all latch edges and mark blocks inside any remaining cycle.
  64    Everything is a bit complicated due to fact we do not want to do this
  65    for parts of cycles that only "pass" through some loop -- i.e. for
  66    each cycle, we want to mark blocks that belong directly to innermost
  67    loop containing the whole cycle.
  68
  69    LOOPS is the loop tree.  */
  70
  71 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  72 #define BB_REPR(BB) ((BB)->index + 1)
  73
  74 bool
  75 mark_irreducible_loops (void)
  76 {
  77   basic_block act;
  78   struct graph_edge *ge;
  79   edge e;
  80   edge_iterator ei;
  81   int src, dest;
  82   unsigned depth;
  83   struct graph *g;
  84   int num = number_of_loops (cfun);
  85   class loop *cloop;
  86   bool irred_loop_found = false;
  87   int i;
  88
  89   gcc_assert (current_loops != NULL);
  90
  91   /* Reset the flags.  */
  92   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
  93                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
  94     {
  95       act->flags &= ~BB_IRREDUCIBLE_LOOP;
  96       FOR_EACH_EDGE (e, ei, act->succs)
  97         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
  98     }
  99
 100   /* Create the edge lists.  */
 101   g = new_graph (last_basic_block_for_fn (cfun) + num);
 102
 103   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 104                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 105     FOR_EACH_EDGE (e, ei, act->succs)
 106       {
 107         /* Ignore edges to exit.  */
 108         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 109           continue;
 110
 111         src = BB_REPR (act);
 112         dest = BB_REPR (e->dest);
 113
 114         /* Ignore latch edges.  */
 115         if (e->dest->loop_father->header == e->dest
 116             && e->dest->loop_father->latch == act)
 117           continue;
 118
 119         /* Edges inside a single loop should be left where they are.  Edges
 120            to subloop headers should lead to representative of the subloop,
 121            but from the same place.
 122
 123            Edges exiting loops should lead from representative
 124            of the son of nearest common ancestor of the loops in that
 125            act lays.  */
 126
 127         if (e->dest->loop_father->header == e->dest)
 128           dest = LOOP_REPR (e->dest->loop_father);
 129
 130         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 131           {
 132             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 133                                                       e->dest->loop_father));
 134             if (depth == loop_depth (act->loop_father))
 135               cloop = act->loop_father;
 136             else
 137               cloop = (*act->loop_father->superloops)[depth];
 138
 139             src = LOOP_REPR (cloop);
 140           }
 141
 142         add_edge (g, src, dest)->data = e;
 143       }
 144
 145   /* Find the strongly connected components.  */
 146   graphds_scc (g, NULL);
 147
 148   /* Mark the irreducible loops.  */
 149   for (i = 0; i < g->n_vertices; i++)
 150     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 151       {
 152         edge real = (edge) ge->data;
 153         /* edge E in graph G is irreducible if it connects two vertices in the
 154            same scc.  */
 155
 156         /* All edges should lead from a component with higher number to the
 157            one with lower one.  */
 158         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 159
 160         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 161           continue;
 162
 163         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 164         irred_loop_found = true;
 165         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 166           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 167       }
 168
 169   free_graph (g);
 170
 171   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 172   return irred_loop_found;
 173 }
 174
 175 /* Counts number of insns inside LOOP.  */
 176 int
 177 num_loop_insns (const class loop *loop)
 178 {
 179   basic_block *bbs, bb;
 180   unsigned i, ninsns = 0;
 181   rtx_insn *insn;
 182
 183   bbs = get_loop_body (loop);
 184   for (i = 0; i < loop->num_nodes; i++)
 185     {
 186       bb = bbs[i];
 187       FOR_BB_INSNS (bb, insn)
 188         if (NONDEBUG_INSN_P (insn))
 189           ninsns++;
 190     }
 191   free (bbs);
 192
 193   if (!ninsns)
 194     ninsns = 1; /* To avoid division by zero.  */
 195
 196   return ninsns;
 197 }
 198
 199 /* Counts number of insns executed on average per iteration LOOP.  */
 200 int
 201 average_num_loop_insns (const class loop *loop)
 202 {
 203   basic_block *bbs, bb;
 204   unsigned i, binsns;
 205   sreal ninsns;
 206   rtx_insn *insn;
 207
 208   ninsns = 0;
 209   bbs = get_loop_body (loop);
 210   for (i = 0; i < loop->num_nodes; i++)
 211     {
 212       bb = bbs[i];
 213
 214       binsns = 0;
 215       FOR_BB_INSNS (bb, insn)
 216         if (NONDEBUG_INSN_P (insn))
 217           binsns++;
 218
 219       ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
 220       /* Avoid overflows.   */
 221       if (ninsns > 1000000)
 222         return 100000;
 223     }
 224   free (bbs);
 225
 226   int64_t ret = ninsns.to_int ();
 227   if (!ret)
 228     ret = 1; /* To avoid division by zero.  */
 229
 230   return ret;
 231 }
 232
 233 /* Returns expected number of iterations of LOOP, according to
 234    measured or guessed profile.
 235
 236    This functions attempts to return "sane" value even if profile
 237    information is not good enough to derive osmething.
 238    If BY_PROFILE_ONLY is set, this logic is bypassed and function
 239    return -1 in those scenarios.  */
 240
 241 gcov_type
 242 expected_loop_iterations_unbounded (const class loop *loop,
 243                                     bool *read_profile_p,
 244                                     bool by_profile_only)
 245 {
 246   edge e;
 247   edge_iterator ei;
 248   gcov_type expected = -1;
 249
 250   if (read_profile_p)
 251     *read_profile_p = false;
 252
 253   /* If we have no profile at all, use AVG_LOOP_NITER.  */
 254   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 255     {
 256       if (by_profile_only)
 257         return -1;
 258       expected = param_avg_loop_niter;
 259     }
 260   else if (loop->latch && (loop->latch->count.initialized_p ()
 261                            || loop->header->count.initialized_p ()))
 262     {
 263       profile_count count_in = profile_count::zero (),
 264                     count_latch = profile_count::zero ();
 265
 266       FOR_EACH_EDGE (e, ei, loop->header->preds)
 267         if (e->src == loop->latch)
 268           count_latch = e->count ();
 269         else
 270           count_in += e->count ();
 271
 272       if (!count_latch.initialized_p ())
 273         {
 274           if (by_profile_only)
 275             return -1;
 276           expected = param_avg_loop_niter;
 277         }
 278       else if (!count_in.nonzero_p ())
 279         {
 280           if (by_profile_only)
 281             return -1;
 282           expected = count_latch.to_gcov_type () * 2;
 283         }
 284       else
 285         {
 286           expected = (count_latch.to_gcov_type () + count_in.to_gcov_type ()
 287                       - 1) / count_in.to_gcov_type ();
 288           if (read_profile_p
 289               && count_latch.reliable_p () && count_in.reliable_p ())
 290             *read_profile_p = true;
 291         }
 292     }
 293   else
 294     {
 295       if (by_profile_only)
 296         return -1;
 297       expected = param_avg_loop_niter;
 298     }
 299
 300   if (!by_profile_only)
 301     {
 302       HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
 303       if (max != -1 && max < expected)
 304         return max;
 305     }
 306
 307   return expected;
 308 }
 309
 310 /* Returns expected number of LOOP iterations.  The returned value is bounded
 311    by REG_BR_PROB_BASE.  */
 312
 313 unsigned
 314 expected_loop_iterations (class loop *loop)
 315 {
 316   gcov_type expected = expected_loop_iterations_unbounded (loop);
 317   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 318 }
 319
 320 /* Returns the maximum level of nesting of subloops of LOOP.  */
 321
 322 unsigned
 323 get_loop_level (const class loop *loop)
 324 {
 325   const class loop *ploop;
 326   unsigned mx = 0, l;
 327
 328   for (ploop = loop->inner; ploop; ploop = ploop->next)
 329     {
 330       l = get_loop_level (ploop);
 331       if (l >= mx)
 332         mx = l + 1;
 333     }
 334   return mx;
 335 }
 336
 337 /* Initialize the constants for computing set costs.  */
 338
 339 void
 340 init_set_costs (void)
 341 {
 342   int speed;
 343   rtx_insn *seq;
 344   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
 345   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
 346   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
 347   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 348   unsigned i;
 349
 350   target_avail_regs = 0;
 351   target_clobbered_regs = 0;
 352   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 353     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 354         && !fixed_regs[i])
 355       {
 356         target_avail_regs++;
 357         /* ??? This is only a rough heuristic.  It doesn't cope well
 358            with alternative ABIs, but that's an optimization rather than
 359            correctness issue.  */
 360         if (default_function_abi.clobbers_full_reg_p (i))
 361           target_clobbered_regs++;
 362       }
 363
 364   target_res_regs = 3;
 365
 366   for (speed = 0; speed < 2; speed++)
 367      {
 368       crtl->maybe_hot_insn_p = speed;
 369       /* Set up the costs for using extra registers:
 370
 371          1) If not many free registers remain, we should prefer having an
 372             additional move to decreasing the number of available registers.
 373             (TARGET_REG_COST).
 374          2) If no registers are available, we need to spill, which may require
 375             storing the old value to memory and loading it back
 376             (TARGET_SPILL_COST).  */
 377
 378       start_sequence ();
 379       emit_move_insn (reg1, reg2);
 380       seq = get_insns ();
 381       end_sequence ();
 382       target_reg_cost [speed] = seq_cost (seq, speed);
 383
 384       start_sequence ();
 385       emit_move_insn (mem, reg1);
 386       emit_move_insn (reg2, mem);
 387       seq = get_insns ();
 388       end_sequence ();
 389       target_spill_cost [speed] = seq_cost (seq, speed);
 390     }
 391   default_rtl_profile ();
 392 }
 393
 394 /* Estimates cost of increased register pressure caused by making N_NEW new
 395    registers live around the loop.  N_OLD is the number of registers live
 396    around the loop.  If CALL_P is true, also take into account that
 397    call-used registers may be clobbered in the loop body, reducing the
 398    number of available registers before we spill.  */
 399
 400 unsigned
 401 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 402                             bool call_p)
 403 {
 404   unsigned cost;
 405   unsigned regs_needed = n_new + n_old;
 406   unsigned available_regs = target_avail_regs;
 407
 408   /* If there is a call in the loop body, the call-clobbered registers
 409      are not available for loop invariants.  */
 410   if (call_p)
 411     available_regs = available_regs - target_clobbered_regs;
 412
 413   /* If we have enough registers, we should use them and not restrict
 414      the transformations unnecessarily.  */
 415   if (regs_needed + target_res_regs <= available_regs)
 416     return 0;
 417
 418   if (regs_needed <= available_regs)
 419     /* If we are close to running out of registers, try to preserve
 420        them.  */
 421     cost = target_reg_cost [speed] * n_new;
 422   else
 423     /* If we run out of registers, it is very expensive to add another
 424        one.  */
 425     cost = target_spill_cost [speed] * n_new;
 426
 427   if (optimize && (flag_ira_region == IRA_REGION_ALL
 428                    || flag_ira_region == IRA_REGION_MIXED)
 429       && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
 430     /* IRA regional allocation deals with high register pressure
 431        better.  So decrease the cost (to do more accurate the cost
 432        calculation for IRA, we need to know how many registers lives
 433        through the loop transparently).  */
 434     cost /= 2;
 435
 436   return cost;
 437 }
 438
 439 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 440
 441 void
 442 mark_loop_exit_edges (void)
 443 {
 444   basic_block bb;
 445   edge e;
 446
 447   if (number_of_loops (cfun) <= 1)
 448     return;
 449
 450   FOR_EACH_BB_FN (bb, cfun)
 451     {
 452       edge_iterator ei;
 453
 454       FOR_EACH_EDGE (e, ei, bb->succs)
 455         {
 456           if (loop_outer (bb->loop_father)
 457               && loop_exit_edge_p (bb->loop_father, e))
 458             e->flags |= EDGE_LOOP_EXIT;
 459           else
 460             e->flags &= ~EDGE_LOOP_EXIT;
 461         }
 462     }
 463 }
 464
 465 /* Return exit edge if loop has only one exit that is likely
 466    to be executed on runtime (i.e. it is not EH or leading
 467    to noreturn call.  */
 468
 469 edge
 470 single_likely_exit (class loop *loop, vec<edge> exits)
 471 {
 472   edge found = single_exit (loop);
 473   unsigned i;
 474   edge ex;
 475
 476   if (found)
 477     return found;
 478   FOR_EACH_VEC_ELT (exits, i, ex)
 479     {
 480       if (probably_never_executed_edge_p (cfun, ex)
 481           /* We want to rule out paths to noreturns but not low probabilities
 482              resulting from adjustments or combining.
 483              FIXME: once we have better quality tracking, make this more
 484              robust.  */
 485           || ex->probability <= profile_probability::very_unlikely ())
 486         continue;
 487       if (!found)
 488         found = ex;
 489       else
 490         return NULL;
 491     }
 492   return found;
 493 }
 494
 495
 496 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 497    order against direction of edges from latch.  Specially, if
 498    header != latch, latch is the 1-st block.  */
 499
 500 vec<basic_block>
 501 get_loop_hot_path (const class loop *loop)
 502 {
 503   basic_block bb = loop->header;
 504   vec<basic_block> path = vNULL;
 505   bitmap visited = BITMAP_ALLOC (NULL);
 506
 507   while (true)
 508     {
 509       edge_iterator ei;
 510       edge e;
 511       edge best = NULL;
 512
 513       path.safe_push (bb);
 514       bitmap_set_bit (visited, bb->index);
 515       FOR_EACH_EDGE (e, ei, bb->succs)
 516         if ((!best || e->probability > best->probability)
 517             && !loop_exit_edge_p (loop, e)
 518             && !bitmap_bit_p (visited, e->dest->index))
 519           best = e;
 520       if (!best || best->dest == loop->header)
 521         break;
 522       bb = best->dest;
 523     }
 524   BITMAP_FREE (visited);
 525   return path;
 526 }