gcc/cfgloopanal.cc

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "predict.h"
  27 #include "memmodel.h"
  28 #include "emit-rtl.h"
  29 #include "cfgloop.h"
  30 #include "explow.h"
  31 #include "expr.h"
  32 #include "graphds.h"
  33 #include "sreal.h"
  34 #include "regs.h"
  35 #include "function-abi.h"
  36
  37 struct target_cfgloop default_target_cfgloop;
  38 #if SWITCHABLE_TARGET
  39 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  40 #endif
  41
  42 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  43
  44 bool
  45 just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
  46 {
  47   /* It must be executed at least once each iteration.  */
  48   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  49     return false;
  50
  51   /* And just once.  */
  52   if (bb->loop_father != loop)
  53     return false;
  54
  55   /* But this was not enough.  We might have some irreducible loop here.  */
  56   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  57     return false;
  58
  59   return true;
  60 }
  61
  62 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  63    throw away all latch edges and mark blocks inside any remaining cycle.
  64    Everything is a bit complicated due to fact we do not want to do this
  65    for parts of cycles that only "pass" through some loop -- i.e. for
  66    each cycle, we want to mark blocks that belong directly to innermost
  67    loop containing the whole cycle.
  68
  69    LOOPS is the loop tree.  */
  70
  71 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  72 #define BB_REPR(BB) ((BB)->index + 1)
  73
  74 bool
  75 mark_irreducible_loops (void)
  76 {
  77   basic_block act;
  78   struct graph_edge *ge;
  79   edge e;
  80   edge_iterator ei;
  81   int src, dest;
  82   unsigned depth;
  83   struct graph *g;
  84   int num = number_of_loops (cfun);
  85   class loop *cloop;
  86   bool irred_loop_found = false;
  87   int i;
  88
  89   gcc_assert (current_loops != NULL);
  90
  91   /* Reset the flags.  */
  92   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
  93                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
  94     {
  95       act->flags &= ~BB_IRREDUCIBLE_LOOP;
  96       FOR_EACH_EDGE (e, ei, act->succs)
  97         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
  98     }
  99
 100   /* Create the edge lists.  */
 101   g = new_graph (last_basic_block_for_fn (cfun) + num);
 102
 103   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 104                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 105     FOR_EACH_EDGE (e, ei, act->succs)
 106       {
 107         /* Ignore edges to exit.  */
 108         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 109           continue;
 110
 111         src = BB_REPR (act);
 112         dest = BB_REPR (e->dest);
 113
 114         /* Ignore latch edges.  */
 115         if (e->dest->loop_father->header == e->dest
 116             && dominated_by_p (CDI_DOMINATORS, act, e->dest))
 117           continue;
 118
 119         /* Edges inside a single loop should be left where they are.  Edges
 120            to subloop headers should lead to representative of the subloop,
 121            but from the same place.
 122
 123            Edges exiting loops should lead from representative
 124            of the son of nearest common ancestor of the loops in that
 125            act lays.  */
 126
 127         if (e->dest->loop_father->header == e->dest)
 128           dest = LOOP_REPR (e->dest->loop_father);
 129
 130         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 131           {
 132             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 133                                                       e->dest->loop_father));
 134             if (depth == loop_depth (act->loop_father))
 135               cloop = act->loop_father;
 136             else
 137               cloop = (*act->loop_father->superloops)[depth];
 138
 139             src = LOOP_REPR (cloop);
 140           }
 141
 142         add_edge (g, src, dest)->data = e;
 143       }
 144
 145   /* Find the strongly connected components.  */
 146   graphds_scc (g, NULL);
 147
 148   /* Mark the irreducible loops.  */
 149   for (i = 0; i < g->n_vertices; i++)
 150     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 151       {
 152         edge real = (edge) ge->data;
 153         /* edge E in graph G is irreducible if it connects two vertices in the
 154            same scc.  */
 155
 156         /* All edges should lead from a component with higher number to the
 157            one with lower one.  */
 158         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 159
 160         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 161           continue;
 162
 163         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 164         irred_loop_found = true;
 165         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 166           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 167       }
 168
 169   free_graph (g);
 170
 171   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 172   return irred_loop_found;
 173 }
 174
 175 /* Counts number of insns inside LOOP.  */
 176 int
 177 num_loop_insns (const class loop *loop)
 178 {
 179   basic_block *bbs, bb;
 180   unsigned i, ninsns = 0;
 181   rtx_insn *insn;
 182
 183   bbs = get_loop_body (loop);
 184   for (i = 0; i < loop->num_nodes; i++)
 185     {
 186       bb = bbs[i];
 187       FOR_BB_INSNS (bb, insn)
 188         if (NONDEBUG_INSN_P (insn))
 189           ninsns++;
 190     }
 191   free (bbs);
 192
 193   if (!ninsns)
 194     ninsns = 1; /* To avoid division by zero.  */
 195
 196   return ninsns;
 197 }
 198
 199 /* Counts number of insns executed on average per iteration LOOP.  */
 200 int
 201 average_num_loop_insns (const class loop *loop)
 202 {
 203   basic_block *bbs, bb;
 204   unsigned i, binsns;
 205   sreal ninsns;
 206   rtx_insn *insn;
 207
 208   ninsns = 0;
 209   bbs = get_loop_body (loop);
 210   for (i = 0; i < loop->num_nodes; i++)
 211     {
 212       bb = bbs[i];
 213
 214       binsns = 0;
 215       FOR_BB_INSNS (bb, insn)
 216         if (NONDEBUG_INSN_P (insn))
 217           binsns++;
 218
 219       ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
 220       /* Avoid overflows.   */
 221       if (ninsns > 1000000)
 222         {
 223           free (bbs);
 224           return 1000000;
 225         }
 226     }
 227   free (bbs);
 228
 229   int64_t ret = ninsns.to_int ();
 230   if (!ret)
 231     ret = 1; /* To avoid division by zero.  */
 232
 233   return ret;
 234 }
 235
 236 /* Return true if BB profile can be used to determine the expected number of
 237    iterations (that is number of executions of latch edge(s) for each
 238    entry of the loop.  If this is the case initialize RET with the number
 239    of iterations.
 240
 241    RELIABLE is set if profile indiates that the returned value should be
 242    realistic estimate.  (This is the case if we read profile and did not
 243    messed it up yet and not the case of guessed profiles.)
 244
 245    This function uses only CFG profile.  We track more reliable info in
 246    loop_info structure and for loop optimization heuristics more relevant
 247    is get_estimated_loop_iterations API.  */
 248
 249 bool
 250 expected_loop_iterations_by_profile (const class loop *loop, sreal *ret,
 251                                      bool *reliable)
 252 {
 253   profile_count header_count = loop->header->count;
 254   if (reliable)
 255     *reliable = false;
 256
 257   /* TODO: For single exit loops we can use loop exit edge probability.
 258      It also may be reliable while loop itself was adjusted.  */
 259   if (!header_count.initialized_p ()
 260       || !header_count.nonzero_p ())
 261     return false;
 262
 263   profile_count count_in = profile_count::zero ();
 264   edge e;
 265   edge_iterator ei;
 266
 267   /* For single-latch loops avoid querying dominators.  */
 268   if (loop->latch)
 269     {
 270       bool found = false;
 271       FOR_EACH_EDGE (e, ei, loop->header->preds)
 272         if (e->src != loop->latch)
 273           count_in += e->count ();
 274         else
 275           found = true;
 276       /* If latch is not found, loop is inconsistent.  */
 277       gcc_checking_assert (found);
 278     }
 279   else
 280     FOR_EACH_EDGE (e, ei, loop->header->preds)
 281       if (!dominated_by_p (CDI_DOMINATORS, e->src, loop->header))
 282         count_in += e->count ();
 283
 284   bool known;
 285   /* Number of iterations is number of executions of latch edge.  */
 286   *ret = (header_count - count_in).to_sreal_scale (count_in, &known);
 287   if (!known)
 288     return false;
 289   if (reliable)
 290     {
 291       /* Header should have at least count_in many executions.
 292          Give up on clearly inconsistent profile.  */
 293       if (header_count < count_in && header_count.differs_from_p (count_in))
 294         {
 295           if (dump_file && (dump_flags & TDF_DETAILS))
 296             fprintf (dump_file, "Inconsistent bb profile of loop %i\n",
 297                      loop->num);
 298           *reliable = false;
 299         }
 300       else
 301         *reliable = count_in.reliable_p () && header_count.reliable_p ();
 302     }
 303   return true;
 304 }
 305
 306 /* Returns expected number of iterations of LOOP, according to
 307    measured or guessed profile.
 308
 309    This functions attempts to return "sane" value even if profile
 310    information is not good enough to derive osmething.  */
 311
 312 gcov_type
 313 expected_loop_iterations_unbounded (const class loop *loop,
 314                                     bool *read_profile_p)
 315 {
 316   gcov_type expected = -1;
 317
 318   if (read_profile_p)
 319     *read_profile_p = false;
 320
 321   sreal sreal_expected;
 322   if (expected_loop_iterations_by_profile
 323           (loop, &sreal_expected, read_profile_p))
 324     expected = sreal_expected.to_nearest_int ();
 325   else
 326     expected = param_avg_loop_niter;
 327
 328   HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
 329   if (max != -1 && max < expected)
 330     return max;
 331
 332   return expected;
 333 }
 334
 335 /* Returns expected number of LOOP iterations.  The returned value is bounded
 336    by REG_BR_PROB_BASE.  */
 337
 338 unsigned
 339 expected_loop_iterations (class loop *loop)
 340 {
 341   gcov_type expected = expected_loop_iterations_unbounded (loop);
 342   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 343 }
 344
 345 /* Returns the maximum level of nesting of subloops of LOOP.  */
 346
 347 unsigned
 348 get_loop_level (const class loop *loop)
 349 {
 350   const class loop *ploop;
 351   unsigned mx = 0, l;
 352
 353   for (ploop = loop->inner; ploop; ploop = ploop->next)
 354     {
 355       l = get_loop_level (ploop);
 356       if (l >= mx)
 357         mx = l + 1;
 358     }
 359   return mx;
 360 }
 361
 362 /* Initialize the constants for computing set costs.  */
 363
 364 void
 365 init_set_costs (void)
 366 {
 367   int speed;
 368   rtx_insn *seq;
 369   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
 370   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
 371   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
 372   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 373   unsigned i;
 374
 375   target_avail_regs = 0;
 376   target_clobbered_regs = 0;
 377   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 378     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 379         && !fixed_regs[i])
 380       {
 381         target_avail_regs++;
 382         /* ??? This is only a rough heuristic.  It doesn't cope well
 383            with alternative ABIs, but that's an optimization rather than
 384            correctness issue.  */
 385         if (default_function_abi.clobbers_full_reg_p (i))
 386           target_clobbered_regs++;
 387       }
 388
 389   target_res_regs = 3;
 390
 391   for (speed = 0; speed < 2; speed++)
 392      {
 393       crtl->maybe_hot_insn_p = speed;
 394       /* Set up the costs for using extra registers:
 395
 396          1) If not many free registers remain, we should prefer having an
 397             additional move to decreasing the number of available registers.
 398             (TARGET_REG_COST).
 399          2) If no registers are available, we need to spill, which may require
 400             storing the old value to memory and loading it back
 401             (TARGET_SPILL_COST).  */
 402
 403       start_sequence ();
 404       emit_move_insn (reg1, reg2);
 405       seq = get_insns ();
 406       end_sequence ();
 407       target_reg_cost [speed] = seq_cost (seq, speed);
 408
 409       start_sequence ();
 410       emit_move_insn (mem, reg1);
 411       emit_move_insn (reg2, mem);
 412       seq = get_insns ();
 413       end_sequence ();
 414       target_spill_cost [speed] = seq_cost (seq, speed);
 415     }
 416   default_rtl_profile ();
 417 }
 418
 419 /* Estimates cost of increased register pressure caused by making N_NEW new
 420    registers live around the loop.  N_OLD is the number of registers live
 421    around the loop.  If CALL_P is true, also take into account that
 422    call-used registers may be clobbered in the loop body, reducing the
 423    number of available registers before we spill.  */
 424
 425 unsigned
 426 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 427                             bool call_p)
 428 {
 429   unsigned cost;
 430   unsigned regs_needed = n_new + n_old;
 431   unsigned available_regs = target_avail_regs;
 432
 433   /* If there is a call in the loop body, the call-clobbered registers
 434      are not available for loop invariants.  */
 435   if (call_p)
 436     available_regs = available_regs - target_clobbered_regs;
 437
 438   /* If we have enough registers, we should use them and not restrict
 439      the transformations unnecessarily.  */
 440   if (regs_needed + target_res_regs <= available_regs)
 441     return 0;
 442
 443   if (regs_needed <= available_regs)
 444     /* If we are close to running out of registers, try to preserve
 445        them.  */
 446     cost = target_reg_cost [speed] * n_new;
 447   else
 448     /* If we run out of registers, it is very expensive to add another
 449        one.  */
 450     cost = target_spill_cost [speed] * n_new;
 451
 452   if (optimize && (flag_ira_region == IRA_REGION_ALL
 453                    || flag_ira_region == IRA_REGION_MIXED)
 454       && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
 455     /* IRA regional allocation deals with high register pressure
 456        better.  So decrease the cost (to do more accurate the cost
 457        calculation for IRA, we need to know how many registers lives
 458        through the loop transparently).  */
 459     cost /= 2;
 460
 461   return cost;
 462 }
 463
 464 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 465
 466 void
 467 mark_loop_exit_edges (void)
 468 {
 469   basic_block bb;
 470   edge e;
 471
 472   if (number_of_loops (cfun) <= 1)
 473     return;
 474
 475   FOR_EACH_BB_FN (bb, cfun)
 476     {
 477       edge_iterator ei;
 478
 479       FOR_EACH_EDGE (e, ei, bb->succs)
 480         {
 481           if (loop_outer (bb->loop_father)
 482               && loop_exit_edge_p (bb->loop_father, e))
 483             e->flags |= EDGE_LOOP_EXIT;
 484           else
 485             e->flags &= ~EDGE_LOOP_EXIT;
 486         }
 487     }
 488 }
 489
 490 /* Return exit edge if loop has only one exit that is likely
 491    to be executed on runtime (i.e. it is not EH or leading
 492    to noreturn call.  */
 493
 494 edge
 495 single_likely_exit (class loop *loop, const vec<edge> &exits)
 496 {
 497   edge found = single_exit (loop);
 498   unsigned i;
 499   edge ex;
 500
 501   if (found)
 502     return found;
 503   FOR_EACH_VEC_ELT (exits, i, ex)
 504     {
 505       if (probably_never_executed_edge_p (cfun, ex)
 506           /* We want to rule out paths to noreturns but not low probabilities
 507              resulting from adjustments or combining.
 508              FIXME: once we have better quality tracking, make this more
 509              robust.  */
 510           || ex->probability <= profile_probability::very_unlikely ())
 511         continue;
 512       if (!found)
 513         found = ex;
 514       else
 515         return NULL;
 516     }
 517   return found;
 518 }
 519
 520
 521 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 522    order against direction of edges from latch.  Specially, if
 523    header != latch, latch is the 1-st block.  */
 524
 525 auto_vec<basic_block>
 526 get_loop_hot_path (const class loop *loop)
 527 {
 528   basic_block bb = loop->header;
 529   auto_vec<basic_block> path;
 530   bitmap visited = BITMAP_ALLOC (NULL);
 531
 532   while (true)
 533     {
 534       edge_iterator ei;
 535       edge e;
 536       edge best = NULL;
 537
 538       path.safe_push (bb);
 539       bitmap_set_bit (visited, bb->index);
 540       FOR_EACH_EDGE (e, ei, bb->succs)
 541         if ((!best || e->probability > best->probability)
 542             && !loop_exit_edge_p (loop, e)
 543             && !bitmap_bit_p (visited, e->dest->index))
 544           best = e;
 545       if (!best || best->dest == loop->header)
 546         break;
 547       bb = best->dest;
 548     }
 549   BITMAP_FREE (visited);
 550   return path;
 551 }