[thirdparty/gcc.git] / gcc / cfgloopanal.cc

/* Natural loop analysis code for GNU compiler.
   Copyright (C) 2002-2023 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "rtl.h"
#include "tree.h"
#include "predict.h"
#include "memmodel.h"
#include "emit-rtl.h"
#include "cfgloop.h"
#include "explow.h"
#include "expr.h"
#include "graphds.h"
#include "sreal.h"
#include "regs.h"
#include "function-abi.h"

struct target_cfgloop default_target_cfgloop;
#if SWITCHABLE_TARGET
struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
#endif

/* Checks whether BB is executed exactly once in each LOOP iteration.  */

bool
just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
{
  /* It must be executed at least once each iteration.  */
  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
    return false;

  /* And just once.  */
  if (bb->loop_father != loop)
    return false;

  /* But this was not enough.  We might have some irreducible loop here.  */
  if (bb->flags & BB_IRREDUCIBLE_LOOP)
    return false;

  return true;
}

/* Marks blocks and edges that are part of non-recognized loops; i.e. we
   throw away all latch edges and mark blocks inside any remaining cycle.
   Everything is a bit complicated due to fact we do not want to do this
   for parts of cycles that only "pass" through some loop -- i.e. for
   each cycle, we want to mark blocks that belong directly to innermost
   loop containing the whole cycle.

   LOOPS is the loop tree.  */

#define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
#define BB_REPR(BB) ((BB)->index + 1)

bool
mark_irreducible_loops (void)
{
  basic_block act;
  struct graph_edge *ge;
  edge e;
  edge_iterator ei;
  int src, dest;
  unsigned depth;
  struct graph *g;
  int num = number_of_loops (cfun);
  class loop *cloop;
  bool irred_loop_found = false;
  int i;

  gcc_assert (current_loops != NULL);

  /* Reset the flags.  */
  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
		  EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
    {
      act->flags &= ~BB_IRREDUCIBLE_LOOP;
      FOR_EACH_EDGE (e, ei, act->succs)
	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
    }

  /* Create the edge lists.  */
  g = new_graph (last_basic_block_for_fn (cfun) + num);

  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
		  EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
    FOR_EACH_EDGE (e, ei, act->succs)
      {
	/* Ignore edges to exit.  */
	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
	  continue;

	src = BB_REPR (act);
	dest = BB_REPR (e->dest);

	/* Ignore latch edges.  */
	if (e->dest->loop_father->header == e->dest
	    && dominated_by_p (CDI_DOMINATORS, act, e->dest))
	  continue;

	/* Edges inside a single loop should be left where they are.  Edges
	   to subloop headers should lead to representative of the subloop,
	   but from the same place.

	   Edges exiting loops should lead from representative
	   of the son of nearest common ancestor of the loops in that
	   act lays.  */

	if (e->dest->loop_father->header == e->dest)
	  dest = LOOP_REPR (e->dest->loop_father);

	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
	  {
	    depth = 1 + loop_depth (find_common_loop (act->loop_father,
						      e->dest->loop_father));
	    if (depth == loop_depth (act->loop_father))
	      cloop = act->loop_father;
	    else
	      cloop = (*act->loop_father->superloops)[depth];

	    src = LOOP_REPR (cloop);
	  }

	add_edge (g, src, dest)->data = e;
      }

  /* Find the strongly connected components.  */
  graphds_scc (g, NULL);

  /* Mark the irreducible loops.  */
  for (i = 0; i < g->n_vertices; i++)
    for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
      {
	edge real = (edge) ge->data;
	/* edge E in graph G is irreducible if it connects two vertices in the
	   same scc.  */

	/* All edges should lead from a component with higher number to the
	   one with lower one.  */
	gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);

	if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
	  continue;

	real->flags |= EDGE_IRREDUCIBLE_LOOP;
	irred_loop_found = true;
	if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
	  real->src->flags |= BB_IRREDUCIBLE_LOOP;
      }

  free_graph (g);

  loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
  return irred_loop_found;
}

/* Counts number of insns inside LOOP.  */
int
num_loop_insns (const class loop *loop)
{
  basic_block *bbs, bb;
  unsigned i, ninsns = 0;
  rtx_insn *insn;

  bbs = get_loop_body (loop);
  for (i = 0; i < loop->num_nodes; i++)
    {
      bb = bbs[i];
      FOR_BB_INSNS (bb, insn)
	if (NONDEBUG_INSN_P (insn))
	  ninsns++;
    }
  free (bbs);

  if (!ninsns)
    ninsns = 1;	/* To avoid division by zero.  */

  return ninsns;
}

/* Counts number of insns executed on average per iteration LOOP.  */
int
average_num_loop_insns (const class loop *loop)
{
  basic_block *bbs, bb;
  unsigned i, binsns;
  sreal ninsns;
  rtx_insn *insn;

  ninsns = 0;
  bbs = get_loop_body (loop);
  for (i = 0; i < loop->num_nodes; i++)
    {
      bb = bbs[i];

      binsns = 0;
      FOR_BB_INSNS (bb, insn)
	if (NONDEBUG_INSN_P (insn))
	  binsns++;

      ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
      /* Avoid overflows.   */
      if (ninsns > 1000000)
	{
	  free (bbs);
	  return 1000000;
	}
    }
  free (bbs);

  int64_t ret = ninsns.to_int ();
  if (!ret)
    ret = 1; /* To avoid division by zero.  */

  return ret;
}

/* Return true if BB profile can be used to determine the expected number of
   iterations (that is number of executions of latch edge(s) for each
   entry of the loop.  If this is the case initialize RET with the number
   of iterations.

   RELIABLE is set if profile indiates that the returned value should be
   realistic estimate.  (This is the case if we read profile and did not
   messed it up yet and not the case of guessed profiles.)

   This function uses only CFG profile.  We track more reliable info in
   loop_info structure and for loop optimization heuristics more relevant
   is get_estimated_loop_iterations API.  */

bool
expected_loop_iterations_by_profile (const class loop *loop, sreal *ret,
				     bool *reliable)
{
  profile_count header_count = loop->header->count;
  if (reliable)
    *reliable = false;

  /* TODO: For single exit loops we can use loop exit edge probability.
     It also may be reliable while loop itself was adjusted.  */
  if (!header_count.initialized_p ()
      || !header_count.nonzero_p ())
    return false;

  profile_count count_in = profile_count::zero ();
  edge e;
  edge_iterator ei;

  /* For single-latch loops avoid querying dominators.  */
  if (loop->latch)
    {
      bool found = false;
      FOR_EACH_EDGE (e, ei, loop->header->preds)
	if (e->src != loop->latch)
	  count_in += e->count ();
	else
	  found = true;
      /* If latch is not found, loop is inconsistent.  */
      gcc_checking_assert (found);
    }
  else
    FOR_EACH_EDGE (e, ei, loop->header->preds)
      if (!dominated_by_p (CDI_DOMINATORS, e->src, loop->header))
	count_in += e->count ();

  bool known;
  /* Number of iterations is number of executions of latch edge.  */
  *ret = (header_count - count_in).to_sreal_scale (count_in, &known);
  if (!known)
    return false;
  if (reliable)
    {
      /* Header should have at least count_in many executions.
	 Give up on clearly inconsistent profile.  */
      if (header_count < count_in && header_count.differs_from_p (count_in))
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Inconsistent bb profile of loop %i\n",
		     loop->num);
	  *reliable = false;
	}
      else
	*reliable = count_in.reliable_p () && header_count.reliable_p ();
    }
  return true;
}

/* Return true if loop CFG profile may be unrealistically flat.
   This is a common case, since average loops iterate only about 5 times.
   In the case we do not have profile feedback or do not know real number of
   iterations during profile estimation, we are likely going to predict it with
   similar low iteration count.  For static loop profiles we also artificially
   cap profile of loops with known large iteration count so they do not appear
   significantly more hot than other loops with unknown iteration counts.

   For loop optimization heuristics we ignore CFG profile and instead
   use get_estimated_loop_iterations API which returns estimate
   only when it is realistic.  For unknown counts some optimizations,
   like vectorizer or unroller make guess that iteration count will
   be large.  In this case we need to avoid scaling down the profile
   after the loop transform.  */

bool
maybe_flat_loop_profile (const class loop *loop)
{
  bool reliable;
  sreal ret;

  if (!expected_loop_iterations_by_profile (loop, &ret, &reliable))
    return true;

  /* Reliable CFG estimates ought never be flat.  Sanity check with
     nb_iterations_estimate.  If those differ, it is a but in profile
     updating code  */
  if (reliable)
    {
      int64_t intret = ret.to_nearest_int ();
      if (loop->any_estimate
	  && (wi::ltu_p (intret * 2, loop->nb_iterations_estimate)
	      || wi::gtu_p (intret, loop->nb_iterations_estimate * 2)))
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file,
		    "Loop %i has inconsistent iterations estimates: "
		    "reliable CFG based iteration estimate is %f "
		    "while nb_iterations_estimate is %i\n",
		    loop->num,
		    ret.to_double (),
		    (int)loop->nb_iterations_estimate.to_shwi ());
	  return true;
	}
      return false;
    }

  /* Allow some margin of error and see if we are close to known bounds.
     sreal (9,-3) is 9/8  */
  int64_t intret = (ret * sreal (9, -3)).to_nearest_int ();
  if (loop->any_upper_bound && wi::geu_p (intret, loop->nb_iterations_upper_bound))
    return false;
  if (loop->any_likely_upper_bound
      && wi::geu_p (intret, loop->nb_iterations_likely_upper_bound))
    return false;
  if (loop->any_estimate
      && wi::geu_p (intret, loop->nb_iterations_estimate))
    return false;
  return true;
}

/* Returns expected number of iterations of LOOP, according to
   measured or guessed profile.

   This functions attempts to return "sane" value even if profile
   information is not good enough to derive osmething.  */

gcov_type
expected_loop_iterations_unbounded (const class loop *loop,
				    bool *read_profile_p)
{
  gcov_type expected = -1;
  
  if (read_profile_p)
    *read_profile_p = false;

  sreal sreal_expected;
  if (expected_loop_iterations_by_profile
	  (loop, &sreal_expected, read_profile_p))
    expected = sreal_expected.to_nearest_int ();
  else
    expected = param_avg_loop_niter;

  HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
  if (max != -1 && max < expected)
    return max;
 
  return expected;
}

/* Returns expected number of LOOP iterations.  The returned value is bounded
   by REG_BR_PROB_BASE.  */

unsigned
expected_loop_iterations (class loop *loop)
{
  gcov_type expected = expected_loop_iterations_unbounded (loop);
  return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
}

/* Returns the maximum level of nesting of subloops of LOOP.  */

unsigned
get_loop_level (const class loop *loop)
{
  const class loop *ploop;
  unsigned mx = 0, l;

  for (ploop = loop->inner; ploop; ploop = ploop->next)
    {
      l = get_loop_level (ploop);
      if (l >= mx)
	mx = l + 1;
    }
  return mx;
}

/* Initialize the constants for computing set costs.  */

void
init_set_costs (void)
{
  int speed;
  rtx_insn *seq;
  rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
  rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
  rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
  rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
  unsigned i;

  target_avail_regs = 0;
  target_clobbered_regs = 0;
  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
    if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
	&& !fixed_regs[i])
      {
	target_avail_regs++;
	/* ??? This is only a rough heuristic.  It doesn't cope well
	   with alternative ABIs, but that's an optimization rather than
	   correctness issue.  */
	if (default_function_abi.clobbers_full_reg_p (i))
	  target_clobbered_regs++;
      }

  target_res_regs = 3;

  for (speed = 0; speed < 2; speed++)
     {
      crtl->maybe_hot_insn_p = speed;
      /* Set up the costs for using extra registers:

	 1) If not many free registers remain, we should prefer having an
	    additional move to decreasing the number of available registers.
	    (TARGET_REG_COST).
	 2) If no registers are available, we need to spill, which may require
	    storing the old value to memory and loading it back
	    (TARGET_SPILL_COST).  */

      start_sequence ();
      emit_move_insn (reg1, reg2);
      seq = get_insns ();
      end_sequence ();
      target_reg_cost [speed] = seq_cost (seq, speed);

      start_sequence ();
      emit_move_insn (mem, reg1);
      emit_move_insn (reg2, mem);
      seq = get_insns ();
      end_sequence ();
      target_spill_cost [speed] = seq_cost (seq, speed);
    }
  default_rtl_profile ();
}

/* Estimates cost of increased register pressure caused by making N_NEW new
   registers live around the loop.  N_OLD is the number of registers live
   around the loop.  If CALL_P is true, also take into account that
   call-used registers may be clobbered in the loop body, reducing the
   number of available registers before we spill.  */

unsigned
estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
			    bool call_p)
{
  unsigned cost;
  unsigned regs_needed = n_new + n_old;
  unsigned available_regs = target_avail_regs;

  /* If there is a call in the loop body, the call-clobbered registers
     are not available for loop invariants.  */
  if (call_p)
    available_regs = available_regs - target_clobbered_regs;

  /* If we have enough registers, we should use them and not restrict
     the transformations unnecessarily.  */
  if (regs_needed + target_res_regs <= available_regs)
    return 0;

  if (regs_needed <= available_regs)
    /* If we are close to running out of registers, try to preserve
       them.  */
    cost = target_reg_cost [speed] * n_new;
  else
    /* If we run out of registers, it is very expensive to add another
       one.  */
    cost = target_spill_cost [speed] * n_new;

  if (optimize && (flag_ira_region == IRA_REGION_ALL
		   || flag_ira_region == IRA_REGION_MIXED)
      && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
    /* IRA regional allocation deals with high register pressure
       better.  So decrease the cost (to do more accurate the cost
       calculation for IRA, we need to know how many registers lives
       through the loop transparently).  */
    cost /= 2;

  return cost;
}

/* Sets EDGE_LOOP_EXIT flag for all loop exits.  */

void
mark_loop_exit_edges (void)
{
  basic_block bb;
  edge e;

  if (number_of_loops (cfun) <= 1)
    return;

  FOR_EACH_BB_FN (bb, cfun)
    {
      edge_iterator ei;

      FOR_EACH_EDGE (e, ei, bb->succs)
	{
	  if (loop_outer (bb->loop_father)
	      && loop_exit_edge_p (bb->loop_father, e))
	    e->flags |= EDGE_LOOP_EXIT;
	  else
	    e->flags &= ~EDGE_LOOP_EXIT;
	}
    }
}

/* Return exit edge if loop has only one exit that is likely
   to be executed on runtime (i.e. it is not EH or leading
   to noreturn call.  */

edge
single_likely_exit (class loop *loop, const vec<edge> &exits)
{
  edge found = single_exit (loop);
  unsigned i;
  edge ex;

  if (found)
    return found;
  FOR_EACH_VEC_ELT (exits, i, ex)
    {
      if (probably_never_executed_edge_p (cfun, ex)
	  /* We want to rule out paths to noreturns but not low probabilities
	     resulting from adjustments or combining.
	     FIXME: once we have better quality tracking, make this more
	     robust.  */
	  || ex->probability <= profile_probability::very_unlikely ())
	continue;
      if (!found)
	found = ex;
      else
	return NULL;
    }
  return found;
}


/* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
   order against direction of edges from latch.  Specially, if
   header != latch, latch is the 1-st block.  */

auto_vec<basic_block>
get_loop_hot_path (const class loop *loop)
{
  basic_block bb = loop->header;
  auto_vec<basic_block> path;
  bitmap visited = BITMAP_ALLOC (NULL);

  while (true)
    {
      edge_iterator ei;
      edge e;
      edge best = NULL;

      path.safe_push (bb);
      bitmap_set_bit (visited, bb->index);
      FOR_EACH_EDGE (e, ei, bb->succs)
        if ((!best || e->probability > best->probability)
	    && !loop_exit_edge_p (loop, e)
	    && !bitmap_bit_p (visited, e->dest->index))
	  best = e;
      if (!best || best->dest == loop->header)
	break;
      bb = best->dest;
    }
  BITMAP_FREE (visited);
  return path;
}
Commit	Line	Data
3d436d2a	1	/* Natural loop analysis code for GNU compiler.
aeee4812	2	Copyright (C) 2002-2023 Free Software Foundation, Inc.
3d436d2a ZD	3
	4	This file is part of GCC.
	5
	6	GCC is free software; you can redistribute it and/or modify it under
	7	the terms of the GNU General Public License as published by the Free
9dcd6f09	8	Software Foundation; either version 3, or (at your option) any later
3d436d2a ZD	9	version.
	10
	11	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
	12	WARRANTY; without even the implied warranty of MERCHANTABILITY or
	13	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	14	for more details.
	15
	16	You should have received a copy of the GNU General Public License
9dcd6f09 NC	17	along with GCC; see the file COPYING3. If not see
9dcd6f09 NC	18	<http://www.gnu.org/licenses/>. */
3d436d2a ZD	19
	20	#include "config.h"
	21	#include "system.h"
	22	#include "coretypes.h"
c7131fb2	23	#include "backend.h"
3d436d2a	24	#include "rtl.h"
c7131fb2	25	#include "tree.h"
957060b5	26	#include "predict.h"
4d0cdd0c	27	#include "memmodel.h"
957060b5 AM	28	#include "emit-rtl.h"
957060b5 AM	29	#include "cfgloop.h"
36566b39	30	#include "explow.h"
3d436d2a	31	#include "expr.h"
66f97d31	32	#include "graphds.h"
692dc070	33	#include "sreal.h"
43b484fb RS	34	#include "regs.h"
43b484fb RS	35	#include "function-abi.h"
3d436d2a	36
4391924a RS	37	struct target_cfgloop default_target_cfgloop;
	38	#if SWITCHABLE_TARGET
	39	struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
	40	#endif
	41
3d436d2a	42	/* Checks whether BB is executed exactly once in each LOOP iteration. */
f2dca510	43
3d436d2a	44	bool
99b1c316	45	just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
3d436d2a ZD	46	{
3d436d2a ZD	47	/* It must be executed at least once each iteration. */
d47cc544	48	if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3d436d2a ZD	49	return false;
	50
	51	/* And just once. */
	52	if (bb->loop_father != loop)
	53	return false;
	54
	55	/* But this was not enough. We might have some irreducible loop here. */
	56	if (bb->flags & BB_IRREDUCIBLE_LOOP)
	57	return false;
	58
	59	return true;
	60	}
	61
35b07080 ZD	62	/* Marks blocks and edges that are part of non-recognized loops; i.e. we
	63	throw away all latch edges and mark blocks inside any remaining cycle.
	64	Everything is a bit complicated due to fact we do not want to do this
	65	for parts of cycles that only "pass" through some loop -- i.e. for
	66	each cycle, we want to mark blocks that belong directly to innermost
cfbe3efe	67	loop containing the whole cycle.
c22cacf3	68
cfbe3efe ZD	69	LOOPS is the loop tree. */
cfbe3efe ZD	70
8b1c6fd7	71	#define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
cfbe3efe ZD	72	#define BB_REPR(BB) ((BB)->index + 1)
cfbe3efe ZD	73
2de58650	74	bool
d73be268	75	mark_irreducible_loops (void)
3d436d2a	76	{
3d436d2a	77	basic_block act;
2de58650	78	struct graph_edge *ge;
cfbe3efe	79	edge e;
628f6a4e	80	edge_iterator ei;
66f97d31 ZD	81	int src, dest;
66f97d31 ZD	82	unsigned depth;
cfbe3efe	83	struct graph *g;
0fc822d0	84	int num = number_of_loops (cfun);
99b1c316	85	class loop *cloop;
2de58650 JH	86	bool irred_loop_found = false;
2de58650 JH	87	int i;
3d436d2a	88
d51157de ZD	89	gcc_assert (current_loops != NULL);
d51157de ZD	90
35b07080	91	/* Reset the flags. */
fefa31b5 DM	92	FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
fefa31b5 DM	93	EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
35b07080 ZD	94	{
35b07080 ZD	95	act->flags &= ~BB_IRREDUCIBLE_LOOP;
628f6a4e	96	FOR_EACH_EDGE (e, ei, act->succs)
35b07080 ZD	97	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
	98	}
	99
3d436d2a	100	/* Create the edge lists. */
8b1c6fd7	101	g = new_graph (last_basic_block_for_fn (cfun) + num);
cfbe3efe	102
fefa31b5 DM	103	FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
fefa31b5 DM	104	EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
628f6a4e	105	FOR_EACH_EDGE (e, ei, act->succs)
3d436d2a	106	{
c22cacf3	107	/* Ignore edges to exit. */
fefa31b5	108	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
3d436d2a	109	continue;
cfbe3efe	110
598ec7bd ZD	111	src = BB_REPR (act);
598ec7bd ZD	112	dest = BB_REPR (e->dest);
cfbe3efe	113
d51157de ZD	114	/* Ignore latch edges. */
d51157de ZD	115	if (e->dest->loop_father->header == e->dest
dfef1164	116	&& dominated_by_p (CDI_DOMINATORS, act, e->dest))
d51157de ZD	117	continue;
	118
	119	/* Edges inside a single loop should be left where they are. Edges
	120	to subloop headers should lead to representative of the subloop,
	121	but from the same place.
cfbe3efe	122
d51157de ZD	123	Edges exiting loops should lead from representative
	124	of the son of nearest common ancestor of the loops in that
	125	act lays. */
	126
	127	if (e->dest->loop_father->header == e->dest)
	128	dest = LOOP_REPR (e->dest->loop_father);
	129
	130	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
	131	{
	132	depth = 1 + loop_depth (find_common_loop (act->loop_father,
	133	e->dest->loop_father));
	134	if (depth == loop_depth (act->loop_father))
	135	cloop = act->loop_father;
	136	else
9771b263	137	cloop = (*act->loop_father->superloops)[depth];
d51157de ZD	138
d51157de ZD	139	src = LOOP_REPR (cloop);
3d436d2a	140	}
cfbe3efe	141
66f97d31	142	add_edge (g, src, dest)->data = e;
3d436d2a ZD	143	}
3d436d2a ZD	144
66f97d31 ZD	145	/* Find the strongly connected components. */
66f97d31 ZD	146	graphds_scc (g, NULL);
3d436d2a	147
cfbe3efe	148	/* Mark the irreducible loops. */
2de58650 JH	149	for (i = 0; i < g->n_vertices; i++)
	150	for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
	151	{
	152	edge real = (edge) ge->data;
	153	/* edge E in graph G is irreducible if it connects two vertices in the
	154	same scc. */
	155
	156	/* All edges should lead from a component with higher number to the
	157	one with lower one. */
	158	gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
	159
	160	if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
	161	continue;
	162
	163	real->flags \|= EDGE_IRREDUCIBLE_LOOP;
	164	irred_loop_found = true;
	165	if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
	166	real->src->flags \|= BB_IRREDUCIBLE_LOOP;
	167	}
3d436d2a	168
cfbe3efe	169	free_graph (g);
3d436d2a	170
f87000d0	171	loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
2de58650	172	return irred_loop_found;
3d436d2a ZD	173	}
	174
	175	/* Counts number of insns inside LOOP. */
	176	int
99b1c316	177	num_loop_insns (const class loop *loop)
3d436d2a ZD	178	{
	179	basic_block *bbs, bb;
	180	unsigned i, ninsns = 0;
1f75b71e	181	rtx_insn *insn;
3d436d2a ZD	182
	183	bbs = get_loop_body (loop);
	184	for (i = 0; i < loop->num_nodes; i++)
	185	{
	186	bb = bbs[i];
b5b8b0ac AO	187	FOR_BB_INSNS (bb, insn)
b5b8b0ac AO	188	if (NONDEBUG_INSN_P (insn))
91f4cfe3	189	ninsns++;
3d436d2a	190	}
53a51cef JJ	191	free (bbs);
	192
	193	if (!ninsns)
	194	ninsns = 1; /* To avoid division by zero. */
d329e058	195
3d436d2a ZD	196	return ninsns;
	197	}
	198
	199	/* Counts number of insns executed on average per iteration LOOP. */
	200	int
99b1c316	201	average_num_loop_insns (const class loop *loop)
3d436d2a ZD	202	{
3d436d2a ZD	203	basic_block *bbs, bb;
692dc070 JH	204	unsigned i, binsns;
692dc070 JH	205	sreal ninsns;
1f75b71e	206	rtx_insn *insn;
3d436d2a ZD	207
	208	ninsns = 0;
	209	bbs = get_loop_body (loop);
	210	for (i = 0; i < loop->num_nodes; i++)
	211	{
	212	bb = bbs[i];
	213
b5b8b0ac AO	214	binsns = 0;
	215	FOR_BB_INSNS (bb, insn)
	216	if (NONDEBUG_INSN_P (insn))
91f4cfe3	217	binsns++;
3d436d2a	218
692dc070 JH	219	ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
	220	/* Avoid overflows. */
	221	if (ninsns > 1000000)
b38e86dd KL	222	{
	223	free (bbs);
	224	return 1000000;
	225	}
3d436d2a	226	}
53a51cef	227	free (bbs);
d329e058	228
692dc070 JH	229	int64_t ret = ninsns.to_int ();
	230	if (!ret)
	231	ret = 1; /* To avoid division by zero. */
3d436d2a	232
692dc070	233	return ret;
3d436d2a ZD	234	}
3d436d2a ZD	235
89619f87 JH	236	/* Return true if BB profile can be used to determine the expected number of
	237	iterations (that is number of executions of latch edge(s) for each
	238	entry of the loop. If this is the case initialize RET with the number
	239	of iterations.
65739a68	240
89619f87 JH	241	RELIABLE is set if profile indiates that the returned value should be
	242	realistic estimate. (This is the case if we read profile and did not
	243	messed it up yet and not the case of guessed profiles.)
ac84e05e	244
89619f87 JH	245	This function uses only CFG profile. We track more reliable info in
	246	loop_info structure and for loop optimization heuristics more relevant
	247	is get_estimated_loop_iterations API. */
	248
	249	bool
	250	expected_loop_iterations_by_profile (const class loop loop, sreal ret,
	251	bool *reliable)
3d436d2a	252	{
89619f87 JH	253	profile_count header_count = loop->header->count;
	254	if (reliable)
	255	*reliable = false;
	256
	257	/* TODO: For single exit loops we can use loop exit edge probability.
	258	It also may be reliable while loop itself was adjusted. */
	259	if (!header_count.initialized_p ()
	260	\|\| !header_count.nonzero_p ())
	261	return false;
	262
	263	profile_count count_in = profile_count::zero ();
3d436d2a	264	edge e;
628f6a4e	265	edge_iterator ei;
97c53806	266
89619f87 JH	267	/* For single-latch loops avoid querying dominators. */
89619f87 JH	268	if (loop->latch)
3d436d2a	269	{
89619f87	270	bool found = false;
628f6a4e	271	FOR_EACH_EDGE (e, ei, loop->header->preds)
89619f87	272	if (e->src != loop->latch)
ef30ab83	273	count_in += e->count ();
89619f87 JH	274	else
	275	found = true;
	276	/* If latch is not found, loop is inconsistent. */
	277	gcc_checking_assert (found);
	278	}
	279	else
	280	FOR_EACH_EDGE (e, ei, loop->header->preds)
	281	if (!dominated_by_p (CDI_DOMINATORS, e->src, loop->header))
	282	count_in += e->count ();
	283
	284	bool known;
	285	/* Number of iterations is number of executions of latch edge. */
	286	*ret = (header_count - count_in).to_sreal_scale (count_in, &known);
	287	if (!known)
	288	return false;
	289	if (reliable)
	290	{
	291	/* Header should have at least count_in many executions.
	292	Give up on clearly inconsistent profile. */
	293	if (header_count < count_in && header_count.differs_from_p (count_in))
65739a68	294	{
89619f87 JH	295	if (dump_file && (dump_flags & TDF_DETAILS))
	296	fprintf (dump_file, "Inconsistent bb profile of loop %i\n",
	297	loop->num);
	298	*reliable = false;
65739a68	299	}
bade3a00	300	else
89619f87	301	*reliable = count_in.reliable_p () && header_count.reliable_p ();
3d436d2a	302	}
89619f87 JH	303	return true;
	304	}
	305
ea272814 JH	306	/* Return true if loop CFG profile may be unrealistically flat.
	307	This is a common case, since average loops iterate only about 5 times.
	308	In the case we do not have profile feedback or do not know real number of
	309	iterations during profile estimation, we are likely going to predict it with
	310	similar low iteration count. For static loop profiles we also artificially
	311	cap profile of loops with known large iteration count so they do not appear
	312	significantly more hot than other loops with unknown iteration counts.
	313
	314	For loop optimization heuristics we ignore CFG profile and instead
	315	use get_estimated_loop_iterations API which returns estimate
	316	only when it is realistic. For unknown counts some optimizations,
	317	like vectorizer or unroller make guess that iteration count will
	318	be large. In this case we need to avoid scaling down the profile
	319	after the loop transform. */
	320
	321	bool
	322	maybe_flat_loop_profile (const class loop *loop)
	323	{
	324	bool reliable;
	325	sreal ret;
	326
	327	if (!expected_loop_iterations_by_profile (loop, &ret, &reliable))
	328	return true;
	329
	330	/* Reliable CFG estimates ought never be flat. Sanity check with
	331	nb_iterations_estimate. If those differ, it is a but in profile
	332	updating code */
	333	if (reliable)
	334	{
	335	int64_t intret = ret.to_nearest_int ();
	336	if (loop->any_estimate
	337	&& (wi::ltu_p (intret * 2, loop->nb_iterations_estimate)
	338	\|\| wi::gtu_p (intret, loop->nb_iterations_estimate * 2)))
	339	{
	340	if (dump_file && (dump_flags & TDF_DETAILS))
	341	fprintf (dump_file,
	342	"Loop %i has inconsistent iterations estimates: "
	343	"reliable CFG based iteration estimate is %f "
	344	"while nb_iterations_estimate is %i\n",
	345	loop->num,
	346	ret.to_double (),
	347	(int)loop->nb_iterations_estimate.to_shwi ());
	348	return true;
	349	}
	350	return false;
	351	}
	352
	353	/* Allow some margin of error and see if we are close to known bounds.
	354	sreal (9,-3) is 9/8 */
	355	int64_t intret = (ret * sreal (9, -3)).to_nearest_int ();
	356	if (loop->any_upper_bound && wi::geu_p (intret, loop->nb_iterations_upper_bound))
	357	return false;
	358	if (loop->any_likely_upper_bound
	359	&& wi::geu_p (intret, loop->nb_iterations_likely_upper_bound))
	360	return false;
	361	if (loop->any_estimate
	362	&& wi::geu_p (intret, loop->nb_iterations_estimate))
	363	return false;
	364	return true;
	365	}
	366
89619f87 JH	367	/* Returns expected number of iterations of LOOP, according to
	368	measured or guessed profile.
	369
	370	This functions attempts to return "sane" value even if profile
	371	information is not good enough to derive osmething. */
	372
	373	gcov_type
	374	expected_loop_iterations_unbounded (const class loop *loop,
	375	bool *read_profile_p)
	376	{
	377	gcov_type expected = -1;
	378
	379	if (read_profile_p)
	380	*read_profile_p = false;
	381
	382	sreal sreal_expected;
	383	if (expected_loop_iterations_by_profile
	384	(loop, &sreal_expected, read_profile_p))
feeee840	385	expected = sreal_expected.to_nearest_int ();
e7a74006	386	else
89619f87	387	expected = param_avg_loop_niter;
97c53806	388
89619f87 JH	389	HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
	390	if (max != -1 && max < expected)
	391	return max;
e7a74006	392
97c53806	393	return expected;
3d436d2a	394	}
689ba89d	395
ac84e05e ZD	396	/* Returns expected number of LOOP iterations. The returned value is bounded
	397	by REG_BR_PROB_BASE. */
	398
	399	unsigned
99b1c316	400	expected_loop_iterations (class loop *loop)
ac84e05e ZD	401	{
	402	gcov_type expected = expected_loop_iterations_unbounded (loop);
	403	return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
	404	}
	405
689ba89d ZD	406	/* Returns the maximum level of nesting of subloops of LOOP. */
	407
	408	unsigned
99b1c316	409	get_loop_level (const class loop *loop)
689ba89d	410	{
99b1c316	411	const class loop *ploop;
689ba89d ZD	412	unsigned mx = 0, l;
	413
	414	for (ploop = loop->inner; ploop; ploop = ploop->next)
	415	{
	416	l = get_loop_level (ploop);
	417	if (l >= mx)
	418	mx = l + 1;
	419	}
	420	return mx;
	421	}
5e962776	422
5e962776 ZD	423	/* Initialize the constants for computing set costs. */
	424
	425	void
	426	init_set_costs (void)
	427	{
f40751dd	428	int speed;
1f75b71e	429	rtx_insn *seq;
c3dc5e66 RS	430	rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
	431	rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
	432	rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
5e962776 ZD	433	rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
	434	unsigned i;
	435
b5deb7b6	436	target_avail_regs = 0;
bec922f0	437	target_clobbered_regs = 0;
5e962776 ZD	438	for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
	439	if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
	440	&& !fixed_regs[i])
bec922f0 SL	441	{
bec922f0 SL	442	target_avail_regs++;
43b484fb RS	443	/* ??? This is only a rough heuristic. It doesn't cope well
	444	with alternative ABIs, but that's an optimization rather than
	445	correctness issue. */
	446	if (default_function_abi.clobbers_full_reg_p (i))
bec922f0 SL	447	target_clobbered_regs++;
bec922f0 SL	448	}
5e962776	449
8b11a64c	450	target_res_regs = 3;
5e962776	451
f40751dd JH	452	for (speed = 0; speed < 2; speed++)
	453	{
	454	crtl->maybe_hot_insn_p = speed;
	455	/* Set up the costs for using extra registers:
	456
	457	1) If not many free registers remain, we should prefer having an
	458	additional move to decreasing the number of available registers.
	459	(TARGET_REG_COST).
	460	2) If no registers are available, we need to spill, which may require
	461	storing the old value to memory and loading it back
	462	(TARGET_SPILL_COST). */
	463
	464	start_sequence ();
	465	emit_move_insn (reg1, reg2);
	466	seq = get_insns ();
	467	end_sequence ();
	468	target_reg_cost [speed] = seq_cost (seq, speed);
	469
	470	start_sequence ();
	471	emit_move_insn (mem, reg1);
	472	emit_move_insn (reg2, mem);
	473	seq = get_insns ();
	474	end_sequence ();
	475	target_spill_cost [speed] = seq_cost (seq, speed);
	476	}
	477	default_rtl_profile ();
5e962776 ZD	478	}
5e962776 ZD	479
a154b43a ZD	480	/* Estimates cost of increased register pressure caused by making N_NEW new
a154b43a ZD	481	registers live around the loop. N_OLD is the number of registers live
bec922f0 SL	482	around the loop. If CALL_P is true, also take into account that
	483	call-used registers may be clobbered in the loop body, reducing the
	484	number of available registers before we spill. */
5e962776 ZD	485
5e962776 ZD	486	unsigned
bec922f0 SL	487	estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
bec922f0 SL	488	bool call_p)
5e962776	489	{
058e97ec	490	unsigned cost;
a154b43a	491	unsigned regs_needed = n_new + n_old;
bec922f0 SL	492	unsigned available_regs = target_avail_regs;
	493
	494	/* If there is a call in the loop body, the call-clobbered registers
	495	are not available for loop invariants. */
	496	if (call_p)
	497	available_regs = available_regs - target_clobbered_regs;
5e962776	498
a154b43a ZD	499	/* If we have enough registers, we should use them and not restrict
a154b43a ZD	500	the transformations unnecessarily. */
bec922f0	501	if (regs_needed + target_res_regs <= available_regs)
a154b43a ZD	502	return 0;
a154b43a ZD	503
bec922f0	504	if (regs_needed <= available_regs)
058e97ec VM	505	/* If we are close to running out of registers, try to preserve
058e97ec VM	506	them. */
f40751dd	507	cost = target_reg_cost [speed] * n_new;
058e97ec VM	508	else
	509	/* If we run out of registers, it is very expensive to add another
	510	one. */
f40751dd	511	cost = target_spill_cost [speed] * n_new;
058e97ec	512
2af2dbdc VM	513	if (optimize && (flag_ira_region == IRA_REGION_ALL
2af2dbdc VM	514	\|\| flag_ira_region == IRA_REGION_MIXED)
028d4092	515	&& number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
058e97ec VM	516	/* IRA regional allocation deals with high register pressure
	517	better. So decrease the cost (to do more accurate the cost
	518	calculation for IRA, we need to know how many registers lives
	519	through the loop transparently). */
	520	cost /= 2;
	521
	522	return cost;
5e962776 ZD	523	}
5e962776 ZD	524
d73be268	525	/* Sets EDGE_LOOP_EXIT flag for all loop exits. */
70388d94 ZD	526
70388d94 ZD	527	void
d73be268	528	mark_loop_exit_edges (void)
70388d94 ZD	529	{
	530	basic_block bb;
	531	edge e;
c22cacf3	532
0fc822d0	533	if (number_of_loops (cfun) <= 1)
70388d94 ZD	534	return;
70388d94 ZD	535
11cd3bed	536	FOR_EACH_BB_FN (bb, cfun)
70388d94 ZD	537	{
	538	edge_iterator ei;
	539
70388d94 ZD	540	FOR_EACH_EDGE (e, ei, bb->succs)
70388d94 ZD	541	{
9ba025a2	542	if (loop_outer (bb->loop_father)
2ff3e325	543	&& loop_exit_edge_p (bb->loop_father, e))
70388d94 ZD	544	e->flags \|= EDGE_LOOP_EXIT;
	545	else
	546	e->flags &= ~EDGE_LOOP_EXIT;
	547	}
	548	}
	549	}
	550
f9bf4777 JH	551	/* Return exit edge if loop has only one exit that is likely
	552	to be executed on runtime (i.e. it is not EH or leading
	553	to noreturn call. */
	554
	555	edge
00dcc88a	556	single_likely_exit (class loop *loop, const vec<edge> &exits)
f9bf4777 JH	557	{
f9bf4777 JH	558	edge found = single_exit (loop);
f9bf4777 JH	559	unsigned i;
	560	edge ex;
	561
	562	if (found)
	563	return found;
9771b263	564	FOR_EACH_VEC_ELT (exits, i, ex)
f9bf4777	565	{
af2bbc51 JH	566	if (probably_never_executed_edge_p (cfun, ex)
	567	/* We want to rule out paths to noreturns but not low probabilities
	568	resulting from adjustments or combining.
	569	FIXME: once we have better quality tracking, make this more
	570	robust. */
	571	\|\| ex->probability <= profile_probability::very_unlikely ())
f9bf4777 JH	572	continue;
	573	if (!found)
	574	found = ex;
	575	else
f10d2d85	576	return NULL;
f9bf4777	577	}
f9bf4777 JH	578	return found;
f9bf4777 JH	579	}
519cac4a JH	580
	581
	582	/* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs
	583	order against direction of edges from latch. Specially, if
	584	header != latch, latch is the 1-st block. */
	585
a165040e	586	auto_vec<basic_block>
99b1c316	587	get_loop_hot_path (const class loop *loop)
519cac4a JH	588	{
519cac4a JH	589	basic_block bb = loop->header;
caeb8892	590	auto_vec<basic_block> path;
519cac4a JH	591	bitmap visited = BITMAP_ALLOC (NULL);
	592
	593	while (true)
	594	{
	595	edge_iterator ei;
	596	edge e;
	597	edge best = NULL;
	598
9771b263	599	path.safe_push (bb);
519cac4a JH	600	bitmap_set_bit (visited, bb->index);
	601	FOR_EACH_EDGE (e, ei, bb->succs)
	602	if ((!best \|\| e->probability > best->probability)
	603	&& !loop_exit_edge_p (loop, e)
	604	&& !bitmap_bit_p (visited, e->dest->index))
	605	best = e;
	606	if (!best \|\| best->dest == loop->header)
	607	break;
	608	bb = best->dest;
	609	}
	610	BITMAP_FREE (visited);
	611	return path;
	612	}