gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "params.h"
 109 #include "profile.h"
 110 #include "symbol-summary.h"
 111 #include "ipa-prop.h"
 112 #include "ipa-inline.h"
 113 #include "ipa-utils.h"
 114 #include "sreal.h"
 115 #include "auto-profile.h"
 116 #include "builtins.h"
 117 #include "fibonacci_heap.h"
 118
 119 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 120 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 121
 122 /* Statistics we collect about inlining algorithm.  */
 123 static int overall_size;
 124 static gcov_type max_count;
 125 static gcov_type spec_rem;
 126
 127 /* Pre-computed constants 1/CGRAPH_FREQ_BASE and 1/100. */
 128 static sreal cgraph_freq_base_rec, percent_rec;
 129
 130 /* Return false when inlining edge E would lead to violating
 131    limits on function unit growth or stack usage growth.
 132
 133    The relative function body growth limit is present generally
 134    to avoid problems with non-linear behavior of the compiler.
 135    To allow inlining huge functions into tiny wrapper, the limit
 136    is always based on the bigger of the two functions considered.
 137
 138    For stack growth limits we always base the growth in stack usage
 139    of the callers.  We want to prevent applications from segfaulting
 140    on stack overflow when functions with huge stack frames gets
 141    inlined. */
 142
 143 static bool
 144 caller_growth_limits (struct cgraph_edge *e)
 145 {
 146   struct cgraph_node *to = e->caller;
 147   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 148   int newsize;
 149   int limit = 0;
 150   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 151   inline_summary *info, *what_info, *outer_info = inline_summaries->get (to);
 152
 153   /* Look for function e->caller is inlined to.  While doing
 154      so work out the largest function body on the way.  As
 155      described above, we want to base our function growth
 156      limits based on that.  Not on the self size of the
 157      outer function, not on the self size of inline code
 158      we immediately inline to.  This is the most relaxed
 159      interpretation of the rule "do not grow large functions
 160      too much in order to prevent compiler from exploding".  */
 161   while (true)
 162     {
 163       info = inline_summaries->get (to);
 164       if (limit < info->self_size)
 165         limit = info->self_size;
 166       if (stack_size_limit < info->estimated_self_stack_size)
 167         stack_size_limit = info->estimated_self_stack_size;
 168       if (to->global.inlined_to)
 169         to = to->callers->caller;
 170       else
 171         break;
 172     }
 173
 174   what_info = inline_summaries->get (what);
 175
 176   if (limit < what_info->self_size)
 177     limit = what_info->self_size;
 178
 179   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 180
 181   /* Check the size after inlining against the function limits.  But allow
 182      the function to shrink if it went over the limits by forced inlining.  */
 183   newsize = estimate_size_after_inlining (to, e);
 184   if (newsize >= info->size
 185       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 186       && newsize > limit)
 187     {
 188       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 189       return false;
 190     }
 191
 192   if (!what_info->estimated_stack_size)
 193     return true;
 194
 195   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 196      due to large i/o datastructures used by the Fortran front-end.
 197      We ought to ignore this limit when we know that the edge is executed
 198      on every invocation of the caller (i.e. its call statement dominates
 199      exit block).  We do not track this information, yet.  */
 200   stack_size_limit += ((gcov_type)stack_size_limit
 201                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 202
 203   inlined_stack = (outer_info->stack_frame_offset
 204                    + outer_info->estimated_self_stack_size
 205                    + what_info->estimated_stack_size);
 206   /* Check new stack consumption with stack consumption at the place
 207      stack is used.  */
 208   if (inlined_stack > stack_size_limit
 209       /* If function already has large stack usage from sibling
 210          inline call, we can inline, too.
 211          This bit overoptimistically assume that we are good at stack
 212          packing.  */
 213       && inlined_stack > info->estimated_stack_size
 214       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 215     {
 216       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 217       return false;
 218     }
 219   return true;
 220 }
 221
 222 /* Dump info about why inlining has failed.  */
 223
 224 static void
 225 report_inline_failed_reason (struct cgraph_edge *e)
 226 {
 227   if (dump_file)
 228     {
 229       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 230                xstrdup_for_dump (e->caller->name ()), e->caller->order,
 231                xstrdup_for_dump (e->callee->name ()), e->callee->order,
 232                cgraph_inline_failed_string (e->inline_failed));
 233       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 234            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 235           && e->caller->lto_file_data
 236           && e->callee->ultimate_alias_target ()->lto_file_data)
 237         {
 238           fprintf (dump_file, "  LTO objects: %s, %s\n",
 239                    e->caller->lto_file_data->file_name,
 240                    e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 241         }
 242       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 243         cl_target_option_print_diff
 244          (dump_file, 2, target_opts_for_fn (e->caller->decl),
 245           target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 246       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 247         cl_optimization_print_diff
 248           (dump_file, 2, opts_for_fn (e->caller->decl),
 249            opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 250     }
 251 }
 252
 253  /* Decide whether sanitizer-related attributes allow inlining. */
 254
 255 static bool
 256 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 257 {
 258   /* Don't care if sanitizer is disabled */
 259   if (!(flag_sanitize & SANITIZE_ADDRESS))
 260     return true;
 261
 262   if (!caller || !callee)
 263     return true;
 264
 265   return !!lookup_attribute ("no_sanitize_address",
 266       DECL_ATTRIBUTES (caller)) ==
 267       !!lookup_attribute ("no_sanitize_address",
 268       DECL_ATTRIBUTES (callee));
 269 }
 270
 271 /* Used for flags where it is safe to inline when caller's value is
 272    grater than callee's.  */
 273 #define check_maybe_up(flag) \
 274       (opts_for_fn (caller->decl)->x_##flag             \
 275        != opts_for_fn (callee->decl)->x_##flag          \
 276        && (!always_inline                               \
 277            || opts_for_fn (caller->decl)->x_##flag      \
 278               < opts_for_fn (callee->decl)->x_##flag))
 279 /* Used for flags where it is safe to inline when caller's value is
 280    smaller than callee's.  */
 281 #define check_maybe_down(flag) \
 282       (opts_for_fn (caller->decl)->x_##flag             \
 283        != opts_for_fn (callee->decl)->x_##flag          \
 284        && (!always_inline                               \
 285            || opts_for_fn (caller->decl)->x_##flag      \
 286               > opts_for_fn (callee->decl)->x_##flag))
 287 /* Used for flags where exact match is needed for correctness.  */
 288 #define check_match(flag) \
 289       (opts_for_fn (caller->decl)->x_##flag             \
 290        != opts_for_fn (callee->decl)->x_##flag)
 291
 292  /* Decide if we can inline the edge and possibly update
 293    inline_failed reason.
 294    We check whether inlining is possible at all and whether
 295    caller growth limits allow doing so.
 296
 297    if REPORT is true, output reason to the dump file.
 298
 299    if DISREGARD_LIMITS is true, ignore size limits.*/
 300
 301 static bool
 302 can_inline_edge_p (struct cgraph_edge *e, bool report,
 303                    bool disregard_limits = false, bool early = false)
 304 {
 305   gcc_checking_assert (e->inline_failed);
 306
 307   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 308     {
 309       if (report)
 310         report_inline_failed_reason (e);
 311       return false;
 312     }
 313
 314   bool inlinable = true;
 315   enum availability avail;
 316   cgraph_node *caller = e->caller->global.inlined_to
 317                         ? e->caller->global.inlined_to : e->caller;
 318   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 319   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 320   tree callee_tree
 321     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 322
 323   if (!callee->definition)
 324     {
 325       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 326       inlinable = false;
 327     }
 328   else if (callee->calls_comdat_local)
 329     {
 330       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 331       inlinable = false;
 332     }
 333   else if (avail <= AVAIL_INTERPOSABLE)
 334     {
 335       e->inline_failed = CIF_OVERWRITABLE;
 336       inlinable = false;
 337     }
 338   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 339      initialized to one of FINAL_ERROR reasons.  */
 340   else if (e->call_stmt_cannot_inline_p)
 341     gcc_unreachable ();
 342   /* Don't inline if the functions have different EH personalities.  */
 343   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 344            && DECL_FUNCTION_PERSONALITY (callee->decl)
 345            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 346                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 347     {
 348       e->inline_failed = CIF_EH_PERSONALITY;
 349       inlinable = false;
 350     }
 351   /* TM pure functions should not be inlined into non-TM_pure
 352      functions.  */
 353   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 354     {
 355       e->inline_failed = CIF_UNSPECIFIED;
 356       inlinable = false;
 357     }
 358   /* Check compatibility of target optimization options.  */
 359   else if (!targetm.target_option.can_inline_p (caller->decl,
 360                                                 callee->decl))
 361     {
 362       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 363       inlinable = false;
 364     }
 365   else if (!inline_summaries->get (callee)->inlinable)
 366     {
 367       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 368       inlinable = false;
 369     }
 370   else if (inline_summaries->get (caller)->contains_cilk_spawn)
 371     {
 372       e->inline_failed = CIF_CILK_SPAWN;
 373       inlinable = false;
 374     }
 375   /* Don't inline a function with mismatched sanitization attributes. */
 376   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 377     {
 378       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 379       inlinable = false;
 380     }
 381   /* Check if caller growth allows the inlining.  */
 382   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 383            && !disregard_limits
 384            && !lookup_attribute ("flatten",
 385                                  DECL_ATTRIBUTES (caller->decl))
 386            && !caller_growth_limits (e))
 387     inlinable = false;
 388   /* Don't inline a function with a higher optimization level than the
 389      caller.  FIXME: this is really just tip of iceberg of handling
 390      optimization attribute.  */
 391   else if (caller_tree != callee_tree)
 392     {
 393       bool always_inline =
 394              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 395               && lookup_attribute ("always_inline",
 396                                    DECL_ATTRIBUTES (callee->decl)));
 397       inline_summary *caller_info = inline_summaries->get (caller);
 398       inline_summary *callee_info = inline_summaries->get (callee);
 399
 400      /* Until GCC 4.9 we did not check the semantics alterning flags
 401         bellow and inline across optimization boundry.
 402         Enabling checks bellow breaks several packages by refusing
 403         to inline library always_inline functions. See PR65873.
 404         Disable the check for early inlining for now until better solution
 405         is found.  */
 406      if (always_inline && early)
 407         ;
 408       /* There are some options that change IL semantics which means
 409          we cannot inline in these cases for correctness reason.
 410          Not even for always_inline declared functions.  */
 411       /* Strictly speaking only when the callee contains signed integer
 412          math where overflow is undefined.  */
 413      else if ((check_maybe_up (flag_strict_overflow)
 414                /* this flag is set by optimize.  Allow inlining across
 415                   optimize boundary.  */
 416                && (!opt_for_fn (caller->decl, optimize)
 417                    == !opt_for_fn (callee->decl, optimize) || !always_inline))
 418               || check_match (flag_wrapv)
 419               || check_match (flag_trapv)
 420               /* When caller or callee does FP math, be sure FP codegen flags
 421                  compatible.  */
 422               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 423                   && (check_maybe_up (flag_rounding_math)
 424                       || check_maybe_up (flag_trapping_math)
 425                       || check_maybe_down (flag_unsafe_math_optimizations)
 426                       || check_maybe_down (flag_finite_math_only)
 427                       || check_maybe_up (flag_signaling_nans)
 428                       || check_maybe_down (flag_cx_limited_range)
 429                       || check_maybe_up (flag_signed_zeros)
 430                       || check_maybe_down (flag_associative_math)
 431                       || check_maybe_down (flag_reciprocal_math)
 432                       || check_maybe_down (flag_fp_int_builtin_inexact)
 433                       /* Strictly speaking only when the callee contains function
 434                          calls that may end up setting errno.  */
 435                       || check_maybe_up (flag_errno_math)))
 436               /* We do not want to make code compiled with exceptions to be
 437                  brought into a non-EH function unless we know that the callee
 438                  does not throw.
 439                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 440               || (check_maybe_up (flag_non_call_exceptions)
 441                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 442               || (check_maybe_up (flag_exceptions)
 443                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 444               /* When devirtualization is diabled for callee, it is not safe
 445                  to inline it as we possibly mangled the type info.
 446                  Allow early inlining of always inlines.  */
 447               || (!early && check_maybe_down (flag_devirtualize)))
 448         {
 449           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 450           inlinable = false;
 451         }
 452       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 453       else if (always_inline)
 454         ;
 455       /* When user added an attribute to the callee honor it.  */
 456       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 457                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 458         {
 459           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 460           inlinable = false;
 461         }
 462       /* If explicit optimize attribute are not used, the mismatch is caused
 463          by different command line options used to build different units.
 464          Do not care about COMDAT functions - those are intended to be
 465          optimized with the optimization flags of module they are used in.
 466          Also do not care about mixing up size/speed optimization when
 467          DECL_DISREGARD_INLINE_LIMITS is set.  */
 468       else if ((callee->merged_comdat
 469                 && !lookup_attribute ("optimize",
 470                                       DECL_ATTRIBUTES (caller->decl)))
 471                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 472         ;
 473       /* If mismatch is caused by merging two LTO units with different
 474          optimizationflags we want to be bit nicer.  However never inline
 475          if one of functions is not optimized at all.  */
 476       else if (!opt_for_fn (callee->decl, optimize)
 477                || !opt_for_fn (caller->decl, optimize))
 478         {
 479           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 480           inlinable = false;
 481         }
 482       /* If callee is optimized for size and caller is not, allow inlining if
 483          code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
 484          is inline (and thus likely an unified comdat).  This will allow caller
 485          to run faster.  */
 486       else if (opt_for_fn (callee->decl, optimize_size)
 487                > opt_for_fn (caller->decl, optimize_size))
 488         {
 489           int growth = estimate_edge_growth (e);
 490           if (growth > 0
 491               && (!DECL_DECLARED_INLINE_P (callee->decl)
 492                   && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
 493                                     MAX_INLINE_INSNS_AUTO)))
 494             {
 495               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 496               inlinable = false;
 497             }
 498         }
 499       /* If callee is more aggressively optimized for performance than caller,
 500          we generally want to inline only cheap (runtime wise) functions.  */
 501       else if (opt_for_fn (callee->decl, optimize_size)
 502                < opt_for_fn (caller->decl, optimize_size)
 503                || (opt_for_fn (callee->decl, optimize)
 504                    > opt_for_fn (caller->decl, optimize)))
 505         {
 506           if (estimate_edge_time (e)
 507               >= 20 + inline_edge_summary (e)->call_stmt_time)
 508             {
 509               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 510               inlinable = false;
 511             }
 512         }
 513
 514     }
 515
 516   if (!inlinable && report)
 517     report_inline_failed_reason (e);
 518   return inlinable;
 519 }
 520
 521
 522 /* Return true if the edge E is inlinable during early inlining.  */
 523
 524 static bool
 525 can_early_inline_edge_p (struct cgraph_edge *e)
 526 {
 527   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 528   /* Early inliner might get called at WPA stage when IPA pass adds new
 529      function.  In this case we can not really do any of early inlining
 530      because function bodies are missing.  */
 531   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 532     return false;
 533   if (!gimple_has_body_p (callee->decl))
 534     {
 535       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 536       return false;
 537     }
 538   /* In early inliner some of callees may not be in SSA form yet
 539      (i.e. the callgraph is cyclic and we did not process
 540      the callee by early inliner, yet).  We don't have CIF code for this
 541      case; later we will re-do the decision in the real inliner.  */
 542   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 543       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 544     {
 545       if (dump_file)
 546         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 547       return false;
 548     }
 549   if (!can_inline_edge_p (e, true, false, true))
 550     return false;
 551   return true;
 552 }
 553
 554
 555 /* Return number of calls in N.  Ignore cheap builtins.  */
 556
 557 static int
 558 num_calls (struct cgraph_node *n)
 559 {
 560   struct cgraph_edge *e;
 561   int num = 0;
 562
 563   for (e = n->callees; e; e = e->next_callee)
 564     if (!is_inexpensive_builtin (e->callee->decl))
 565       num++;
 566   return num;
 567 }
 568
 569
 570 /* Return true if we are interested in inlining small function.  */
 571
 572 static bool
 573 want_early_inline_function_p (struct cgraph_edge *e)
 574 {
 575   bool want_inline = true;
 576   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 577
 578   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 579     ;
 580   /* For AutoFDO, we need to make sure that before profile summary, all
 581      hot paths' IR look exactly the same as profiled binary. As a result,
 582      in einliner, we will disregard size limit and inline those callsites
 583      that are:
 584        * inlined in the profiled binary, and
 585        * the cloned callee has enough samples to be considered "hot".  */
 586   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 587     ;
 588   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 589            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 590     {
 591       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 592       report_inline_failed_reason (e);
 593       want_inline = false;
 594     }
 595   else
 596     {
 597       int growth = estimate_edge_growth (e);
 598       int n;
 599
 600       if (growth <= 0)
 601         ;
 602       else if (!e->maybe_hot_p ()
 603                && growth > 0)
 604         {
 605           if (dump_file)
 606             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 607                      "call is cold and code would grow by %i\n",
 608                      xstrdup_for_dump (e->caller->name ()),
 609                      e->caller->order,
 610                      xstrdup_for_dump (callee->name ()), callee->order,
 611                      growth);
 612           want_inline = false;
 613         }
 614       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 615         {
 616           if (dump_file)
 617             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 618                      "growth %i exceeds --param early-inlining-insns\n",
 619                      xstrdup_for_dump (e->caller->name ()),
 620                      e->caller->order,
 621                      xstrdup_for_dump (callee->name ()), callee->order,
 622                      growth);
 623           want_inline = false;
 624         }
 625       else if ((n = num_calls (callee)) != 0
 626                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 627         {
 628           if (dump_file)
 629             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 630                      "growth %i exceeds --param early-inlining-insns "
 631                      "divided by number of calls\n",
 632                      xstrdup_for_dump (e->caller->name ()),
 633                      e->caller->order,
 634                      xstrdup_for_dump (callee->name ()), callee->order,
 635                      growth);
 636           want_inline = false;
 637         }
 638     }
 639   return want_inline;
 640 }
 641
 642 /* Compute time of the edge->caller + edge->callee execution when inlining
 643    does not happen.  */
 644
 645 inline sreal
 646 compute_uninlined_call_time (struct inline_summary *callee_info,
 647                              struct cgraph_edge *edge)
 648 {
 649   sreal uninlined_call_time = (sreal)callee_info->time;
 650   cgraph_node *caller = (edge->caller->global.inlined_to
 651                          ? edge->caller->global.inlined_to
 652                          : edge->caller);
 653
 654   if (edge->count && caller->count)
 655     uninlined_call_time *= (sreal)edge->count / caller->count;
 656   if (edge->frequency)
 657     uninlined_call_time *= cgraph_freq_base_rec * edge->frequency;
 658   else
 659     uninlined_call_time = uninlined_call_time >> 11;
 660
 661   int caller_time = inline_summaries->get (caller)->time;
 662   return uninlined_call_time + caller_time;
 663 }
 664
 665 /* Same as compute_uinlined_call_time but compute time when inlining
 666    does happen.  */
 667
 668 inline sreal
 669 compute_inlined_call_time (struct cgraph_edge *edge,
 670                            int edge_time)
 671 {
 672   cgraph_node *caller = (edge->caller->global.inlined_to
 673                          ? edge->caller->global.inlined_to
 674                          : edge->caller);
 675   int caller_time = inline_summaries->get (caller)->time;
 676   sreal time = edge_time;
 677
 678   if (edge->count && caller->count)
 679     time *= (sreal)edge->count / caller->count;
 680   if (edge->frequency)
 681     time *= cgraph_freq_base_rec * edge->frequency;
 682   else
 683     time = time >> 11;
 684
 685   /* This calculation should match one in ipa-inline-analysis.
 686      FIXME: Once ipa-inline-analysis is converted to sreal this can be
 687      simplified.  */
 688   time -= (sreal) ((gcov_type) edge->frequency
 689                    * inline_edge_summary (edge)->call_stmt_time
 690                    * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)) / INLINE_TIME_SCALE;
 691   time += caller_time;
 692   if (time <= 0)
 693     time = ((sreal) 1) >> 8;
 694   gcc_checking_assert (time >= 0);
 695   return time;
 696 }
 697
 698 /* Return true if the speedup for inlining E is bigger than
 699    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 700
 701 static bool
 702 big_speedup_p (struct cgraph_edge *e)
 703 {
 704   sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee),
 705                                             e);
 706   sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
 707
 708   if (time - inlined_time
 709       > (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
 710          * percent_rec)
 711     return true;
 712   return false;
 713 }
 714
 715 /* Return true if we are interested in inlining small function.
 716    When REPORT is true, report reason to dump file.  */
 717
 718 static bool
 719 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 720 {
 721   bool want_inline = true;
 722   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 723
 724   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 725     ;
 726   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 727            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 728     {
 729       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 730       want_inline = false;
 731     }
 732   /* Do fast and conservative check if the function can be good
 733      inline candidate.  At the moment we allow inline hints to
 734      promote non-inline functions to inline and we increase
 735      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 736   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 737            && (!e->count || !e->maybe_hot_p ()))
 738            && inline_summaries->get (callee)->min_size
 739                 - inline_edge_summary (e)->call_stmt_size
 740               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 741     {
 742       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 743       want_inline = false;
 744     }
 745   else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
 746            && inline_summaries->get (callee)->min_size
 747                 - inline_edge_summary (e)->call_stmt_size
 748               > 16 * MAX_INLINE_INSNS_SINGLE)
 749     {
 750       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 751                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 752                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 753       want_inline = false;
 754     }
 755   else
 756     {
 757       int growth = estimate_edge_growth (e);
 758       inline_hints hints = estimate_edge_hints (e);
 759       bool big_speedup = big_speedup_p (e);
 760
 761       if (growth <= 0)
 762         ;
 763       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 764          hints suggests that inlining given function is very profitable.  */
 765       else if (DECL_DECLARED_INLINE_P (callee->decl)
 766                && growth >= MAX_INLINE_INSNS_SINGLE
 767                && ((!big_speedup
 768                     && !(hints & (INLINE_HINT_indirect_call
 769                                   | INLINE_HINT_known_hot
 770                                   | INLINE_HINT_loop_iterations
 771                                   | INLINE_HINT_array_index
 772                                   | INLINE_HINT_loop_stride)))
 773                    || growth >= MAX_INLINE_INSNS_SINGLE * 16))
 774         {
 775           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 776           want_inline = false;
 777         }
 778       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 779                && !opt_for_fn (e->caller->decl, flag_inline_functions))
 780         {
 781           /* growth_likely_positive is expensive, always test it last.  */
 782           if (growth >= MAX_INLINE_INSNS_SINGLE
 783               || growth_likely_positive (callee, growth))
 784             {
 785               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 786               want_inline = false;
 787             }
 788         }
 789       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 790          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 791          inlining given function is very profitable.  */
 792       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 793                && !big_speedup
 794                && !(hints & INLINE_HINT_known_hot)
 795                && growth >= ((hints & (INLINE_HINT_indirect_call
 796                                        | INLINE_HINT_loop_iterations
 797                                        | INLINE_HINT_array_index
 798                                        | INLINE_HINT_loop_stride))
 799                              ? MAX (MAX_INLINE_INSNS_AUTO,
 800                                     MAX_INLINE_INSNS_SINGLE)
 801                              : MAX_INLINE_INSNS_AUTO))
 802         {
 803           /* growth_likely_positive is expensive, always test it last.  */
 804           if (growth >= MAX_INLINE_INSNS_SINGLE
 805               || growth_likely_positive (callee, growth))
 806             {
 807               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 808               want_inline = false;
 809             }
 810         }
 811       /* If call is cold, do not inline when function body would grow. */
 812       else if (!e->maybe_hot_p ()
 813                && (growth >= MAX_INLINE_INSNS_SINGLE
 814                    || growth_likely_positive (callee, growth)))
 815         {
 816           e->inline_failed = CIF_UNLIKELY_CALL;
 817           want_inline = false;
 818         }
 819     }
 820   if (!want_inline && report)
 821     report_inline_failed_reason (e);
 822   return want_inline;
 823 }
 824
 825 /* EDGE is self recursive edge.
 826    We hand two cases - when function A is inlining into itself
 827    or when function A is being inlined into another inliner copy of function
 828    A within function B.
 829
 830    In first case OUTER_NODE points to the toplevel copy of A, while
 831    in the second case OUTER_NODE points to the outermost copy of A in B.
 832
 833    In both cases we want to be extra selective since
 834    inlining the call will just introduce new recursive calls to appear.  */
 835
 836 static bool
 837 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 838                                    struct cgraph_node *outer_node,
 839                                    bool peeling,
 840                                    int depth)
 841 {
 842   char const *reason = NULL;
 843   bool want_inline = true;
 844   int caller_freq = CGRAPH_FREQ_BASE;
 845   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 846
 847   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 848     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 849
 850   if (!edge->maybe_hot_p ())
 851     {
 852       reason = "recursive call is cold";
 853       want_inline = false;
 854     }
 855   else if (max_count && !outer_node->count)
 856     {
 857       reason = "not executed in profile";
 858       want_inline = false;
 859     }
 860   else if (depth > max_depth)
 861     {
 862       reason = "--param max-inline-recursive-depth exceeded.";
 863       want_inline = false;
 864     }
 865
 866   if (outer_node->global.inlined_to)
 867     caller_freq = outer_node->callers->frequency;
 868
 869   if (!caller_freq)
 870     {
 871       reason = "function is inlined and unlikely";
 872       want_inline = false;
 873     }
 874
 875   if (!want_inline)
 876     ;
 877   /* Inlining of self recursive function into copy of itself within other function
 878      is transformation similar to loop peeling.
 879
 880      Peeling is profitable if we can inline enough copies to make probability
 881      of actual call to the self recursive function very small.  Be sure that
 882      the probability of recursion is small.
 883
 884      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 885      This way the expected number of recision is at most max_depth.  */
 886   else if (peeling)
 887     {
 888       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 889                                          / max_depth);
 890       int i;
 891       for (i = 1; i < depth; i++)
 892         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 893       if (max_count
 894           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 895               >= max_prob))
 896         {
 897           reason = "profile of recursive call is too large";
 898           want_inline = false;
 899         }
 900       if (!max_count
 901           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 902               >= max_prob))
 903         {
 904           reason = "frequency of recursive call is too large";
 905           want_inline = false;
 906         }
 907     }
 908   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 909      depth is large.  We reduce function call overhead and increase chances that
 910      things fit in hardware return predictor.
 911
 912      Recursive inlining might however increase cost of stack frame setup
 913      actually slowing down functions whose recursion tree is wide rather than
 914      deep.
 915
 916      Deciding reliably on when to do recursive inlining without profile feedback
 917      is tricky.  For now we disable recursive inlining when probability of self
 918      recursion is low.
 919
 920      Recursive inlining of self recursive call within loop also results in large loop
 921      depths that generally optimize badly.  We may want to throttle down inlining
 922      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 923      methods.  */
 924   else
 925     {
 926       if (max_count
 927           && (edge->count * 100 / outer_node->count
 928               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 929         {
 930           reason = "profile of recursive call is too small";
 931           want_inline = false;
 932         }
 933       else if (!max_count
 934                && (edge->frequency * 100 / caller_freq
 935                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 936         {
 937           reason = "frequency of recursive call is too small";
 938           want_inline = false;
 939         }
 940     }
 941   if (!want_inline && dump_file)
 942     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 943   return want_inline;
 944 }
 945
 946 /* Return true when NODE has uninlinable caller;
 947    set HAS_HOT_CALL if it has hot call.
 948    Worker for cgraph_for_node_and_aliases.  */
 949
 950 static bool
 951 check_callers (struct cgraph_node *node, void *has_hot_call)
 952 {
 953   struct cgraph_edge *e;
 954    for (e = node->callers; e; e = e->next_caller)
 955      {
 956        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once))
 957          return true;
 958        if (!can_inline_edge_p (e, true))
 959          return true;
 960        if (e->recursive_p ())
 961          return true;
 962        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
 963          *(bool *)has_hot_call = true;
 964      }
 965   return false;
 966 }
 967
 968 /* If NODE has a caller, return true.  */
 969
 970 static bool
 971 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 972 {
 973   if (node->callers)
 974     return true;
 975   return false;
 976 }
 977
 978 /* Decide if inlining NODE would reduce unit size by eliminating
 979    the offline copy of function.
 980    When COLD is true the cold calls are considered, too.  */
 981
 982 static bool
 983 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 984 {
 985   bool has_hot_call = false;
 986
 987   /* Aliases gets inlined along with the function they alias.  */
 988   if (node->alias)
 989     return false;
 990   /* Already inlined?  */
 991   if (node->global.inlined_to)
 992     return false;
 993   /* Does it have callers?  */
 994   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
 995     return false;
 996   /* Inlining into all callers would increase size?  */
 997   if (estimate_growth (node) > 0)
 998     return false;
 999   /* All inlines must be possible.  */
1000   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1001                                          true))
1002     return false;
1003   if (!cold && !has_hot_call)
1004     return false;
1005   return true;
1006 }
1007
1008 /* A cost model driving the inlining heuristics in a way so the edges with
1009    smallest badness are inlined first.  After each inlining is performed
1010    the costs of all caller edges of nodes affected are recomputed so the
1011    metrics may accurately depend on values such as number of inlinable callers
1012    of the function or function body size.  */
1013
1014 static sreal
1015 edge_badness (struct cgraph_edge *edge, bool dump)
1016 {
1017   sreal badness;
1018   int growth, edge_time;
1019   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1020   struct inline_summary *callee_info = inline_summaries->get (callee);
1021   inline_hints hints;
1022   cgraph_node *caller = (edge->caller->global.inlined_to
1023                          ? edge->caller->global.inlined_to
1024                          : edge->caller);
1025
1026   growth = estimate_edge_growth (edge);
1027   edge_time = estimate_edge_time (edge);
1028   hints = estimate_edge_hints (edge);
1029   gcc_checking_assert (edge_time >= 0);
1030   gcc_checking_assert (edge_time <= callee_info->time);
1031   gcc_checking_assert (growth <= callee_info->size);
1032
1033   if (dump)
1034     {
1035       fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
1036                xstrdup_for_dump (edge->caller->name ()),
1037                edge->caller->order,
1038                xstrdup_for_dump (callee->name ()),
1039                edge->callee->order);
1040       fprintf (dump_file, "      size growth %i, time %i ",
1041                growth,
1042                edge_time);
1043       dump_inline_hints (dump_file, hints);
1044       if (big_speedup_p (edge))
1045         fprintf (dump_file, " big_speedup");
1046       fprintf (dump_file, "\n");
1047     }
1048
1049   /* Always prefer inlining saving code size.  */
1050   if (growth <= 0)
1051     {
1052       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1053       if (dump)
1054         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1055                  growth);
1056     }
1057    /* Inlining into EXTERNAL functions is not going to change anything unless
1058       they are themselves inlined.  */
1059    else if (DECL_EXTERNAL (caller->decl))
1060     {
1061       if (dump)
1062         fprintf (dump_file, "      max: function is external\n");
1063       return sreal::max ();
1064     }
1065   /* When profile is available. Compute badness as:
1066
1067                  time_saved * caller_count
1068      goodness =  -------------------------------------------------
1069                  growth_of_caller * overall_growth * combined_size
1070
1071      badness = - goodness
1072
1073      Again use negative value to make calls with profile appear hotter
1074      then calls without.
1075   */
1076   else if (opt_for_fn (caller->decl, flag_guess_branch_prob) || caller->count)
1077     {
1078       sreal numerator, denominator;
1079       int overall_growth;
1080
1081       numerator = (compute_uninlined_call_time (callee_info, edge)
1082                    - compute_inlined_call_time (edge, edge_time));
1083       if (numerator == 0)
1084         numerator = ((sreal) 1 >> 8);
1085       if (caller->count)
1086         numerator *= caller->count;
1087       else if (opt_for_fn (caller->decl, flag_branch_probabilities))
1088         numerator = numerator >> 11;
1089       denominator = growth;
1090
1091       overall_growth = callee_info->growth;
1092
1093       /* Look for inliner wrappers of the form:
1094
1095          inline_caller ()
1096            {
1097              do_fast_job...
1098              if (need_more_work)
1099                noninline_callee ();
1100            }
1101          Withhout panilizing this case, we usually inline noninline_callee
1102          into the inline_caller because overall_growth is small preventing
1103          further inlining of inline_caller.
1104
1105          Penalize only callgraph edges to functions with small overall
1106          growth ...
1107         */
1108       if (growth > overall_growth
1109           /* ... and having only one caller which is not inlined ... */
1110           && callee_info->single_caller
1111           && !edge->caller->global.inlined_to
1112           /* ... and edges executed only conditionally ... */
1113           && edge->frequency < CGRAPH_FREQ_BASE
1114           /* ... consider case where callee is not inline but caller is ... */
1115           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1116                && DECL_DECLARED_INLINE_P (caller->decl))
1117               /* ... or when early optimizers decided to split and edge
1118                  frequency still indicates splitting is a win ... */
1119               || (callee->split_part && !caller->split_part
1120                   && edge->frequency
1121                      < CGRAPH_FREQ_BASE
1122                        * PARAM_VALUE
1123                           (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100
1124                   /* ... and do not overwrite user specified hints.   */
1125                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1126                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1127         {
1128           struct inline_summary *caller_info = inline_summaries->get (caller);
1129           int caller_growth = caller_info->growth;
1130
1131           /* Only apply the penalty when caller looks like inline candidate,
1132              and it is not called once and.  */
1133           if (!caller_info->single_caller && overall_growth < caller_growth
1134               && caller_info->inlinable
1135               && caller_info->size
1136                  < (DECL_DECLARED_INLINE_P (caller->decl)
1137                     ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
1138             {
1139               if (dump)
1140                 fprintf (dump_file,
1141                          "     Wrapper penalty. Increasing growth %i to %i\n",
1142                          overall_growth, caller_growth);
1143               overall_growth = caller_growth;
1144             }
1145         }
1146       if (overall_growth > 0)
1147         {
1148           /* Strongly preffer functions with few callers that can be inlined
1149              fully.  The square root here leads to smaller binaries at average.
1150              Watch however for extreme cases and return to linear function
1151              when growth is large.  */
1152           if (overall_growth < 256)
1153             overall_growth *= overall_growth;
1154           else
1155             overall_growth += 256 * 256 - 256;
1156           denominator *= overall_growth;
1157         }
1158       denominator *= inline_summaries->get (caller)->self_size + growth;
1159
1160       badness = - numerator / denominator;
1161
1162       if (dump)
1163         {
1164           fprintf (dump_file,
1165                    "      %f: guessed profile. frequency %f, count %" PRId64
1166                    " caller count %" PRId64
1167                    " time w/o inlining %f, time w/ inlining %f"
1168                    " overall growth %i (current) %i (original)"
1169                    " %i (compensated)\n",
1170                    badness.to_double (),
1171                   (double)edge->frequency / CGRAPH_FREQ_BASE,
1172                    edge->count, caller->count,
1173                    compute_uninlined_call_time (callee_info, edge).to_double (),
1174                    compute_inlined_call_time (edge, edge_time).to_double (),
1175                    estimate_growth (callee),
1176                    callee_info->growth, overall_growth);
1177         }
1178     }
1179   /* When function local profile is not available or it does not give
1180      useful information (ie frequency is zero), base the cost on
1181      loop nest and overall size growth, so we optimize for overall number
1182      of functions fully inlined in program.  */
1183   else
1184     {
1185       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1186       badness = growth;
1187
1188       /* Decrease badness if call is nested.  */
1189       if (badness > 0)
1190         badness = badness >> nest;
1191       else
1192         badness = badness << nest;
1193       if (dump)
1194         fprintf (dump_file, "      %f: no profile. nest %i\n",
1195                  badness.to_double (), nest);
1196     }
1197   gcc_checking_assert (badness != 0);
1198
1199   if (edge->recursive_p ())
1200     badness = badness.shift (badness > 0 ? 4 : -4);
1201   if ((hints & (INLINE_HINT_indirect_call
1202                 | INLINE_HINT_loop_iterations
1203                 | INLINE_HINT_array_index
1204                 | INLINE_HINT_loop_stride))
1205       || callee_info->growth <= 0)
1206     badness = badness.shift (badness > 0 ? -2 : 2);
1207   if (hints & (INLINE_HINT_same_scc))
1208     badness = badness.shift (badness > 0 ? 3 : -3);
1209   else if (hints & (INLINE_HINT_in_scc))
1210     badness = badness.shift (badness > 0 ? 2 : -2);
1211   else if (hints & (INLINE_HINT_cross_module))
1212     badness = badness.shift (badness > 0 ? 1 : -1);
1213   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1214     badness = badness.shift (badness > 0 ? -4 : 4);
1215   else if ((hints & INLINE_HINT_declared_inline))
1216     badness = badness.shift (badness > 0 ? -3 : 3);
1217   if (dump)
1218     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1219   return badness;
1220 }
1221
1222 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1223 static inline void
1224 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1225 {
1226   sreal badness = edge_badness (edge, false);
1227   if (edge->aux)
1228     {
1229       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1230       gcc_checking_assert (n->get_data () == edge);
1231
1232       /* fibonacci_heap::replace_key does busy updating of the
1233          heap that is unnecesarily expensive.
1234          We do lazy increases: after extracting minimum if the key
1235          turns out to be out of date, it is re-inserted into heap
1236          with correct value.  */
1237       if (badness < n->get_key ())
1238         {
1239           if (dump_file && (dump_flags & TDF_DETAILS))
1240             {
1241               fprintf (dump_file,
1242                        "  decreasing badness %s/%i -> %s/%i, %f"
1243                        " to %f\n",
1244                        xstrdup_for_dump (edge->caller->name ()),
1245                        edge->caller->order,
1246                        xstrdup_for_dump (edge->callee->name ()),
1247                        edge->callee->order,
1248                        n->get_key ().to_double (),
1249                        badness.to_double ());
1250             }
1251           heap->decrease_key (n, badness);
1252         }
1253     }
1254   else
1255     {
1256        if (dump_file && (dump_flags & TDF_DETAILS))
1257          {
1258            fprintf (dump_file,
1259                     "  enqueuing call %s/%i -> %s/%i, badness %f\n",
1260                     xstrdup_for_dump (edge->caller->name ()),
1261                     edge->caller->order,
1262                     xstrdup_for_dump (edge->callee->name ()),
1263                     edge->callee->order,
1264                     badness.to_double ());
1265          }
1266       edge->aux = heap->insert (badness, edge);
1267     }
1268 }
1269
1270
1271 /* NODE was inlined.
1272    All caller edges needs to be resetted because
1273    size estimates change. Similarly callees needs reset
1274    because better context may be known.  */
1275
1276 static void
1277 reset_edge_caches (struct cgraph_node *node)
1278 {
1279   struct cgraph_edge *edge;
1280   struct cgraph_edge *e = node->callees;
1281   struct cgraph_node *where = node;
1282   struct ipa_ref *ref;
1283
1284   if (where->global.inlined_to)
1285     where = where->global.inlined_to;
1286
1287   for (edge = where->callers; edge; edge = edge->next_caller)
1288     if (edge->inline_failed)
1289       reset_edge_growth_cache (edge);
1290
1291   FOR_EACH_ALIAS (where, ref)
1292     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1293
1294   if (!e)
1295     return;
1296
1297   while (true)
1298     if (!e->inline_failed && e->callee->callees)
1299       e = e->callee->callees;
1300     else
1301       {
1302         if (e->inline_failed)
1303           reset_edge_growth_cache (e);
1304         if (e->next_callee)
1305           e = e->next_callee;
1306         else
1307           {
1308             do
1309               {
1310                 if (e->caller == node)
1311                   return;
1312                 e = e->caller->callers;
1313               }
1314             while (!e->next_callee);
1315             e = e->next_callee;
1316           }
1317       }
1318 }
1319
1320 /* Recompute HEAP nodes for each of caller of NODE.
1321    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1322    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1323    it is inlinable. Otherwise check all edges.  */
1324
1325 static void
1326 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1327                     bitmap updated_nodes,
1328                     struct cgraph_edge *check_inlinablity_for)
1329 {
1330   struct cgraph_edge *edge;
1331   struct ipa_ref *ref;
1332
1333   if ((!node->alias && !inline_summaries->get (node)->inlinable)
1334       || node->global.inlined_to)
1335     return;
1336   if (!bitmap_set_bit (updated_nodes, node->uid))
1337     return;
1338
1339   FOR_EACH_ALIAS (node, ref)
1340     {
1341       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1342       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1343     }
1344
1345   for (edge = node->callers; edge; edge = edge->next_caller)
1346     if (edge->inline_failed)
1347       {
1348         if (!check_inlinablity_for
1349             || check_inlinablity_for == edge)
1350           {
1351             if (can_inline_edge_p (edge, false)
1352                 && want_inline_small_function_p (edge, false))
1353               update_edge_key (heap, edge);
1354             else if (edge->aux)
1355               {
1356                 report_inline_failed_reason (edge);
1357                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1358                 edge->aux = NULL;
1359               }
1360           }
1361         else if (edge->aux)
1362           update_edge_key (heap, edge);
1363       }
1364 }
1365
1366 /* Recompute HEAP nodes for each uninlined call in NODE.
1367    This is used when we know that edge badnesses are going only to increase
1368    (we introduced new call site) and thus all we need is to insert newly
1369    created edges into heap.  */
1370
1371 static void
1372 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1373                     bitmap updated_nodes)
1374 {
1375   struct cgraph_edge *e = node->callees;
1376
1377   if (!e)
1378     return;
1379   while (true)
1380     if (!e->inline_failed && e->callee->callees)
1381       e = e->callee->callees;
1382     else
1383       {
1384         enum availability avail;
1385         struct cgraph_node *callee;
1386         /* We do not reset callee growth cache here.  Since we added a new call,
1387            growth chould have just increased and consequentely badness metric
1388            don't need updating.  */
1389         if (e->inline_failed
1390             && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1391             && inline_summaries->get (callee)->inlinable
1392             && avail >= AVAIL_AVAILABLE
1393             && !bitmap_bit_p (updated_nodes, callee->uid))
1394           {
1395             if (can_inline_edge_p (e, false)
1396                 && want_inline_small_function_p (e, false))
1397               update_edge_key (heap, e);
1398             else if (e->aux)
1399               {
1400                 report_inline_failed_reason (e);
1401                 heap->delete_node ((edge_heap_node_t *) e->aux);
1402                 e->aux = NULL;
1403               }
1404           }
1405         if (e->next_callee)
1406           e = e->next_callee;
1407         else
1408           {
1409             do
1410               {
1411                 if (e->caller == node)
1412                   return;
1413                 e = e->caller->callers;
1414               }
1415             while (!e->next_callee);
1416             e = e->next_callee;
1417           }
1418       }
1419 }
1420
1421 /* Enqueue all recursive calls from NODE into priority queue depending on
1422    how likely we want to recursively inline the call.  */
1423
1424 static void
1425 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1426                         edge_heap_t *heap)
1427 {
1428   struct cgraph_edge *e;
1429   enum availability avail;
1430
1431   for (e = where->callees; e; e = e->next_callee)
1432     if (e->callee == node
1433         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1434             && avail > AVAIL_INTERPOSABLE))
1435       {
1436         /* When profile feedback is available, prioritize by expected number
1437            of calls.  */
1438         heap->insert (!max_count ? -e->frequency
1439                       : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1440                       e);
1441       }
1442   for (e = where->callees; e; e = e->next_callee)
1443     if (!e->inline_failed)
1444       lookup_recursive_calls (node, e->callee, heap);
1445 }
1446
1447 /* Decide on recursive inlining: in the case function has recursive calls,
1448    inline until body size reaches given argument.  If any new indirect edges
1449    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1450    is NULL.  */
1451
1452 static bool
1453 recursive_inlining (struct cgraph_edge *edge,
1454                     vec<cgraph_edge *> *new_edges)
1455 {
1456   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1457   edge_heap_t heap (sreal::min ());
1458   struct cgraph_node *node;
1459   struct cgraph_edge *e;
1460   struct cgraph_node *master_clone = NULL, *next;
1461   int depth = 0;
1462   int n = 0;
1463
1464   node = edge->caller;
1465   if (node->global.inlined_to)
1466     node = node->global.inlined_to;
1467
1468   if (DECL_DECLARED_INLINE_P (node->decl))
1469     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1470
1471   /* Make sure that function is small enough to be considered for inlining.  */
1472   if (estimate_size_after_inlining (node, edge)  >= limit)
1473     return false;
1474   lookup_recursive_calls (node, node, &heap);
1475   if (heap.empty ())
1476     return false;
1477
1478   if (dump_file)
1479     fprintf (dump_file,
1480              "  Performing recursive inlining on %s\n",
1481              node->name ());
1482
1483   /* Do the inlining and update list of recursive call during process.  */
1484   while (!heap.empty ())
1485     {
1486       struct cgraph_edge *curr = heap.extract_min ();
1487       struct cgraph_node *cnode, *dest = curr->callee;
1488
1489       if (!can_inline_edge_p (curr, true))
1490         continue;
1491
1492       /* MASTER_CLONE is produced in the case we already started modified
1493          the function. Be sure to redirect edge to the original body before
1494          estimating growths otherwise we will be seeing growths after inlining
1495          the already modified body.  */
1496       if (master_clone)
1497         {
1498           curr->redirect_callee (master_clone);
1499           reset_edge_growth_cache (curr);
1500         }
1501
1502       if (estimate_size_after_inlining (node, curr) > limit)
1503         {
1504           curr->redirect_callee (dest);
1505           reset_edge_growth_cache (curr);
1506           break;
1507         }
1508
1509       depth = 1;
1510       for (cnode = curr->caller;
1511            cnode->global.inlined_to; cnode = cnode->callers->caller)
1512         if (node->decl
1513             == curr->callee->ultimate_alias_target ()->decl)
1514           depth++;
1515
1516       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1517         {
1518           curr->redirect_callee (dest);
1519           reset_edge_growth_cache (curr);
1520           continue;
1521         }
1522
1523       if (dump_file)
1524         {
1525           fprintf (dump_file,
1526                    "   Inlining call of depth %i", depth);
1527           if (node->count)
1528             {
1529               fprintf (dump_file, " called approx. %.2f times per call",
1530                        (double)curr->count / node->count);
1531             }
1532           fprintf (dump_file, "\n");
1533         }
1534       if (!master_clone)
1535         {
1536           /* We need original clone to copy around.  */
1537           master_clone = node->create_clone (node->decl, node->count,
1538             CGRAPH_FREQ_BASE, false, vNULL,
1539             true, NULL, NULL);
1540           for (e = master_clone->callees; e; e = e->next_callee)
1541             if (!e->inline_failed)
1542               clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1543           curr->redirect_callee (master_clone);
1544           reset_edge_growth_cache (curr);
1545         }
1546
1547       inline_call (curr, false, new_edges, &overall_size, true);
1548       lookup_recursive_calls (node, curr->callee, &heap);
1549       n++;
1550     }
1551
1552   if (!heap.empty () && dump_file)
1553     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1554
1555   if (!master_clone)
1556     return false;
1557
1558   if (dump_file)
1559     fprintf (dump_file,
1560              "\n   Inlined %i times, "
1561              "body grown from size %i to %i, time %i to %i\n", n,
1562              inline_summaries->get (master_clone)->size, inline_summaries->get (node)->size,
1563              inline_summaries->get (master_clone)->time, inline_summaries->get (node)->time);
1564
1565   /* Remove master clone we used for inlining.  We rely that clones inlined
1566      into master clone gets queued just before master clone so we don't
1567      need recursion.  */
1568   for (node = symtab->first_function (); node != master_clone;
1569        node = next)
1570     {
1571       next = symtab->next_function (node);
1572       if (node->global.inlined_to == master_clone)
1573         node->remove ();
1574     }
1575   master_clone->remove ();
1576   return true;
1577 }
1578
1579
1580 /* Given whole compilation unit estimate of INSNS, compute how large we can
1581    allow the unit to grow.  */
1582
1583 static int
1584 compute_max_insns (int insns)
1585 {
1586   int max_insns = insns;
1587   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1588     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1589
1590   return ((int64_t) max_insns
1591           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1592 }
1593
1594
1595 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1596
1597 static void
1598 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1599 {
1600   while (new_edges.length () > 0)
1601     {
1602       struct cgraph_edge *edge = new_edges.pop ();
1603
1604       gcc_assert (!edge->aux);
1605       if (edge->inline_failed
1606           && can_inline_edge_p (edge, true)
1607           && want_inline_small_function_p (edge, true))
1608         edge->aux = heap->insert (edge_badness (edge, false), edge);
1609     }
1610 }
1611
1612 /* Remove EDGE from the fibheap.  */
1613
1614 static void
1615 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1616 {
1617   if (e->aux)
1618     {
1619       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1620       e->aux = NULL;
1621     }
1622 }
1623
1624 /* Return true if speculation of edge E seems useful.
1625    If ANTICIPATE_INLINING is true, be conservative and hope that E
1626    may get inlined.  */
1627
1628 bool
1629 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1630 {
1631   enum availability avail;
1632   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1633                                                                  e->caller);
1634   struct cgraph_edge *direct, *indirect;
1635   struct ipa_ref *ref;
1636
1637   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1638
1639   if (!e->maybe_hot_p ())
1640     return false;
1641
1642   /* See if IP optimizations found something potentially useful about the
1643      function.  For now we look only for CONST/PURE flags.  Almost everything
1644      else we propagate is useless.  */
1645   if (avail >= AVAIL_AVAILABLE)
1646     {
1647       int ecf_flags = flags_from_decl_or_type (target->decl);
1648       if (ecf_flags & ECF_CONST)
1649         {
1650           e->speculative_call_info (direct, indirect, ref);
1651           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1652             return true;
1653         }
1654       else if (ecf_flags & ECF_PURE)
1655         {
1656           e->speculative_call_info (direct, indirect, ref);
1657           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1658             return true;
1659         }
1660     }
1661   /* If we did not managed to inline the function nor redirect
1662      to an ipa-cp clone (that are seen by having local flag set),
1663      it is probably pointless to inline it unless hardware is missing
1664      indirect call predictor.  */
1665   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1666     return false;
1667   /* For overwritable targets there is not much to do.  */
1668   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1669     return false;
1670   /* OK, speculation seems interesting.  */
1671   return true;
1672 }
1673
1674 /* We know that EDGE is not going to be inlined.
1675    See if we can remove speculation.  */
1676
1677 static void
1678 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1679 {
1680   if (edge->speculative && !speculation_useful_p (edge, false))
1681     {
1682       struct cgraph_node *node = edge->caller;
1683       struct cgraph_node *where = node->global.inlined_to
1684                                   ? node->global.inlined_to : node;
1685       bitmap updated_nodes = BITMAP_ALLOC (NULL);
1686
1687       spec_rem += edge->count;
1688       edge->resolve_speculation ();
1689       reset_edge_caches (where);
1690       inline_update_overall_summary (where);
1691       update_caller_keys (edge_heap, where,
1692                           updated_nodes, NULL);
1693       update_callee_keys (edge_heap, where,
1694                           updated_nodes);
1695       BITMAP_FREE (updated_nodes);
1696     }
1697 }
1698
1699 /* Return true if NODE should be accounted for overall size estimate.
1700    Skip all nodes optimized for size so we can measure the growth of hot
1701    part of program no matter of the padding.  */
1702
1703 bool
1704 inline_account_function_p (struct cgraph_node *node)
1705 {
1706    return (!DECL_EXTERNAL (node->decl)
1707            && !opt_for_fn (node->decl, optimize_size)
1708            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1709 }
1710
1711 /* Count number of callers of NODE and store it into DATA (that
1712    points to int.  Worker for cgraph_for_node_and_aliases.  */
1713
1714 static bool
1715 sum_callers (struct cgraph_node *node, void *data)
1716 {
1717   struct cgraph_edge *e;
1718   int *num_calls = (int *)data;
1719
1720   for (e = node->callers; e; e = e->next_caller)
1721     (*num_calls)++;
1722   return false;
1723 }
1724
1725 /* We use greedy algorithm for inlining of small functions:
1726    All inline candidates are put into prioritized heap ordered in
1727    increasing badness.
1728
1729    The inlining of small functions is bounded by unit growth parameters.  */
1730
1731 static void
1732 inline_small_functions (void)
1733 {
1734   struct cgraph_node *node;
1735   struct cgraph_edge *edge;
1736   edge_heap_t edge_heap (sreal::min ());
1737   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1738   int min_size, max_size;
1739   auto_vec<cgraph_edge *> new_indirect_edges;
1740   int initial_size = 0;
1741   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1742   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1743   new_indirect_edges.create (8);
1744
1745   edge_removal_hook_holder
1746     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1747
1748   /* Compute overall unit size and other global parameters used by badness
1749      metrics.  */
1750
1751   max_count = 0;
1752   ipa_reduced_postorder (order, true, true, NULL);
1753   free (order);
1754
1755   FOR_EACH_DEFINED_FUNCTION (node)
1756     if (!node->global.inlined_to)
1757       {
1758         if (!node->alias && node->analyzed
1759             && (node->has_gimple_body_p () || node->thunk.thunk_p))
1760           {
1761             struct inline_summary *info = inline_summaries->get (node);
1762             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1763
1764             /* Do not account external functions, they will be optimized out
1765                if not inlined.  Also only count the non-cold portion of program.  */
1766             if (inline_account_function_p (node))
1767               initial_size += info->size;
1768             info->growth = estimate_growth (node);
1769
1770             int num_calls = 0;
1771             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1772                                                true);
1773             if (num_calls == 1)
1774               info->single_caller = true;
1775             if (dfs && dfs->next_cycle)
1776               {
1777                 struct cgraph_node *n2;
1778                 int id = dfs->scc_no + 1;
1779                 for (n2 = node; n2;
1780                      n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1781                   {
1782                     struct inline_summary *info2 = inline_summaries->get (n2);
1783                     if (info2->scc_no)
1784                       break;
1785                     info2->scc_no = id;
1786                   }
1787               }
1788           }
1789
1790         for (edge = node->callers; edge; edge = edge->next_caller)
1791           if (max_count < edge->count)
1792             max_count = edge->count;
1793       }
1794   ipa_free_postorder_info ();
1795   initialize_growth_caches ();
1796
1797   if (dump_file)
1798     fprintf (dump_file,
1799              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1800              initial_size);
1801
1802   overall_size = initial_size;
1803   max_size = compute_max_insns (overall_size);
1804   min_size = overall_size;
1805
1806   /* Populate the heap with all edges we might inline.  */
1807
1808   FOR_EACH_DEFINED_FUNCTION (node)
1809     {
1810       bool update = false;
1811       struct cgraph_edge *next = NULL;
1812       bool has_speculative = false;
1813
1814       if (dump_file)
1815         fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1816                  node->name (), node->order);
1817
1818       for (edge = node->callees; edge; edge = next)
1819         {
1820           next = edge->next_callee;
1821           if (edge->inline_failed
1822               && !edge->aux
1823               && can_inline_edge_p (edge, true)
1824               && want_inline_small_function_p (edge, true)
1825               && edge->inline_failed)
1826             {
1827               gcc_assert (!edge->aux);
1828               update_edge_key (&edge_heap, edge);
1829             }
1830           if (edge->speculative)
1831             has_speculative = true;
1832         }
1833       if (has_speculative)
1834         for (edge = node->callees; edge; edge = next)
1835           if (edge->speculative && !speculation_useful_p (edge,
1836                                                           edge->aux != NULL))
1837             {
1838               edge->resolve_speculation ();
1839               update = true;
1840             }
1841       if (update)
1842         {
1843           struct cgraph_node *where = node->global.inlined_to
1844                                       ? node->global.inlined_to : node;
1845           inline_update_overall_summary (where);
1846           reset_edge_caches (where);
1847           update_caller_keys (&edge_heap, where,
1848                               updated_nodes, NULL);
1849           update_callee_keys (&edge_heap, where,
1850                               updated_nodes);
1851           bitmap_clear (updated_nodes);
1852         }
1853     }
1854
1855   gcc_assert (in_lto_p
1856               || !max_count
1857               || (profile_info && flag_branch_probabilities));
1858
1859   while (!edge_heap.empty ())
1860     {
1861       int old_size = overall_size;
1862       struct cgraph_node *where, *callee;
1863       sreal badness = edge_heap.min_key ();
1864       sreal current_badness;
1865       int growth;
1866
1867       edge = edge_heap.extract_min ();
1868       gcc_assert (edge->aux);
1869       edge->aux = NULL;
1870       if (!edge->inline_failed || !edge->callee->analyzed)
1871         continue;
1872
1873 #if CHECKING_P
1874       /* Be sure that caches are maintained consistent.  */
1875       sreal cached_badness = edge_badness (edge, false);
1876
1877       int old_size_est = estimate_edge_size (edge);
1878       int old_time_est = estimate_edge_time (edge);
1879       int old_hints_est = estimate_edge_hints (edge);
1880
1881       reset_edge_growth_cache (edge);
1882       gcc_assert (old_size_est == estimate_edge_size (edge));
1883       gcc_assert (old_time_est == estimate_edge_time (edge));
1884       /* FIXME:
1885
1886          gcc_assert (old_hints_est == estimate_edge_hints (edge));
1887
1888          fails with profile feedback because some hints depends on
1889          maybe_hot_edge_p predicate and because callee gets inlined to other
1890          calls, the edge may become cold.
1891          This ought to be fixed by computing relative probabilities
1892          for given invocation but that will be better done once whole
1893          code is converted to sreals.  Disable for now and revert to "wrong"
1894          value so enable/disable checking paths agree.  */
1895       edge_growth_cache[edge->uid].hints = old_hints_est + 1;
1896
1897       /* When updating the edge costs, we only decrease badness in the keys.
1898          Increases of badness are handled lazilly; when we see key with out
1899          of date value on it, we re-insert it now.  */
1900       current_badness = edge_badness (edge, false);
1901       /* Disable checking for profile because roundoff errors may cause slight
1902          deviations in the order.  */
1903       gcc_assert (max_count || cached_badness == current_badness);
1904       gcc_assert (current_badness >= badness);
1905 #else
1906       current_badness = edge_badness (edge, false);
1907 #endif
1908       if (current_badness != badness)
1909         {
1910           if (edge_heap.min () && current_badness > edge_heap.min_key ())
1911             {
1912               edge->aux = edge_heap.insert (current_badness, edge);
1913               continue;
1914             }
1915           else
1916             badness = current_badness;
1917         }
1918
1919       if (!can_inline_edge_p (edge, true))
1920         {
1921           resolve_noninline_speculation (&edge_heap, edge);
1922           continue;
1923         }
1924
1925       callee = edge->callee->ultimate_alias_target ();
1926       growth = estimate_edge_growth (edge);
1927       if (dump_file)
1928         {
1929           fprintf (dump_file,
1930                    "\nConsidering %s/%i with %i size\n",
1931                    callee->name (), callee->order,
1932                    inline_summaries->get (callee)->size);
1933           fprintf (dump_file,
1934                    " to be inlined into %s/%i in %s:%i\n"
1935                    " Estimated badness is %f, frequency %.2f.\n",
1936                    edge->caller->name (), edge->caller->order,
1937                    edge->call_stmt
1938                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
1939                                                         edge->call_stmt))
1940                        > BUILTINS_LOCATION)
1941                    ? gimple_filename ((const gimple *) edge->call_stmt)
1942                    : "unknown",
1943                    edge->call_stmt
1944                    ? gimple_lineno ((const gimple *) edge->call_stmt)
1945                    : -1,
1946                    badness.to_double (),
1947                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1948           if (edge->count)
1949             fprintf (dump_file," Called %" PRId64"x\n",
1950                      edge->count);
1951           if (dump_flags & TDF_DETAILS)
1952             edge_badness (edge, true);
1953         }
1954
1955       if (overall_size + growth > max_size
1956           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1957         {
1958           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1959           report_inline_failed_reason (edge);
1960           resolve_noninline_speculation (&edge_heap, edge);
1961           continue;
1962         }
1963
1964       if (!want_inline_small_function_p (edge, true))
1965         {
1966           resolve_noninline_speculation (&edge_heap, edge);
1967           continue;
1968         }
1969
1970       /* Heuristics for inlining small functions work poorly for
1971          recursive calls where we do effects similar to loop unrolling.
1972          When inlining such edge seems profitable, leave decision on
1973          specific inliner.  */
1974       if (edge->recursive_p ())
1975         {
1976           where = edge->caller;
1977           if (where->global.inlined_to)
1978             where = where->global.inlined_to;
1979           if (!recursive_inlining (edge,
1980                                    opt_for_fn (edge->caller->decl,
1981                                                flag_indirect_inlining)
1982                                    ? &new_indirect_edges : NULL))
1983             {
1984               edge->inline_failed = CIF_RECURSIVE_INLINING;
1985               resolve_noninline_speculation (&edge_heap, edge);
1986               continue;
1987             }
1988           reset_edge_caches (where);
1989           /* Recursive inliner inlines all recursive calls of the function
1990              at once. Consequently we need to update all callee keys.  */
1991           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
1992             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
1993           update_callee_keys (&edge_heap, where, updated_nodes);
1994           bitmap_clear (updated_nodes);
1995         }
1996       else
1997         {
1998           struct cgraph_node *outer_node = NULL;
1999           int depth = 0;
2000
2001           /* Consider the case where self recursive function A is inlined
2002              into B.  This is desired optimization in some cases, since it
2003              leads to effect similar of loop peeling and we might completely
2004              optimize out the recursive call.  However we must be extra
2005              selective.  */
2006
2007           where = edge->caller;
2008           while (where->global.inlined_to)
2009             {
2010               if (where->decl == callee->decl)
2011                 outer_node = where, depth++;
2012               where = where->callers->caller;
2013             }
2014           if (outer_node
2015               && !want_inline_self_recursive_call_p (edge, outer_node,
2016                                                      true, depth))
2017             {
2018               edge->inline_failed
2019                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2020                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2021               resolve_noninline_speculation (&edge_heap, edge);
2022               continue;
2023             }
2024           else if (depth && dump_file)
2025             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2026
2027           gcc_checking_assert (!callee->global.inlined_to);
2028           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2029           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2030
2031           reset_edge_caches (edge->callee);
2032
2033           update_callee_keys (&edge_heap, where, updated_nodes);
2034         }
2035       where = edge->caller;
2036       if (where->global.inlined_to)
2037         where = where->global.inlined_to;
2038
2039       /* Our profitability metric can depend on local properties
2040          such as number of inlinable calls and size of the function body.
2041          After inlining these properties might change for the function we
2042          inlined into (since it's body size changed) and for the functions
2043          called by function we inlined (since number of it inlinable callers
2044          might change).  */
2045       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2046       /* Offline copy count has possibly changed, recompute if profile is
2047          available.  */
2048       if (max_count)
2049         {
2050           struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2051           if (n != edge->callee && n->analyzed)
2052             update_callee_keys (&edge_heap, n, updated_nodes);
2053         }
2054       bitmap_clear (updated_nodes);
2055
2056       if (dump_file)
2057         {
2058           fprintf (dump_file,
2059                    " Inlined into %s which now has time %i and size %i,"
2060                    "net change of %+i.\n",
2061                    edge->caller->name (),
2062                    inline_summaries->get (edge->caller)->time,
2063                    inline_summaries->get (edge->caller)->size,
2064                    overall_size - old_size);
2065         }
2066       if (min_size > overall_size)
2067         {
2068           min_size = overall_size;
2069           max_size = compute_max_insns (min_size);
2070
2071           if (dump_file)
2072             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2073         }
2074     }
2075
2076   free_growth_caches ();
2077   if (dump_file)
2078     fprintf (dump_file,
2079              "Unit growth for small function inlining: %i->%i (%i%%)\n",
2080              initial_size, overall_size,
2081              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2082   BITMAP_FREE (updated_nodes);
2083   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2084 }
2085
2086 /* Flatten NODE.  Performed both during early inlining and
2087    at IPA inlining time.  */
2088
2089 static void
2090 flatten_function (struct cgraph_node *node, bool early)
2091 {
2092   struct cgraph_edge *e;
2093
2094   /* We shouldn't be called recursively when we are being processed.  */
2095   gcc_assert (node->aux == NULL);
2096
2097   node->aux = (void *) node;
2098
2099   for (e = node->callees; e; e = e->next_callee)
2100     {
2101       struct cgraph_node *orig_callee;
2102       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2103
2104       /* We've hit cycle?  It is time to give up.  */
2105       if (callee->aux)
2106         {
2107           if (dump_file)
2108             fprintf (dump_file,
2109                      "Not inlining %s into %s to avoid cycle.\n",
2110                      xstrdup_for_dump (callee->name ()),
2111                      xstrdup_for_dump (e->caller->name ()));
2112           e->inline_failed = CIF_RECURSIVE_INLINING;
2113           continue;
2114         }
2115
2116       /* When the edge is already inlined, we just need to recurse into
2117          it in order to fully flatten the leaves.  */
2118       if (!e->inline_failed)
2119         {
2120           flatten_function (callee, early);
2121           continue;
2122         }
2123
2124       /* Flatten attribute needs to be processed during late inlining. For
2125          extra code quality we however do flattening during early optimization,
2126          too.  */
2127       if (!early
2128           ? !can_inline_edge_p (e, true)
2129           : !can_early_inline_edge_p (e))
2130         continue;
2131
2132       if (e->recursive_p ())
2133         {
2134           if (dump_file)
2135             fprintf (dump_file, "Not inlining: recursive call.\n");
2136           continue;
2137         }
2138
2139       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2140           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2141         {
2142           if (dump_file)
2143             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
2144           continue;
2145         }
2146
2147       /* Inline the edge and flatten the inline clone.  Avoid
2148          recursing through the original node if the node was cloned.  */
2149       if (dump_file)
2150         fprintf (dump_file, " Inlining %s into %s.\n",
2151                  xstrdup_for_dump (callee->name ()),
2152                  xstrdup_for_dump (e->caller->name ()));
2153       orig_callee = callee;
2154       inline_call (e, true, NULL, NULL, false);
2155       if (e->callee != orig_callee)
2156         orig_callee->aux = (void *) node;
2157       flatten_function (e->callee, early);
2158       if (e->callee != orig_callee)
2159         orig_callee->aux = NULL;
2160     }
2161
2162   node->aux = NULL;
2163   if (!node->global.inlined_to)
2164     inline_update_overall_summary (node);
2165 }
2166
2167 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2168    DATA points to number of calls originally found so we avoid infinite
2169    recursion.  */
2170
2171 static bool
2172 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2173                          hash_set<cgraph_node *> *callers)
2174 {
2175   int *num_calls = (int *)data;
2176   bool callee_removed = false;
2177
2178   while (node->callers && !node->global.inlined_to)
2179     {
2180       struct cgraph_node *caller = node->callers->caller;
2181
2182       if (!can_inline_edge_p (node->callers, true)
2183           || node->callers->recursive_p ())
2184         {
2185           if (dump_file)
2186             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2187           *num_calls = 0;
2188           return false;
2189         }
2190
2191       if (dump_file)
2192         {
2193           fprintf (dump_file,
2194                    "\nInlining %s size %i.\n",
2195                    node->name (),
2196                    inline_summaries->get (node)->size);
2197           fprintf (dump_file,
2198                    " Called once from %s %i insns.\n",
2199                    node->callers->caller->name (),
2200                    inline_summaries->get (node->callers->caller)->size);
2201         }
2202
2203       /* Remember which callers we inlined to, delaying updating the
2204          overall summary.  */
2205       callers->add (node->callers->caller);
2206       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2207       if (dump_file)
2208         fprintf (dump_file,
2209                  " Inlined into %s which now has %i size\n",
2210                  caller->name (),
2211                  inline_summaries->get (caller)->size);
2212       if (!(*num_calls)--)
2213         {
2214           if (dump_file)
2215             fprintf (dump_file, "New calls found; giving up.\n");
2216           return callee_removed;
2217         }
2218       if (callee_removed)
2219         return true;
2220     }
2221   return false;
2222 }
2223
2224 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2225    update.  */
2226
2227 static bool
2228 inline_to_all_callers (struct cgraph_node *node, void *data)
2229 {
2230   hash_set<cgraph_node *> callers;
2231   bool res = inline_to_all_callers_1 (node, data, &callers);
2232   /* Perform the delayed update of the overall summary of all callers
2233      processed.  This avoids quadratic behavior in the cases where
2234      we have a lot of calls to the same function.  */
2235   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2236        i != callers.end (); ++i)
2237     inline_update_overall_summary (*i);
2238   return res;
2239 }
2240
2241 /* Output overall time estimate.  */
2242 static void
2243 dump_overall_stats (void)
2244 {
2245   int64_t sum_weighted = 0, sum = 0;
2246   struct cgraph_node *node;
2247
2248   FOR_EACH_DEFINED_FUNCTION (node)
2249     if (!node->global.inlined_to
2250         && !node->alias)
2251       {
2252         int time = inline_summaries->get (node)->time;
2253         sum += time;
2254         sum_weighted += time * node->count;
2255       }
2256   fprintf (dump_file, "Overall time estimate: "
2257            "%" PRId64" weighted by profile: "
2258            "%" PRId64"\n", sum, sum_weighted);
2259 }
2260
2261 /* Output some useful stats about inlining.  */
2262
2263 static void
2264 dump_inline_stats (void)
2265 {
2266   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2267   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2268   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2269   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2270   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2271   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2272   int64_t reason[CIF_N_REASONS][3];
2273   int i;
2274   struct cgraph_node *node;
2275
2276   memset (reason, 0, sizeof (reason));
2277   FOR_EACH_DEFINED_FUNCTION (node)
2278   {
2279     struct cgraph_edge *e;
2280     for (e = node->callees; e; e = e->next_callee)
2281       {
2282         if (e->inline_failed)
2283           {
2284             reason[(int) e->inline_failed][0] += e->count;
2285             reason[(int) e->inline_failed][1] += e->frequency;
2286             reason[(int) e->inline_failed][2] ++;
2287             if (DECL_VIRTUAL_P (e->callee->decl))
2288               {
2289                 if (e->indirect_inlining_edge)
2290                   noninlined_virt_indir_cnt += e->count;
2291                 else
2292                   noninlined_virt_cnt += e->count;
2293               }
2294             else
2295               {
2296                 if (e->indirect_inlining_edge)
2297                   noninlined_indir_cnt += e->count;
2298                 else
2299                   noninlined_cnt += e->count;
2300               }
2301           }
2302         else
2303           {
2304             if (e->speculative)
2305               {
2306                 if (DECL_VIRTUAL_P (e->callee->decl))
2307                   inlined_speculative_ply += e->count;
2308                 else
2309                   inlined_speculative += e->count;
2310               }
2311             else if (DECL_VIRTUAL_P (e->callee->decl))
2312               {
2313                 if (e->indirect_inlining_edge)
2314                   inlined_virt_indir_cnt += e->count;
2315                 else
2316                   inlined_virt_cnt += e->count;
2317               }
2318             else
2319               {
2320                 if (e->indirect_inlining_edge)
2321                   inlined_indir_cnt += e->count;
2322                 else
2323                   inlined_cnt += e->count;
2324               }
2325           }
2326       }
2327     for (e = node->indirect_calls; e; e = e->next_callee)
2328       if (e->indirect_info->polymorphic)
2329         indirect_poly_cnt += e->count;
2330       else
2331         indirect_cnt += e->count;
2332   }
2333   if (max_count)
2334     {
2335       fprintf (dump_file,
2336                "Inlined %" PRId64 " + speculative "
2337                "%" PRId64 " + speculative polymorphic "
2338                "%" PRId64 " + previously indirect "
2339                "%" PRId64 " + virtual "
2340                "%" PRId64 " + virtual and previously indirect "
2341                "%" PRId64 "\n" "Not inlined "
2342                "%" PRId64 " + previously indirect "
2343                "%" PRId64 " + virtual "
2344                "%" PRId64 " + virtual and previously indirect "
2345                "%" PRId64 " + stil indirect "
2346                "%" PRId64 " + still indirect polymorphic "
2347                "%" PRId64 "\n", inlined_cnt,
2348                inlined_speculative, inlined_speculative_ply,
2349                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2350                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2351                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2352       fprintf (dump_file,
2353                "Removed speculations %" PRId64 "\n",
2354                spec_rem);
2355     }
2356   dump_overall_stats ();
2357   fprintf (dump_file, "\nWhy inlining failed?\n");
2358   for (i = 0; i < CIF_N_REASONS; i++)
2359     if (reason[i][2])
2360       fprintf (dump_file, "%-50s: %8i calls, %8i freq, %" PRId64" count\n",
2361                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2362                (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2363 }
2364
2365 /* Decide on the inlining.  We do so in the topological order to avoid
2366    expenses on updating data structures.  */
2367
2368 static unsigned int
2369 ipa_inline (void)
2370 {
2371   struct cgraph_node *node;
2372   int nnodes;
2373   struct cgraph_node **order;
2374   int i;
2375   int cold;
2376   bool remove_functions = false;
2377
2378   if (!optimize)
2379     return 0;
2380
2381   cgraph_freq_base_rec = (sreal) 1 / (sreal) CGRAPH_FREQ_BASE;
2382   percent_rec = (sreal) 1 / (sreal) 100;
2383
2384   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2385
2386   if (in_lto_p && optimize)
2387     ipa_update_after_lto_read ();
2388
2389   if (dump_file)
2390     dump_inline_summaries (dump_file);
2391
2392   nnodes = ipa_reverse_postorder (order);
2393
2394   FOR_EACH_FUNCTION (node)
2395     {
2396       node->aux = 0;
2397
2398       /* Recompute the default reasons for inlining because they may have
2399          changed during merging.  */
2400       if (in_lto_p)
2401         {
2402           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2403             {
2404               gcc_assert (e->inline_failed);
2405               initialize_inline_failed (e);
2406             }
2407           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2408             initialize_inline_failed (e);
2409         }
2410     }
2411
2412   if (dump_file)
2413     fprintf (dump_file, "\nFlattening functions:\n");
2414
2415   /* In the first pass handle functions to be flattened.  Do this with
2416      a priority so none of our later choices will make this impossible.  */
2417   for (i = nnodes - 1; i >= 0; i--)
2418     {
2419       node = order[i];
2420
2421       /* Handle nodes to be flattened.
2422          Ideally when processing callees we stop inlining at the
2423          entry of cycles, possibly cloning that entry point and
2424          try to flatten itself turning it into a self-recursive
2425          function.  */
2426       if (lookup_attribute ("flatten",
2427                             DECL_ATTRIBUTES (node->decl)) != NULL)
2428         {
2429           if (dump_file)
2430             fprintf (dump_file,
2431                      "Flattening %s\n", node->name ());
2432           flatten_function (node, false);
2433         }
2434     }
2435   if (dump_file)
2436     dump_overall_stats ();
2437
2438   inline_small_functions ();
2439
2440   gcc_assert (symtab->state == IPA_SSA);
2441   symtab->state = IPA_SSA_AFTER_INLINING;
2442   /* Do first after-inlining removal.  We want to remove all "stale" extern
2443      inline functions and virtual functions so we really know what is called
2444      once.  */
2445   symtab->remove_unreachable_nodes (dump_file);
2446   free (order);
2447
2448   /* Inline functions with a property that after inlining into all callers the
2449      code size will shrink because the out-of-line copy is eliminated.
2450      We do this regardless on the callee size as long as function growth limits
2451      are met.  */
2452   if (dump_file)
2453     fprintf (dump_file,
2454              "\nDeciding on functions to be inlined into all callers and "
2455              "removing useless speculations:\n");
2456
2457   /* Inlining one function called once has good chance of preventing
2458      inlining other function into the same callee.  Ideally we should
2459      work in priority order, but probably inlining hot functions first
2460      is good cut without the extra pain of maintaining the queue.
2461
2462      ??? this is not really fitting the bill perfectly: inlining function
2463      into callee often leads to better optimization of callee due to
2464      increased context for optimization.
2465      For example if main() function calls a function that outputs help
2466      and then function that does the main optmization, we should inline
2467      the second with priority even if both calls are cold by themselves.
2468
2469      We probably want to implement new predicate replacing our use of
2470      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2471      to be hot.  */
2472   for (cold = 0; cold <= 1; cold ++)
2473     {
2474       FOR_EACH_DEFINED_FUNCTION (node)
2475         {
2476           struct cgraph_edge *edge, *next;
2477           bool update=false;
2478
2479           for (edge = node->callees; edge; edge = next)
2480             {
2481               next = edge->next_callee;
2482               if (edge->speculative && !speculation_useful_p (edge, false))
2483                 {
2484                   edge->resolve_speculation ();
2485                   spec_rem += edge->count;
2486                   update = true;
2487                   remove_functions = true;
2488                 }
2489             }
2490           if (update)
2491             {
2492               struct cgraph_node *where = node->global.inlined_to
2493                                           ? node->global.inlined_to : node;
2494               reset_edge_caches (where);
2495               inline_update_overall_summary (where);
2496             }
2497           if (want_inline_function_to_all_callers_p (node, cold))
2498             {
2499               int num_calls = 0;
2500               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2501                                                  true);
2502               while (node->call_for_symbol_and_aliases
2503                        (inline_to_all_callers, &num_calls, true))
2504                 ;
2505               remove_functions = true;
2506             }
2507         }
2508     }
2509
2510   /* Free ipa-prop structures if they are no longer needed.  */
2511   if (optimize)
2512     ipa_free_all_structures_after_iinln ();
2513
2514   if (dump_file)
2515     {
2516       fprintf (dump_file,
2517                "\nInlined %i calls, eliminated %i functions\n\n",
2518                ncalls_inlined, nfunctions_inlined);
2519       dump_inline_stats ();
2520     }
2521
2522   if (dump_file)
2523     dump_inline_summaries (dump_file);
2524   /* In WPA we use inline summaries for partitioning process.  */
2525   if (!flag_wpa)
2526     inline_free_summary ();
2527   return remove_functions ? TODO_remove_functions : 0;
2528 }
2529
2530 /* Inline always-inline function calls in NODE.  */
2531
2532 static bool
2533 inline_always_inline_functions (struct cgraph_node *node)
2534 {
2535   struct cgraph_edge *e;
2536   bool inlined = false;
2537
2538   for (e = node->callees; e; e = e->next_callee)
2539     {
2540       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2541       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2542         continue;
2543
2544       if (e->recursive_p ())
2545         {
2546           if (dump_file)
2547             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2548                      e->callee->name ());
2549           e->inline_failed = CIF_RECURSIVE_INLINING;
2550           continue;
2551         }
2552
2553       if (!can_early_inline_edge_p (e))
2554         {
2555           /* Set inlined to true if the callee is marked "always_inline" but
2556              is not inlinable.  This will allow flagging an error later in
2557              expand_call_inline in tree-inline.c.  */
2558           if (lookup_attribute ("always_inline",
2559                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2560             inlined = true;
2561           continue;
2562         }
2563
2564       if (dump_file)
2565         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2566                  xstrdup_for_dump (e->callee->name ()),
2567                  xstrdup_for_dump (e->caller->name ()));
2568       inline_call (e, true, NULL, NULL, false);
2569       inlined = true;
2570     }
2571   if (inlined)
2572     inline_update_overall_summary (node);
2573
2574   return inlined;
2575 }
2576
2577 /* Decide on the inlining.  We do so in the topological order to avoid
2578    expenses on updating data structures.  */
2579
2580 static bool
2581 early_inline_small_functions (struct cgraph_node *node)
2582 {
2583   struct cgraph_edge *e;
2584   bool inlined = false;
2585
2586   for (e = node->callees; e; e = e->next_callee)
2587     {
2588       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2589       if (!inline_summaries->get (callee)->inlinable
2590           || !e->inline_failed)
2591         continue;
2592
2593       /* Do not consider functions not declared inline.  */
2594       if (!DECL_DECLARED_INLINE_P (callee->decl)
2595           && !opt_for_fn (node->decl, flag_inline_small_functions)
2596           && !opt_for_fn (node->decl, flag_inline_functions))
2597         continue;
2598
2599       if (dump_file)
2600         fprintf (dump_file, "Considering inline candidate %s.\n",
2601                  callee->name ());
2602
2603       if (!can_early_inline_edge_p (e))
2604         continue;
2605
2606       if (e->recursive_p ())
2607         {
2608           if (dump_file)
2609             fprintf (dump_file, "  Not inlining: recursive call.\n");
2610           continue;
2611         }
2612
2613       if (!want_early_inline_function_p (e))
2614         continue;
2615
2616       if (dump_file)
2617         fprintf (dump_file, " Inlining %s into %s.\n",
2618                  xstrdup_for_dump (callee->name ()),
2619                  xstrdup_for_dump (e->caller->name ()));
2620       inline_call (e, true, NULL, NULL, false);
2621       inlined = true;
2622     }
2623
2624   if (inlined)
2625     inline_update_overall_summary (node);
2626
2627   return inlined;
2628 }
2629
2630 unsigned int
2631 early_inliner (function *fun)
2632 {
2633   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2634   struct cgraph_edge *edge;
2635   unsigned int todo = 0;
2636   int iterations = 0;
2637   bool inlined = false;
2638
2639   if (seen_error ())
2640     return 0;
2641
2642   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2643      happens when some pass decides to construct new function and
2644      cgraph_add_new_function calls lowering passes and early optimization on
2645      it.  This may confuse ourself when early inliner decide to inline call to
2646      function clone, because function clones don't have parameter list in
2647      ipa-prop matching their signature.  */
2648   if (ipa_node_params_sum)
2649     return 0;
2650
2651   if (flag_checking)
2652     node->verify ();
2653   node->remove_all_references ();
2654
2655   /* Rebuild this reference because it dosn't depend on
2656      function's body and it's required to pass cgraph_node
2657      verification.  */
2658   if (node->instrumented_version
2659       && !node->instrumentation_clone)
2660     node->create_reference (node->instrumented_version, IPA_REF_CHKP, NULL);
2661
2662   /* Even when not optimizing or not inlining inline always-inline
2663      functions.  */
2664   inlined = inline_always_inline_functions (node);
2665
2666   if (!optimize
2667       || flag_no_inline
2668       || !flag_early_inlining
2669       /* Never inline regular functions into always-inline functions
2670          during incremental inlining.  This sucks as functions calling
2671          always inline functions will get less optimized, but at the
2672          same time inlining of functions calling always inline
2673          function into an always inline function might introduce
2674          cycles of edges to be always inlined in the callgraph.
2675
2676          We might want to be smarter and just avoid this type of inlining.  */
2677       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2678           && lookup_attribute ("always_inline",
2679                                DECL_ATTRIBUTES (node->decl))))
2680     ;
2681   else if (lookup_attribute ("flatten",
2682                              DECL_ATTRIBUTES (node->decl)) != NULL)
2683     {
2684       /* When the function is marked to be flattened, recursively inline
2685          all calls in it.  */
2686       if (dump_file)
2687         fprintf (dump_file,
2688                  "Flattening %s\n", node->name ());
2689       flatten_function (node, true);
2690       inlined = true;
2691     }
2692   else
2693     {
2694       /* If some always_inline functions was inlined, apply the changes.
2695          This way we will not account always inline into growth limits and
2696          moreover we will inline calls from always inlines that we skipped
2697          previously because of conditional above.  */
2698       if (inlined)
2699         {
2700           timevar_push (TV_INTEGRATION);
2701           todo |= optimize_inline_calls (current_function_decl);
2702           /* optimize_inline_calls call above might have introduced new
2703              statements that don't have inline parameters computed.  */
2704           for (edge = node->callees; edge; edge = edge->next_callee)
2705             {
2706               if (inline_edge_summary_vec.length () > (unsigned) edge->uid)
2707                 {
2708                   struct inline_edge_summary *es = inline_edge_summary (edge);
2709                   es->call_stmt_size
2710                     = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2711                   es->call_stmt_time
2712                     = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2713                 }
2714             }
2715           inline_update_overall_summary (node);
2716           inlined = false;
2717           timevar_pop (TV_INTEGRATION);
2718         }
2719       /* We iterate incremental inlining to get trivial cases of indirect
2720          inlining.  */
2721       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2722              && early_inline_small_functions (node))
2723         {
2724           timevar_push (TV_INTEGRATION);
2725           todo |= optimize_inline_calls (current_function_decl);
2726
2727           /* Technically we ought to recompute inline parameters so the new
2728              iteration of early inliner works as expected.  We however have
2729              values approximately right and thus we only need to update edge
2730              info that might be cleared out for newly discovered edges.  */
2731           for (edge = node->callees; edge; edge = edge->next_callee)
2732             {
2733               /* We have no summary for new bound store calls yet.  */
2734               if (inline_edge_summary_vec.length () > (unsigned)edge->uid)
2735                 {
2736                   struct inline_edge_summary *es = inline_edge_summary (edge);
2737                   es->call_stmt_size
2738                     = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2739                   es->call_stmt_time
2740                     = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2741                 }
2742               if (edge->callee->decl
2743                   && !gimple_check_call_matching_types (
2744                       edge->call_stmt, edge->callee->decl, false))
2745                 {
2746                   edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
2747                   edge->call_stmt_cannot_inline_p = true;
2748                 }
2749             }
2750           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2751             inline_update_overall_summary (node);
2752           timevar_pop (TV_INTEGRATION);
2753           iterations++;
2754           inlined = false;
2755         }
2756       if (dump_file)
2757         fprintf (dump_file, "Iterations: %i\n", iterations);
2758     }
2759
2760   if (inlined)
2761     {
2762       timevar_push (TV_INTEGRATION);
2763       todo |= optimize_inline_calls (current_function_decl);
2764       timevar_pop (TV_INTEGRATION);
2765     }
2766
2767   fun->always_inline_functions_inlined = true;
2768
2769   return todo;
2770 }
2771
2772 /* Do inlining of small functions.  Doing so early helps profiling and other
2773    passes to be somewhat more effective and avoids some code duplication in
2774    later real inlining pass for testcases with very many function calls.  */
2775
2776 namespace {
2777
2778 const pass_data pass_data_early_inline =
2779 {
2780   GIMPLE_PASS, /* type */
2781   "einline", /* name */
2782   OPTGROUP_INLINE, /* optinfo_flags */
2783   TV_EARLY_INLINING, /* tv_id */
2784   PROP_ssa, /* properties_required */
2785   0, /* properties_provided */
2786   0, /* properties_destroyed */
2787   0, /* todo_flags_start */
2788   0, /* todo_flags_finish */
2789 };
2790
2791 class pass_early_inline : public gimple_opt_pass
2792 {
2793 public:
2794   pass_early_inline (gcc::context *ctxt)
2795     : gimple_opt_pass (pass_data_early_inline, ctxt)
2796   {}
2797
2798   /* opt_pass methods: */
2799   virtual unsigned int execute (function *);
2800
2801 }; // class pass_early_inline
2802
2803 unsigned int
2804 pass_early_inline::execute (function *fun)
2805 {
2806   return early_inliner (fun);
2807 }
2808
2809 } // anon namespace
2810
2811 gimple_opt_pass *
2812 make_pass_early_inline (gcc::context *ctxt)
2813 {
2814   return new pass_early_inline (ctxt);
2815 }
2816
2817 namespace {
2818
2819 const pass_data pass_data_ipa_inline =
2820 {
2821   IPA_PASS, /* type */
2822   "inline", /* name */
2823   OPTGROUP_INLINE, /* optinfo_flags */
2824   TV_IPA_INLINING, /* tv_id */
2825   0, /* properties_required */
2826   0, /* properties_provided */
2827   0, /* properties_destroyed */
2828   0, /* todo_flags_start */
2829   ( TODO_dump_symtab ), /* todo_flags_finish */
2830 };
2831
2832 class pass_ipa_inline : public ipa_opt_pass_d
2833 {
2834 public:
2835   pass_ipa_inline (gcc::context *ctxt)
2836     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2837                       inline_generate_summary, /* generate_summary */
2838                       inline_write_summary, /* write_summary */
2839                       inline_read_summary, /* read_summary */
2840                       NULL, /* write_optimization_summary */
2841                       NULL, /* read_optimization_summary */
2842                       NULL, /* stmt_fixup */
2843                       0, /* function_transform_todo_flags_start */
2844                       inline_transform, /* function_transform */
2845                       NULL) /* variable_transform */
2846   {}
2847
2848   /* opt_pass methods: */
2849   virtual unsigned int execute (function *) { return ipa_inline (); }
2850
2851 }; // class pass_ipa_inline
2852
2853 } // anon namespace
2854
2855 ipa_opt_pass_d *
2856 make_pass_ipa_inline (gcc::context *ctxt)
2857 {
2858   return new pass_ipa_inline (ctxt);
2859 }