gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass cannot really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "params.h"
 109 #include "profile.h"
 110 #include "symbol-summary.h"
 111 #include "tree-vrp.h"
 112 #include "ipa-prop.h"
 113 #include "ipa-fnsummary.h"
 114 #include "ipa-inline.h"
 115 #include "ipa-utils.h"
 116 #include "sreal.h"
 117 #include "auto-profile.h"
 118 #include "builtins.h"
 119 #include "fibonacci_heap.h"
 120 #include "stringpool.h"
 121 #include "attribs.h"
 122 #include "asan.h"
 123
 124 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 125 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 126
 127 /* Statistics we collect about inlining algorithm.  */
 128 static int overall_size;
 129 static profile_count max_count;
 130 static profile_count spec_rem;
 131
 132 /* Return false when inlining edge E would lead to violating
 133    limits on function unit growth or stack usage growth.
 134
 135    The relative function body growth limit is present generally
 136    to avoid problems with non-linear behavior of the compiler.
 137    To allow inlining huge functions into tiny wrapper, the limit
 138    is always based on the bigger of the two functions considered.
 139
 140    For stack growth limits we always base the growth in stack usage
 141    of the callers.  We want to prevent applications from segfaulting
 142    on stack overflow when functions with huge stack frames gets
 143    inlined. */
 144
 145 static bool
 146 caller_growth_limits (struct cgraph_edge *e)
 147 {
 148   struct cgraph_node *to = e->caller;
 149   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 150   int newsize;
 151   int limit = 0;
 152   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 153   ipa_fn_summary *info, *what_info;
 154   ipa_fn_summary *outer_info = ipa_fn_summaries->get (to);
 155
 156   /* Look for function e->caller is inlined to.  While doing
 157      so work out the largest function body on the way.  As
 158      described above, we want to base our function growth
 159      limits based on that.  Not on the self size of the
 160      outer function, not on the self size of inline code
 161      we immediately inline to.  This is the most relaxed
 162      interpretation of the rule "do not grow large functions
 163      too much in order to prevent compiler from exploding".  */
 164   while (true)
 165     {
 166       info = ipa_fn_summaries->get (to);
 167       if (limit < info->self_size)
 168         limit = info->self_size;
 169       if (stack_size_limit < info->estimated_self_stack_size)
 170         stack_size_limit = info->estimated_self_stack_size;
 171       if (to->global.inlined_to)
 172         to = to->callers->caller;
 173       else
 174         break;
 175     }
 176
 177   what_info = ipa_fn_summaries->get (what);
 178
 179   if (limit < what_info->self_size)
 180     limit = what_info->self_size;
 181
 182   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 183
 184   /* Check the size after inlining against the function limits.  But allow
 185      the function to shrink if it went over the limits by forced inlining.  */
 186   newsize = estimate_size_after_inlining (to, e);
 187   if (newsize >= info->size
 188       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 189       && newsize > limit)
 190     {
 191       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 192       return false;
 193     }
 194
 195   if (!what_info->estimated_stack_size)
 196     return true;
 197
 198   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 199      due to large i/o datastructures used by the Fortran front-end.
 200      We ought to ignore this limit when we know that the edge is executed
 201      on every invocation of the caller (i.e. its call statement dominates
 202      exit block).  We do not track this information, yet.  */
 203   stack_size_limit += ((gcov_type)stack_size_limit
 204                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 205
 206   inlined_stack = (outer_info->stack_frame_offset
 207                    + outer_info->estimated_self_stack_size
 208                    + what_info->estimated_stack_size);
 209   /* Check new stack consumption with stack consumption at the place
 210      stack is used.  */
 211   if (inlined_stack > stack_size_limit
 212       /* If function already has large stack usage from sibling
 213          inline call, we can inline, too.
 214          This bit overoptimistically assume that we are good at stack
 215          packing.  */
 216       && inlined_stack > info->estimated_stack_size
 217       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 218     {
 219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 220       return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Dump info about why inlining has failed.  */
 226
 227 static void
 228 report_inline_failed_reason (struct cgraph_edge *e)
 229 {
 230   if (dump_enabled_p ())
 231     {
 232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 233                        "  not inlinable: %C -> %C, %s\n",
 234                        e->caller, e->callee,
 235                        cgraph_inline_failed_string (e->inline_failed));
 236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 237            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 238           && e->caller->lto_file_data
 239           && e->callee->ultimate_alias_target ()->lto_file_data)
 240         {
 241           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 242                            "  LTO objects: %s, %s\n",
 243                            e->caller->lto_file_data->file_name,
 244                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 245         }
 246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 247         if (dump_file)
 248           cl_target_option_print_diff
 249             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 250              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 252         if (dump_file)
 253           cl_optimization_print_diff
 254             (dump_file, 2, opts_for_fn (e->caller->decl),
 255              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 256     }
 257 }
 258
 259  /* Decide whether sanitizer-related attributes allow inlining. */
 260
 261 static bool
 262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 263 {
 264   if (!caller || !callee)
 265     return true;
 266
 267   /* Allow inlining always_inline functions into no_sanitize_address
 268      functions.  */
 269   if (!sanitize_flags_p (SANITIZE_ADDRESS, caller)
 270       && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
 271     return true;
 272
 273   return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
 274            == sanitize_flags_p (SANITIZE_ADDRESS, callee))
 275           && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
 276               == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
 277           && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
 278               == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
 279 }
 280
 281 /* Used for flags where it is safe to inline when caller's value is
 282    grater than callee's.  */
 283 #define check_maybe_up(flag) \
 284       (opts_for_fn (caller->decl)->x_##flag             \
 285        != opts_for_fn (callee->decl)->x_##flag          \
 286        && (!always_inline                               \
 287            || opts_for_fn (caller->decl)->x_##flag      \
 288               < opts_for_fn (callee->decl)->x_##flag))
 289 /* Used for flags where it is safe to inline when caller's value is
 290    smaller than callee's.  */
 291 #define check_maybe_down(flag) \
 292       (opts_for_fn (caller->decl)->x_##flag             \
 293        != opts_for_fn (callee->decl)->x_##flag          \
 294        && (!always_inline                               \
 295            || opts_for_fn (caller->decl)->x_##flag      \
 296               > opts_for_fn (callee->decl)->x_##flag))
 297 /* Used for flags where exact match is needed for correctness.  */
 298 #define check_match(flag) \
 299       (opts_for_fn (caller->decl)->x_##flag             \
 300        != opts_for_fn (callee->decl)->x_##flag)
 301
 302 /* Decide if we can inline the edge and possibly update
 303    inline_failed reason.
 304    We check whether inlining is possible at all and whether
 305    caller growth limits allow doing so.
 306
 307    if REPORT is true, output reason to the dump file. */
 308
 309 static bool
 310 can_inline_edge_p (struct cgraph_edge *e, bool report,
 311                    bool early = false)
 312 {
 313   gcc_checking_assert (e->inline_failed);
 314
 315   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 316     {
 317       if (report)
 318         report_inline_failed_reason (e);
 319       return false;
 320     }
 321
 322   bool inlinable = true;
 323   enum availability avail;
 324   cgraph_node *caller = e->caller->global.inlined_to
 325                         ? e->caller->global.inlined_to : e->caller;
 326   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 327
 328   if (!callee->definition)
 329     {
 330       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 331       inlinable = false;
 332     }
 333   if (!early && (!opt_for_fn (callee->decl, optimize)
 334                  || !opt_for_fn (caller->decl, optimize)))
 335     {
 336       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 337       inlinable = false;
 338     }
 339   else if (callee->calls_comdat_local)
 340     {
 341       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 342       inlinable = false;
 343     }
 344   else if (avail <= AVAIL_INTERPOSABLE)
 345     {
 346       e->inline_failed = CIF_OVERWRITABLE;
 347       inlinable = false;
 348     }
 349   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 350      initialized to one of FINAL_ERROR reasons.  */
 351   else if (e->call_stmt_cannot_inline_p)
 352     gcc_unreachable ();
 353   /* Don't inline if the functions have different EH personalities.  */
 354   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 355            && DECL_FUNCTION_PERSONALITY (callee->decl)
 356            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 357                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 358     {
 359       e->inline_failed = CIF_EH_PERSONALITY;
 360       inlinable = false;
 361     }
 362   /* TM pure functions should not be inlined into non-TM_pure
 363      functions.  */
 364   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 365     {
 366       e->inline_failed = CIF_UNSPECIFIED;
 367       inlinable = false;
 368     }
 369   /* Check compatibility of target optimization options.  */
 370   else if (!targetm.target_option.can_inline_p (caller->decl,
 371                                                 callee->decl))
 372     {
 373       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 374       inlinable = false;
 375     }
 376   else if (ipa_fn_summaries->get (callee) == NULL
 377            || !ipa_fn_summaries->get (callee)->inlinable)
 378     {
 379       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 380       inlinable = false;
 381     }
 382   /* Don't inline a function with mismatched sanitization attributes. */
 383   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 384     {
 385       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 386       inlinable = false;
 387     }
 388   if (!inlinable && report)
 389     report_inline_failed_reason (e);
 390   return inlinable;
 391 }
 392
 393 /* Return inlining_insns_single limit for function N */
 394
 395 static int
 396 inline_insns_single (cgraph_node *n)
 397 {
 398   if (opt_for_fn (n->decl, optimize >= 3))
 399     return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SINGLE);
 400   else
 401     return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SINGLE_O2);
 402 }
 403
 404 /* Return inlining_insns_auto limit for function N */
 405
 406 static int
 407 inline_insns_auto (cgraph_node *n)
 408 {
 409   if (opt_for_fn (n->decl, optimize >= 3))
 410     return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_AUTO);
 411   else
 412     return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_AUTO_O2);
 413 }
 414
 415 /* Decide if we can inline the edge and possibly update
 416    inline_failed reason.
 417    We check whether inlining is possible at all and whether
 418    caller growth limits allow doing so.
 419
 420    if REPORT is true, output reason to the dump file.
 421
 422    if DISREGARD_LIMITS is true, ignore size limits.  */
 423
 424 static bool
 425 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 426                              bool disregard_limits = false, bool early = false)
 427 {
 428   gcc_checking_assert (e->inline_failed);
 429
 430   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 431     {
 432       if (report)
 433         report_inline_failed_reason (e);
 434       return false;
 435     }
 436
 437   bool inlinable = true;
 438   enum availability avail;
 439   cgraph_node *caller = e->caller->global.inlined_to
 440                         ? e->caller->global.inlined_to : e->caller;
 441   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 442   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 443   tree callee_tree
 444     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 445   /* Check if caller growth allows the inlining.  */
 446   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 447       && !disregard_limits
 448       && !lookup_attribute ("flatten",
 449                  DECL_ATTRIBUTES (caller->decl))
 450       && !caller_growth_limits (e))
 451     inlinable = false;
 452   else if (callee->externally_visible
 453            && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 454            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 455     {
 456       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 457       inlinable = false;
 458     }
 459   /* Don't inline a function with a higher optimization level than the
 460      caller.  FIXME: this is really just tip of iceberg of handling
 461      optimization attribute.  */
 462   else if (caller_tree != callee_tree)
 463     {
 464       bool always_inline =
 465              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 466               && lookup_attribute ("always_inline",
 467                                    DECL_ATTRIBUTES (callee->decl)));
 468       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 469       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 470
 471      /* Until GCC 4.9 we did not check the semantics-altering flags
 472         below and inlined across optimization boundaries.
 473         Enabling checks below breaks several packages by refusing
 474         to inline library always_inline functions. See PR65873.
 475         Disable the check for early inlining for now until better solution
 476         is found.  */
 477      if (always_inline && early)
 478         ;
 479       /* There are some options that change IL semantics which means
 480          we cannot inline in these cases for correctness reason.
 481          Not even for always_inline declared functions.  */
 482      else if (check_match (flag_wrapv)
 483               || check_match (flag_trapv)
 484               || check_match (flag_pcc_struct_return)
 485               /* When caller or callee does FP math, be sure FP codegen flags
 486                  compatible.  */
 487               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 488                   && (check_maybe_up (flag_rounding_math)
 489                       || check_maybe_up (flag_trapping_math)
 490                       || check_maybe_down (flag_unsafe_math_optimizations)
 491                       || check_maybe_down (flag_finite_math_only)
 492                       || check_maybe_up (flag_signaling_nans)
 493                       || check_maybe_down (flag_cx_limited_range)
 494                       || check_maybe_up (flag_signed_zeros)
 495                       || check_maybe_down (flag_associative_math)
 496                       || check_maybe_down (flag_reciprocal_math)
 497                       || check_maybe_down (flag_fp_int_builtin_inexact)
 498                       /* Strictly speaking only when the callee contains function
 499                          calls that may end up setting errno.  */
 500                       || check_maybe_up (flag_errno_math)))
 501               /* We do not want to make code compiled with exceptions to be
 502                  brought into a non-EH function unless we know that the callee
 503                  does not throw.
 504                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 505               || (check_maybe_up (flag_non_call_exceptions)
 506                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 507               || (check_maybe_up (flag_exceptions)
 508                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 509               /* When devirtualization is diabled for callee, it is not safe
 510                  to inline it as we possibly mangled the type info.
 511                  Allow early inlining of always inlines.  */
 512               || (!early && check_maybe_down (flag_devirtualize)))
 513         {
 514           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 515           inlinable = false;
 516         }
 517       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 518       else if (always_inline)
 519         ;
 520       /* When user added an attribute to the callee honor it.  */
 521       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 522                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 523         {
 524           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 525           inlinable = false;
 526         }
 527       /* If explicit optimize attribute are not used, the mismatch is caused
 528          by different command line options used to build different units.
 529          Do not care about COMDAT functions - those are intended to be
 530          optimized with the optimization flags of module they are used in.
 531          Also do not care about mixing up size/speed optimization when
 532          DECL_DISREGARD_INLINE_LIMITS is set.  */
 533       else if ((callee->merged_comdat
 534                 && !lookup_attribute ("optimize",
 535                                       DECL_ATTRIBUTES (caller->decl)))
 536                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 537         ;
 538       /* If mismatch is caused by merging two LTO units with different
 539          optimizationflags we want to be bit nicer.  However never inline
 540          if one of functions is not optimized at all.  */
 541       else if (!opt_for_fn (callee->decl, optimize)
 542                || !opt_for_fn (caller->decl, optimize))
 543         {
 544           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 545           inlinable = false;
 546         }
 547       /* If callee is optimized for size and caller is not, allow inlining if
 548          code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
 549          is inline (and thus likely an unified comdat).  This will allow caller
 550          to run faster.  */
 551       else if (opt_for_fn (callee->decl, optimize_size)
 552                > opt_for_fn (caller->decl, optimize_size))
 553         {
 554           int growth = estimate_edge_growth (e);
 555           if (growth > PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE)
 556               && (!DECL_DECLARED_INLINE_P (callee->decl)
 557                   && growth >= MAX (inline_insns_single (caller),
 558                                     inline_insns_auto (caller))))
 559             {
 560               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 561               inlinable = false;
 562             }
 563         }
 564       /* If callee is more aggressively optimized for performance than caller,
 565          we generally want to inline only cheap (runtime wise) functions.  */
 566       else if (opt_for_fn (callee->decl, optimize_size)
 567                < opt_for_fn (caller->decl, optimize_size)
 568                || (opt_for_fn (callee->decl, optimize)
 569                    > opt_for_fn (caller->decl, optimize)))
 570         {
 571           if (estimate_edge_time (e)
 572               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 573             {
 574               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 575               inlinable = false;
 576             }
 577         }
 578
 579     }
 580
 581   if (!inlinable && report)
 582     report_inline_failed_reason (e);
 583   return inlinable;
 584 }
 585
 586
 587 /* Return true if the edge E is inlinable during early inlining.  */
 588
 589 static bool
 590 can_early_inline_edge_p (struct cgraph_edge *e)
 591 {
 592   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 593   /* Early inliner might get called at WPA stage when IPA pass adds new
 594      function.  In this case we cannot really do any of early inlining
 595      because function bodies are missing.  */
 596   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 597     return false;
 598   if (!gimple_has_body_p (callee->decl))
 599     {
 600       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 601       return false;
 602     }
 603   /* In early inliner some of callees may not be in SSA form yet
 604      (i.e. the callgraph is cyclic and we did not process
 605      the callee by early inliner, yet).  We don't have CIF code for this
 606      case; later we will re-do the decision in the real inliner.  */
 607   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 608       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 609     {
 610       if (dump_enabled_p ())
 611         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 612                          "  edge not inlinable: not in SSA form\n");
 613       return false;
 614     }
 615   if (!can_inline_edge_p (e, true, true)
 616       || !can_inline_edge_by_limits_p (e, true, false, true))
 617     return false;
 618   return true;
 619 }
 620
 621
 622 /* Return number of calls in N.  Ignore cheap builtins.  */
 623
 624 static int
 625 num_calls (struct cgraph_node *n)
 626 {
 627   struct cgraph_edge *e;
 628   int num = 0;
 629
 630   for (e = n->callees; e; e = e->next_callee)
 631     if (!is_inexpensive_builtin (e->callee->decl))
 632       num++;
 633   return num;
 634 }
 635
 636
 637 /* Return true if we are interested in inlining small function.  */
 638
 639 static bool
 640 want_early_inline_function_p (struct cgraph_edge *e)
 641 {
 642   bool want_inline = true;
 643   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 644
 645   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 646     ;
 647   /* For AutoFDO, we need to make sure that before profile summary, all
 648      hot paths' IR look exactly the same as profiled binary. As a result,
 649      in einliner, we will disregard size limit and inline those callsites
 650      that are:
 651        * inlined in the profiled binary, and
 652        * the cloned callee has enough samples to be considered "hot".  */
 653   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 654     ;
 655   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 656            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 657     {
 658       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 659       report_inline_failed_reason (e);
 660       want_inline = false;
 661     }
 662   else
 663     {
 664       int growth = estimate_edge_growth (e);
 665       int n;
 666       int early_inlining_insns = opt_for_fn (e->caller->decl, optimize) >= 3
 667                                  ? PARAM_VALUE (PARAM_EARLY_INLINING_INSNS)
 668                                  : PARAM_VALUE (PARAM_EARLY_INLINING_INSNS_O2);
 669
 670
 671       if (growth <= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE))
 672         ;
 673       else if (!e->maybe_hot_p ())
 674         {
 675           if (dump_enabled_p ())
 676             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 677                              "  will not early inline: %C->%C, "
 678                              "call is cold and code would grow by %i\n",
 679                              e->caller, callee,
 680                              growth);
 681           want_inline = false;
 682         }
 683       else if (growth > early_inlining_insns)
 684         {
 685           if (dump_enabled_p ())
 686             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 687                              "  will not early inline: %C->%C, "
 688                              "growth %i exceeds --param early-inlining-insns%s\n",
 689                              e->caller, callee, growth,
 690                              opt_for_fn (e->caller->decl, optimize) >= 3
 691                              ? "" : "-O2");
 692           want_inline = false;
 693         }
 694       else if ((n = num_calls (callee)) != 0
 695                && growth * (n + 1) > early_inlining_insns)
 696         {
 697           if (dump_enabled_p ())
 698             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 699                              "  will not early inline: %C->%C, "
 700                              "growth %i exceeds --param early-inlining-insns%s "
 701                              "divided by number of calls\n",
 702                              e->caller, callee, growth,
 703                              opt_for_fn (e->caller->decl, optimize) >= 3
 704                              ? "" : "-O2");
 705           want_inline = false;
 706         }
 707     }
 708   return want_inline;
 709 }
 710
 711 /* Compute time of the edge->caller + edge->callee execution when inlining
 712    does not happen.  */
 713
 714 inline sreal
 715 compute_uninlined_call_time (struct cgraph_edge *edge,
 716                              sreal uninlined_call_time)
 717 {
 718   cgraph_node *caller = (edge->caller->global.inlined_to
 719                          ? edge->caller->global.inlined_to
 720                          : edge->caller);
 721
 722   sreal freq = edge->sreal_frequency ();
 723   if (freq > 0)
 724     uninlined_call_time *= freq;
 725   else
 726     uninlined_call_time = uninlined_call_time >> 11;
 727
 728   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 729   return uninlined_call_time + caller_time;
 730 }
 731
 732 /* Same as compute_uinlined_call_time but compute time when inlining
 733    does happen.  */
 734
 735 inline sreal
 736 compute_inlined_call_time (struct cgraph_edge *edge,
 737                            sreal time)
 738 {
 739   cgraph_node *caller = (edge->caller->global.inlined_to
 740                          ? edge->caller->global.inlined_to
 741                          : edge->caller);
 742   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 743
 744   sreal freq = edge->sreal_frequency ();
 745   if (freq > 0)
 746     time *= freq;
 747   else
 748     time = time >> 11;
 749
 750   /* This calculation should match one in ipa-inline-analysis.c
 751      (estimate_edge_size_and_time).  */
 752   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 753   time += caller_time;
 754   if (time <= 0)
 755     time = ((sreal) 1) >> 8;
 756   gcc_checking_assert (time >= 0);
 757   return time;
 758 }
 759
 760 /* Return true if the speedup for inlining E is bigger than
 761    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 762
 763 static bool
 764 big_speedup_p (struct cgraph_edge *e)
 765 {
 766   sreal unspec_time;
 767   sreal spec_time = estimate_edge_time (e, &unspec_time);
 768   sreal time = compute_uninlined_call_time (e, unspec_time);
 769   sreal inlined_time = compute_inlined_call_time (e, spec_time);
 770   cgraph_node *caller = e->caller->global.inlined_to
 771                         ? e->caller->global.inlined_to
 772                         : e->caller;
 773   int limit = opt_for_fn (caller->decl, optimize) >= 3
 774               ? PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
 775               : PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP_O2);
 776
 777   if ((time - inlined_time) * 100 > time * limit)
 778     return true;
 779   return false;
 780 }
 781
 782 /* Return true if we are interested in inlining small function.
 783    When REPORT is true, report reason to dump file.  */
 784
 785 static bool
 786 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 787 {
 788   bool want_inline = true;
 789   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 790
 791   /* Allow this function to be called before can_inline_edge_p,
 792      since it's usually cheaper.  */
 793   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 794     want_inline = false;
 795   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 796     ;
 797   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 798            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 799     {
 800       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 801       want_inline = false;
 802     }
 803   /* Do fast and conservative check if the function can be good
 804      inline candidate.  At the moment we allow inline hints to
 805      promote non-inline functions to inline and we increase
 806      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 807   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 808            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 809            && ipa_fn_summaries->get (callee)->min_size
 810                 - ipa_call_summaries->get (e)->call_stmt_size
 811               > MAX (inline_insns_single (e->caller),
 812                      inline_insns_auto (e->caller)))
 813     {
 814       if (opt_for_fn (e->caller->decl, optimize) >= 3)
 815         e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 816       else
 817         e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT;
 818       want_inline = false;
 819     }
 820   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 821             || e->count.ipa ().nonzero_p ())
 822            && ipa_fn_summaries->get (callee)->min_size
 823                 - ipa_call_summaries->get (e)->call_stmt_size
 824               > 16 * inline_insns_single (e->caller))
 825     {
 826       if (opt_for_fn (e->caller->decl, optimize) >= 3)
 827         e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 828                             ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 829                             : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 830       else
 831         e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 832                             ? CIF_MAX_INLINE_INSNS_SINGLE_O2_LIMIT
 833                             : CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT);
 834       want_inline = false;
 835     }
 836   else
 837     {
 838       int growth = estimate_edge_growth (e);
 839       ipa_hints hints = estimate_edge_hints (e);
 840       int big_speedup = -1; /* compute this lazily */
 841
 842       if (growth <= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE))
 843         ;
 844       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 845          hints suggests that inlining given function is very profitable.  */
 846       else if (DECL_DECLARED_INLINE_P (callee->decl)
 847                && growth >= inline_insns_single (e->caller)
 848                && (growth >= inline_insns_single (e->caller) * 16
 849                    || (!(hints & (INLINE_HINT_indirect_call
 850                                   | INLINE_HINT_known_hot
 851                                   | INLINE_HINT_loop_iterations
 852                                   | INLINE_HINT_loop_stride))
 853                        && !(big_speedup = big_speedup_p (e)))))
 854         {
 855           if (opt_for_fn (e->caller->decl, optimize) >= 3)
 856             e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 857           else
 858             e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_O2_LIMIT;
 859           want_inline = false;
 860         }
 861       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 862                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 863                && growth >= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SMALL))
 864         {
 865           /* growth_likely_positive is expensive, always test it last.  */
 866           if (growth >= inline_insns_single (e->caller)
 867               || growth_likely_positive (callee, growth))
 868             {
 869               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 870               want_inline = false;
 871             }
 872         }
 873       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 874          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 875          inlining given function is very profitable.  */
 876       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 877                && !(hints & INLINE_HINT_known_hot)
 878                && growth >= ((hints & (INLINE_HINT_indirect_call
 879                                        | INLINE_HINT_loop_iterations
 880                                        | INLINE_HINT_loop_stride))
 881                              ? MAX (inline_insns_auto (e->caller),
 882                                     inline_insns_single (e->caller))
 883                              : inline_insns_auto (e->caller))
 884                && !(big_speedup == -1 ? big_speedup_p (e) : big_speedup))
 885         {
 886           /* growth_likely_positive is expensive, always test it last.  */
 887           if (growth >= inline_insns_single (e->caller)
 888               || growth_likely_positive (callee, growth))
 889             {
 890               if (opt_for_fn (e->caller->decl, optimize) >= 3)
 891                 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 892               else
 893                 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT;
 894               want_inline = false;
 895             }
 896         }
 897       /* If call is cold, do not inline when function body would grow. */
 898       else if (!e->maybe_hot_p ()
 899                && (growth >= inline_insns_single (e->caller)
 900                    || growth_likely_positive (callee, growth)))
 901         {
 902           e->inline_failed = CIF_UNLIKELY_CALL;
 903           want_inline = false;
 904         }
 905     }
 906   if (!want_inline && report)
 907     report_inline_failed_reason (e);
 908   return want_inline;
 909 }
 910
 911 /* EDGE is self recursive edge.
 912    We hand two cases - when function A is inlining into itself
 913    or when function A is being inlined into another inliner copy of function
 914    A within function B.
 915
 916    In first case OUTER_NODE points to the toplevel copy of A, while
 917    in the second case OUTER_NODE points to the outermost copy of A in B.
 918
 919    In both cases we want to be extra selective since
 920    inlining the call will just introduce new recursive calls to appear.  */
 921
 922 static bool
 923 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 924                                    struct cgraph_node *outer_node,
 925                                    bool peeling,
 926                                    int depth)
 927 {
 928   char const *reason = NULL;
 929   bool want_inline = true;
 930   sreal caller_freq = 1;
 931   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 932
 933   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 934     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 935
 936   if (!edge->maybe_hot_p ())
 937     {
 938       reason = "recursive call is cold";
 939       want_inline = false;
 940     }
 941   else if (depth > max_depth)
 942     {
 943       reason = "--param max-inline-recursive-depth exceeded.";
 944       want_inline = false;
 945     }
 946   else if (outer_node->global.inlined_to
 947            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
 948     {
 949       reason = "caller frequency is 0";
 950       want_inline = false;
 951     }
 952
 953   if (!want_inline)
 954     ;
 955   /* Inlining of self recursive function into copy of itself within other
 956      function is transformation similar to loop peeling.
 957
 958      Peeling is profitable if we can inline enough copies to make probability
 959      of actual call to the self recursive function very small.  Be sure that
 960      the probability of recursion is small.
 961
 962      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 963      This way the expected number of recursion is at most max_depth.  */
 964   else if (peeling)
 965     {
 966       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
 967       int i;
 968       for (i = 1; i < depth; i++)
 969         max_prob = max_prob * max_prob;
 970       if (edge->sreal_frequency () >= max_prob * caller_freq)
 971         {
 972           reason = "frequency of recursive call is too large";
 973           want_inline = false;
 974         }
 975     }
 976   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
 977      recursion depth is large.  We reduce function call overhead and increase
 978      chances that things fit in hardware return predictor.
 979
 980      Recursive inlining might however increase cost of stack frame setup
 981      actually slowing down functions whose recursion tree is wide rather than
 982      deep.
 983
 984      Deciding reliably on when to do recursive inlining without profile feedback
 985      is tricky.  For now we disable recursive inlining when probability of self
 986      recursion is low.
 987
 988      Recursive inlining of self recursive call within loop also results in
 989      large loop depths that generally optimize badly.  We may want to throttle
 990      down inlining in those cases.  In particular this seems to happen in one
 991      of libstdc++ rb tree methods.  */
 992   else
 993     {
 994       if (edge->sreal_frequency () * 100
 995           <= caller_freq
 996              * PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))
 997         {
 998           reason = "frequency of recursive call is too small";
 999           want_inline = false;
1000         }
1001     }
1002   if (!want_inline && dump_enabled_p ())
1003     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1004                      "   not inlining recursively: %s\n", reason);
1005   return want_inline;
1006 }
1007
1008 /* Return true when NODE has uninlinable caller;
1009    set HAS_HOT_CALL if it has hot call.
1010    Worker for cgraph_for_node_and_aliases.  */
1011
1012 static bool
1013 check_callers (struct cgraph_node *node, void *has_hot_call)
1014 {
1015   struct cgraph_edge *e;
1016    for (e = node->callers; e; e = e->next_caller)
1017      {
1018        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1019            || !opt_for_fn (e->caller->decl, optimize))
1020          return true;
1021        if (!can_inline_edge_p (e, true))
1022          return true;
1023        if (e->recursive_p ())
1024          return true;
1025        if (!can_inline_edge_by_limits_p (e, true))
1026          return true;
1027        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1028          *(bool *)has_hot_call = true;
1029      }
1030   return false;
1031 }
1032
1033 /* If NODE has a caller, return true.  */
1034
1035 static bool
1036 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1037 {
1038   if (node->callers)
1039     return true;
1040   return false;
1041 }
1042
1043 /* Decide if inlining NODE would reduce unit size by eliminating
1044    the offline copy of function.
1045    When COLD is true the cold calls are considered, too.  */
1046
1047 static bool
1048 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1049 {
1050   bool has_hot_call = false;
1051
1052   /* Aliases gets inlined along with the function they alias.  */
1053   if (node->alias)
1054     return false;
1055   /* Already inlined?  */
1056   if (node->global.inlined_to)
1057     return false;
1058   /* Does it have callers?  */
1059   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1060     return false;
1061   /* Inlining into all callers would increase size?  */
1062   if (estimate_growth (node) > 0)
1063     return false;
1064   /* All inlines must be possible.  */
1065   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1066                                          true))
1067     return false;
1068   if (!cold && !has_hot_call)
1069     return false;
1070   return true;
1071 }
1072
1073 /* A cost model driving the inlining heuristics in a way so the edges with
1074    smallest badness are inlined first.  After each inlining is performed
1075    the costs of all caller edges of nodes affected are recomputed so the
1076    metrics may accurately depend on values such as number of inlinable callers
1077    of the function or function body size.  */
1078
1079 static sreal
1080 edge_badness (struct cgraph_edge *edge, bool dump)
1081 {
1082   sreal badness;
1083   int growth;
1084   sreal edge_time, unspec_edge_time;
1085   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1086   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1087   ipa_hints hints;
1088   cgraph_node *caller = (edge->caller->global.inlined_to
1089                          ? edge->caller->global.inlined_to
1090                          : edge->caller);
1091
1092   growth = estimate_edge_growth (edge);
1093   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1094   hints = estimate_edge_hints (edge);
1095   gcc_checking_assert (edge_time >= 0);
1096   /* Check that inlined time is better, but tolerate some roundoff issues.
1097      FIXME: When callee profile drops to 0 we account calls more.  This
1098      should be fixed by never doing that.  */
1099   gcc_checking_assert ((edge_time * 100
1100                         - callee_info->time * 101).to_int () <= 0
1101                         || callee->count.ipa ().initialized_p ());
1102   gcc_checking_assert (growth <= callee_info->size);
1103
1104   if (dump)
1105     {
1106       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1107                edge->caller->dump_name (),
1108                edge->callee->dump_name ());
1109       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1110                growth,
1111                edge_time.to_double (),
1112                unspec_edge_time.to_double ());
1113       ipa_dump_hints (dump_file, hints);
1114       if (big_speedup_p (edge))
1115         fprintf (dump_file, " big_speedup");
1116       fprintf (dump_file, "\n");
1117     }
1118
1119   /* Always prefer inlining saving code size.  */
1120   if (growth <= 0)
1121     {
1122       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1123       if (dump)
1124         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1125                  growth);
1126     }
1127    /* Inlining into EXTERNAL functions is not going to change anything unless
1128       they are themselves inlined.  */
1129    else if (DECL_EXTERNAL (caller->decl))
1130     {
1131       if (dump)
1132         fprintf (dump_file, "      max: function is external\n");
1133       return sreal::max ();
1134     }
1135   /* When profile is available. Compute badness as:
1136
1137                  time_saved * caller_count
1138      goodness =  -------------------------------------------------
1139                  growth_of_caller * overall_growth * combined_size
1140
1141      badness = - goodness
1142
1143      Again use negative value to make calls with profile appear hotter
1144      then calls without.
1145   */
1146   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1147            || caller->count.ipa ().nonzero_p ())
1148     {
1149       sreal numerator, denominator;
1150       int overall_growth;
1151       sreal inlined_time = compute_inlined_call_time (edge, edge_time);
1152
1153       numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
1154                    - inlined_time);
1155       if (numerator <= 0)
1156         numerator = ((sreal) 1 >> 8);
1157       if (caller->count.ipa ().nonzero_p ())
1158         numerator *= caller->count.ipa ().to_gcov_type ();
1159       else if (caller->count.ipa ().initialized_p ())
1160         numerator = numerator >> 11;
1161       denominator = growth;
1162
1163       overall_growth = callee_info->growth;
1164
1165       /* Look for inliner wrappers of the form:
1166
1167          inline_caller ()
1168            {
1169              do_fast_job...
1170              if (need_more_work)
1171                noninline_callee ();
1172            }
1173          Withhout panilizing this case, we usually inline noninline_callee
1174          into the inline_caller because overall_growth is small preventing
1175          further inlining of inline_caller.
1176
1177          Penalize only callgraph edges to functions with small overall
1178          growth ...
1179         */
1180       if (growth > overall_growth
1181           /* ... and having only one caller which is not inlined ... */
1182           && callee_info->single_caller
1183           && !edge->caller->global.inlined_to
1184           /* ... and edges executed only conditionally ... */
1185           && edge->sreal_frequency () < 1
1186           /* ... consider case where callee is not inline but caller is ... */
1187           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1188                && DECL_DECLARED_INLINE_P (caller->decl))
1189               /* ... or when early optimizers decided to split and edge
1190                  frequency still indicates splitting is a win ... */
1191               || (callee->split_part && !caller->split_part
1192                   && edge->sreal_frequency () * 100
1193                      < PARAM_VALUE
1194                           (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY)
1195                   /* ... and do not overwrite user specified hints.   */
1196                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1197                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1198         {
1199           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1200           int caller_growth = caller_info->growth;
1201
1202           /* Only apply the penalty when caller looks like inline candidate,
1203              and it is not called once and.  */
1204           if (!caller_info->single_caller && overall_growth < caller_growth
1205               && caller_info->inlinable
1206               && caller_info->size
1207                  < (DECL_DECLARED_INLINE_P (caller->decl)
1208                     ? inline_insns_single (caller) : inline_insns_auto (caller)))
1209             {
1210               if (dump)
1211                 fprintf (dump_file,
1212                          "     Wrapper penalty. Increasing growth %i to %i\n",
1213                          overall_growth, caller_growth);
1214               overall_growth = caller_growth;
1215             }
1216         }
1217       if (overall_growth > 0)
1218         {
1219           /* Strongly preffer functions with few callers that can be inlined
1220              fully.  The square root here leads to smaller binaries at average.
1221              Watch however for extreme cases and return to linear function
1222              when growth is large.  */
1223           if (overall_growth < 256)
1224             overall_growth *= overall_growth;
1225           else
1226             overall_growth += 256 * 256 - 256;
1227           denominator *= overall_growth;
1228         }
1229       denominator *= ipa_fn_summaries->get (caller)->self_size + growth;
1230
1231       badness = - numerator / denominator;
1232
1233       if (dump)
1234         {
1235           fprintf (dump_file,
1236                    "      %f: guessed profile. frequency %f, count %" PRId64
1237                    " caller count %" PRId64
1238                    " time w/o inlining %f, time with inlining %f"
1239                    " overall growth %i (current) %i (original)"
1240                    " %i (compensated)\n",
1241                    badness.to_double (),
1242                    edge->sreal_frequency ().to_double (),
1243                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1244                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1245                    compute_uninlined_call_time (edge,
1246                                                 unspec_edge_time).to_double (),
1247                    inlined_time.to_double (),
1248                    estimate_growth (callee),
1249                    callee_info->growth, overall_growth);
1250         }
1251     }
1252   /* When function local profile is not available or it does not give
1253      useful information (ie frequency is zero), base the cost on
1254      loop nest and overall size growth, so we optimize for overall number
1255      of functions fully inlined in program.  */
1256   else
1257     {
1258       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1259       badness = growth;
1260
1261       /* Decrease badness if call is nested.  */
1262       if (badness > 0)
1263         badness = badness >> nest;
1264       else
1265         badness = badness << nest;
1266       if (dump)
1267         fprintf (dump_file, "      %f: no profile. nest %i\n",
1268                  badness.to_double (), nest);
1269     }
1270   gcc_checking_assert (badness != 0);
1271
1272   if (edge->recursive_p ())
1273     badness = badness.shift (badness > 0 ? 4 : -4);
1274   if ((hints & (INLINE_HINT_indirect_call
1275                 | INLINE_HINT_loop_iterations
1276                 | INLINE_HINT_loop_stride))
1277       || callee_info->growth <= 0)
1278     badness = badness.shift (badness > 0 ? -2 : 2);
1279   if (hints & (INLINE_HINT_same_scc))
1280     badness = badness.shift (badness > 0 ? 3 : -3);
1281   else if (hints & (INLINE_HINT_in_scc))
1282     badness = badness.shift (badness > 0 ? 2 : -2);
1283   else if (hints & (INLINE_HINT_cross_module))
1284     badness = badness.shift (badness > 0 ? 1 : -1);
1285   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1286     badness = badness.shift (badness > 0 ? -4 : 4);
1287   else if ((hints & INLINE_HINT_declared_inline))
1288     badness = badness.shift (badness > 0 ? -3 : 3);
1289   if (dump)
1290     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1291   return badness;
1292 }
1293
1294 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1295 static inline void
1296 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1297 {
1298   sreal badness = edge_badness (edge, false);
1299   if (edge->aux)
1300     {
1301       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1302       gcc_checking_assert (n->get_data () == edge);
1303
1304       /* fibonacci_heap::replace_key does busy updating of the
1305          heap that is unnecesarily expensive.
1306          We do lazy increases: after extracting minimum if the key
1307          turns out to be out of date, it is re-inserted into heap
1308          with correct value.  */
1309       if (badness < n->get_key ())
1310         {
1311           if (dump_file && (dump_flags & TDF_DETAILS))
1312             {
1313               fprintf (dump_file,
1314                        "  decreasing badness %s -> %s, %f to %f\n",
1315                        edge->caller->dump_name (),
1316                        edge->callee->dump_name (),
1317                        n->get_key ().to_double (),
1318                        badness.to_double ());
1319             }
1320           heap->decrease_key (n, badness);
1321         }
1322     }
1323   else
1324     {
1325        if (dump_file && (dump_flags & TDF_DETAILS))
1326          {
1327            fprintf (dump_file,
1328                     "  enqueuing call %s -> %s, badness %f\n",
1329                     edge->caller->dump_name (),
1330                     edge->callee->dump_name (),
1331                     badness.to_double ());
1332          }
1333       edge->aux = heap->insert (badness, edge);
1334     }
1335 }
1336
1337
1338 /* NODE was inlined.
1339    All caller edges needs to be resetted because
1340    size estimates change. Similarly callees needs reset
1341    because better context may be known.  */
1342
1343 static void
1344 reset_edge_caches (struct cgraph_node *node)
1345 {
1346   struct cgraph_edge *edge;
1347   struct cgraph_edge *e = node->callees;
1348   struct cgraph_node *where = node;
1349   struct ipa_ref *ref;
1350
1351   if (where->global.inlined_to)
1352     where = where->global.inlined_to;
1353
1354   if (edge_growth_cache != NULL)
1355     for (edge = where->callers; edge; edge = edge->next_caller)
1356       if (edge->inline_failed)
1357         edge_growth_cache->remove (edge);
1358
1359   FOR_EACH_ALIAS (where, ref)
1360     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1361
1362   if (!e)
1363     return;
1364
1365   while (true)
1366     if (!e->inline_failed && e->callee->callees)
1367       e = e->callee->callees;
1368     else
1369       {
1370         if (edge_growth_cache != NULL && e->inline_failed)
1371           edge_growth_cache->remove (e);
1372         if (e->next_callee)
1373           e = e->next_callee;
1374         else
1375           {
1376             do
1377               {
1378                 if (e->caller == node)
1379                   return;
1380                 e = e->caller->callers;
1381               }
1382             while (!e->next_callee);
1383             e = e->next_callee;
1384           }
1385       }
1386 }
1387
1388 /* Recompute HEAP nodes for each of caller of NODE.
1389    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1390    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1391    it is inlinable. Otherwise check all edges.  */
1392
1393 static void
1394 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1395                     bitmap updated_nodes,
1396                     struct cgraph_edge *check_inlinablity_for)
1397 {
1398   struct cgraph_edge *edge;
1399   struct ipa_ref *ref;
1400
1401   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1402       || node->global.inlined_to)
1403     return;
1404   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1405     return;
1406
1407   FOR_EACH_ALIAS (node, ref)
1408     {
1409       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1410       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1411     }
1412
1413   for (edge = node->callers; edge; edge = edge->next_caller)
1414     if (edge->inline_failed)
1415       {
1416         if (!check_inlinablity_for
1417             || check_inlinablity_for == edge)
1418           {
1419             if (can_inline_edge_p (edge, false)
1420                 && want_inline_small_function_p (edge, false)
1421                 && can_inline_edge_by_limits_p (edge, false))
1422               update_edge_key (heap, edge);
1423             else if (edge->aux)
1424               {
1425                 report_inline_failed_reason (edge);
1426                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1427                 edge->aux = NULL;
1428               }
1429           }
1430         else if (edge->aux)
1431           update_edge_key (heap, edge);
1432       }
1433 }
1434
1435 /* Recompute HEAP nodes for each uninlined call in NODE.
1436    This is used when we know that edge badnesses are going only to increase
1437    (we introduced new call site) and thus all we need is to insert newly
1438    created edges into heap.  */
1439
1440 static void
1441 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1442                     bitmap updated_nodes)
1443 {
1444   struct cgraph_edge *e = node->callees;
1445
1446   if (!e)
1447     return;
1448   while (true)
1449     if (!e->inline_failed && e->callee->callees)
1450       e = e->callee->callees;
1451     else
1452       {
1453         enum availability avail;
1454         struct cgraph_node *callee;
1455         /* We do not reset callee growth cache here.  Since we added a new call,
1456            growth chould have just increased and consequentely badness metric
1457            don't need updating.  */
1458         if (e->inline_failed
1459             && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1460             && ipa_fn_summaries->get (callee) != NULL
1461             && ipa_fn_summaries->get (callee)->inlinable
1462             && avail >= AVAIL_AVAILABLE
1463             && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1464           {
1465             if (can_inline_edge_p (e, false)
1466                 && want_inline_small_function_p (e, false)
1467                 && can_inline_edge_by_limits_p (e, false))
1468               update_edge_key (heap, e);
1469             else if (e->aux)
1470               {
1471                 report_inline_failed_reason (e);
1472                 heap->delete_node ((edge_heap_node_t *) e->aux);
1473                 e->aux = NULL;
1474               }
1475           }
1476         if (e->next_callee)
1477           e = e->next_callee;
1478         else
1479           {
1480             do
1481               {
1482                 if (e->caller == node)
1483                   return;
1484                 e = e->caller->callers;
1485               }
1486             while (!e->next_callee);
1487             e = e->next_callee;
1488           }
1489       }
1490 }
1491
1492 /* Enqueue all recursive calls from NODE into priority queue depending on
1493    how likely we want to recursively inline the call.  */
1494
1495 static void
1496 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1497                         edge_heap_t *heap)
1498 {
1499   struct cgraph_edge *e;
1500   enum availability avail;
1501
1502   for (e = where->callees; e; e = e->next_callee)
1503     if (e->callee == node
1504         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1505             && avail > AVAIL_INTERPOSABLE))
1506       heap->insert (-e->sreal_frequency (), e);
1507   for (e = where->callees; e; e = e->next_callee)
1508     if (!e->inline_failed)
1509       lookup_recursive_calls (node, e->callee, heap);
1510 }
1511
1512 /* Decide on recursive inlining: in the case function has recursive calls,
1513    inline until body size reaches given argument.  If any new indirect edges
1514    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1515    is NULL.  */
1516
1517 static bool
1518 recursive_inlining (struct cgraph_edge *edge,
1519                     vec<cgraph_edge *> *new_edges)
1520 {
1521   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1522   edge_heap_t heap (sreal::min ());
1523   struct cgraph_node *node;
1524   struct cgraph_edge *e;
1525   struct cgraph_node *master_clone = NULL, *next;
1526   int depth = 0;
1527   int n = 0;
1528
1529   node = edge->caller;
1530   if (node->global.inlined_to)
1531     node = node->global.inlined_to;
1532
1533   if (DECL_DECLARED_INLINE_P (node->decl))
1534     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1535
1536   /* Make sure that function is small enough to be considered for inlining.  */
1537   if (estimate_size_after_inlining (node, edge)  >= limit)
1538     return false;
1539   lookup_recursive_calls (node, node, &heap);
1540   if (heap.empty ())
1541     return false;
1542
1543   if (dump_file)
1544     fprintf (dump_file,
1545              "  Performing recursive inlining on %s\n",
1546              node->name ());
1547
1548   /* Do the inlining and update list of recursive call during process.  */
1549   while (!heap.empty ())
1550     {
1551       struct cgraph_edge *curr = heap.extract_min ();
1552       struct cgraph_node *cnode, *dest = curr->callee;
1553
1554       if (!can_inline_edge_p (curr, true)
1555           || !can_inline_edge_by_limits_p (curr, true))
1556         continue;
1557
1558       /* MASTER_CLONE is produced in the case we already started modified
1559          the function. Be sure to redirect edge to the original body before
1560          estimating growths otherwise we will be seeing growths after inlining
1561          the already modified body.  */
1562       if (master_clone)
1563         {
1564           curr->redirect_callee (master_clone);
1565           if (edge_growth_cache != NULL)
1566             edge_growth_cache->remove (curr);
1567         }
1568
1569       if (estimate_size_after_inlining (node, curr) > limit)
1570         {
1571           curr->redirect_callee (dest);
1572           if (edge_growth_cache != NULL)
1573             edge_growth_cache->remove (curr);
1574           break;
1575         }
1576
1577       depth = 1;
1578       for (cnode = curr->caller;
1579            cnode->global.inlined_to; cnode = cnode->callers->caller)
1580         if (node->decl
1581             == curr->callee->ultimate_alias_target ()->decl)
1582           depth++;
1583
1584       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1585         {
1586           curr->redirect_callee (dest);
1587           if (edge_growth_cache != NULL)
1588             edge_growth_cache->remove (curr);
1589           continue;
1590         }
1591
1592       if (dump_file)
1593         {
1594           fprintf (dump_file,
1595                    "   Inlining call of depth %i", depth);
1596           if (node->count.nonzero_p ())
1597             {
1598               fprintf (dump_file, " called approx. %.2f times per call",
1599                        (double)curr->count.to_gcov_type ()
1600                        / node->count.to_gcov_type ());
1601             }
1602           fprintf (dump_file, "\n");
1603         }
1604       if (!master_clone)
1605         {
1606           /* We need original clone to copy around.  */
1607           master_clone = node->create_clone (node->decl, node->count,
1608             false, vNULL, true, NULL, NULL);
1609           for (e = master_clone->callees; e; e = e->next_callee)
1610             if (!e->inline_failed)
1611               clone_inlined_nodes (e, true, false, NULL);
1612           curr->redirect_callee (master_clone);
1613           if (edge_growth_cache != NULL)
1614             edge_growth_cache->remove (curr);
1615         }
1616
1617       inline_call (curr, false, new_edges, &overall_size, true);
1618       lookup_recursive_calls (node, curr->callee, &heap);
1619       n++;
1620     }
1621
1622   if (!heap.empty () && dump_file)
1623     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1624
1625   if (!master_clone)
1626     return false;
1627
1628   if (dump_enabled_p ())
1629     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1630                      "\n   Inlined %i times, "
1631                      "body grown from size %i to %i, time %f to %f\n", n,
1632                      ipa_fn_summaries->get (master_clone)->size,
1633                      ipa_fn_summaries->get (node)->size,
1634                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1635                      ipa_fn_summaries->get (node)->time.to_double ());
1636
1637   /* Remove master clone we used for inlining.  We rely that clones inlined
1638      into master clone gets queued just before master clone so we don't
1639      need recursion.  */
1640   for (node = symtab->first_function (); node != master_clone;
1641        node = next)
1642     {
1643       next = symtab->next_function (node);
1644       if (node->global.inlined_to == master_clone)
1645         node->remove ();
1646     }
1647   master_clone->remove ();
1648   return true;
1649 }
1650
1651
1652 /* Given whole compilation unit estimate of INSNS, compute how large we can
1653    allow the unit to grow.  */
1654
1655 static int
1656 compute_max_insns (int insns)
1657 {
1658   int max_insns = insns;
1659   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1660     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1661
1662   return ((int64_t) max_insns
1663           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1664 }
1665
1666
1667 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1668
1669 static void
1670 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1671 {
1672   while (new_edges.length () > 0)
1673     {
1674       struct cgraph_edge *edge = new_edges.pop ();
1675
1676       gcc_assert (!edge->aux);
1677       gcc_assert (edge->callee);
1678       if (edge->inline_failed
1679           && can_inline_edge_p (edge, true)
1680           && want_inline_small_function_p (edge, true)
1681           && can_inline_edge_by_limits_p (edge, true))
1682         edge->aux = heap->insert (edge_badness (edge, false), edge);
1683     }
1684 }
1685
1686 /* Remove EDGE from the fibheap.  */
1687
1688 static void
1689 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1690 {
1691   if (e->aux)
1692     {
1693       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1694       e->aux = NULL;
1695     }
1696 }
1697
1698 /* Return true if speculation of edge E seems useful.
1699    If ANTICIPATE_INLINING is true, be conservative and hope that E
1700    may get inlined.  */
1701
1702 bool
1703 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1704 {
1705   /* If we have already decided to inline the edge, it seems useful.  */
1706   if (!e->inline_failed)
1707     return true;
1708
1709   enum availability avail;
1710   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1711                                                                  e->caller);
1712   struct cgraph_edge *direct, *indirect;
1713   struct ipa_ref *ref;
1714
1715   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1716
1717   if (!e->maybe_hot_p ())
1718     return false;
1719
1720   /* See if IP optimizations found something potentially useful about the
1721      function.  For now we look only for CONST/PURE flags.  Almost everything
1722      else we propagate is useless.  */
1723   if (avail >= AVAIL_AVAILABLE)
1724     {
1725       int ecf_flags = flags_from_decl_or_type (target->decl);
1726       if (ecf_flags & ECF_CONST)
1727         {
1728           e->speculative_call_info (direct, indirect, ref);
1729           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1730             return true;
1731         }
1732       else if (ecf_flags & ECF_PURE)
1733         {
1734           e->speculative_call_info (direct, indirect, ref);
1735           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1736             return true;
1737         }
1738     }
1739   /* If we did not managed to inline the function nor redirect
1740      to an ipa-cp clone (that are seen by having local flag set),
1741      it is probably pointless to inline it unless hardware is missing
1742      indirect call predictor.  */
1743   if (!anticipate_inlining && !target->local.local)
1744     return false;
1745   /* For overwritable targets there is not much to do.  */
1746   if (!can_inline_edge_p (e, false)
1747       || !can_inline_edge_by_limits_p (e, false, true))
1748     return false;
1749   /* OK, speculation seems interesting.  */
1750   return true;
1751 }
1752
1753 /* We know that EDGE is not going to be inlined.
1754    See if we can remove speculation.  */
1755
1756 static void
1757 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1758 {
1759   if (edge->speculative && !speculation_useful_p (edge, false))
1760     {
1761       struct cgraph_node *node = edge->caller;
1762       struct cgraph_node *where = node->global.inlined_to
1763                                   ? node->global.inlined_to : node;
1764       auto_bitmap updated_nodes;
1765
1766       if (edge->count.ipa ().initialized_p ())
1767         spec_rem += edge->count.ipa ();
1768       edge->resolve_speculation ();
1769       reset_edge_caches (where);
1770       ipa_update_overall_fn_summary (where);
1771       update_caller_keys (edge_heap, where,
1772                           updated_nodes, NULL);
1773       update_callee_keys (edge_heap, where,
1774                           updated_nodes);
1775     }
1776 }
1777
1778 /* Return true if NODE should be accounted for overall size estimate.
1779    Skip all nodes optimized for size so we can measure the growth of hot
1780    part of program no matter of the padding.  */
1781
1782 bool
1783 inline_account_function_p (struct cgraph_node *node)
1784 {
1785    return (!DECL_EXTERNAL (node->decl)
1786            && !opt_for_fn (node->decl, optimize_size)
1787            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1788 }
1789
1790 /* Count number of callers of NODE and store it into DATA (that
1791    points to int.  Worker for cgraph_for_node_and_aliases.  */
1792
1793 static bool
1794 sum_callers (struct cgraph_node *node, void *data)
1795 {
1796   struct cgraph_edge *e;
1797   int *num_calls = (int *)data;
1798
1799   for (e = node->callers; e; e = e->next_caller)
1800     (*num_calls)++;
1801   return false;
1802 }
1803
1804 /* We only propagate across edges with non-interposable callee.  */
1805
1806 inline bool
1807 ignore_edge_p (struct cgraph_edge *e)
1808 {
1809   enum availability avail;
1810   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1811   return (avail <= AVAIL_INTERPOSABLE);
1812 }
1813
1814 /* We use greedy algorithm for inlining of small functions:
1815    All inline candidates are put into prioritized heap ordered in
1816    increasing badness.
1817
1818    The inlining of small functions is bounded by unit growth parameters.  */
1819
1820 static void
1821 inline_small_functions (void)
1822 {
1823   struct cgraph_node *node;
1824   struct cgraph_edge *edge;
1825   edge_heap_t edge_heap (sreal::min ());
1826   auto_bitmap updated_nodes;
1827   int min_size, max_size;
1828   auto_vec<cgraph_edge *> new_indirect_edges;
1829   int initial_size = 0;
1830   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1831   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1832   new_indirect_edges.create (8);
1833
1834   edge_removal_hook_holder
1835     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1836
1837   /* Compute overall unit size and other global parameters used by badness
1838      metrics.  */
1839
1840   max_count = profile_count::uninitialized ();
1841   ipa_reduced_postorder (order, true, ignore_edge_p);
1842   free (order);
1843
1844   FOR_EACH_DEFINED_FUNCTION (node)
1845     if (!node->global.inlined_to)
1846       {
1847         if (!node->alias && node->analyzed
1848             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1849             && opt_for_fn (node->decl, optimize))
1850           {
1851             class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1852             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1853
1854             /* Do not account external functions, they will be optimized out
1855                if not inlined.  Also only count the non-cold portion of program.  */
1856             if (inline_account_function_p (node))
1857               initial_size += info->size;
1858             info->growth = estimate_growth (node);
1859
1860             int num_calls = 0;
1861             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1862                                                true);
1863             if (num_calls == 1)
1864               info->single_caller = true;
1865             if (dfs && dfs->next_cycle)
1866               {
1867                 struct cgraph_node *n2;
1868                 int id = dfs->scc_no + 1;
1869                 for (n2 = node; n2;
1870                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1871                   if (opt_for_fn (n2->decl, optimize))
1872                     {
1873                       ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
1874                       if (info2->scc_no)
1875                         break;
1876                       info2->scc_no = id;
1877                     }
1878               }
1879           }
1880
1881         for (edge = node->callers; edge; edge = edge->next_caller)
1882           max_count = max_count.max (edge->count.ipa ());
1883       }
1884   ipa_free_postorder_info ();
1885   edge_growth_cache
1886     = new call_summary<edge_growth_cache_entry *> (symtab, false);
1887
1888   if (dump_file)
1889     fprintf (dump_file,
1890              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1891              initial_size);
1892
1893   overall_size = initial_size;
1894   max_size = compute_max_insns (overall_size);
1895   min_size = overall_size;
1896
1897   /* Populate the heap with all edges we might inline.  */
1898
1899   FOR_EACH_DEFINED_FUNCTION (node)
1900     {
1901       bool update = false;
1902       struct cgraph_edge *next = NULL;
1903       bool has_speculative = false;
1904
1905       if (!opt_for_fn (node->decl, optimize))
1906         continue;
1907
1908       if (dump_file)
1909         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1910
1911       for (edge = node->callees; edge; edge = next)
1912         {
1913           next = edge->next_callee;
1914           if (edge->inline_failed
1915               && !edge->aux
1916               && can_inline_edge_p (edge, true)
1917               && want_inline_small_function_p (edge, true)
1918               && can_inline_edge_by_limits_p (edge, true)
1919               && edge->inline_failed)
1920             {
1921               gcc_assert (!edge->aux);
1922               update_edge_key (&edge_heap, edge);
1923             }
1924           if (edge->speculative)
1925             has_speculative = true;
1926         }
1927       if (has_speculative)
1928         for (edge = node->callees; edge; edge = next)
1929           if (edge->speculative && !speculation_useful_p (edge,
1930                                                           edge->aux != NULL))
1931             {
1932               edge->resolve_speculation ();
1933               update = true;
1934             }
1935       if (update)
1936         {
1937           struct cgraph_node *where = node->global.inlined_to
1938                                       ? node->global.inlined_to : node;
1939           ipa_update_overall_fn_summary (where);
1940           reset_edge_caches (where);
1941           update_caller_keys (&edge_heap, where,
1942                               updated_nodes, NULL);
1943           update_callee_keys (&edge_heap, where,
1944                               updated_nodes);
1945           bitmap_clear (updated_nodes);
1946         }
1947     }
1948
1949   gcc_assert (in_lto_p
1950               || !(max_count > 0)
1951               || (profile_info && flag_branch_probabilities));
1952
1953   while (!edge_heap.empty ())
1954     {
1955       int old_size = overall_size;
1956       struct cgraph_node *where, *callee;
1957       sreal badness = edge_heap.min_key ();
1958       sreal current_badness;
1959       int growth;
1960
1961       edge = edge_heap.extract_min ();
1962       gcc_assert (edge->aux);
1963       edge->aux = NULL;
1964       if (!edge->inline_failed || !edge->callee->analyzed)
1965         continue;
1966
1967 #if CHECKING_P
1968       /* Be sure that caches are maintained consistent.
1969          This check is affected by scaling roundoff errors when compiling for
1970          IPA this we skip it in that case.  */
1971       if (!edge->callee->count.ipa_p ()
1972           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
1973         {
1974           sreal cached_badness = edge_badness (edge, false);
1975
1976           int old_size_est = estimate_edge_size (edge);
1977           sreal old_time_est = estimate_edge_time (edge);
1978           int old_hints_est = estimate_edge_hints (edge);
1979
1980           if (edge_growth_cache != NULL)
1981             edge_growth_cache->remove (edge);
1982           gcc_assert (old_size_est == estimate_edge_size (edge));
1983           gcc_assert (old_time_est == estimate_edge_time (edge));
1984           /* FIXME:
1985
1986              gcc_assert (old_hints_est == estimate_edge_hints (edge));
1987
1988              fails with profile feedback because some hints depends on
1989              maybe_hot_edge_p predicate and because callee gets inlined to other
1990              calls, the edge may become cold.
1991              This ought to be fixed by computing relative probabilities
1992              for given invocation but that will be better done once whole
1993              code is converted to sreals.  Disable for now and revert to "wrong"
1994              value so enable/disable checking paths agree.  */
1995           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
1996
1997           /* When updating the edge costs, we only decrease badness in the keys.
1998              Increases of badness are handled lazilly; when we see key with out
1999              of date value on it, we re-insert it now.  */
2000           current_badness = edge_badness (edge, false);
2001           gcc_assert (cached_badness == current_badness);
2002           gcc_assert (current_badness >= badness);
2003         }
2004       else
2005         current_badness = edge_badness (edge, false);
2006 #else
2007       current_badness = edge_badness (edge, false);
2008 #endif
2009       if (current_badness != badness)
2010         {
2011           if (edge_heap.min () && current_badness > edge_heap.min_key ())
2012             {
2013               edge->aux = edge_heap.insert (current_badness, edge);
2014               continue;
2015             }
2016           else
2017             badness = current_badness;
2018         }
2019
2020       if (!can_inline_edge_p (edge, true)
2021           || !can_inline_edge_by_limits_p (edge, true))
2022         {
2023           resolve_noninline_speculation (&edge_heap, edge);
2024           continue;
2025         }
2026
2027       callee = edge->callee->ultimate_alias_target ();
2028       growth = estimate_edge_growth (edge);
2029       if (dump_file)
2030         {
2031           fprintf (dump_file,
2032                    "\nConsidering %s with %i size\n",
2033                    callee->dump_name (),
2034                    ipa_fn_summaries->get (callee)->size);
2035           fprintf (dump_file,
2036                    " to be inlined into %s in %s:%i\n"
2037                    " Estimated badness is %f, frequency %.2f.\n",
2038                    edge->caller->dump_name (),
2039                    edge->call_stmt
2040                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
2041                                                         edge->call_stmt))
2042                        > BUILTINS_LOCATION)
2043                    ? gimple_filename ((const gimple *) edge->call_stmt)
2044                    : "unknown",
2045                    edge->call_stmt
2046                    ? gimple_lineno ((const gimple *) edge->call_stmt)
2047                    : -1,
2048                    badness.to_double (),
2049                    edge->sreal_frequency ().to_double ());
2050           if (edge->count.ipa ().initialized_p ())
2051             {
2052               fprintf (dump_file, " Called ");
2053               edge->count.ipa ().dump (dump_file);
2054               fprintf (dump_file, " times\n");
2055             }
2056           if (dump_flags & TDF_DETAILS)
2057             edge_badness (edge, true);
2058         }
2059
2060       if (overall_size + growth > max_size
2061           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2062         {
2063           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2064           report_inline_failed_reason (edge);
2065           resolve_noninline_speculation (&edge_heap, edge);
2066           continue;
2067         }
2068
2069       if (!want_inline_small_function_p (edge, true))
2070         {
2071           resolve_noninline_speculation (&edge_heap, edge);
2072           continue;
2073         }
2074
2075       /* Heuristics for inlining small functions work poorly for
2076          recursive calls where we do effects similar to loop unrolling.
2077          When inlining such edge seems profitable, leave decision on
2078          specific inliner.  */
2079       if (edge->recursive_p ())
2080         {
2081           where = edge->caller;
2082           if (where->global.inlined_to)
2083             where = where->global.inlined_to;
2084           if (!recursive_inlining (edge,
2085                                    opt_for_fn (edge->caller->decl,
2086                                                flag_indirect_inlining)
2087                                    ? &new_indirect_edges : NULL))
2088             {
2089               edge->inline_failed = CIF_RECURSIVE_INLINING;
2090               resolve_noninline_speculation (&edge_heap, edge);
2091               continue;
2092             }
2093           reset_edge_caches (where);
2094           /* Recursive inliner inlines all recursive calls of the function
2095              at once. Consequently we need to update all callee keys.  */
2096           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2097             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2098           update_callee_keys (&edge_heap, where, updated_nodes);
2099           bitmap_clear (updated_nodes);
2100         }
2101       else
2102         {
2103           struct cgraph_node *outer_node = NULL;
2104           int depth = 0;
2105
2106           /* Consider the case where self recursive function A is inlined
2107              into B.  This is desired optimization in some cases, since it
2108              leads to effect similar of loop peeling and we might completely
2109              optimize out the recursive call.  However we must be extra
2110              selective.  */
2111
2112           where = edge->caller;
2113           while (where->global.inlined_to)
2114             {
2115               if (where->decl == callee->decl)
2116                 outer_node = where, depth++;
2117               where = where->callers->caller;
2118             }
2119           if (outer_node
2120               && !want_inline_self_recursive_call_p (edge, outer_node,
2121                                                      true, depth))
2122             {
2123               edge->inline_failed
2124                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2125                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2126               resolve_noninline_speculation (&edge_heap, edge);
2127               continue;
2128             }
2129           else if (depth && dump_file)
2130             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2131
2132           gcc_checking_assert (!callee->global.inlined_to);
2133           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2134           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2135
2136           reset_edge_caches (edge->callee);
2137
2138           update_callee_keys (&edge_heap, where, updated_nodes);
2139         }
2140       where = edge->caller;
2141       if (where->global.inlined_to)
2142         where = where->global.inlined_to;
2143
2144       /* Our profitability metric can depend on local properties
2145          such as number of inlinable calls and size of the function body.
2146          After inlining these properties might change for the function we
2147          inlined into (since it's body size changed) and for the functions
2148          called by function we inlined (since number of it inlinable callers
2149          might change).  */
2150       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2151       /* Offline copy count has possibly changed, recompute if profile is
2152          available.  */
2153       struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2154       if (n != edge->callee && n->analyzed && n->count.ipa ().initialized_p ())
2155         update_callee_keys (&edge_heap, n, updated_nodes);
2156       bitmap_clear (updated_nodes);
2157
2158       if (dump_enabled_p ())
2159         {
2160           ipa_fn_summary *s = ipa_fn_summaries->get (edge->caller);
2161
2162           /* dump_printf can't handle %+i.  */
2163           char buf_net_change[100];
2164           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2165                     overall_size - old_size);
2166
2167           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2168                            " Inlined %C into %C which now has time %f and "
2169                            "size %i, net change of %s.\n",
2170                            edge->callee, edge->caller,
2171                            s->time.to_double (), s->size, buf_net_change);
2172         }
2173       if (min_size > overall_size)
2174         {
2175           min_size = overall_size;
2176           max_size = compute_max_insns (min_size);
2177
2178           if (dump_file)
2179             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2180         }
2181     }
2182
2183   free_growth_caches ();
2184   if (dump_enabled_p ())
2185     dump_printf (MSG_NOTE,
2186                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2187                  initial_size, overall_size,
2188                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2189   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2190 }
2191
2192 /* Flatten NODE.  Performed both during early inlining and
2193    at IPA inlining time.  */
2194
2195 static void
2196 flatten_function (struct cgraph_node *node, bool early, bool update)
2197 {
2198   struct cgraph_edge *e;
2199
2200   /* We shouldn't be called recursively when we are being processed.  */
2201   gcc_assert (node->aux == NULL);
2202
2203   node->aux = (void *) node;
2204
2205   for (e = node->callees; e; e = e->next_callee)
2206     {
2207       struct cgraph_node *orig_callee;
2208       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2209
2210       /* We've hit cycle?  It is time to give up.  */
2211       if (callee->aux)
2212         {
2213           if (dump_enabled_p ())
2214             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2215                              "Not inlining %C into %C to avoid cycle.\n",
2216                              callee, e->caller);
2217           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2218             e->inline_failed = CIF_RECURSIVE_INLINING;
2219           continue;
2220         }
2221
2222       /* When the edge is already inlined, we just need to recurse into
2223          it in order to fully flatten the leaves.  */
2224       if (!e->inline_failed)
2225         {
2226           flatten_function (callee, early, false);
2227           continue;
2228         }
2229
2230       /* Flatten attribute needs to be processed during late inlining. For
2231          extra code quality we however do flattening during early optimization,
2232          too.  */
2233       if (!early
2234           ? !can_inline_edge_p (e, true)
2235             && !can_inline_edge_by_limits_p (e, true)
2236           : !can_early_inline_edge_p (e))
2237         continue;
2238
2239       if (e->recursive_p ())
2240         {
2241           if (dump_enabled_p ())
2242             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2243                              "Not inlining: recursive call.\n");
2244           continue;
2245         }
2246
2247       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2248           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2249         {
2250           if (dump_enabled_p ())
2251             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2252                              "Not inlining: SSA form does not match.\n");
2253           continue;
2254         }
2255
2256       /* Inline the edge and flatten the inline clone.  Avoid
2257          recursing through the original node if the node was cloned.  */
2258       if (dump_enabled_p ())
2259         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2260                          " Inlining %C into %C.\n",
2261                          callee, e->caller);
2262       orig_callee = callee;
2263       inline_call (e, true, NULL, NULL, false);
2264       if (e->callee != orig_callee)
2265         orig_callee->aux = (void *) node;
2266       flatten_function (e->callee, early, false);
2267       if (e->callee != orig_callee)
2268         orig_callee->aux = NULL;
2269     }
2270
2271   node->aux = NULL;
2272   if (update)
2273     ipa_update_overall_fn_summary (node->global.inlined_to
2274                                    ? node->global.inlined_to : node);
2275 }
2276
2277 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2278    DATA points to number of calls originally found so we avoid infinite
2279    recursion.  */
2280
2281 static bool
2282 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2283                          hash_set<cgraph_node *> *callers)
2284 {
2285   int *num_calls = (int *)data;
2286   bool callee_removed = false;
2287
2288   while (node->callers && !node->global.inlined_to)
2289     {
2290       struct cgraph_node *caller = node->callers->caller;
2291
2292       if (!can_inline_edge_p (node->callers, true)
2293           || !can_inline_edge_by_limits_p (node->callers, true)
2294           || node->callers->recursive_p ())
2295         {
2296           if (dump_file)
2297             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2298           *num_calls = 0;
2299           return false;
2300         }
2301
2302       if (dump_file)
2303         {
2304           cgraph_node *ultimate = node->ultimate_alias_target ();
2305           fprintf (dump_file,
2306                    "\nInlining %s size %i.\n",
2307                    ultimate->name (),
2308                    ipa_fn_summaries->get (ultimate)->size);
2309           fprintf (dump_file,
2310                    " Called once from %s %i insns.\n",
2311                    node->callers->caller->name (),
2312                    ipa_fn_summaries->get (node->callers->caller)->size);
2313         }
2314
2315       /* Remember which callers we inlined to, delaying updating the
2316          overall summary.  */
2317       callers->add (node->callers->caller);
2318       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2319       if (dump_file)
2320         fprintf (dump_file,
2321                  " Inlined into %s which now has %i size\n",
2322                  caller->name (),
2323                  ipa_fn_summaries->get (caller)->size);
2324       if (!(*num_calls)--)
2325         {
2326           if (dump_file)
2327             fprintf (dump_file, "New calls found; giving up.\n");
2328           return callee_removed;
2329         }
2330       if (callee_removed)
2331         return true;
2332     }
2333   return false;
2334 }
2335
2336 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2337    update.  */
2338
2339 static bool
2340 inline_to_all_callers (struct cgraph_node *node, void *data)
2341 {
2342   hash_set<cgraph_node *> callers;
2343   bool res = inline_to_all_callers_1 (node, data, &callers);
2344   /* Perform the delayed update of the overall summary of all callers
2345      processed.  This avoids quadratic behavior in the cases where
2346      we have a lot of calls to the same function.  */
2347   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2348        i != callers.end (); ++i)
2349     ipa_update_overall_fn_summary (*i);
2350   return res;
2351 }
2352
2353 /* Output overall time estimate.  */
2354 static void
2355 dump_overall_stats (void)
2356 {
2357   sreal sum_weighted = 0, sum = 0;
2358   struct cgraph_node *node;
2359
2360   FOR_EACH_DEFINED_FUNCTION (node)
2361     if (!node->global.inlined_to
2362         && !node->alias)
2363       {
2364         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2365         if (s != NULL)
2366           {
2367           sum += s->time;
2368           if (node->count.ipa ().initialized_p ())
2369             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2370           }
2371       }
2372   fprintf (dump_file, "Overall time estimate: "
2373            "%f weighted by profile: "
2374            "%f\n", sum.to_double (), sum_weighted.to_double ());
2375 }
2376
2377 /* Output some useful stats about inlining.  */
2378
2379 static void
2380 dump_inline_stats (void)
2381 {
2382   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2383   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2384   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2385   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2386   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2387   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2388   int64_t reason[CIF_N_REASONS][2];
2389   sreal reason_freq[CIF_N_REASONS];
2390   int i;
2391   struct cgraph_node *node;
2392
2393   memset (reason, 0, sizeof (reason));
2394   for (i=0; i < CIF_N_REASONS; i++)
2395     reason_freq[i] = 0;
2396   FOR_EACH_DEFINED_FUNCTION (node)
2397   {
2398     struct cgraph_edge *e;
2399     for (e = node->callees; e; e = e->next_callee)
2400       {
2401         if (e->inline_failed)
2402           {
2403             if (e->count.ipa ().initialized_p ())
2404               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2405             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2406             reason[(int) e->inline_failed][1] ++;
2407             if (DECL_VIRTUAL_P (e->callee->decl)
2408                 && e->count.ipa ().initialized_p ())
2409               {
2410                 if (e->indirect_inlining_edge)
2411                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2412                 else
2413                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2414               }
2415             else if (e->count.ipa ().initialized_p ())
2416               {
2417                 if (e->indirect_inlining_edge)
2418                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2419                 else
2420                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2421               }
2422           }
2423         else if (e->count.ipa ().initialized_p ())
2424           {
2425             if (e->speculative)
2426               {
2427                 if (DECL_VIRTUAL_P (e->callee->decl))
2428                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2429                 else
2430                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2431               }
2432             else if (DECL_VIRTUAL_P (e->callee->decl))
2433               {
2434                 if (e->indirect_inlining_edge)
2435                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2436                 else
2437                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2438               }
2439             else
2440               {
2441                 if (e->indirect_inlining_edge)
2442                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2443                 else
2444                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2445               }
2446           }
2447       }
2448     for (e = node->indirect_calls; e; e = e->next_callee)
2449       if (e->indirect_info->polymorphic
2450           & e->count.ipa ().initialized_p ())
2451         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2452       else if (e->count.ipa ().initialized_p ())
2453         indirect_cnt += e->count.ipa ().to_gcov_type ();
2454   }
2455   if (max_count.initialized_p ())
2456     {
2457       fprintf (dump_file,
2458                "Inlined %" PRId64 " + speculative "
2459                "%" PRId64 " + speculative polymorphic "
2460                "%" PRId64 " + previously indirect "
2461                "%" PRId64 " + virtual "
2462                "%" PRId64 " + virtual and previously indirect "
2463                "%" PRId64 "\n" "Not inlined "
2464                "%" PRId64 " + previously indirect "
2465                "%" PRId64 " + virtual "
2466                "%" PRId64 " + virtual and previously indirect "
2467                "%" PRId64 " + stil indirect "
2468                "%" PRId64 " + still indirect polymorphic "
2469                "%" PRId64 "\n", inlined_cnt,
2470                inlined_speculative, inlined_speculative_ply,
2471                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2472                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2473                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2474       fprintf (dump_file, "Removed speculations ");
2475       spec_rem.dump (dump_file);
2476       fprintf (dump_file, "\n");
2477     }
2478   dump_overall_stats ();
2479   fprintf (dump_file, "\nWhy inlining failed?\n");
2480   for (i = 0; i < CIF_N_REASONS; i++)
2481     if (reason[i][1])
2482       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2483                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2484                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2485 }
2486
2487 /* Called when node is removed.  */
2488
2489 static void
2490 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2491 {
2492   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2493     return;
2494
2495   hash_set<struct cgraph_node *> *removed
2496     = (hash_set<struct cgraph_node *> *) data;
2497   removed->add (node);
2498 }
2499
2500 /* Decide on the inlining.  We do so in the topological order to avoid
2501    expenses on updating data structures.  */
2502
2503 static unsigned int
2504 ipa_inline (void)
2505 {
2506   struct cgraph_node *node;
2507   int nnodes;
2508   struct cgraph_node **order;
2509   int i, j;
2510   int cold;
2511   bool remove_functions = false;
2512
2513   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2514
2515   if (dump_file)
2516     ipa_dump_fn_summaries (dump_file);
2517
2518   nnodes = ipa_reverse_postorder (order);
2519   spec_rem = profile_count::zero ();
2520
2521   FOR_EACH_FUNCTION (node)
2522     {
2523       node->aux = 0;
2524
2525       /* Recompute the default reasons for inlining because they may have
2526          changed during merging.  */
2527       if (in_lto_p)
2528         {
2529           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2530             {
2531               gcc_assert (e->inline_failed);
2532               initialize_inline_failed (e);
2533             }
2534           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2535             initialize_inline_failed (e);
2536         }
2537     }
2538
2539   if (dump_file)
2540     fprintf (dump_file, "\nFlattening functions:\n");
2541
2542   /* First shrink order array, so that it only contains nodes with
2543      flatten attribute.  */
2544   for (i = nnodes - 1, j = i; i >= 0; i--)
2545     {
2546       node = order[i];
2547       if (lookup_attribute ("flatten",
2548                             DECL_ATTRIBUTES (node->decl)) != NULL)
2549         order[j--] = order[i];
2550     }
2551
2552   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2553      nodes with flatten attribute.  If there is more than one such
2554      node, we need to register a node removal hook, as flatten_function
2555      could remove other nodes with flatten attribute.  See PR82801.  */
2556   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2557   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2558   if (j < nnodes - 2)
2559     {
2560       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2561       node_removal_hook_holder
2562         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2563                                            flatten_removed_nodes);
2564     }
2565
2566   /* In the first pass handle functions to be flattened.  Do this with
2567      a priority so none of our later choices will make this impossible.  */
2568   for (i = nnodes - 1; i > j; i--)
2569     {
2570       node = order[i];
2571       if (flatten_removed_nodes
2572           && flatten_removed_nodes->contains (node))
2573         continue;
2574
2575       /* Handle nodes to be flattened.
2576          Ideally when processing callees we stop inlining at the
2577          entry of cycles, possibly cloning that entry point and
2578          try to flatten itself turning it into a self-recursive
2579          function.  */
2580       if (dump_file)
2581         fprintf (dump_file, "Flattening %s\n", node->name ());
2582       flatten_function (node, false, true);
2583     }
2584
2585   if (j < nnodes - 2)
2586     {
2587       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2588       delete flatten_removed_nodes;
2589     }
2590   free (order);
2591
2592   if (dump_file)
2593     dump_overall_stats ();
2594
2595   inline_small_functions ();
2596
2597   gcc_assert (symtab->state == IPA_SSA);
2598   symtab->state = IPA_SSA_AFTER_INLINING;
2599   /* Do first after-inlining removal.  We want to remove all "stale" extern
2600      inline functions and virtual functions so we really know what is called
2601      once.  */
2602   symtab->remove_unreachable_nodes (dump_file);
2603
2604   /* Inline functions with a property that after inlining into all callers the
2605      code size will shrink because the out-of-line copy is eliminated.
2606      We do this regardless on the callee size as long as function growth limits
2607      are met.  */
2608   if (dump_file)
2609     fprintf (dump_file,
2610              "\nDeciding on functions to be inlined into all callers and "
2611              "removing useless speculations:\n");
2612
2613   /* Inlining one function called once has good chance of preventing
2614      inlining other function into the same callee.  Ideally we should
2615      work in priority order, but probably inlining hot functions first
2616      is good cut without the extra pain of maintaining the queue.
2617
2618      ??? this is not really fitting the bill perfectly: inlining function
2619      into callee often leads to better optimization of callee due to
2620      increased context for optimization.
2621      For example if main() function calls a function that outputs help
2622      and then function that does the main optmization, we should inline
2623      the second with priority even if both calls are cold by themselves.
2624
2625      We probably want to implement new predicate replacing our use of
2626      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2627      to be hot.  */
2628   for (cold = 0; cold <= 1; cold ++)
2629     {
2630       FOR_EACH_DEFINED_FUNCTION (node)
2631         {
2632           struct cgraph_edge *edge, *next;
2633           bool update=false;
2634
2635           if (!opt_for_fn (node->decl, optimize)
2636               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2637             continue;
2638
2639           for (edge = node->callees; edge; edge = next)
2640             {
2641               next = edge->next_callee;
2642               if (edge->speculative && !speculation_useful_p (edge, false))
2643                 {
2644                   if (edge->count.ipa ().initialized_p ())
2645                     spec_rem += edge->count.ipa ();
2646                   edge->resolve_speculation ();
2647                   update = true;
2648                   remove_functions = true;
2649                 }
2650             }
2651           if (update)
2652             {
2653               struct cgraph_node *where = node->global.inlined_to
2654                                           ? node->global.inlined_to : node;
2655               reset_edge_caches (where);
2656               ipa_update_overall_fn_summary (where);
2657             }
2658           if (want_inline_function_to_all_callers_p (node, cold))
2659             {
2660               int num_calls = 0;
2661               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2662                                                  true);
2663               while (node->call_for_symbol_and_aliases
2664                        (inline_to_all_callers, &num_calls, true))
2665                 ;
2666               remove_functions = true;
2667             }
2668         }
2669     }
2670
2671   /* Free ipa-prop structures if they are no longer needed.  */
2672   ipa_free_all_structures_after_iinln ();
2673
2674   if (dump_enabled_p ())
2675     dump_printf (MSG_NOTE,
2676                  "\nInlined %i calls, eliminated %i functions\n\n",
2677                  ncalls_inlined, nfunctions_inlined);
2678   if (dump_file)
2679     dump_inline_stats ();
2680
2681   if (dump_file)
2682     ipa_dump_fn_summaries (dump_file);
2683   return remove_functions ? TODO_remove_functions : 0;
2684 }
2685
2686 /* Inline always-inline function calls in NODE.  */
2687
2688 static bool
2689 inline_always_inline_functions (struct cgraph_node *node)
2690 {
2691   struct cgraph_edge *e;
2692   bool inlined = false;
2693
2694   for (e = node->callees; e; e = e->next_callee)
2695     {
2696       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2697       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2698         continue;
2699
2700       if (e->recursive_p ())
2701         {
2702           if (dump_enabled_p ())
2703             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2704                              "  Not inlining recursive call to %C.\n",
2705                              e->callee);
2706           e->inline_failed = CIF_RECURSIVE_INLINING;
2707           continue;
2708         }
2709
2710       if (!can_early_inline_edge_p (e))
2711         {
2712           /* Set inlined to true if the callee is marked "always_inline" but
2713              is not inlinable.  This will allow flagging an error later in
2714              expand_call_inline in tree-inline.c.  */
2715           if (lookup_attribute ("always_inline",
2716                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2717             inlined = true;
2718           continue;
2719         }
2720
2721       if (dump_enabled_p ())
2722         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2723                          "  Inlining %C into %C (always_inline).\n",
2724                          e->callee, e->caller);
2725       inline_call (e, true, NULL, NULL, false);
2726       inlined = true;
2727     }
2728   if (inlined)
2729     ipa_update_overall_fn_summary (node);
2730
2731   return inlined;
2732 }
2733
2734 /* Decide on the inlining.  We do so in the topological order to avoid
2735    expenses on updating data structures.  */
2736
2737 static bool
2738 early_inline_small_functions (struct cgraph_node *node)
2739 {
2740   struct cgraph_edge *e;
2741   bool inlined = false;
2742
2743   for (e = node->callees; e; e = e->next_callee)
2744     {
2745       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2746
2747       /* We can enounter not-yet-analyzed function during
2748          early inlining on callgraphs with strongly
2749          connected components.  */
2750       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2751       if (s == NULL || !s->inlinable || !e->inline_failed)
2752         continue;
2753
2754       /* Do not consider functions not declared inline.  */
2755       if (!DECL_DECLARED_INLINE_P (callee->decl)
2756           && !opt_for_fn (node->decl, flag_inline_small_functions)
2757           && !opt_for_fn (node->decl, flag_inline_functions))
2758         continue;
2759
2760       if (dump_enabled_p ())
2761         dump_printf_loc (MSG_NOTE, e->call_stmt,
2762                          "Considering inline candidate %C.\n",
2763                          callee);
2764
2765       if (!can_early_inline_edge_p (e))
2766         continue;
2767
2768       if (e->recursive_p ())
2769         {
2770           if (dump_enabled_p ())
2771             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2772                              "  Not inlining: recursive call.\n");
2773           continue;
2774         }
2775
2776       if (!want_early_inline_function_p (e))
2777         continue;
2778
2779       if (dump_enabled_p ())
2780         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2781                          " Inlining %C into %C.\n",
2782                          callee, e->caller);
2783       inline_call (e, true, NULL, NULL, false);
2784       inlined = true;
2785     }
2786
2787   if (inlined)
2788     ipa_update_overall_fn_summary (node);
2789
2790   return inlined;
2791 }
2792
2793 unsigned int
2794 early_inliner (function *fun)
2795 {
2796   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2797   struct cgraph_edge *edge;
2798   unsigned int todo = 0;
2799   int iterations = 0;
2800   bool inlined = false;
2801
2802   if (seen_error ())
2803     return 0;
2804
2805   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2806      happens when some pass decides to construct new function and
2807      cgraph_add_new_function calls lowering passes and early optimization on
2808      it.  This may confuse ourself when early inliner decide to inline call to
2809      function clone, because function clones don't have parameter list in
2810      ipa-prop matching their signature.  */
2811   if (ipa_node_params_sum)
2812     return 0;
2813
2814   if (flag_checking)
2815     node->verify ();
2816   node->remove_all_references ();
2817
2818   /* Even when not optimizing or not inlining inline always-inline
2819      functions.  */
2820   inlined = inline_always_inline_functions (node);
2821
2822   if (!optimize
2823       || flag_no_inline
2824       || !flag_early_inlining
2825       /* Never inline regular functions into always-inline functions
2826          during incremental inlining.  This sucks as functions calling
2827          always inline functions will get less optimized, but at the
2828          same time inlining of functions calling always inline
2829          function into an always inline function might introduce
2830          cycles of edges to be always inlined in the callgraph.
2831
2832          We might want to be smarter and just avoid this type of inlining.  */
2833       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2834           && lookup_attribute ("always_inline",
2835                                DECL_ATTRIBUTES (node->decl))))
2836     ;
2837   else if (lookup_attribute ("flatten",
2838                              DECL_ATTRIBUTES (node->decl)) != NULL)
2839     {
2840       /* When the function is marked to be flattened, recursively inline
2841          all calls in it.  */
2842       if (dump_enabled_p ())
2843         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2844                      "Flattening %C\n", node);
2845       flatten_function (node, true, true);
2846       inlined = true;
2847     }
2848   else
2849     {
2850       /* If some always_inline functions was inlined, apply the changes.
2851          This way we will not account always inline into growth limits and
2852          moreover we will inline calls from always inlines that we skipped
2853          previously because of conditional above.  */
2854       if (inlined)
2855         {
2856           timevar_push (TV_INTEGRATION);
2857           todo |= optimize_inline_calls (current_function_decl);
2858           /* optimize_inline_calls call above might have introduced new
2859              statements that don't have inline parameters computed.  */
2860           for (edge = node->callees; edge; edge = edge->next_callee)
2861             {
2862               /* We can enounter not-yet-analyzed function during
2863                  early inlining on callgraphs with strongly
2864                  connected components.  */
2865               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2866               es->call_stmt_size
2867                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2868               es->call_stmt_time
2869                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2870             }
2871           ipa_update_overall_fn_summary (node);
2872           inlined = false;
2873           timevar_pop (TV_INTEGRATION);
2874         }
2875       /* We iterate incremental inlining to get trivial cases of indirect
2876          inlining.  */
2877       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2878              && early_inline_small_functions (node))
2879         {
2880           timevar_push (TV_INTEGRATION);
2881           todo |= optimize_inline_calls (current_function_decl);
2882
2883           /* Technically we ought to recompute inline parameters so the new
2884              iteration of early inliner works as expected.  We however have
2885              values approximately right and thus we only need to update edge
2886              info that might be cleared out for newly discovered edges.  */
2887           for (edge = node->callees; edge; edge = edge->next_callee)
2888             {
2889               /* We have no summary for new bound store calls yet.  */
2890               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2891               es->call_stmt_size
2892                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2893               es->call_stmt_time
2894                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2895
2896               if (edge->callee->decl
2897                   && !gimple_check_call_matching_types (
2898                       edge->call_stmt, edge->callee->decl, false))
2899                 {
2900                   edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
2901                   edge->call_stmt_cannot_inline_p = true;
2902                 }
2903             }
2904           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2905             ipa_update_overall_fn_summary (node);
2906           timevar_pop (TV_INTEGRATION);
2907           iterations++;
2908           inlined = false;
2909         }
2910       if (dump_file)
2911         fprintf (dump_file, "Iterations: %i\n", iterations);
2912     }
2913
2914   if (inlined)
2915     {
2916       timevar_push (TV_INTEGRATION);
2917       todo |= optimize_inline_calls (current_function_decl);
2918       timevar_pop (TV_INTEGRATION);
2919     }
2920
2921   fun->always_inline_functions_inlined = true;
2922
2923   return todo;
2924 }
2925
2926 /* Do inlining of small functions.  Doing so early helps profiling and other
2927    passes to be somewhat more effective and avoids some code duplication in
2928    later real inlining pass for testcases with very many function calls.  */
2929
2930 namespace {
2931
2932 const pass_data pass_data_early_inline =
2933 {
2934   GIMPLE_PASS, /* type */
2935   "einline", /* name */
2936   OPTGROUP_INLINE, /* optinfo_flags */
2937   TV_EARLY_INLINING, /* tv_id */
2938   PROP_ssa, /* properties_required */
2939   0, /* properties_provided */
2940   0, /* properties_destroyed */
2941   0, /* todo_flags_start */
2942   0, /* todo_flags_finish */
2943 };
2944
2945 class pass_early_inline : public gimple_opt_pass
2946 {
2947 public:
2948   pass_early_inline (gcc::context *ctxt)
2949     : gimple_opt_pass (pass_data_early_inline, ctxt)
2950   {}
2951
2952   /* opt_pass methods: */
2953   virtual unsigned int execute (function *);
2954
2955 }; // class pass_early_inline
2956
2957 unsigned int
2958 pass_early_inline::execute (function *fun)
2959 {
2960   return early_inliner (fun);
2961 }
2962
2963 } // anon namespace
2964
2965 gimple_opt_pass *
2966 make_pass_early_inline (gcc::context *ctxt)
2967 {
2968   return new pass_early_inline (ctxt);
2969 }
2970
2971 namespace {
2972
2973 const pass_data pass_data_ipa_inline =
2974 {
2975   IPA_PASS, /* type */
2976   "inline", /* name */
2977   OPTGROUP_INLINE, /* optinfo_flags */
2978   TV_IPA_INLINING, /* tv_id */
2979   0, /* properties_required */
2980   0, /* properties_provided */
2981   0, /* properties_destroyed */
2982   0, /* todo_flags_start */
2983   ( TODO_dump_symtab ), /* todo_flags_finish */
2984 };
2985
2986 class pass_ipa_inline : public ipa_opt_pass_d
2987 {
2988 public:
2989   pass_ipa_inline (gcc::context *ctxt)
2990     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2991                       NULL, /* generate_summary */
2992                       NULL, /* write_summary */
2993                       NULL, /* read_summary */
2994                       NULL, /* write_optimization_summary */
2995                       NULL, /* read_optimization_summary */
2996                       NULL, /* stmt_fixup */
2997                       0, /* function_transform_todo_flags_start */
2998                       inline_transform, /* function_transform */
2999                       NULL) /* variable_transform */
3000   {}
3001
3002   /* opt_pass methods: */
3003   virtual unsigned int execute (function *) { return ipa_inline (); }
3004
3005 }; // class pass_ipa_inline
3006
3007 } // anon namespace
3008
3009 ipa_opt_pass_d *
3010 make_pass_ipa_inline (gcc::context *ctxt)
3011 {
3012   return new pass_ipa_inline (ctxt);
3013 }