gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass cannot really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "profile.h"
 109 #include "symbol-summary.h"
 110 #include "tree-vrp.h"
 111 #include "ipa-prop.h"
 112 #include "ipa-fnsummary.h"
 113 #include "ipa-inline.h"
 114 #include "ipa-utils.h"
 115 #include "sreal.h"
 116 #include "auto-profile.h"
 117 #include "builtins.h"
 118 #include "fibonacci_heap.h"
 119 #include "stringpool.h"
 120 #include "attribs.h"
 121 #include "asan.h"
 122
 123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 125
 126 /* Statistics we collect about inlining algorithm.  */
 127 static int overall_size;
 128 static profile_count max_count;
 129 static profile_count spec_rem;
 130
 131 /* Return false when inlining edge E would lead to violating
 132    limits on function unit growth or stack usage growth.
 133
 134    The relative function body growth limit is present generally
 135    to avoid problems with non-linear behavior of the compiler.
 136    To allow inlining huge functions into tiny wrapper, the limit
 137    is always based on the bigger of the two functions considered.
 138
 139    For stack growth limits we always base the growth in stack usage
 140    of the callers.  We want to prevent applications from segfaulting
 141    on stack overflow when functions with huge stack frames gets
 142    inlined. */
 143
 144 static bool
 145 caller_growth_limits (struct cgraph_edge *e)
 146 {
 147   struct cgraph_node *to = e->caller;
 148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 149   int newsize;
 150   int limit = 0;
 151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
 153
 154   /* Look for function e->caller is inlined to.  While doing
 155      so work out the largest function body on the way.  As
 156      described above, we want to base our function growth
 157      limits based on that.  Not on the self size of the
 158      outer function, not on the self size of inline code
 159      we immediately inline to.  This is the most relaxed
 160      interpretation of the rule "do not grow large functions
 161      too much in order to prevent compiler from exploding".  */
 162   while (true)
 163     {
 164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
 165       if (limit < size_info->self_size)
 166         limit = size_info->self_size;
 167       if (stack_size_limit < size_info->estimated_self_stack_size)
 168         stack_size_limit = size_info->estimated_self_stack_size;
 169       if (to->inlined_to)
 170         to = to->callers->caller;
 171       else
 172         break;
 173     }
 174
 175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
 176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
 177
 178   if (limit < what_size_info->self_size)
 179     limit = what_size_info->self_size;
 180
 181   limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= ipa_size_summaries->get (what)->size
 187       && newsize > opt_for_fn (to->decl, param_large_function_insns)
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * opt_for_fn (to->decl, param_stack_frame_growth)
 204                        / 100);
 205
 206   inlined_stack = (ipa_get_stack_frame_offset (to)
 207                    + outer_info->estimated_self_stack_size
 208                    + what_info->estimated_stack_size);
 209   /* Check new stack consumption with stack consumption at the place
 210      stack is used.  */
 211   if (inlined_stack > stack_size_limit
 212       /* If function already has large stack usage from sibling
 213          inline call, we can inline, too.
 214          This bit overoptimistically assume that we are good at stack
 215          packing.  */
 216       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
 217       && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
 218     {
 219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 220       return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Dump info about why inlining has failed.  */
 226
 227 static void
 228 report_inline_failed_reason (struct cgraph_edge *e)
 229 {
 230   if (dump_enabled_p ())
 231     {
 232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 233                        "  not inlinable: %C -> %C, %s\n",
 234                        e->caller, e->callee,
 235                        cgraph_inline_failed_string (e->inline_failed));
 236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 237            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 238           && e->caller->lto_file_data
 239           && e->callee->ultimate_alias_target ()->lto_file_data)
 240         {
 241           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 242                            "  LTO objects: %s, %s\n",
 243                            e->caller->lto_file_data->file_name,
 244                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 245         }
 246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 247         if (dump_file)
 248           cl_target_option_print_diff
 249             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 250              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 252         if (dump_file)
 253           cl_optimization_print_diff
 254             (dump_file, 2, opts_for_fn (e->caller->decl),
 255              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 256     }
 257 }
 258
 259  /* Decide whether sanitizer-related attributes allow inlining. */
 260
 261 static bool
 262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 263 {
 264   if (!caller || !callee)
 265     return true;
 266
 267   /* Allow inlining always_inline functions into no_sanitize_address
 268      functions.  */
 269   if (!sanitize_flags_p (SANITIZE_ADDRESS, caller)
 270       && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
 271     return true;
 272
 273   return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
 274            == sanitize_flags_p (SANITIZE_ADDRESS, callee))
 275           && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
 276               == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
 277           && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
 278               == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
 279 }
 280
 281 /* Used for flags where it is safe to inline when caller's value is
 282    grater than callee's.  */
 283 #define check_maybe_up(flag) \
 284       (opts_for_fn (caller->decl)->x_##flag             \
 285        != opts_for_fn (callee->decl)->x_##flag          \
 286        && (!always_inline                               \
 287            || opts_for_fn (caller->decl)->x_##flag      \
 288               < opts_for_fn (callee->decl)->x_##flag))
 289 /* Used for flags where it is safe to inline when caller's value is
 290    smaller than callee's.  */
 291 #define check_maybe_down(flag) \
 292       (opts_for_fn (caller->decl)->x_##flag             \
 293        != opts_for_fn (callee->decl)->x_##flag          \
 294        && (!always_inline                               \
 295            || opts_for_fn (caller->decl)->x_##flag      \
 296               > opts_for_fn (callee->decl)->x_##flag))
 297 /* Used for flags where exact match is needed for correctness.  */
 298 #define check_match(flag) \
 299       (opts_for_fn (caller->decl)->x_##flag             \
 300        != opts_for_fn (callee->decl)->x_##flag)
 301
 302 /* Decide if we can inline the edge and possibly update
 303    inline_failed reason.
 304    We check whether inlining is possible at all and whether
 305    caller growth limits allow doing so.
 306
 307    if REPORT is true, output reason to the dump file. */
 308
 309 static bool
 310 can_inline_edge_p (struct cgraph_edge *e, bool report,
 311                    bool early = false)
 312 {
 313   gcc_checking_assert (e->inline_failed);
 314
 315   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 316     {
 317       if (report)
 318         report_inline_failed_reason (e);
 319       return false;
 320     }
 321
 322   bool inlinable = true;
 323   enum availability avail;
 324   cgraph_node *caller = (e->caller->inlined_to
 325                          ? e->caller->inlined_to : e->caller);
 326   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 327
 328   if (!callee->definition)
 329     {
 330       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 331       inlinable = false;
 332     }
 333   if (!early && (!opt_for_fn (callee->decl, optimize)
 334                  || !opt_for_fn (caller->decl, optimize)))
 335     {
 336       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 337       inlinable = false;
 338     }
 339   else if (callee->calls_comdat_local)
 340     {
 341       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 342       inlinable = false;
 343     }
 344   else if (avail <= AVAIL_INTERPOSABLE)
 345     {
 346       e->inline_failed = CIF_OVERWRITABLE;
 347       inlinable = false;
 348     }
 349   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 350      initialized to one of FINAL_ERROR reasons.  */
 351   else if (e->call_stmt_cannot_inline_p)
 352     gcc_unreachable ();
 353   /* Don't inline if the functions have different EH personalities.  */
 354   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 355            && DECL_FUNCTION_PERSONALITY (callee->decl)
 356            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 357                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 358     {
 359       e->inline_failed = CIF_EH_PERSONALITY;
 360       inlinable = false;
 361     }
 362   /* TM pure functions should not be inlined into non-TM_pure
 363      functions.  */
 364   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 365     {
 366       e->inline_failed = CIF_UNSPECIFIED;
 367       inlinable = false;
 368     }
 369   /* Check compatibility of target optimization options.  */
 370   else if (!targetm.target_option.can_inline_p (caller->decl,
 371                                                 callee->decl))
 372     {
 373       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 374       inlinable = false;
 375     }
 376   else if (ipa_fn_summaries->get (callee) == NULL
 377            || !ipa_fn_summaries->get (callee)->inlinable)
 378     {
 379       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 380       inlinable = false;
 381     }
 382   /* Don't inline a function with mismatched sanitization attributes. */
 383   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 384     {
 385       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 386       inlinable = false;
 387     }
 388   if (!inlinable && report)
 389     report_inline_failed_reason (e);
 390   return inlinable;
 391 }
 392
 393 /* Return inlining_insns_single limit for function N. If HINT is true
 394    scale up the bound.  */
 395
 396 static int
 397 inline_insns_single (cgraph_node *n, bool hint)
 398 {
 399   if (hint)
 400     return opt_for_fn (n->decl, param_max_inline_insns_single)
 401            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 402   return opt_for_fn (n->decl, param_max_inline_insns_single);
 403 }
 404
 405 /* Return inlining_insns_auto limit for function N. If HINT is true
 406    scale up the bound.   */
 407
 408 static int
 409 inline_insns_auto (cgraph_node *n, bool hint)
 410 {
 411   int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
 412   if (hint)
 413     return max_inline_insns_auto
 414            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 415   return max_inline_insns_auto;
 416 }
 417
 418 /* Decide if we can inline the edge and possibly update
 419    inline_failed reason.
 420    We check whether inlining is possible at all and whether
 421    caller growth limits allow doing so.
 422
 423    if REPORT is true, output reason to the dump file.
 424
 425    if DISREGARD_LIMITS is true, ignore size limits.  */
 426
 427 static bool
 428 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 429                              bool disregard_limits = false, bool early = false)
 430 {
 431   gcc_checking_assert (e->inline_failed);
 432
 433   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 434     {
 435       if (report)
 436         report_inline_failed_reason (e);
 437       return false;
 438     }
 439
 440   bool inlinable = true;
 441   enum availability avail;
 442   cgraph_node *caller = (e->caller->inlined_to
 443                          ? e->caller->inlined_to : e->caller);
 444   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 445   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 446   tree callee_tree
 447     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 448   /* Check if caller growth allows the inlining.  */
 449   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 450       && !disregard_limits
 451       && !lookup_attribute ("flatten",
 452                  DECL_ATTRIBUTES (caller->decl))
 453       && !caller_growth_limits (e))
 454     inlinable = false;
 455   else if (callee->externally_visible
 456            && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 457            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 458     {
 459       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 460       inlinable = false;
 461     }
 462   /* Don't inline a function with a higher optimization level than the
 463      caller.  FIXME: this is really just tip of iceberg of handling
 464      optimization attribute.  */
 465   else if (caller_tree != callee_tree)
 466     {
 467       bool always_inline =
 468              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 469               && lookup_attribute ("always_inline",
 470                                    DECL_ATTRIBUTES (callee->decl)));
 471       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 472       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 473
 474      /* Until GCC 4.9 we did not check the semantics-altering flags
 475         below and inlined across optimization boundaries.
 476         Enabling checks below breaks several packages by refusing
 477         to inline library always_inline functions. See PR65873.
 478         Disable the check for early inlining for now until better solution
 479         is found.  */
 480      if (always_inline && early)
 481         ;
 482       /* There are some options that change IL semantics which means
 483          we cannot inline in these cases for correctness reason.
 484          Not even for always_inline declared functions.  */
 485      else if (check_match (flag_wrapv)
 486               || check_match (flag_trapv)
 487               || check_match (flag_pcc_struct_return)
 488               || check_maybe_down (optimize_debug)
 489               /* When caller or callee does FP math, be sure FP codegen flags
 490                  compatible.  */
 491               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 492                   && (check_maybe_up (flag_rounding_math)
 493                       || check_maybe_up (flag_trapping_math)
 494                       || check_maybe_down (flag_unsafe_math_optimizations)
 495                       || check_maybe_down (flag_finite_math_only)
 496                       || check_maybe_up (flag_signaling_nans)
 497                       || check_maybe_down (flag_cx_limited_range)
 498                       || check_maybe_up (flag_signed_zeros)
 499                       || check_maybe_down (flag_associative_math)
 500                       || check_maybe_down (flag_reciprocal_math)
 501                       || check_maybe_down (flag_fp_int_builtin_inexact)
 502                       /* Strictly speaking only when the callee contains function
 503                          calls that may end up setting errno.  */
 504                       || check_maybe_up (flag_errno_math)))
 505               /* We do not want to make code compiled with exceptions to be
 506                  brought into a non-EH function unless we know that the callee
 507                  does not throw.
 508                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 509               || (check_maybe_up (flag_non_call_exceptions)
 510                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 511               || (check_maybe_up (flag_exceptions)
 512                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 513               /* When devirtualization is disabled for callee, it is not safe
 514                  to inline it as we possibly mangled the type info.
 515                  Allow early inlining of always inlines.  */
 516               || (!early && check_maybe_down (flag_devirtualize)))
 517         {
 518           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 519           inlinable = false;
 520         }
 521       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 522       else if (always_inline)
 523         ;
 524       /* When user added an attribute to the callee honor it.  */
 525       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 526                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 527         {
 528           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 529           inlinable = false;
 530         }
 531       /* If explicit optimize attribute are not used, the mismatch is caused
 532          by different command line options used to build different units.
 533          Do not care about COMDAT functions - those are intended to be
 534          optimized with the optimization flags of module they are used in.
 535          Also do not care about mixing up size/speed optimization when
 536          DECL_DISREGARD_INLINE_LIMITS is set.  */
 537       else if ((callee->merged_comdat
 538                 && !lookup_attribute ("optimize",
 539                                       DECL_ATTRIBUTES (caller->decl)))
 540                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 541         ;
 542       /* If mismatch is caused by merging two LTO units with different
 543          optimization flags we want to be bit nicer.  However never inline
 544          if one of functions is not optimized at all.  */
 545       else if (!opt_for_fn (callee->decl, optimize)
 546                || !opt_for_fn (caller->decl, optimize))
 547         {
 548           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 549           inlinable = false;
 550         }
 551       /* If callee is optimized for size and caller is not, allow inlining if
 552          code shrinks or we are in param_max_inline_insns_single limit and
 553          callee is inline (and thus likely an unified comdat).
 554          This will allow caller to run faster.  */
 555       else if (opt_for_fn (callee->decl, optimize_size)
 556                > opt_for_fn (caller->decl, optimize_size))
 557         {
 558           int growth = estimate_edge_growth (e);
 559           if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
 560               && (!DECL_DECLARED_INLINE_P (callee->decl)
 561                   && growth >= MAX (inline_insns_single (caller, false),
 562                                     inline_insns_auto (caller, false))))
 563             {
 564               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 565               inlinable = false;
 566             }
 567         }
 568       /* If callee is more aggressively optimized for performance than caller,
 569          we generally want to inline only cheap (runtime wise) functions.  */
 570       else if (opt_for_fn (callee->decl, optimize_size)
 571                < opt_for_fn (caller->decl, optimize_size)
 572                || (opt_for_fn (callee->decl, optimize)
 573                    > opt_for_fn (caller->decl, optimize)))
 574         {
 575           if (estimate_edge_time (e)
 576               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 577             {
 578               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 579               inlinable = false;
 580             }
 581         }
 582
 583     }
 584
 585   if (!inlinable && report)
 586     report_inline_failed_reason (e);
 587   return inlinable;
 588 }
 589
 590
 591 /* Return true if the edge E is inlinable during early inlining.  */
 592
 593 static bool
 594 can_early_inline_edge_p (struct cgraph_edge *e)
 595 {
 596   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 597   /* Early inliner might get called at WPA stage when IPA pass adds new
 598      function.  In this case we cannot really do any of early inlining
 599      because function bodies are missing.  */
 600   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 601     return false;
 602   if (!gimple_has_body_p (callee->decl))
 603     {
 604       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 605       return false;
 606     }
 607   /* In early inliner some of callees may not be in SSA form yet
 608      (i.e. the callgraph is cyclic and we did not process
 609      the callee by early inliner, yet).  We don't have CIF code for this
 610      case; later we will re-do the decision in the real inliner.  */
 611   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 612       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 613     {
 614       if (dump_enabled_p ())
 615         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 616                          "  edge not inlinable: not in SSA form\n");
 617       return false;
 618     }
 619   if (!can_inline_edge_p (e, true, true)
 620       || !can_inline_edge_by_limits_p (e, true, false, true))
 621     return false;
 622   return true;
 623 }
 624
 625
 626 /* Return number of calls in N.  Ignore cheap builtins.  */
 627
 628 static int
 629 num_calls (struct cgraph_node *n)
 630 {
 631   struct cgraph_edge *e;
 632   int num = 0;
 633
 634   for (e = n->callees; e; e = e->next_callee)
 635     if (!is_inexpensive_builtin (e->callee->decl))
 636       num++;
 637   return num;
 638 }
 639
 640
 641 /* Return true if we are interested in inlining small function.  */
 642
 643 static bool
 644 want_early_inline_function_p (struct cgraph_edge *e)
 645 {
 646   bool want_inline = true;
 647   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 648
 649   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 650     ;
 651   /* For AutoFDO, we need to make sure that before profile summary, all
 652      hot paths' IR look exactly the same as profiled binary. As a result,
 653      in einliner, we will disregard size limit and inline those callsites
 654      that are:
 655        * inlined in the profiled binary, and
 656        * the cloned callee has enough samples to be considered "hot".  */
 657   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 658     ;
 659   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 660            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 661     {
 662       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 663       report_inline_failed_reason (e);
 664       want_inline = false;
 665     }
 666   else
 667     {
 668       /* First take care of very large functions.  */
 669       int min_growth = estimate_min_edge_growth (e), growth = 0;
 670       int n;
 671       int early_inlining_insns = param_early_inlining_insns;
 672
 673       if (min_growth > early_inlining_insns)
 674         {
 675           if (dump_enabled_p ())
 676             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 677                              "  will not early inline: %C->%C, "
 678                              "call is cold and code would grow "
 679                              "at least by %i\n",
 680                              e->caller, callee,
 681                              min_growth);
 682           want_inline = false;
 683         }
 684       else
 685         growth = estimate_edge_growth (e);
 686
 687
 688       if (!want_inline || growth <= param_max_inline_insns_size)
 689         ;
 690       else if (!e->maybe_hot_p ())
 691         {
 692           if (dump_enabled_p ())
 693             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 694                              "  will not early inline: %C->%C, "
 695                              "call is cold and code would grow by %i\n",
 696                              e->caller, callee,
 697                              growth);
 698           want_inline = false;
 699         }
 700       else if (growth > early_inlining_insns)
 701         {
 702           if (dump_enabled_p ())
 703             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 704                              "  will not early inline: %C->%C, "
 705                              "growth %i exceeds --param early-inlining-insns\n",
 706                              e->caller, callee, growth);
 707           want_inline = false;
 708         }
 709       else if ((n = num_calls (callee)) != 0
 710                && growth * (n + 1) > early_inlining_insns)
 711         {
 712           if (dump_enabled_p ())
 713             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 714                              "  will not early inline: %C->%C, "
 715                              "growth %i exceeds --param early-inlining-insns "
 716                              "divided by number of calls\n",
 717                              e->caller, callee, growth);
 718           want_inline = false;
 719         }
 720     }
 721   return want_inline;
 722 }
 723
 724 /* Compute time of the edge->caller + edge->callee execution when inlining
 725    does not happen.  */
 726
 727 inline sreal
 728 compute_uninlined_call_time (struct cgraph_edge *edge,
 729                              sreal uninlined_call_time,
 730                              sreal freq)
 731 {
 732   cgraph_node *caller = (edge->caller->inlined_to
 733                          ? edge->caller->inlined_to
 734                          : edge->caller);
 735
 736   if (freq > 0)
 737     uninlined_call_time *= freq;
 738   else
 739     uninlined_call_time = uninlined_call_time >> 11;
 740
 741   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 742   return uninlined_call_time + caller_time;
 743 }
 744
 745 /* Same as compute_uinlined_call_time but compute time when inlining
 746    does happen.  */
 747
 748 inline sreal
 749 compute_inlined_call_time (struct cgraph_edge *edge,
 750                            sreal time,
 751                            sreal freq)
 752 {
 753   cgraph_node *caller = (edge->caller->inlined_to
 754                          ? edge->caller->inlined_to
 755                          : edge->caller);
 756   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 757
 758   if (freq > 0)
 759     time *= freq;
 760   else
 761     time = time >> 11;
 762
 763   /* This calculation should match one in ipa-inline-analysis.c
 764      (estimate_edge_size_and_time).  */
 765   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 766   time += caller_time;
 767   if (time <= 0)
 768     time = ((sreal) 1) >> 8;
 769   gcc_checking_assert (time >= 0);
 770   return time;
 771 }
 772
 773 /* Determine time saved by inlining EDGE of frequency FREQ
 774    where callee's runtime w/o inlining is UNINLINED_TYPE
 775    and with inlined is INLINED_TYPE.  */
 776
 777 inline sreal
 778 inlining_speedup (struct cgraph_edge *edge,
 779                   sreal freq,
 780                   sreal uninlined_time,
 781                   sreal inlined_time)
 782 {
 783   sreal speedup = uninlined_time - inlined_time;
 784   /* Handling of call_time should match one in ipa-inline-fnsummary.c
 785      (estimate_edge_size_and_time).  */
 786   sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
 787
 788   if (freq > 0)
 789     {
 790       speedup = (speedup + call_time);
 791       if (freq != 1)
 792        speedup = speedup * freq;
 793     }
 794   else if (freq == 0)
 795     speedup = speedup >> 11;
 796   gcc_checking_assert (speedup >= 0);
 797   return speedup;
 798 }
 799
 800 /* Return true if the speedup for inlining E is bigger than
 801    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 802
 803 static bool
 804 big_speedup_p (struct cgraph_edge *e)
 805 {
 806   sreal unspec_time;
 807   sreal spec_time = estimate_edge_time (e, &unspec_time);
 808   sreal freq = e->sreal_frequency ();
 809   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
 810   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
 811   cgraph_node *caller = (e->caller->inlined_to
 812                          ? e->caller->inlined_to
 813                          : e->caller);
 814   int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
 815
 816   if ((time - inlined_time) * 100 > time * limit)
 817     return true;
 818   return false;
 819 }
 820
 821 /* Return true if we are interested in inlining small function.
 822    When REPORT is true, report reason to dump file.  */
 823
 824 static bool
 825 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 826 {
 827   bool want_inline = true;
 828   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 829   cgraph_node *to  = (e->caller->inlined_to
 830                       ? e->caller->inlined_to : e->caller);
 831
 832   /* Allow this function to be called before can_inline_edge_p,
 833      since it's usually cheaper.  */
 834   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 835     want_inline = false;
 836   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 837     ;
 838   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 839            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 840     {
 841       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 842       want_inline = false;
 843     }
 844   /* Do fast and conservative check if the function can be good
 845      inline candidate.  */
 846   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 847            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 848            && ipa_fn_summaries->get (callee)->min_size
 849                 - ipa_call_summaries->get (e)->call_stmt_size
 850               > inline_insns_auto (e->caller, true))
 851     {
 852       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 853       want_inline = false;
 854     }
 855   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 856             || e->count.ipa ().nonzero_p ())
 857            && ipa_fn_summaries->get (callee)->min_size
 858                 - ipa_call_summaries->get (e)->call_stmt_size
 859               > inline_insns_single (e->caller, true))
 860     {
 861       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 862                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 863                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 864       want_inline = false;
 865     }
 866   else
 867     {
 868       int growth = estimate_edge_growth (e);
 869       ipa_hints hints = estimate_edge_hints (e);
 870       bool apply_hints = (hints & (INLINE_HINT_indirect_call
 871                                    | INLINE_HINT_known_hot
 872                                    | INLINE_HINT_loop_iterations
 873                                    | INLINE_HINT_loop_stride));
 874
 875       if (growth <= opt_for_fn (to->decl,
 876                                 param_max_inline_insns_size))
 877         ;
 878       /* Apply param_max_inline_insns_single limit.  Do not do so when
 879          hints suggests that inlining given function is very profitable.
 880          Avoid computation of big_speedup_p when not necessary to change
 881          outcome of decision.  */
 882       else if (DECL_DECLARED_INLINE_P (callee->decl)
 883                && growth >= inline_insns_single (e->caller, apply_hints)
 884                && (apply_hints
 885                    || growth >= inline_insns_single (e->caller, true)
 886                    || !big_speedup_p (e)))
 887         {
 888           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 889           want_inline = false;
 890         }
 891       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 892                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 893                && growth >= opt_for_fn (to->decl,
 894                                         param_max_inline_insns_small))
 895         {
 896           /* growth_positive_p is expensive, always test it last.  */
 897           if (growth >= inline_insns_single (e->caller, false)
 898               || growth_positive_p (callee, e, growth))
 899             {
 900               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 901               want_inline = false;
 902             }
 903         }
 904       /* Apply param_max_inline_insns_auto limit for functions not declared
 905          inline.  Bypass the limit when speedup seems big.  */
 906       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 907                && growth >= inline_insns_auto (e->caller, apply_hints)
 908                && (apply_hints
 909                    || growth >= inline_insns_auto (e->caller, true)
 910                    || !big_speedup_p (e)))
 911         {
 912           /* growth_positive_p is expensive, always test it last.  */
 913           if (growth >= inline_insns_single (e->caller, false)
 914               || growth_positive_p (callee, e, growth))
 915             {
 916               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 917               want_inline = false;
 918             }
 919         }
 920       /* If call is cold, do not inline when function body would grow. */
 921       else if (!e->maybe_hot_p ()
 922                && (growth >= inline_insns_single (e->caller, false)
 923                    || growth_positive_p (callee, e, growth)))
 924         {
 925           e->inline_failed = CIF_UNLIKELY_CALL;
 926           want_inline = false;
 927         }
 928     }
 929   if (!want_inline && report)
 930     report_inline_failed_reason (e);
 931   return want_inline;
 932 }
 933
 934 /* EDGE is self recursive edge.
 935    We handle two cases - when function A is inlining into itself
 936    or when function A is being inlined into another inliner copy of function
 937    A within function B.
 938
 939    In first case OUTER_NODE points to the toplevel copy of A, while
 940    in the second case OUTER_NODE points to the outermost copy of A in B.
 941
 942    In both cases we want to be extra selective since
 943    inlining the call will just introduce new recursive calls to appear.  */
 944
 945 static bool
 946 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 947                                    struct cgraph_node *outer_node,
 948                                    bool peeling,
 949                                    int depth)
 950 {
 951   char const *reason = NULL;
 952   bool want_inline = true;
 953   sreal caller_freq = 1;
 954   int max_depth = opt_for_fn (outer_node->decl,
 955                               param_max_inline_recursive_depth_auto);
 956
 957   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 958     max_depth = opt_for_fn (outer_node->decl,
 959                             param_max_inline_recursive_depth);
 960
 961   if (!edge->maybe_hot_p ())
 962     {
 963       reason = "recursive call is cold";
 964       want_inline = false;
 965     }
 966   else if (depth > max_depth)
 967     {
 968       reason = "--param max-inline-recursive-depth exceeded.";
 969       want_inline = false;
 970     }
 971   else if (outer_node->inlined_to
 972            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
 973     {
 974       reason = "caller frequency is 0";
 975       want_inline = false;
 976     }
 977
 978   if (!want_inline)
 979     ;
 980   /* Inlining of self recursive function into copy of itself within other
 981      function is transformation similar to loop peeling.
 982
 983      Peeling is profitable if we can inline enough copies to make probability
 984      of actual call to the self recursive function very small.  Be sure that
 985      the probability of recursion is small.
 986
 987      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 988      This way the expected number of recursion is at most max_depth.  */
 989   else if (peeling)
 990     {
 991       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
 992       int i;
 993       for (i = 1; i < depth; i++)
 994         max_prob = max_prob * max_prob;
 995       if (edge->sreal_frequency () >= max_prob * caller_freq)
 996         {
 997           reason = "frequency of recursive call is too large";
 998           want_inline = false;
 999         }
1000     }
1001   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1002      recursion depth is large.  We reduce function call overhead and increase
1003      chances that things fit in hardware return predictor.
1004
1005      Recursive inlining might however increase cost of stack frame setup
1006      actually slowing down functions whose recursion tree is wide rather than
1007      deep.
1008
1009      Deciding reliably on when to do recursive inlining without profile feedback
1010      is tricky.  For now we disable recursive inlining when probability of self
1011      recursion is low.
1012
1013      Recursive inlining of self recursive call within loop also results in
1014      large loop depths that generally optimize badly.  We may want to throttle
1015      down inlining in those cases.  In particular this seems to happen in one
1016      of libstdc++ rb tree methods.  */
1017   else
1018     {
1019       if (edge->sreal_frequency () * 100
1020           <= caller_freq
1021              * opt_for_fn (outer_node->decl,
1022                            param_min_inline_recursive_probability))
1023         {
1024           reason = "frequency of recursive call is too small";
1025           want_inline = false;
1026         }
1027     }
1028   if (!want_inline && dump_enabled_p ())
1029     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1030                      "   not inlining recursively: %s\n", reason);
1031   return want_inline;
1032 }
1033
1034 /* Return true when NODE has uninlinable caller;
1035    set HAS_HOT_CALL if it has hot call.
1036    Worker for cgraph_for_node_and_aliases.  */
1037
1038 static bool
1039 check_callers (struct cgraph_node *node, void *has_hot_call)
1040 {
1041   struct cgraph_edge *e;
1042    for (e = node->callers; e; e = e->next_caller)
1043      {
1044        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1045            || !opt_for_fn (e->caller->decl, optimize))
1046          return true;
1047        if (!can_inline_edge_p (e, true))
1048          return true;
1049        if (e->recursive_p ())
1050          return true;
1051        if (!can_inline_edge_by_limits_p (e, true))
1052          return true;
1053        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1054          *(bool *)has_hot_call = true;
1055      }
1056   return false;
1057 }
1058
1059 /* If NODE has a caller, return true.  */
1060
1061 static bool
1062 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1063 {
1064   if (node->callers)
1065     return true;
1066   return false;
1067 }
1068
1069 /* Decide if inlining NODE would reduce unit size by eliminating
1070    the offline copy of function.
1071    When COLD is true the cold calls are considered, too.  */
1072
1073 static bool
1074 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1075 {
1076   bool has_hot_call = false;
1077
1078   /* Aliases gets inlined along with the function they alias.  */
1079   if (node->alias)
1080     return false;
1081   /* Already inlined?  */
1082   if (node->inlined_to)
1083     return false;
1084   /* Does it have callers?  */
1085   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1086     return false;
1087   /* Inlining into all callers would increase size?  */
1088   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1089     return false;
1090   /* All inlines must be possible.  */
1091   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1092                                          true))
1093     return false;
1094   if (!cold && !has_hot_call)
1095     return false;
1096   return true;
1097 }
1098
1099 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1100    in estimate_edge_badness.  */
1101
1102 static bool
1103 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1104 {
1105   return size < (DECL_DECLARED_INLINE_P (where->decl)
1106                  ? inline_insns_single (where, false)
1107                  : inline_insns_auto (where, false));
1108 }
1109
1110 /* A cost model driving the inlining heuristics in a way so the edges with
1111    smallest badness are inlined first.  After each inlining is performed
1112    the costs of all caller edges of nodes affected are recomputed so the
1113    metrics may accurately depend on values such as number of inlinable callers
1114    of the function or function body size.  */
1115
1116 static sreal
1117 edge_badness (struct cgraph_edge *edge, bool dump)
1118 {
1119   sreal badness;
1120   int growth;
1121   sreal edge_time, unspec_edge_time;
1122   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1123   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1124   ipa_hints hints;
1125   cgraph_node *caller = (edge->caller->inlined_to
1126                          ? edge->caller->inlined_to
1127                          : edge->caller);
1128
1129   growth = estimate_edge_growth (edge);
1130   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1131   hints = estimate_edge_hints (edge);
1132   gcc_checking_assert (edge_time >= 0);
1133   /* Check that inlined time is better, but tolerate some roundoff issues.
1134      FIXME: When callee profile drops to 0 we account calls more.  This
1135      should be fixed by never doing that.  */
1136   gcc_checking_assert ((edge_time * 100
1137                         - callee_info->time * 101).to_int () <= 0
1138                         || callee->count.ipa ().initialized_p ());
1139   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1140
1141   if (dump)
1142     {
1143       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1144                edge->caller->dump_name (),
1145                edge->callee->dump_name ());
1146       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1147                growth,
1148                edge_time.to_double (),
1149                unspec_edge_time.to_double ());
1150       ipa_dump_hints (dump_file, hints);
1151       if (big_speedup_p (edge))
1152         fprintf (dump_file, " big_speedup");
1153       fprintf (dump_file, "\n");
1154     }
1155
1156   /* Always prefer inlining saving code size.  */
1157   if (growth <= 0)
1158     {
1159       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1160       if (dump)
1161         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1162                  growth);
1163     }
1164    /* Inlining into EXTERNAL functions is not going to change anything unless
1165       they are themselves inlined.  */
1166    else if (DECL_EXTERNAL (caller->decl))
1167     {
1168       if (dump)
1169         fprintf (dump_file, "      max: function is external\n");
1170       return sreal::max ();
1171     }
1172   /* When profile is available. Compute badness as:
1173
1174                  time_saved * caller_count
1175      goodness =  -------------------------------------------------
1176                  growth_of_caller * overall_growth * combined_size
1177
1178      badness = - goodness
1179
1180      Again use negative value to make calls with profile appear hotter
1181      then calls without.
1182   */
1183   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1184            || caller->count.ipa ().nonzero_p ())
1185     {
1186       sreal numerator, denominator;
1187       int overall_growth;
1188       sreal freq = edge->sreal_frequency ();
1189
1190       numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1191       if (numerator <= 0)
1192         numerator = ((sreal) 1 >> 8);
1193       if (caller->count.ipa ().nonzero_p ())
1194         numerator *= caller->count.ipa ().to_gcov_type ();
1195       else if (caller->count.ipa ().initialized_p ())
1196         numerator = numerator >> 11;
1197       denominator = growth;
1198
1199       overall_growth = callee_info->growth;
1200
1201       /* Look for inliner wrappers of the form:
1202
1203          inline_caller ()
1204            {
1205              do_fast_job...
1206              if (need_more_work)
1207                noninline_callee ();
1208            }
1209          Without penalizing this case, we usually inline noninline_callee
1210          into the inline_caller because overall_growth is small preventing
1211          further inlining of inline_caller.
1212
1213          Penalize only callgraph edges to functions with small overall
1214          growth ...
1215         */
1216       if (growth > overall_growth
1217           /* ... and having only one caller which is not inlined ... */
1218           && callee_info->single_caller
1219           && !edge->caller->inlined_to
1220           /* ... and edges executed only conditionally ... */
1221           && freq < 1
1222           /* ... consider case where callee is not inline but caller is ... */
1223           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1224                && DECL_DECLARED_INLINE_P (caller->decl))
1225               /* ... or when early optimizers decided to split and edge
1226                  frequency still indicates splitting is a win ... */
1227               || (callee->split_part && !caller->split_part
1228                   && freq * 100
1229                          < opt_for_fn (caller->decl,
1230                                        param_partial_inlining_entry_probability)
1231                   /* ... and do not overwrite user specified hints.   */
1232                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1233                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1234         {
1235           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1236           int caller_growth = caller_info->growth;
1237
1238           /* Only apply the penalty when caller looks like inline candidate,
1239              and it is not called once.  */
1240           if (!caller_info->single_caller && overall_growth < caller_growth
1241               && caller_info->inlinable
1242               && wrapper_heuristics_may_apply
1243                  (caller, ipa_size_summaries->get (caller)->size))
1244             {
1245               if (dump)
1246                 fprintf (dump_file,
1247                          "     Wrapper penalty. Increasing growth %i to %i\n",
1248                          overall_growth, caller_growth);
1249               overall_growth = caller_growth;
1250             }
1251         }
1252       if (overall_growth > 0)
1253         {
1254           /* Strongly prefer functions with few callers that can be inlined
1255              fully.  The square root here leads to smaller binaries at average.
1256              Watch however for extreme cases and return to linear function
1257              when growth is large.  */
1258           if (overall_growth < 256)
1259             overall_growth *= overall_growth;
1260           else
1261             overall_growth += 256 * 256 - 256;
1262           denominator *= overall_growth;
1263         }
1264       denominator *= ipa_size_summaries->get (caller)->size + growth;
1265
1266       badness = - numerator / denominator;
1267
1268       if (dump)
1269         {
1270           fprintf (dump_file,
1271                    "      %f: guessed profile. frequency %f, count %" PRId64
1272                    " caller count %" PRId64
1273                    " time saved %f"
1274                    " overall growth %i (current) %i (original)"
1275                    " %i (compensated)\n",
1276                    badness.to_double (),
1277                    freq.to_double (),
1278                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1279                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1280                    inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1281                    estimate_growth (callee),
1282                    callee_info->growth, overall_growth);
1283         }
1284     }
1285   /* When function local profile is not available or it does not give
1286      useful information (i.e. frequency is zero), base the cost on
1287      loop nest and overall size growth, so we optimize for overall number
1288      of functions fully inlined in program.  */
1289   else
1290     {
1291       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1292       badness = growth;
1293
1294       /* Decrease badness if call is nested.  */
1295       if (badness > 0)
1296         badness = badness >> nest;
1297       else
1298         badness = badness << nest;
1299       if (dump)
1300         fprintf (dump_file, "      %f: no profile. nest %i\n",
1301                  badness.to_double (), nest);
1302     }
1303   gcc_checking_assert (badness != 0);
1304
1305   if (edge->recursive_p ())
1306     badness = badness.shift (badness > 0 ? 4 : -4);
1307   if ((hints & (INLINE_HINT_indirect_call
1308                 | INLINE_HINT_loop_iterations
1309                 | INLINE_HINT_loop_stride))
1310       || callee_info->growth <= 0)
1311     badness = badness.shift (badness > 0 ? -2 : 2);
1312   if (hints & (INLINE_HINT_same_scc))
1313     badness = badness.shift (badness > 0 ? 3 : -3);
1314   else if (hints & (INLINE_HINT_in_scc))
1315     badness = badness.shift (badness > 0 ? 2 : -2);
1316   else if (hints & (INLINE_HINT_cross_module))
1317     badness = badness.shift (badness > 0 ? 1 : -1);
1318   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1319     badness = badness.shift (badness > 0 ? -4 : 4);
1320   else if ((hints & INLINE_HINT_declared_inline))
1321     badness = badness.shift (badness > 0 ? -3 : 3);
1322   if (dump)
1323     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1324   return badness;
1325 }
1326
1327 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1328 static inline void
1329 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1330 {
1331   sreal badness = edge_badness (edge, false);
1332   if (edge->aux)
1333     {
1334       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1335       gcc_checking_assert (n->get_data () == edge);
1336
1337       /* fibonacci_heap::replace_key does busy updating of the
1338          heap that is unnecessarily expensive.
1339          We do lazy increases: after extracting minimum if the key
1340          turns out to be out of date, it is re-inserted into heap
1341          with correct value.  */
1342       if (badness < n->get_key ())
1343         {
1344           if (dump_file && (dump_flags & TDF_DETAILS))
1345             {
1346               fprintf (dump_file,
1347                        "  decreasing badness %s -> %s, %f to %f\n",
1348                        edge->caller->dump_name (),
1349                        edge->callee->dump_name (),
1350                        n->get_key ().to_double (),
1351                        badness.to_double ());
1352             }
1353           heap->decrease_key (n, badness);
1354         }
1355     }
1356   else
1357     {
1358        if (dump_file && (dump_flags & TDF_DETAILS))
1359          {
1360            fprintf (dump_file,
1361                     "  enqueuing call %s -> %s, badness %f\n",
1362                     edge->caller->dump_name (),
1363                     edge->callee->dump_name (),
1364                     badness.to_double ());
1365          }
1366       edge->aux = heap->insert (badness, edge);
1367     }
1368 }
1369
1370
1371 /* NODE was inlined.
1372    All caller edges needs to be reset because
1373    size estimates change. Similarly callees needs reset
1374    because better context may be known.  */
1375
1376 static void
1377 reset_edge_caches (struct cgraph_node *node)
1378 {
1379   struct cgraph_edge *edge;
1380   struct cgraph_edge *e = node->callees;
1381   struct cgraph_node *where = node;
1382   struct ipa_ref *ref;
1383
1384   if (where->inlined_to)
1385     where = where->inlined_to;
1386
1387   reset_node_cache (where);
1388
1389   if (edge_growth_cache != NULL)
1390     for (edge = where->callers; edge; edge = edge->next_caller)
1391       if (edge->inline_failed)
1392         edge_growth_cache->remove (edge);
1393
1394   FOR_EACH_ALIAS (where, ref)
1395     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1396
1397   if (!e)
1398     return;
1399
1400   while (true)
1401     if (!e->inline_failed && e->callee->callees)
1402       e = e->callee->callees;
1403     else
1404       {
1405         if (edge_growth_cache != NULL && e->inline_failed)
1406           edge_growth_cache->remove (e);
1407         if (e->next_callee)
1408           e = e->next_callee;
1409         else
1410           {
1411             do
1412               {
1413                 if (e->caller == node)
1414                   return;
1415                 e = e->caller->callers;
1416               }
1417             while (!e->next_callee);
1418             e = e->next_callee;
1419           }
1420       }
1421 }
1422
1423 /* Recompute HEAP nodes for each of caller of NODE.
1424    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1425    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1426    it is inlinable. Otherwise check all edges.  */
1427
1428 static void
1429 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1430                     bitmap updated_nodes,
1431                     struct cgraph_edge *check_inlinablity_for)
1432 {
1433   struct cgraph_edge *edge;
1434   struct ipa_ref *ref;
1435
1436   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1437       || node->inlined_to)
1438     return;
1439   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1440     return;
1441
1442   FOR_EACH_ALIAS (node, ref)
1443     {
1444       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1445       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1446     }
1447
1448   for (edge = node->callers; edge; edge = edge->next_caller)
1449     if (edge->inline_failed)
1450       {
1451         if (!check_inlinablity_for
1452             || check_inlinablity_for == edge)
1453           {
1454             if (can_inline_edge_p (edge, false)
1455                 && want_inline_small_function_p (edge, false)
1456                 && can_inline_edge_by_limits_p (edge, false))
1457               update_edge_key (heap, edge);
1458             else if (edge->aux)
1459               {
1460                 report_inline_failed_reason (edge);
1461                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1462                 edge->aux = NULL;
1463               }
1464           }
1465         else if (edge->aux)
1466           update_edge_key (heap, edge);
1467       }
1468 }
1469
1470 /* Recompute HEAP nodes for each uninlined call in NODE
1471    If UPDATE_SINCE is non-NULL check if edges called within that function
1472    are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1473    where all edges have new context).
1474
1475    This is used when we know that edge badnesses are going only to increase
1476    (we introduced new call site) and thus all we need is to insert newly
1477    created edges into heap.  */
1478
1479 static void
1480 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1481                     struct cgraph_node *update_since,
1482                     bitmap updated_nodes)
1483 {
1484   struct cgraph_edge *e = node->callees;
1485   bool check_inlinability = update_since == node;
1486
1487   if (!e)
1488     return;
1489   while (true)
1490     if (!e->inline_failed && e->callee->callees)
1491       {
1492         if (e->callee == update_since)
1493           check_inlinability = true;
1494         e = e->callee->callees;
1495       }
1496     else
1497       {
1498         enum availability avail;
1499         struct cgraph_node *callee;
1500         if (!check_inlinability)
1501           {
1502             if (e->aux
1503                 && !bitmap_bit_p (updated_nodes,
1504                                   e->callee->ultimate_alias_target
1505                                     (&avail, e->caller)->get_uid ()))
1506               update_edge_key (heap, e);
1507           }
1508         /* We do not reset callee growth cache here.  Since we added a new call,
1509            growth should have just increased and consequently badness metric
1510            don't need updating.  */
1511         else if (e->inline_failed
1512                  && (callee = e->callee->ultimate_alias_target (&avail,
1513                                                                 e->caller))
1514                  && avail >= AVAIL_AVAILABLE
1515                  && ipa_fn_summaries->get (callee) != NULL
1516                  && ipa_fn_summaries->get (callee)->inlinable
1517                  && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1518           {
1519             if (can_inline_edge_p (e, false)
1520                 && want_inline_small_function_p (e, false)
1521                 && can_inline_edge_by_limits_p (e, false))
1522               {
1523                 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1524                 gcc_checking_assert (check_inlinability || e->aux);
1525                 update_edge_key (heap, e);
1526               }
1527             else if (e->aux)
1528               {
1529                 report_inline_failed_reason (e);
1530                 heap->delete_node ((edge_heap_node_t *) e->aux);
1531                 e->aux = NULL;
1532               }
1533           }
1534         /* In case we redirected to unreachable node we only need to remove the
1535            fibheap entry.  */
1536         else if (e->aux)
1537           {
1538             heap->delete_node ((edge_heap_node_t *) e->aux);
1539             e->aux = NULL;
1540           }
1541         if (e->next_callee)
1542           e = e->next_callee;
1543         else
1544           {
1545             do
1546               {
1547                 if (e->caller == node)
1548                   return;
1549                 if (e->caller == update_since)
1550                   check_inlinability = false;
1551                 e = e->caller->callers;
1552               }
1553             while (!e->next_callee);
1554             e = e->next_callee;
1555           }
1556       }
1557 }
1558
1559 /* Enqueue all recursive calls from NODE into priority queue depending on
1560    how likely we want to recursively inline the call.  */
1561
1562 static void
1563 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1564                         edge_heap_t *heap)
1565 {
1566   struct cgraph_edge *e;
1567   enum availability avail;
1568
1569   for (e = where->callees; e; e = e->next_callee)
1570     if (e->callee == node
1571         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1572             && avail > AVAIL_INTERPOSABLE))
1573       heap->insert (-e->sreal_frequency (), e);
1574   for (e = where->callees; e; e = e->next_callee)
1575     if (!e->inline_failed)
1576       lookup_recursive_calls (node, e->callee, heap);
1577 }
1578
1579 /* Decide on recursive inlining: in the case function has recursive calls,
1580    inline until body size reaches given argument.  If any new indirect edges
1581    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1582    is NULL.  */
1583
1584 static bool
1585 recursive_inlining (struct cgraph_edge *edge,
1586                     vec<cgraph_edge *> *new_edges)
1587 {
1588   cgraph_node *to  = (edge->caller->inlined_to
1589                       ? edge->caller->inlined_to : edge->caller);
1590   int limit = opt_for_fn (to->decl,
1591                           param_max_inline_insns_recursive_auto);
1592   edge_heap_t heap (sreal::min ());
1593   struct cgraph_node *node;
1594   struct cgraph_edge *e;
1595   struct cgraph_node *master_clone = NULL, *next;
1596   int depth = 0;
1597   int n = 0;
1598
1599   node = edge->caller;
1600   if (node->inlined_to)
1601     node = node->inlined_to;
1602
1603   if (DECL_DECLARED_INLINE_P (node->decl))
1604     limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1605
1606   /* Make sure that function is small enough to be considered for inlining.  */
1607   if (estimate_size_after_inlining (node, edge)  >= limit)
1608     return false;
1609   lookup_recursive_calls (node, node, &heap);
1610   if (heap.empty ())
1611     return false;
1612
1613   if (dump_file)
1614     fprintf (dump_file,
1615              "  Performing recursive inlining on %s\n", node->dump_name ());
1616
1617   /* Do the inlining and update list of recursive call during process.  */
1618   while (!heap.empty ())
1619     {
1620       struct cgraph_edge *curr = heap.extract_min ();
1621       struct cgraph_node *cnode, *dest = curr->callee;
1622
1623       if (!can_inline_edge_p (curr, true)
1624           || !can_inline_edge_by_limits_p (curr, true))
1625         continue;
1626
1627       /* MASTER_CLONE is produced in the case we already started modified
1628          the function. Be sure to redirect edge to the original body before
1629          estimating growths otherwise we will be seeing growths after inlining
1630          the already modified body.  */
1631       if (master_clone)
1632         {
1633           curr->redirect_callee (master_clone);
1634           if (edge_growth_cache != NULL)
1635             edge_growth_cache->remove (curr);
1636         }
1637
1638       if (estimate_size_after_inlining (node, curr) > limit)
1639         {
1640           curr->redirect_callee (dest);
1641           if (edge_growth_cache != NULL)
1642             edge_growth_cache->remove (curr);
1643           break;
1644         }
1645
1646       depth = 1;
1647       for (cnode = curr->caller;
1648            cnode->inlined_to; cnode = cnode->callers->caller)
1649         if (node->decl
1650             == curr->callee->ultimate_alias_target ()->decl)
1651           depth++;
1652
1653       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1654         {
1655           curr->redirect_callee (dest);
1656           if (edge_growth_cache != NULL)
1657             edge_growth_cache->remove (curr);
1658           continue;
1659         }
1660
1661       if (dump_file)
1662         {
1663           fprintf (dump_file,
1664                    "   Inlining call of depth %i", depth);
1665           if (node->count.nonzero_p () && curr->count.initialized_p ())
1666             {
1667               fprintf (dump_file, " called approx. %.2f times per call",
1668                        (double)curr->count.to_gcov_type ()
1669                        / node->count.to_gcov_type ());
1670             }
1671           fprintf (dump_file, "\n");
1672         }
1673       if (!master_clone)
1674         {
1675           /* We need original clone to copy around.  */
1676           master_clone = node->create_clone (node->decl, node->count,
1677             false, vNULL, true, NULL, NULL);
1678           for (e = master_clone->callees; e; e = e->next_callee)
1679             if (!e->inline_failed)
1680               clone_inlined_nodes (e, true, false, NULL);
1681           curr->redirect_callee (master_clone);
1682           if (edge_growth_cache != NULL)
1683             edge_growth_cache->remove (curr);
1684         }
1685
1686       inline_call (curr, false, new_edges, &overall_size, true);
1687       reset_node_cache (node);
1688       lookup_recursive_calls (node, curr->callee, &heap);
1689       n++;
1690     }
1691
1692   if (!heap.empty () && dump_file)
1693     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1694
1695   if (!master_clone)
1696     return false;
1697
1698   if (dump_enabled_p ())
1699     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1700                      "\n   Inlined %i times, "
1701                      "body grown from size %i to %i, time %f to %f\n", n,
1702                      ipa_size_summaries->get (master_clone)->size,
1703                      ipa_size_summaries->get (node)->size,
1704                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1705                      ipa_fn_summaries->get (node)->time.to_double ());
1706
1707   /* Remove master clone we used for inlining.  We rely that clones inlined
1708      into master clone gets queued just before master clone so we don't
1709      need recursion.  */
1710   for (node = symtab->first_function (); node != master_clone;
1711        node = next)
1712     {
1713       next = symtab->next_function (node);
1714       if (node->inlined_to == master_clone)
1715         node->remove ();
1716     }
1717   master_clone->remove ();
1718   return true;
1719 }
1720
1721
1722 /* Given whole compilation unit estimate of INSNS, compute how large we can
1723    allow the unit to grow.  */
1724
1725 static int64_t
1726 compute_max_insns (cgraph_node *node, int insns)
1727 {
1728   int max_insns = insns;
1729   if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1730     max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1731
1732   return ((int64_t) max_insns
1733           * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1734 }
1735
1736
1737 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1738
1739 static void
1740 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1741 {
1742   while (new_edges.length () > 0)
1743     {
1744       struct cgraph_edge *edge = new_edges.pop ();
1745
1746       gcc_assert (!edge->aux);
1747       gcc_assert (edge->callee);
1748       if (edge->inline_failed
1749           && can_inline_edge_p (edge, true)
1750           && want_inline_small_function_p (edge, true)
1751           && can_inline_edge_by_limits_p (edge, true))
1752         edge->aux = heap->insert (edge_badness (edge, false), edge);
1753     }
1754 }
1755
1756 /* Remove EDGE from the fibheap.  */
1757
1758 static void
1759 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1760 {
1761   if (e->aux)
1762     {
1763       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1764       e->aux = NULL;
1765     }
1766 }
1767
1768 /* Return true if speculation of edge E seems useful.
1769    If ANTICIPATE_INLINING is true, be conservative and hope that E
1770    may get inlined.  */
1771
1772 bool
1773 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1774 {
1775   /* If we have already decided to inline the edge, it seems useful.  */
1776   if (!e->inline_failed)
1777     return true;
1778
1779   enum availability avail;
1780   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1781                                                                  e->caller);
1782
1783   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1784
1785   if (!e->maybe_hot_p ())
1786     return false;
1787
1788   /* See if IP optimizations found something potentially useful about the
1789      function.  For now we look only for CONST/PURE flags.  Almost everything
1790      else we propagate is useless.  */
1791   if (avail >= AVAIL_AVAILABLE)
1792     {
1793       int ecf_flags = flags_from_decl_or_type (target->decl);
1794       if (ecf_flags & ECF_CONST)
1795         {
1796           if (!(e->speculative_call_indirect_edge ()->indirect_info
1797                 ->ecf_flags & ECF_CONST))
1798             return true;
1799         }
1800       else if (ecf_flags & ECF_PURE)
1801         {
1802           if (!(e->speculative_call_indirect_edge ()->indirect_info
1803                 ->ecf_flags & ECF_PURE))
1804             return true;
1805         }
1806     }
1807   /* If we did not managed to inline the function nor redirect
1808      to an ipa-cp clone (that are seen by having local flag set),
1809      it is probably pointless to inline it unless hardware is missing
1810      indirect call predictor.  */
1811   if (!anticipate_inlining && !target->local)
1812     return false;
1813   /* For overwritable targets there is not much to do.  */
1814   if (!can_inline_edge_p (e, false)
1815       || !can_inline_edge_by_limits_p (e, false, true))
1816     return false;
1817   /* OK, speculation seems interesting.  */
1818   return true;
1819 }
1820
1821 /* We know that EDGE is not going to be inlined.
1822    See if we can remove speculation.  */
1823
1824 static void
1825 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1826 {
1827   if (edge->speculative && !speculation_useful_p (edge, false))
1828     {
1829       struct cgraph_node *node = edge->caller;
1830       struct cgraph_node *where = node->inlined_to
1831                                   ? node->inlined_to : node;
1832       auto_bitmap updated_nodes;
1833
1834       if (edge->count.ipa ().initialized_p ())
1835         spec_rem += edge->count.ipa ();
1836       cgraph_edge::resolve_speculation (edge);
1837       reset_edge_caches (where);
1838       ipa_update_overall_fn_summary (where);
1839       update_caller_keys (edge_heap, where,
1840                           updated_nodes, NULL);
1841       update_callee_keys (edge_heap, where, NULL,
1842                           updated_nodes);
1843     }
1844 }
1845
1846 /* Return true if NODE should be accounted for overall size estimate.
1847    Skip all nodes optimized for size so we can measure the growth of hot
1848    part of program no matter of the padding.  */
1849
1850 bool
1851 inline_account_function_p (struct cgraph_node *node)
1852 {
1853    return (!DECL_EXTERNAL (node->decl)
1854            && !opt_for_fn (node->decl, optimize_size)
1855            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1856 }
1857
1858 /* Count number of callers of NODE and store it into DATA (that
1859    points to int.  Worker for cgraph_for_node_and_aliases.  */
1860
1861 static bool
1862 sum_callers (struct cgraph_node *node, void *data)
1863 {
1864   struct cgraph_edge *e;
1865   int *num_calls = (int *)data;
1866
1867   for (e = node->callers; e; e = e->next_caller)
1868     (*num_calls)++;
1869   return false;
1870 }
1871
1872 /* We only propagate across edges with non-interposable callee.  */
1873
1874 inline bool
1875 ignore_edge_p (struct cgraph_edge *e)
1876 {
1877   enum availability avail;
1878   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1879   return (avail <= AVAIL_INTERPOSABLE);
1880 }
1881
1882 /* We use greedy algorithm for inlining of small functions:
1883    All inline candidates are put into prioritized heap ordered in
1884    increasing badness.
1885
1886    The inlining of small functions is bounded by unit growth parameters.  */
1887
1888 static void
1889 inline_small_functions (void)
1890 {
1891   struct cgraph_node *node;
1892   struct cgraph_edge *edge;
1893   edge_heap_t edge_heap (sreal::min ());
1894   auto_bitmap updated_nodes;
1895   int min_size;
1896   auto_vec<cgraph_edge *> new_indirect_edges;
1897   int initial_size = 0;
1898   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1899   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1900   new_indirect_edges.create (8);
1901
1902   edge_removal_hook_holder
1903     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1904
1905   /* Compute overall unit size and other global parameters used by badness
1906      metrics.  */
1907
1908   max_count = profile_count::uninitialized ();
1909   ipa_reduced_postorder (order, true, ignore_edge_p);
1910   free (order);
1911
1912   FOR_EACH_DEFINED_FUNCTION (node)
1913     if (!node->inlined_to)
1914       {
1915         if (!node->alias && node->analyzed
1916             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1917             && opt_for_fn (node->decl, optimize))
1918           {
1919             class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1920             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1921
1922             /* Do not account external functions, they will be optimized out
1923                if not inlined.  Also only count the non-cold portion of program.  */
1924             if (inline_account_function_p (node))
1925               initial_size += ipa_size_summaries->get (node)->size;
1926             info->growth = estimate_growth (node);
1927
1928             int num_calls = 0;
1929             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1930                                                true);
1931             if (num_calls == 1)
1932               info->single_caller = true;
1933             if (dfs && dfs->next_cycle)
1934               {
1935                 struct cgraph_node *n2;
1936                 int id = dfs->scc_no + 1;
1937                 for (n2 = node; n2;
1938                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1939                   if (opt_for_fn (n2->decl, optimize))
1940                     {
1941                       ipa_fn_summary *info2 = ipa_fn_summaries->get
1942                          (n2->inlined_to ? n2->inlined_to : n2);
1943                       if (info2->scc_no)
1944                         break;
1945                       info2->scc_no = id;
1946                     }
1947               }
1948           }
1949
1950         for (edge = node->callers; edge; edge = edge->next_caller)
1951           max_count = max_count.max (edge->count.ipa ());
1952       }
1953   ipa_free_postorder_info ();
1954   initialize_growth_caches ();
1955
1956   if (dump_file)
1957     fprintf (dump_file,
1958              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1959              initial_size);
1960
1961   overall_size = initial_size;
1962   min_size = overall_size;
1963
1964   /* Populate the heap with all edges we might inline.  */
1965
1966   FOR_EACH_DEFINED_FUNCTION (node)
1967     {
1968       bool update = false;
1969       struct cgraph_edge *next = NULL;
1970       bool has_speculative = false;
1971
1972       if (!opt_for_fn (node->decl, optimize))
1973         continue;
1974
1975       if (dump_file)
1976         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1977
1978       for (edge = node->callees; edge; edge = edge->next_callee)
1979         {
1980           if (edge->inline_failed
1981               && !edge->aux
1982               && can_inline_edge_p (edge, true)
1983               && want_inline_small_function_p (edge, true)
1984               && can_inline_edge_by_limits_p (edge, true)
1985               && edge->inline_failed)
1986             {
1987               gcc_assert (!edge->aux);
1988               update_edge_key (&edge_heap, edge);
1989             }
1990           if (edge->speculative)
1991             has_speculative = true;
1992         }
1993       if (has_speculative)
1994         for (edge = node->callees; edge; edge = next)
1995           {
1996             next = edge->next_callee;
1997             if (edge->speculative
1998                 && !speculation_useful_p (edge, edge->aux != NULL))
1999               {
2000                 cgraph_edge::resolve_speculation (edge);
2001                 update = true;
2002               }
2003           }
2004       if (update)
2005         {
2006           struct cgraph_node *where = node->inlined_to
2007                                       ? node->inlined_to : node;
2008           ipa_update_overall_fn_summary (where);
2009           reset_edge_caches (where);
2010           update_caller_keys (&edge_heap, where,
2011                               updated_nodes, NULL);
2012           update_callee_keys (&edge_heap, where, NULL,
2013                               updated_nodes);
2014           bitmap_clear (updated_nodes);
2015         }
2016     }
2017
2018   gcc_assert (in_lto_p
2019               || !(max_count > 0)
2020               || (profile_info && flag_branch_probabilities));
2021
2022   while (!edge_heap.empty ())
2023     {
2024       int old_size = overall_size;
2025       struct cgraph_node *where, *callee;
2026       sreal badness = edge_heap.min_key ();
2027       sreal current_badness;
2028       int growth;
2029
2030       edge = edge_heap.extract_min ();
2031       gcc_assert (edge->aux);
2032       edge->aux = NULL;
2033       if (!edge->inline_failed || !edge->callee->analyzed)
2034         continue;
2035
2036       /* Be sure that caches are maintained consistent.
2037          This check is affected by scaling roundoff errors when compiling for
2038          IPA this we skip it in that case.  */
2039       if (flag_checking && !edge->callee->count.ipa_p ()
2040           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2041         {
2042           sreal cached_badness = edge_badness (edge, false);
2043
2044           int old_size_est = estimate_edge_size (edge);
2045           sreal old_time_est = estimate_edge_time (edge);
2046           int old_hints_est = estimate_edge_hints (edge);
2047
2048           if (edge_growth_cache != NULL)
2049             edge_growth_cache->remove (edge);
2050           reset_node_cache (edge->caller->inlined_to
2051                             ? edge->caller->inlined_to
2052                             : edge->caller);
2053           gcc_assert (old_size_est == estimate_edge_size (edge));
2054           gcc_assert (old_time_est == estimate_edge_time (edge));
2055           /* FIXME:
2056
2057              gcc_assert (old_hints_est == estimate_edge_hints (edge));
2058
2059              fails with profile feedback because some hints depends on
2060              maybe_hot_edge_p predicate and because callee gets inlined to other
2061              calls, the edge may become cold.
2062              This ought to be fixed by computing relative probabilities
2063              for given invocation but that will be better done once whole
2064              code is converted to sreals.  Disable for now and revert to "wrong"
2065              value so enable/disable checking paths agree.  */
2066           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2067
2068           /* When updating the edge costs, we only decrease badness in the keys.
2069              Increases of badness are handled lazily; when we see key with out
2070              of date value on it, we re-insert it now.  */
2071           current_badness = edge_badness (edge, false);
2072           gcc_assert (cached_badness == current_badness);
2073           gcc_assert (current_badness >= badness);
2074         }
2075       else
2076         current_badness = edge_badness (edge, false);
2077       if (current_badness != badness)
2078         {
2079           if (edge_heap.min () && current_badness > edge_heap.min_key ())
2080             {
2081               edge->aux = edge_heap.insert (current_badness, edge);
2082               continue;
2083             }
2084           else
2085             badness = current_badness;
2086         }
2087
2088       if (!can_inline_edge_p (edge, true)
2089           || !can_inline_edge_by_limits_p (edge, true))
2090         {
2091           resolve_noninline_speculation (&edge_heap, edge);
2092           continue;
2093         }
2094
2095       callee = edge->callee->ultimate_alias_target ();
2096       growth = estimate_edge_growth (edge);
2097       if (dump_file)
2098         {
2099           fprintf (dump_file,
2100                    "\nConsidering %s with %i size\n",
2101                    callee->dump_name (),
2102                    ipa_size_summaries->get (callee)->size);
2103           fprintf (dump_file,
2104                    " to be inlined into %s in %s:%i\n"
2105                    " Estimated badness is %f, frequency %.2f.\n",
2106                    edge->caller->dump_name (),
2107                    edge->call_stmt
2108                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
2109                                                         edge->call_stmt))
2110                        > BUILTINS_LOCATION)
2111                    ? gimple_filename ((const gimple *) edge->call_stmt)
2112                    : "unknown",
2113                    edge->call_stmt
2114                    ? gimple_lineno ((const gimple *) edge->call_stmt)
2115                    : -1,
2116                    badness.to_double (),
2117                    edge->sreal_frequency ().to_double ());
2118           if (edge->count.ipa ().initialized_p ())
2119             {
2120               fprintf (dump_file, " Called ");
2121               edge->count.ipa ().dump (dump_file);
2122               fprintf (dump_file, " times\n");
2123             }
2124           if (dump_flags & TDF_DETAILS)
2125             edge_badness (edge, true);
2126         }
2127
2128       where = edge->caller;
2129
2130       if (overall_size + growth > compute_max_insns (where, min_size)
2131           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2132         {
2133           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2134           report_inline_failed_reason (edge);
2135           resolve_noninline_speculation (&edge_heap, edge);
2136           continue;
2137         }
2138
2139       if (!want_inline_small_function_p (edge, true))
2140         {
2141           resolve_noninline_speculation (&edge_heap, edge);
2142           continue;
2143         }
2144
2145       profile_count old_count = callee->count;
2146
2147       /* Heuristics for inlining small functions work poorly for
2148          recursive calls where we do effects similar to loop unrolling.
2149          When inlining such edge seems profitable, leave decision on
2150          specific inliner.  */
2151       if (edge->recursive_p ())
2152         {
2153           if (where->inlined_to)
2154             where = where->inlined_to;
2155           if (!recursive_inlining (edge,
2156                                    opt_for_fn (edge->caller->decl,
2157                                                flag_indirect_inlining)
2158                                    ? &new_indirect_edges : NULL))
2159             {
2160               edge->inline_failed = CIF_RECURSIVE_INLINING;
2161               resolve_noninline_speculation (&edge_heap, edge);
2162               continue;
2163             }
2164           reset_edge_caches (where);
2165           /* Recursive inliner inlines all recursive calls of the function
2166              at once. Consequently we need to update all callee keys.  */
2167           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2168             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2169           update_callee_keys (&edge_heap, where, where, updated_nodes);
2170           bitmap_clear (updated_nodes);
2171         }
2172       else
2173         {
2174           struct cgraph_node *outer_node = NULL;
2175           int depth = 0;
2176
2177           /* Consider the case where self recursive function A is inlined
2178              into B.  This is desired optimization in some cases, since it
2179              leads to effect similar of loop peeling and we might completely
2180              optimize out the recursive call.  However we must be extra
2181              selective.  */
2182
2183           where = edge->caller;
2184           while (where->inlined_to)
2185             {
2186               if (where->decl == callee->decl)
2187                 outer_node = where, depth++;
2188               where = where->callers->caller;
2189             }
2190           if (outer_node
2191               && !want_inline_self_recursive_call_p (edge, outer_node,
2192                                                      true, depth))
2193             {
2194               edge->inline_failed
2195                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2196                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2197               resolve_noninline_speculation (&edge_heap, edge);
2198               continue;
2199             }
2200           else if (depth && dump_file)
2201             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2202
2203           gcc_checking_assert (!callee->inlined_to);
2204
2205           int old_size = ipa_size_summaries->get (where)->size;
2206           sreal old_time = ipa_fn_summaries->get (where)->time;
2207
2208           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2209           reset_edge_caches (edge->callee);
2210           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2211
2212           /* If caller's size and time increased we do not need to update
2213              all edges because badness is not going to decrease.  */
2214           if (old_size <= ipa_size_summaries->get (where)->size
2215               && old_time <= ipa_fn_summaries->get (where)->time
2216               /* Wrapper penalty may be non-monotonous in this respect.
2217                  Fortunately it only affects small functions.  */
2218               && !wrapper_heuristics_may_apply (where, old_size))
2219             update_callee_keys (&edge_heap, edge->callee, edge->callee,
2220                                 updated_nodes);
2221           else
2222             update_callee_keys (&edge_heap, where,
2223                                 edge->callee,
2224                                 updated_nodes);
2225         }
2226       where = edge->caller;
2227       if (where->inlined_to)
2228         where = where->inlined_to;
2229
2230       /* Our profitability metric can depend on local properties
2231          such as number of inlinable calls and size of the function body.
2232          After inlining these properties might change for the function we
2233          inlined into (since it's body size changed) and for the functions
2234          called by function we inlined (since number of it inlinable callers
2235          might change).  */
2236       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2237       /* Offline copy count has possibly changed, recompute if profile is
2238          available.  */
2239       struct cgraph_node *n
2240               = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2241       if (n != edge->callee && n->analyzed && !(n->count == old_count)
2242           && n->count.ipa_p ())
2243         update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2244       bitmap_clear (updated_nodes);
2245
2246       if (dump_enabled_p ())
2247         {
2248           ipa_fn_summary *s = ipa_fn_summaries->get (where);
2249
2250           /* dump_printf can't handle %+i.  */
2251           char buf_net_change[100];
2252           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2253                     overall_size - old_size);
2254
2255           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2256                            " Inlined %C into %C which now has time %f and "
2257                            "size %i, net change of %s%s.\n",
2258                            edge->callee, edge->caller,
2259                            s->time.to_double (),
2260                            ipa_size_summaries->get (edge->caller)->size,
2261                            buf_net_change,
2262                            cross_module_call_p (edge) ? " (cross module)":"");
2263         }
2264       if (min_size > overall_size)
2265         {
2266           min_size = overall_size;
2267
2268           if (dump_file)
2269             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2270         }
2271     }
2272
2273   free_growth_caches ();
2274   if (dump_enabled_p ())
2275     dump_printf (MSG_NOTE,
2276                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2277                  initial_size, overall_size,
2278                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2279   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2280 }
2281
2282 /* Flatten NODE.  Performed both during early inlining and
2283    at IPA inlining time.  */
2284
2285 static void
2286 flatten_function (struct cgraph_node *node, bool early, bool update)
2287 {
2288   struct cgraph_edge *e;
2289
2290   /* We shouldn't be called recursively when we are being processed.  */
2291   gcc_assert (node->aux == NULL);
2292
2293   node->aux = (void *) node;
2294
2295   for (e = node->callees; e; e = e->next_callee)
2296     {
2297       struct cgraph_node *orig_callee;
2298       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2299
2300       /* We've hit cycle?  It is time to give up.  */
2301       if (callee->aux)
2302         {
2303           if (dump_enabled_p ())
2304             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2305                              "Not inlining %C into %C to avoid cycle.\n",
2306                              callee, e->caller);
2307           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2308             e->inline_failed = CIF_RECURSIVE_INLINING;
2309           continue;
2310         }
2311
2312       /* When the edge is already inlined, we just need to recurse into
2313          it in order to fully flatten the leaves.  */
2314       if (!e->inline_failed)
2315         {
2316           flatten_function (callee, early, false);
2317           continue;
2318         }
2319
2320       /* Flatten attribute needs to be processed during late inlining. For
2321          extra code quality we however do flattening during early optimization,
2322          too.  */
2323       if (!early
2324           ? !can_inline_edge_p (e, true)
2325             && !can_inline_edge_by_limits_p (e, true)
2326           : !can_early_inline_edge_p (e))
2327         continue;
2328
2329       if (e->recursive_p ())
2330         {
2331           if (dump_enabled_p ())
2332             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2333                              "Not inlining: recursive call.\n");
2334           continue;
2335         }
2336
2337       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2338           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2339         {
2340           if (dump_enabled_p ())
2341             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2342                              "Not inlining: SSA form does not match.\n");
2343           continue;
2344         }
2345
2346       /* Inline the edge and flatten the inline clone.  Avoid
2347          recursing through the original node if the node was cloned.  */
2348       if (dump_enabled_p ())
2349         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2350                          " Inlining %C into %C.\n",
2351                          callee, e->caller);
2352       orig_callee = callee;
2353       inline_call (e, true, NULL, NULL, false);
2354       if (e->callee != orig_callee)
2355         orig_callee->aux = (void *) node;
2356       flatten_function (e->callee, early, false);
2357       if (e->callee != orig_callee)
2358         orig_callee->aux = NULL;
2359     }
2360
2361   node->aux = NULL;
2362   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2363   if (update && opt_for_fn (where->decl, optimize))
2364     ipa_update_overall_fn_summary (where);
2365 }
2366
2367 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2368    DATA points to number of calls originally found so we avoid infinite
2369    recursion.  */
2370
2371 static bool
2372 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2373                          hash_set<cgraph_node *> *callers)
2374 {
2375   int *num_calls = (int *)data;
2376   bool callee_removed = false;
2377
2378   while (node->callers && !node->inlined_to)
2379     {
2380       struct cgraph_node *caller = node->callers->caller;
2381
2382       if (!can_inline_edge_p (node->callers, true)
2383           || !can_inline_edge_by_limits_p (node->callers, true)
2384           || node->callers->recursive_p ())
2385         {
2386           if (dump_file)
2387             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2388           *num_calls = 0;
2389           return false;
2390         }
2391
2392       if (dump_file)
2393         {
2394           cgraph_node *ultimate = node->ultimate_alias_target ();
2395           fprintf (dump_file,
2396                    "\nInlining %s size %i.\n",
2397                    ultimate->dump_name (),
2398                    ipa_size_summaries->get (ultimate)->size);
2399           fprintf (dump_file,
2400                    " Called once from %s %i insns.\n",
2401                    node->callers->caller->dump_name (),
2402                    ipa_size_summaries->get (node->callers->caller)->size);
2403         }
2404
2405       /* Remember which callers we inlined to, delaying updating the
2406          overall summary.  */
2407       callers->add (node->callers->caller);
2408       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2409       if (dump_file)
2410         fprintf (dump_file,
2411                  " Inlined into %s which now has %i size\n",
2412                  caller->dump_name (),
2413                  ipa_size_summaries->get (caller)->size);
2414       if (!(*num_calls)--)
2415         {
2416           if (dump_file)
2417             fprintf (dump_file, "New calls found; giving up.\n");
2418           return callee_removed;
2419         }
2420       if (callee_removed)
2421         return true;
2422     }
2423   return false;
2424 }
2425
2426 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2427    update.  */
2428
2429 static bool
2430 inline_to_all_callers (struct cgraph_node *node, void *data)
2431 {
2432   hash_set<cgraph_node *> callers;
2433   bool res = inline_to_all_callers_1 (node, data, &callers);
2434   /* Perform the delayed update of the overall summary of all callers
2435      processed.  This avoids quadratic behavior in the cases where
2436      we have a lot of calls to the same function.  */
2437   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2438        i != callers.end (); ++i)
2439     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2440   return res;
2441 }
2442
2443 /* Output overall time estimate.  */
2444 static void
2445 dump_overall_stats (void)
2446 {
2447   sreal sum_weighted = 0, sum = 0;
2448   struct cgraph_node *node;
2449
2450   FOR_EACH_DEFINED_FUNCTION (node)
2451     if (!node->inlined_to
2452         && !node->alias)
2453       {
2454         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2455         if (s != NULL)
2456           {
2457           sum += s->time;
2458           if (node->count.ipa ().initialized_p ())
2459             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2460           }
2461       }
2462   fprintf (dump_file, "Overall time estimate: "
2463            "%f weighted by profile: "
2464            "%f\n", sum.to_double (), sum_weighted.to_double ());
2465 }
2466
2467 /* Output some useful stats about inlining.  */
2468
2469 static void
2470 dump_inline_stats (void)
2471 {
2472   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2473   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2474   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2475   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2476   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2477   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2478   int64_t reason[CIF_N_REASONS][2];
2479   sreal reason_freq[CIF_N_REASONS];
2480   int i;
2481   struct cgraph_node *node;
2482
2483   memset (reason, 0, sizeof (reason));
2484   for (i=0; i < CIF_N_REASONS; i++)
2485     reason_freq[i] = 0;
2486   FOR_EACH_DEFINED_FUNCTION (node)
2487   {
2488     struct cgraph_edge *e;
2489     for (e = node->callees; e; e = e->next_callee)
2490       {
2491         if (e->inline_failed)
2492           {
2493             if (e->count.ipa ().initialized_p ())
2494               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2495             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2496             reason[(int) e->inline_failed][1] ++;
2497             if (DECL_VIRTUAL_P (e->callee->decl)
2498                 && e->count.ipa ().initialized_p ())
2499               {
2500                 if (e->indirect_inlining_edge)
2501                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2502                 else
2503                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2504               }
2505             else if (e->count.ipa ().initialized_p ())
2506               {
2507                 if (e->indirect_inlining_edge)
2508                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2509                 else
2510                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2511               }
2512           }
2513         else if (e->count.ipa ().initialized_p ())
2514           {
2515             if (e->speculative)
2516               {
2517                 if (DECL_VIRTUAL_P (e->callee->decl))
2518                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2519                 else
2520                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2521               }
2522             else if (DECL_VIRTUAL_P (e->callee->decl))
2523               {
2524                 if (e->indirect_inlining_edge)
2525                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2526                 else
2527                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2528               }
2529             else
2530               {
2531                 if (e->indirect_inlining_edge)
2532                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2533                 else
2534                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2535               }
2536           }
2537       }
2538     for (e = node->indirect_calls; e; e = e->next_callee)
2539       if (e->indirect_info->polymorphic
2540           & e->count.ipa ().initialized_p ())
2541         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2542       else if (e->count.ipa ().initialized_p ())
2543         indirect_cnt += e->count.ipa ().to_gcov_type ();
2544   }
2545   if (max_count.initialized_p ())
2546     {
2547       fprintf (dump_file,
2548                "Inlined %" PRId64 " + speculative "
2549                "%" PRId64 " + speculative polymorphic "
2550                "%" PRId64 " + previously indirect "
2551                "%" PRId64 " + virtual "
2552                "%" PRId64 " + virtual and previously indirect "
2553                "%" PRId64 "\n" "Not inlined "
2554                "%" PRId64 " + previously indirect "
2555                "%" PRId64 " + virtual "
2556                "%" PRId64 " + virtual and previously indirect "
2557                "%" PRId64 " + still indirect "
2558                "%" PRId64 " + still indirect polymorphic "
2559                "%" PRId64 "\n", inlined_cnt,
2560                inlined_speculative, inlined_speculative_ply,
2561                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2562                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2563                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2564       fprintf (dump_file, "Removed speculations ");
2565       spec_rem.dump (dump_file);
2566       fprintf (dump_file, "\n");
2567     }
2568   dump_overall_stats ();
2569   fprintf (dump_file, "\nWhy inlining failed?\n");
2570   for (i = 0; i < CIF_N_REASONS; i++)
2571     if (reason[i][1])
2572       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2573                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2574                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2575 }
2576
2577 /* Called when node is removed.  */
2578
2579 static void
2580 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2581 {
2582   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2583     return;
2584
2585   hash_set<struct cgraph_node *> *removed
2586     = (hash_set<struct cgraph_node *> *) data;
2587   removed->add (node);
2588 }
2589
2590 /* Decide on the inlining.  We do so in the topological order to avoid
2591    expenses on updating data structures.  */
2592
2593 static unsigned int
2594 ipa_inline (void)
2595 {
2596   struct cgraph_node *node;
2597   int nnodes;
2598   struct cgraph_node **order;
2599   int i, j;
2600   int cold;
2601   bool remove_functions = false;
2602
2603   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2604
2605   if (dump_file)
2606     ipa_dump_fn_summaries (dump_file);
2607
2608   nnodes = ipa_reverse_postorder (order);
2609   spec_rem = profile_count::zero ();
2610
2611   FOR_EACH_FUNCTION (node)
2612     {
2613       node->aux = 0;
2614
2615       /* Recompute the default reasons for inlining because they may have
2616          changed during merging.  */
2617       if (in_lto_p)
2618         {
2619           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2620             {
2621               gcc_assert (e->inline_failed);
2622               initialize_inline_failed (e);
2623             }
2624           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2625             initialize_inline_failed (e);
2626         }
2627     }
2628
2629   if (dump_file)
2630     fprintf (dump_file, "\nFlattening functions:\n");
2631
2632   /* First shrink order array, so that it only contains nodes with
2633      flatten attribute.  */
2634   for (i = nnodes - 1, j = i; i >= 0; i--)
2635     {
2636       node = order[i];
2637       if (node->definition
2638           /* Do not try to flatten aliases.  These may happen for example when
2639              creating local aliases.  */
2640           && !node->alias
2641           && lookup_attribute ("flatten",
2642                                DECL_ATTRIBUTES (node->decl)) != NULL)
2643         order[j--] = order[i];
2644     }
2645
2646   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2647      nodes with flatten attribute.  If there is more than one such
2648      node, we need to register a node removal hook, as flatten_function
2649      could remove other nodes with flatten attribute.  See PR82801.  */
2650   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2651   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2652   if (j < nnodes - 2)
2653     {
2654       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2655       node_removal_hook_holder
2656         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2657                                            flatten_removed_nodes);
2658     }
2659
2660   /* In the first pass handle functions to be flattened.  Do this with
2661      a priority so none of our later choices will make this impossible.  */
2662   for (i = nnodes - 1; i > j; i--)
2663     {
2664       node = order[i];
2665       if (flatten_removed_nodes
2666           && flatten_removed_nodes->contains (node))
2667         continue;
2668
2669       /* Handle nodes to be flattened.
2670          Ideally when processing callees we stop inlining at the
2671          entry of cycles, possibly cloning that entry point and
2672          try to flatten itself turning it into a self-recursive
2673          function.  */
2674       if (dump_file)
2675         fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2676       flatten_function (node, false, true);
2677     }
2678
2679   if (j < nnodes - 2)
2680     {
2681       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2682       delete flatten_removed_nodes;
2683     }
2684   free (order);
2685
2686   if (dump_file)
2687     dump_overall_stats ();
2688
2689   inline_small_functions ();
2690
2691   gcc_assert (symtab->state == IPA_SSA);
2692   symtab->state = IPA_SSA_AFTER_INLINING;
2693   /* Do first after-inlining removal.  We want to remove all "stale" extern
2694      inline functions and virtual functions so we really know what is called
2695      once.  */
2696   symtab->remove_unreachable_nodes (dump_file);
2697
2698   /* Inline functions with a property that after inlining into all callers the
2699      code size will shrink because the out-of-line copy is eliminated.
2700      We do this regardless on the callee size as long as function growth limits
2701      are met.  */
2702   if (dump_file)
2703     fprintf (dump_file,
2704              "\nDeciding on functions to be inlined into all callers and "
2705              "removing useless speculations:\n");
2706
2707   /* Inlining one function called once has good chance of preventing
2708      inlining other function into the same callee.  Ideally we should
2709      work in priority order, but probably inlining hot functions first
2710      is good cut without the extra pain of maintaining the queue.
2711
2712      ??? this is not really fitting the bill perfectly: inlining function
2713      into callee often leads to better optimization of callee due to
2714      increased context for optimization.
2715      For example if main() function calls a function that outputs help
2716      and then function that does the main optimization, we should inline
2717      the second with priority even if both calls are cold by themselves.
2718
2719      We probably want to implement new predicate replacing our use of
2720      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2721      to be hot.  */
2722   for (cold = 0; cold <= 1; cold ++)
2723     {
2724       FOR_EACH_DEFINED_FUNCTION (node)
2725         {
2726           struct cgraph_edge *edge, *next;
2727           bool update=false;
2728
2729           if (!opt_for_fn (node->decl, optimize)
2730               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2731             continue;
2732
2733           for (edge = node->callees; edge; edge = next)
2734             {
2735               next = edge->next_callee;
2736               if (edge->speculative && !speculation_useful_p (edge, false))
2737                 {
2738                   if (edge->count.ipa ().initialized_p ())
2739                     spec_rem += edge->count.ipa ();
2740                   cgraph_edge::resolve_speculation (edge);
2741                   update = true;
2742                   remove_functions = true;
2743                 }
2744             }
2745           if (update)
2746             {
2747               struct cgraph_node *where = node->inlined_to
2748                                           ? node->inlined_to : node;
2749               reset_edge_caches (where);
2750               ipa_update_overall_fn_summary (where);
2751             }
2752           if (want_inline_function_to_all_callers_p (node, cold))
2753             {
2754               int num_calls = 0;
2755               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2756                                                  true);
2757               while (node->call_for_symbol_and_aliases
2758                        (inline_to_all_callers, &num_calls, true))
2759                 ;
2760               remove_functions = true;
2761             }
2762         }
2763     }
2764
2765   /* Free ipa-prop structures if they are no longer needed.  */
2766   ipa_free_all_structures_after_iinln ();
2767
2768   if (dump_enabled_p ())
2769     dump_printf (MSG_NOTE,
2770                  "\nInlined %i calls, eliminated %i functions\n\n",
2771                  ncalls_inlined, nfunctions_inlined);
2772   if (dump_file)
2773     dump_inline_stats ();
2774
2775   if (dump_file)
2776     ipa_dump_fn_summaries (dump_file);
2777   return remove_functions ? TODO_remove_functions : 0;
2778 }
2779
2780 /* Inline always-inline function calls in NODE.  */
2781
2782 static bool
2783 inline_always_inline_functions (struct cgraph_node *node)
2784 {
2785   struct cgraph_edge *e;
2786   bool inlined = false;
2787
2788   for (e = node->callees; e; e = e->next_callee)
2789     {
2790       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2791       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2792         continue;
2793
2794       if (e->recursive_p ())
2795         {
2796           if (dump_enabled_p ())
2797             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2798                              "  Not inlining recursive call to %C.\n",
2799                              e->callee);
2800           e->inline_failed = CIF_RECURSIVE_INLINING;
2801           continue;
2802         }
2803
2804       if (!can_early_inline_edge_p (e))
2805         {
2806           /* Set inlined to true if the callee is marked "always_inline" but
2807              is not inlinable.  This will allow flagging an error later in
2808              expand_call_inline in tree-inline.c.  */
2809           if (lookup_attribute ("always_inline",
2810                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2811             inlined = true;
2812           continue;
2813         }
2814
2815       if (dump_enabled_p ())
2816         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2817                          "  Inlining %C into %C (always_inline).\n",
2818                          e->callee, e->caller);
2819       inline_call (e, true, NULL, NULL, false);
2820       inlined = true;
2821     }
2822   if (inlined)
2823     ipa_update_overall_fn_summary (node);
2824
2825   return inlined;
2826 }
2827
2828 /* Decide on the inlining.  We do so in the topological order to avoid
2829    expenses on updating data structures.  */
2830
2831 static bool
2832 early_inline_small_functions (struct cgraph_node *node)
2833 {
2834   struct cgraph_edge *e;
2835   bool inlined = false;
2836
2837   for (e = node->callees; e; e = e->next_callee)
2838     {
2839       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2840
2841       /* We can encounter not-yet-analyzed function during
2842          early inlining on callgraphs with strongly
2843          connected components.  */
2844       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2845       if (s == NULL || !s->inlinable || !e->inline_failed)
2846         continue;
2847
2848       /* Do not consider functions not declared inline.  */
2849       if (!DECL_DECLARED_INLINE_P (callee->decl)
2850           && !opt_for_fn (node->decl, flag_inline_small_functions)
2851           && !opt_for_fn (node->decl, flag_inline_functions))
2852         continue;
2853
2854       if (dump_enabled_p ())
2855         dump_printf_loc (MSG_NOTE, e->call_stmt,
2856                          "Considering inline candidate %C.\n",
2857                          callee);
2858
2859       if (!can_early_inline_edge_p (e))
2860         continue;
2861
2862       if (e->recursive_p ())
2863         {
2864           if (dump_enabled_p ())
2865             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2866                              "  Not inlining: recursive call.\n");
2867           continue;
2868         }
2869
2870       if (!want_early_inline_function_p (e))
2871         continue;
2872
2873       if (dump_enabled_p ())
2874         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2875                          " Inlining %C into %C.\n",
2876                          callee, e->caller);
2877       inline_call (e, true, NULL, NULL, false);
2878       inlined = true;
2879     }
2880
2881   if (inlined)
2882     ipa_update_overall_fn_summary (node);
2883
2884   return inlined;
2885 }
2886
2887 unsigned int
2888 early_inliner (function *fun)
2889 {
2890   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2891   struct cgraph_edge *edge;
2892   unsigned int todo = 0;
2893   int iterations = 0;
2894   bool inlined = false;
2895
2896   if (seen_error ())
2897     return 0;
2898
2899   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2900      happens when some pass decides to construct new function and
2901      cgraph_add_new_function calls lowering passes and early optimization on
2902      it.  This may confuse ourself when early inliner decide to inline call to
2903      function clone, because function clones don't have parameter list in
2904      ipa-prop matching their signature.  */
2905   if (ipa_node_params_sum)
2906     return 0;
2907
2908   if (flag_checking)
2909     node->verify ();
2910   node->remove_all_references ();
2911
2912   /* Even when not optimizing or not inlining inline always-inline
2913      functions.  */
2914   inlined = inline_always_inline_functions (node);
2915
2916   if (!optimize
2917       || flag_no_inline
2918       || !flag_early_inlining
2919       /* Never inline regular functions into always-inline functions
2920          during incremental inlining.  This sucks as functions calling
2921          always inline functions will get less optimized, but at the
2922          same time inlining of functions calling always inline
2923          function into an always inline function might introduce
2924          cycles of edges to be always inlined in the callgraph.
2925
2926          We might want to be smarter and just avoid this type of inlining.  */
2927       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2928           && lookup_attribute ("always_inline",
2929                                DECL_ATTRIBUTES (node->decl))))
2930     ;
2931   else if (lookup_attribute ("flatten",
2932                              DECL_ATTRIBUTES (node->decl)) != NULL)
2933     {
2934       /* When the function is marked to be flattened, recursively inline
2935          all calls in it.  */
2936       if (dump_enabled_p ())
2937         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2938                      "Flattening %C\n", node);
2939       flatten_function (node, true, true);
2940       inlined = true;
2941     }
2942   else
2943     {
2944       /* If some always_inline functions was inlined, apply the changes.
2945          This way we will not account always inline into growth limits and
2946          moreover we will inline calls from always inlines that we skipped
2947          previously because of conditional above.  */
2948       if (inlined)
2949         {
2950           timevar_push (TV_INTEGRATION);
2951           todo |= optimize_inline_calls (current_function_decl);
2952           /* optimize_inline_calls call above might have introduced new
2953              statements that don't have inline parameters computed.  */
2954           for (edge = node->callees; edge; edge = edge->next_callee)
2955             {
2956               /* We can enounter not-yet-analyzed function during
2957                  early inlining on callgraphs with strongly
2958                  connected components.  */
2959               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2960               es->call_stmt_size
2961                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2962               es->call_stmt_time
2963                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2964             }
2965           ipa_update_overall_fn_summary (node);
2966           inlined = false;
2967           timevar_pop (TV_INTEGRATION);
2968         }
2969       /* We iterate incremental inlining to get trivial cases of indirect
2970          inlining.  */
2971       while (iterations < opt_for_fn (node->decl,
2972                                       param_early_inliner_max_iterations)
2973              && early_inline_small_functions (node))
2974         {
2975           timevar_push (TV_INTEGRATION);
2976           todo |= optimize_inline_calls (current_function_decl);
2977
2978           /* Technically we ought to recompute inline parameters so the new
2979              iteration of early inliner works as expected.  We however have
2980              values approximately right and thus we only need to update edge
2981              info that might be cleared out for newly discovered edges.  */
2982           for (edge = node->callees; edge; edge = edge->next_callee)
2983             {
2984               /* We have no summary for new bound store calls yet.  */
2985               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2986               es->call_stmt_size
2987                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2988               es->call_stmt_time
2989                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2990             }
2991           if (iterations < opt_for_fn (node->decl,
2992                                        param_early_inliner_max_iterations) - 1)
2993             ipa_update_overall_fn_summary (node);
2994           timevar_pop (TV_INTEGRATION);
2995           iterations++;
2996           inlined = false;
2997         }
2998       if (dump_file)
2999         fprintf (dump_file, "Iterations: %i\n", iterations);
3000     }
3001
3002   if (inlined)
3003     {
3004       timevar_push (TV_INTEGRATION);
3005       todo |= optimize_inline_calls (current_function_decl);
3006       timevar_pop (TV_INTEGRATION);
3007     }
3008
3009   fun->always_inline_functions_inlined = true;
3010
3011   return todo;
3012 }
3013
3014 /* Do inlining of small functions.  Doing so early helps profiling and other
3015    passes to be somewhat more effective and avoids some code duplication in
3016    later real inlining pass for testcases with very many function calls.  */
3017
3018 namespace {
3019
3020 const pass_data pass_data_early_inline =
3021 {
3022   GIMPLE_PASS, /* type */
3023   "einline", /* name */
3024   OPTGROUP_INLINE, /* optinfo_flags */
3025   TV_EARLY_INLINING, /* tv_id */
3026   PROP_ssa, /* properties_required */
3027   0, /* properties_provided */
3028   0, /* properties_destroyed */
3029   0, /* todo_flags_start */
3030   0, /* todo_flags_finish */
3031 };
3032
3033 class pass_early_inline : public gimple_opt_pass
3034 {
3035 public:
3036   pass_early_inline (gcc::context *ctxt)
3037     : gimple_opt_pass (pass_data_early_inline, ctxt)
3038   {}
3039
3040   /* opt_pass methods: */
3041   virtual unsigned int execute (function *);
3042
3043 }; // class pass_early_inline
3044
3045 unsigned int
3046 pass_early_inline::execute (function *fun)
3047 {
3048   return early_inliner (fun);
3049 }
3050
3051 } // anon namespace
3052
3053 gimple_opt_pass *
3054 make_pass_early_inline (gcc::context *ctxt)
3055 {
3056   return new pass_early_inline (ctxt);
3057 }
3058
3059 namespace {
3060
3061 const pass_data pass_data_ipa_inline =
3062 {
3063   IPA_PASS, /* type */
3064   "inline", /* name */
3065   OPTGROUP_INLINE, /* optinfo_flags */
3066   TV_IPA_INLINING, /* tv_id */
3067   0, /* properties_required */
3068   0, /* properties_provided */
3069   0, /* properties_destroyed */
3070   0, /* todo_flags_start */
3071   ( TODO_dump_symtab ), /* todo_flags_finish */
3072 };
3073
3074 class pass_ipa_inline : public ipa_opt_pass_d
3075 {
3076 public:
3077   pass_ipa_inline (gcc::context *ctxt)
3078     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3079                       NULL, /* generate_summary */
3080                       NULL, /* write_summary */
3081                       NULL, /* read_summary */
3082                       NULL, /* write_optimization_summary */
3083                       NULL, /* read_optimization_summary */
3084                       NULL, /* stmt_fixup */
3085                       0, /* function_transform_todo_flags_start */
3086                       inline_transform, /* function_transform */
3087                       NULL) /* variable_transform */
3088   {}
3089
3090   /* opt_pass methods: */
3091   virtual unsigned int execute (function *) { return ipa_inline (); }
3092
3093 }; // class pass_ipa_inline
3094
3095 } // anon namespace
3096
3097 ipa_opt_pass_d *
3098 make_pass_ipa_inline (gcc::context *ctxt)
3099 {
3100   return new pass_ipa_inline (ctxt);
3101 }