gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass cannot really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "profile.h"
 109 #include "symbol-summary.h"
 110 #include "tree-vrp.h"
 111 #include "ipa-prop.h"
 112 #include "ipa-fnsummary.h"
 113 #include "ipa-inline.h"
 114 #include "ipa-utils.h"
 115 #include "sreal.h"
 116 #include "auto-profile.h"
 117 #include "builtins.h"
 118 #include "fibonacci_heap.h"
 119 #include "stringpool.h"
 120 #include "attribs.h"
 121 #include "asan.h"
 122
 123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 125
 126 /* Statistics we collect about inlining algorithm.  */
 127 static int overall_size;
 128 static profile_count max_count;
 129 static profile_count spec_rem;
 130
 131 /* Return false when inlining edge E would lead to violating
 132    limits on function unit growth or stack usage growth.
 133
 134    The relative function body growth limit is present generally
 135    to avoid problems with non-linear behavior of the compiler.
 136    To allow inlining huge functions into tiny wrapper, the limit
 137    is always based on the bigger of the two functions considered.
 138
 139    For stack growth limits we always base the growth in stack usage
 140    of the callers.  We want to prevent applications from segfaulting
 141    on stack overflow when functions with huge stack frames gets
 142    inlined. */
 143
 144 static bool
 145 caller_growth_limits (struct cgraph_edge *e)
 146 {
 147   struct cgraph_node *to = e->caller;
 148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 149   int newsize;
 150   int limit = 0;
 151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
 153
 154   /* Look for function e->caller is inlined to.  While doing
 155      so work out the largest function body on the way.  As
 156      described above, we want to base our function growth
 157      limits based on that.  Not on the self size of the
 158      outer function, not on the self size of inline code
 159      we immediately inline to.  This is the most relaxed
 160      interpretation of the rule "do not grow large functions
 161      too much in order to prevent compiler from exploding".  */
 162   while (true)
 163     {
 164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
 165       if (limit < size_info->self_size)
 166         limit = size_info->self_size;
 167       if (stack_size_limit < size_info->estimated_self_stack_size)
 168         stack_size_limit = size_info->estimated_self_stack_size;
 169       if (to->inlined_to)
 170         to = to->callers->caller;
 171       else
 172         break;
 173     }
 174
 175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
 176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
 177
 178   if (limit < what_size_info->self_size)
 179     limit = what_size_info->self_size;
 180
 181   limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= ipa_size_summaries->get (what)->size
 187       && newsize > opt_for_fn (to->decl, param_large_function_insns)
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * opt_for_fn (to->decl, param_stack_frame_growth)
 204                        / 100);
 205
 206   inlined_stack = (ipa_get_stack_frame_offset (to)
 207                    + outer_info->estimated_self_stack_size
 208                    + what_info->estimated_stack_size);
 209   /* Check new stack consumption with stack consumption at the place
 210      stack is used.  */
 211   if (inlined_stack > stack_size_limit
 212       /* If function already has large stack usage from sibling
 213          inline call, we can inline, too.
 214          This bit overoptimistically assume that we are good at stack
 215          packing.  */
 216       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
 217       && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
 218     {
 219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 220       return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Dump info about why inlining has failed.  */
 226
 227 static void
 228 report_inline_failed_reason (struct cgraph_edge *e)
 229 {
 230   if (dump_enabled_p ())
 231     {
 232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 233                        "  not inlinable: %C -> %C, %s\n",
 234                        e->caller, e->callee,
 235                        cgraph_inline_failed_string (e->inline_failed));
 236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 237            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 238           && e->caller->lto_file_data
 239           && e->callee->ultimate_alias_target ()->lto_file_data)
 240         {
 241           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 242                            "  LTO objects: %s, %s\n",
 243                            e->caller->lto_file_data->file_name,
 244                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 245         }
 246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 247         if (dump_file)
 248           cl_target_option_print_diff
 249             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 250              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 252         if (dump_file)
 253           cl_optimization_print_diff
 254             (dump_file, 2, opts_for_fn (e->caller->decl),
 255              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 256     }
 257 }
 258
 259  /* Decide whether sanitizer-related attributes allow inlining. */
 260
 261 static bool
 262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 263 {
 264   if (!caller || !callee)
 265     return true;
 266
 267   /* Allow inlining always_inline functions into no_sanitize_address
 268      functions.  */
 269   if (!sanitize_flags_p (SANITIZE_ADDRESS, caller)
 270       && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
 271     return true;
 272
 273   return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
 274            == sanitize_flags_p (SANITIZE_ADDRESS, callee))
 275           && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
 276               == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
 277           && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
 278               == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
 279 }
 280
 281 /* Used for flags where it is safe to inline when caller's value is
 282    grater than callee's.  */
 283 #define check_maybe_up(flag) \
 284       (opts_for_fn (caller->decl)->x_##flag             \
 285        != opts_for_fn (callee->decl)->x_##flag          \
 286        && (!always_inline                               \
 287            || opts_for_fn (caller->decl)->x_##flag      \
 288               < opts_for_fn (callee->decl)->x_##flag))
 289 /* Used for flags where it is safe to inline when caller's value is
 290    smaller than callee's.  */
 291 #define check_maybe_down(flag) \
 292       (opts_for_fn (caller->decl)->x_##flag             \
 293        != opts_for_fn (callee->decl)->x_##flag          \
 294        && (!always_inline                               \
 295            || opts_for_fn (caller->decl)->x_##flag      \
 296               > opts_for_fn (callee->decl)->x_##flag))
 297 /* Used for flags where exact match is needed for correctness.  */
 298 #define check_match(flag) \
 299       (opts_for_fn (caller->decl)->x_##flag             \
 300        != opts_for_fn (callee->decl)->x_##flag)
 301
 302 /* Decide if we can inline the edge and possibly update
 303    inline_failed reason.
 304    We check whether inlining is possible at all and whether
 305    caller growth limits allow doing so.
 306
 307    if REPORT is true, output reason to the dump file. */
 308
 309 static bool
 310 can_inline_edge_p (struct cgraph_edge *e, bool report,
 311                    bool early = false)
 312 {
 313   gcc_checking_assert (e->inline_failed);
 314
 315   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 316     {
 317       if (report)
 318         report_inline_failed_reason (e);
 319       return false;
 320     }
 321
 322   bool inlinable = true;
 323   enum availability avail;
 324   cgraph_node *caller = (e->caller->inlined_to
 325                          ? e->caller->inlined_to : e->caller);
 326   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 327
 328   if (!callee->definition)
 329     {
 330       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 331       inlinable = false;
 332     }
 333   if (!early && (!opt_for_fn (callee->decl, optimize)
 334                  || !opt_for_fn (caller->decl, optimize)))
 335     {
 336       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 337       inlinable = false;
 338     }
 339   else if (callee->calls_comdat_local)
 340     {
 341       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 342       inlinable = false;
 343     }
 344   else if (avail <= AVAIL_INTERPOSABLE)
 345     {
 346       e->inline_failed = CIF_OVERWRITABLE;
 347       inlinable = false;
 348     }
 349   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 350      initialized to one of FINAL_ERROR reasons.  */
 351   else if (e->call_stmt_cannot_inline_p)
 352     gcc_unreachable ();
 353   /* Don't inline if the functions have different EH personalities.  */
 354   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 355            && DECL_FUNCTION_PERSONALITY (callee->decl)
 356            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 357                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 358     {
 359       e->inline_failed = CIF_EH_PERSONALITY;
 360       inlinable = false;
 361     }
 362   /* TM pure functions should not be inlined into non-TM_pure
 363      functions.  */
 364   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 365     {
 366       e->inline_failed = CIF_UNSPECIFIED;
 367       inlinable = false;
 368     }
 369   /* Check compatibility of target optimization options.  */
 370   else if (!targetm.target_option.can_inline_p (caller->decl,
 371                                                 callee->decl))
 372     {
 373       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 374       inlinable = false;
 375     }
 376   else if (ipa_fn_summaries->get (callee) == NULL
 377            || !ipa_fn_summaries->get (callee)->inlinable)
 378     {
 379       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 380       inlinable = false;
 381     }
 382   /* Don't inline a function with mismatched sanitization attributes. */
 383   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 384     {
 385       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 386       inlinable = false;
 387     }
 388   if (!inlinable && report)
 389     report_inline_failed_reason (e);
 390   return inlinable;
 391 }
 392
 393 /* Return inlining_insns_single limit for function N. If HINT is true
 394    scale up the bound.  */
 395
 396 static int
 397 inline_insns_single (cgraph_node *n, bool hint)
 398 {
 399   if (hint)
 400     return opt_for_fn (n->decl, param_max_inline_insns_single)
 401            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 402   return opt_for_fn (n->decl, param_max_inline_insns_single);
 403 }
 404
 405 /* Return inlining_insns_auto limit for function N. If HINT is true
 406    scale up the bound.   */
 407
 408 static int
 409 inline_insns_auto (cgraph_node *n, bool hint)
 410 {
 411   int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
 412   if (hint)
 413     return max_inline_insns_auto
 414            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 415   return max_inline_insns_auto;
 416 }
 417
 418 /* Decide if we can inline the edge and possibly update
 419    inline_failed reason.
 420    We check whether inlining is possible at all and whether
 421    caller growth limits allow doing so.
 422
 423    if REPORT is true, output reason to the dump file.
 424
 425    if DISREGARD_LIMITS is true, ignore size limits.  */
 426
 427 static bool
 428 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 429                              bool disregard_limits = false, bool early = false)
 430 {
 431   gcc_checking_assert (e->inline_failed);
 432
 433   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 434     {
 435       if (report)
 436         report_inline_failed_reason (e);
 437       return false;
 438     }
 439
 440   bool inlinable = true;
 441   enum availability avail;
 442   cgraph_node *caller = (e->caller->inlined_to
 443                          ? e->caller->inlined_to : e->caller);
 444   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 445   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 446   tree callee_tree
 447     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 448   /* Check if caller growth allows the inlining.  */
 449   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 450       && !disregard_limits
 451       && !lookup_attribute ("flatten",
 452                  DECL_ATTRIBUTES (caller->decl))
 453       && !caller_growth_limits (e))
 454     inlinable = false;
 455   else if (callee->externally_visible
 456            && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 457            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 458     {
 459       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 460       inlinable = false;
 461     }
 462   /* Don't inline a function with a higher optimization level than the
 463      caller.  FIXME: this is really just tip of iceberg of handling
 464      optimization attribute.  */
 465   else if (caller_tree != callee_tree)
 466     {
 467       bool always_inline =
 468              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 469               && lookup_attribute ("always_inline",
 470                                    DECL_ATTRIBUTES (callee->decl)));
 471       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 472       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 473
 474      /* Until GCC 4.9 we did not check the semantics-altering flags
 475         below and inlined across optimization boundaries.
 476         Enabling checks below breaks several packages by refusing
 477         to inline library always_inline functions. See PR65873.
 478         Disable the check for early inlining for now until better solution
 479         is found.  */
 480      if (always_inline && early)
 481         ;
 482       /* There are some options that change IL semantics which means
 483          we cannot inline in these cases for correctness reason.
 484          Not even for always_inline declared functions.  */
 485      else if (check_match (flag_wrapv)
 486               || check_match (flag_trapv)
 487               || check_match (flag_pcc_struct_return)
 488               /* When caller or callee does FP math, be sure FP codegen flags
 489                  compatible.  */
 490               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 491                   && (check_maybe_up (flag_rounding_math)
 492                       || check_maybe_up (flag_trapping_math)
 493                       || check_maybe_down (flag_unsafe_math_optimizations)
 494                       || check_maybe_down (flag_finite_math_only)
 495                       || check_maybe_up (flag_signaling_nans)
 496                       || check_maybe_down (flag_cx_limited_range)
 497                       || check_maybe_up (flag_signed_zeros)
 498                       || check_maybe_down (flag_associative_math)
 499                       || check_maybe_down (flag_reciprocal_math)
 500                       || check_maybe_down (flag_fp_int_builtin_inexact)
 501                       /* Strictly speaking only when the callee contains function
 502                          calls that may end up setting errno.  */
 503                       || check_maybe_up (flag_errno_math)))
 504               /* We do not want to make code compiled with exceptions to be
 505                  brought into a non-EH function unless we know that the callee
 506                  does not throw.
 507                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 508               || (check_maybe_up (flag_non_call_exceptions)
 509                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 510               || (check_maybe_up (flag_exceptions)
 511                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 512               /* When devirtualization is disabled for callee, it is not safe
 513                  to inline it as we possibly mangled the type info.
 514                  Allow early inlining of always inlines.  */
 515               || (!early && check_maybe_down (flag_devirtualize)))
 516         {
 517           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 518           inlinable = false;
 519         }
 520       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 521       else if (always_inline)
 522         ;
 523       /* When user added an attribute to the callee honor it.  */
 524       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 525                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 526         {
 527           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 528           inlinable = false;
 529         }
 530       /* If explicit optimize attribute are not used, the mismatch is caused
 531          by different command line options used to build different units.
 532          Do not care about COMDAT functions - those are intended to be
 533          optimized with the optimization flags of module they are used in.
 534          Also do not care about mixing up size/speed optimization when
 535          DECL_DISREGARD_INLINE_LIMITS is set.  */
 536       else if ((callee->merged_comdat
 537                 && !lookup_attribute ("optimize",
 538                                       DECL_ATTRIBUTES (caller->decl)))
 539                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 540         ;
 541       /* If mismatch is caused by merging two LTO units with different
 542          optimization flags we want to be bit nicer.  However never inline
 543          if one of functions is not optimized at all.  */
 544       else if (!opt_for_fn (callee->decl, optimize)
 545                || !opt_for_fn (caller->decl, optimize))
 546         {
 547           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 548           inlinable = false;
 549         }
 550       /* If callee is optimized for size and caller is not, allow inlining if
 551          code shrinks or we are in param_max_inline_insns_single limit and
 552          callee is inline (and thus likely an unified comdat).
 553          This will allow caller to run faster.  */
 554       else if (opt_for_fn (callee->decl, optimize_size)
 555                > opt_for_fn (caller->decl, optimize_size))
 556         {
 557           int growth = estimate_edge_growth (e);
 558           if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
 559               && (!DECL_DECLARED_INLINE_P (callee->decl)
 560                   && growth >= MAX (inline_insns_single (caller, false),
 561                                     inline_insns_auto (caller, false))))
 562             {
 563               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 564               inlinable = false;
 565             }
 566         }
 567       /* If callee is more aggressively optimized for performance than caller,
 568          we generally want to inline only cheap (runtime wise) functions.  */
 569       else if (opt_for_fn (callee->decl, optimize_size)
 570                < opt_for_fn (caller->decl, optimize_size)
 571                || (opt_for_fn (callee->decl, optimize)
 572                    > opt_for_fn (caller->decl, optimize)))
 573         {
 574           if (estimate_edge_time (e)
 575               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 576             {
 577               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 578               inlinable = false;
 579             }
 580         }
 581
 582     }
 583
 584   if (!inlinable && report)
 585     report_inline_failed_reason (e);
 586   return inlinable;
 587 }
 588
 589
 590 /* Return true if the edge E is inlinable during early inlining.  */
 591
 592 static bool
 593 can_early_inline_edge_p (struct cgraph_edge *e)
 594 {
 595   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 596   /* Early inliner might get called at WPA stage when IPA pass adds new
 597      function.  In this case we cannot really do any of early inlining
 598      because function bodies are missing.  */
 599   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 600     return false;
 601   if (!gimple_has_body_p (callee->decl))
 602     {
 603       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 604       return false;
 605     }
 606   /* In early inliner some of callees may not be in SSA form yet
 607      (i.e. the callgraph is cyclic and we did not process
 608      the callee by early inliner, yet).  We don't have CIF code for this
 609      case; later we will re-do the decision in the real inliner.  */
 610   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 611       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 612     {
 613       if (dump_enabled_p ())
 614         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 615                          "  edge not inlinable: not in SSA form\n");
 616       return false;
 617     }
 618   if (!can_inline_edge_p (e, true, true)
 619       || !can_inline_edge_by_limits_p (e, true, false, true))
 620     return false;
 621   return true;
 622 }
 623
 624
 625 /* Return number of calls in N.  Ignore cheap builtins.  */
 626
 627 static int
 628 num_calls (struct cgraph_node *n)
 629 {
 630   struct cgraph_edge *e;
 631   int num = 0;
 632
 633   for (e = n->callees; e; e = e->next_callee)
 634     if (!is_inexpensive_builtin (e->callee->decl))
 635       num++;
 636   return num;
 637 }
 638
 639
 640 /* Return true if we are interested in inlining small function.  */
 641
 642 static bool
 643 want_early_inline_function_p (struct cgraph_edge *e)
 644 {
 645   bool want_inline = true;
 646   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 647
 648   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 649     ;
 650   /* For AutoFDO, we need to make sure that before profile summary, all
 651      hot paths' IR look exactly the same as profiled binary. As a result,
 652      in einliner, we will disregard size limit and inline those callsites
 653      that are:
 654        * inlined in the profiled binary, and
 655        * the cloned callee has enough samples to be considered "hot".  */
 656   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 657     ;
 658   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 659            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 660     {
 661       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 662       report_inline_failed_reason (e);
 663       want_inline = false;
 664     }
 665   else
 666     {
 667       /* First take care of very large functions.  */
 668       int min_growth = estimate_min_edge_growth (e), growth = 0;
 669       int n;
 670       int early_inlining_insns = param_early_inlining_insns;
 671
 672       if (min_growth > early_inlining_insns)
 673         {
 674           if (dump_enabled_p ())
 675             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 676                              "  will not early inline: %C->%C, "
 677                              "call is cold and code would grow "
 678                              "at least by %i\n",
 679                              e->caller, callee,
 680                              min_growth);
 681           want_inline = false;
 682         }
 683       else
 684         growth = estimate_edge_growth (e);
 685
 686
 687       if (!want_inline || growth <= param_max_inline_insns_size)
 688         ;
 689       else if (!e->maybe_hot_p ())
 690         {
 691           if (dump_enabled_p ())
 692             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 693                              "  will not early inline: %C->%C, "
 694                              "call is cold and code would grow by %i\n",
 695                              e->caller, callee,
 696                              growth);
 697           want_inline = false;
 698         }
 699       else if (growth > early_inlining_insns)
 700         {
 701           if (dump_enabled_p ())
 702             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 703                              "  will not early inline: %C->%C, "
 704                              "growth %i exceeds --param early-inlining-insns\n",
 705                              e->caller, callee, growth);
 706           want_inline = false;
 707         }
 708       else if ((n = num_calls (callee)) != 0
 709                && growth * (n + 1) > early_inlining_insns)
 710         {
 711           if (dump_enabled_p ())
 712             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 713                              "  will not early inline: %C->%C, "
 714                              "growth %i exceeds --param early-inlining-insns "
 715                              "divided by number of calls\n",
 716                              e->caller, callee, growth);
 717           want_inline = false;
 718         }
 719     }
 720   return want_inline;
 721 }
 722
 723 /* Compute time of the edge->caller + edge->callee execution when inlining
 724    does not happen.  */
 725
 726 inline sreal
 727 compute_uninlined_call_time (struct cgraph_edge *edge,
 728                              sreal uninlined_call_time,
 729                              sreal freq)
 730 {
 731   cgraph_node *caller = (edge->caller->inlined_to
 732                          ? edge->caller->inlined_to
 733                          : edge->caller);
 734
 735   if (freq > 0)
 736     uninlined_call_time *= freq;
 737   else
 738     uninlined_call_time = uninlined_call_time >> 11;
 739
 740   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 741   return uninlined_call_time + caller_time;
 742 }
 743
 744 /* Same as compute_uinlined_call_time but compute time when inlining
 745    does happen.  */
 746
 747 inline sreal
 748 compute_inlined_call_time (struct cgraph_edge *edge,
 749                            sreal time,
 750                            sreal freq)
 751 {
 752   cgraph_node *caller = (edge->caller->inlined_to
 753                          ? edge->caller->inlined_to
 754                          : edge->caller);
 755   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 756
 757   if (freq > 0)
 758     time *= freq;
 759   else
 760     time = time >> 11;
 761
 762   /* This calculation should match one in ipa-inline-analysis.c
 763      (estimate_edge_size_and_time).  */
 764   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 765   time += caller_time;
 766   if (time <= 0)
 767     time = ((sreal) 1) >> 8;
 768   gcc_checking_assert (time >= 0);
 769   return time;
 770 }
 771
 772 /* Determine time saved by inlining EDGE of frequency FREQ
 773    where callee's runtime w/o inlining is UNINLINED_TYPE
 774    and with inlined is INLINED_TYPE.  */
 775
 776 inline sreal
 777 inlining_speedup (struct cgraph_edge *edge,
 778                   sreal freq,
 779                   sreal uninlined_time,
 780                   sreal inlined_time)
 781 {
 782   sreal speedup = uninlined_time - inlined_time;
 783   /* Handling of call_time should match one in ipa-inline-fnsummary.c
 784      (estimate_edge_size_and_time).  */
 785   sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
 786
 787   if (freq > 0)
 788     {
 789       speedup = (speedup + call_time);
 790       if (freq != 1)
 791        speedup = speedup * freq;
 792     }
 793   else if (freq == 0)
 794     speedup = speedup >> 11;
 795   gcc_checking_assert (speedup >= 0);
 796   return speedup;
 797 }
 798
 799 /* Return true if the speedup for inlining E is bigger than
 800    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 801
 802 static bool
 803 big_speedup_p (struct cgraph_edge *e)
 804 {
 805   sreal unspec_time;
 806   sreal spec_time = estimate_edge_time (e, &unspec_time);
 807   sreal freq = e->sreal_frequency ();
 808   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
 809   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
 810   cgraph_node *caller = (e->caller->inlined_to
 811                          ? e->caller->inlined_to
 812                          : e->caller);
 813   int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
 814
 815   if ((time - inlined_time) * 100 > time * limit)
 816     return true;
 817   return false;
 818 }
 819
 820 /* Return true if we are interested in inlining small function.
 821    When REPORT is true, report reason to dump file.  */
 822
 823 static bool
 824 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 825 {
 826   bool want_inline = true;
 827   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 828   cgraph_node *to  = (e->caller->inlined_to
 829                       ? e->caller->inlined_to : e->caller);
 830
 831   /* Allow this function to be called before can_inline_edge_p,
 832      since it's usually cheaper.  */
 833   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 834     want_inline = false;
 835   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 836     ;
 837   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 838            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 839     {
 840       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 841       want_inline = false;
 842     }
 843   /* Do fast and conservative check if the function can be good
 844      inline candidate.  */
 845   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 846            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 847            && ipa_fn_summaries->get (callee)->min_size
 848                 - ipa_call_summaries->get (e)->call_stmt_size
 849               > inline_insns_auto (e->caller, true))
 850     {
 851       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 852       want_inline = false;
 853     }
 854   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 855             || e->count.ipa ().nonzero_p ())
 856            && ipa_fn_summaries->get (callee)->min_size
 857                 - ipa_call_summaries->get (e)->call_stmt_size
 858               > inline_insns_single (e->caller, true))
 859     {
 860       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 861                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 862                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 863       want_inline = false;
 864     }
 865   else
 866     {
 867       int growth = estimate_edge_growth (e);
 868       ipa_hints hints = estimate_edge_hints (e);
 869       bool apply_hints = (hints & (INLINE_HINT_indirect_call
 870                                    | INLINE_HINT_known_hot
 871                                    | INLINE_HINT_loop_iterations
 872                                    | INLINE_HINT_loop_stride));
 873
 874       if (growth <= opt_for_fn (to->decl,
 875                                 param_max_inline_insns_size))
 876         ;
 877       /* Apply param_max_inline_insns_single limit.  Do not do so when
 878          hints suggests that inlining given function is very profitable.
 879          Avoid computation of big_speedup_p when not necessary to change
 880          outcome of decision.  */
 881       else if (DECL_DECLARED_INLINE_P (callee->decl)
 882                && growth >= inline_insns_single (e->caller, apply_hints)
 883                && (apply_hints
 884                    || growth >= inline_insns_single (e->caller, true)
 885                    || !big_speedup_p (e)))
 886         {
 887           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 888           want_inline = false;
 889         }
 890       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 891                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 892                && growth >= opt_for_fn (to->decl,
 893                                         param_max_inline_insns_small))
 894         {
 895           /* growth_positive_p is expensive, always test it last.  */
 896           if (growth >= inline_insns_single (e->caller, false)
 897               || growth_positive_p (callee, e, growth))
 898             {
 899               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 900               want_inline = false;
 901             }
 902         }
 903       /* Apply param_max_inline_insns_auto limit for functions not declared
 904          inline.  Bypass the limit when speedup seems big.  */
 905       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 906                && growth >= inline_insns_auto (e->caller, apply_hints)
 907                && (apply_hints
 908                    || growth >= inline_insns_auto (e->caller, true)
 909                    || !big_speedup_p (e)))
 910         {
 911           /* growth_positive_p is expensive, always test it last.  */
 912           if (growth >= inline_insns_single (e->caller, false)
 913               || growth_positive_p (callee, e, growth))
 914             {
 915               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 916               want_inline = false;
 917             }
 918         }
 919       /* If call is cold, do not inline when function body would grow. */
 920       else if (!e->maybe_hot_p ()
 921                && (growth >= inline_insns_single (e->caller, false)
 922                    || growth_positive_p (callee, e, growth)))
 923         {
 924           e->inline_failed = CIF_UNLIKELY_CALL;
 925           want_inline = false;
 926         }
 927     }
 928   if (!want_inline && report)
 929     report_inline_failed_reason (e);
 930   return want_inline;
 931 }
 932
 933 /* EDGE is self recursive edge.
 934    We handle two cases - when function A is inlining into itself
 935    or when function A is being inlined into another inliner copy of function
 936    A within function B.
 937
 938    In first case OUTER_NODE points to the toplevel copy of A, while
 939    in the second case OUTER_NODE points to the outermost copy of A in B.
 940
 941    In both cases we want to be extra selective since
 942    inlining the call will just introduce new recursive calls to appear.  */
 943
 944 static bool
 945 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 946                                    struct cgraph_node *outer_node,
 947                                    bool peeling,
 948                                    int depth)
 949 {
 950   char const *reason = NULL;
 951   bool want_inline = true;
 952   sreal caller_freq = 1;
 953   int max_depth = opt_for_fn (outer_node->decl,
 954                               param_max_inline_recursive_depth_auto);
 955
 956   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 957     max_depth = opt_for_fn (outer_node->decl,
 958                             param_max_inline_recursive_depth);
 959
 960   if (!edge->maybe_hot_p ())
 961     {
 962       reason = "recursive call is cold";
 963       want_inline = false;
 964     }
 965   else if (depth > max_depth)
 966     {
 967       reason = "--param max-inline-recursive-depth exceeded.";
 968       want_inline = false;
 969     }
 970   else if (outer_node->inlined_to
 971            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
 972     {
 973       reason = "caller frequency is 0";
 974       want_inline = false;
 975     }
 976
 977   if (!want_inline)
 978     ;
 979   /* Inlining of self recursive function into copy of itself within other
 980      function is transformation similar to loop peeling.
 981
 982      Peeling is profitable if we can inline enough copies to make probability
 983      of actual call to the self recursive function very small.  Be sure that
 984      the probability of recursion is small.
 985
 986      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 987      This way the expected number of recursion is at most max_depth.  */
 988   else if (peeling)
 989     {
 990       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
 991       int i;
 992       for (i = 1; i < depth; i++)
 993         max_prob = max_prob * max_prob;
 994       if (edge->sreal_frequency () >= max_prob * caller_freq)
 995         {
 996           reason = "frequency of recursive call is too large";
 997           want_inline = false;
 998         }
 999     }
1000   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1001      recursion depth is large.  We reduce function call overhead and increase
1002      chances that things fit in hardware return predictor.
1003
1004      Recursive inlining might however increase cost of stack frame setup
1005      actually slowing down functions whose recursion tree is wide rather than
1006      deep.
1007
1008      Deciding reliably on when to do recursive inlining without profile feedback
1009      is tricky.  For now we disable recursive inlining when probability of self
1010      recursion is low.
1011
1012      Recursive inlining of self recursive call within loop also results in
1013      large loop depths that generally optimize badly.  We may want to throttle
1014      down inlining in those cases.  In particular this seems to happen in one
1015      of libstdc++ rb tree methods.  */
1016   else
1017     {
1018       if (edge->sreal_frequency () * 100
1019           <= caller_freq
1020              * opt_for_fn (outer_node->decl,
1021                            param_min_inline_recursive_probability))
1022         {
1023           reason = "frequency of recursive call is too small";
1024           want_inline = false;
1025         }
1026     }
1027   if (!want_inline && dump_enabled_p ())
1028     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1029                      "   not inlining recursively: %s\n", reason);
1030   return want_inline;
1031 }
1032
1033 /* Return true when NODE has uninlinable caller;
1034    set HAS_HOT_CALL if it has hot call.
1035    Worker for cgraph_for_node_and_aliases.  */
1036
1037 static bool
1038 check_callers (struct cgraph_node *node, void *has_hot_call)
1039 {
1040   struct cgraph_edge *e;
1041    for (e = node->callers; e; e = e->next_caller)
1042      {
1043        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1044            || !opt_for_fn (e->caller->decl, optimize))
1045          return true;
1046        if (!can_inline_edge_p (e, true))
1047          return true;
1048        if (e->recursive_p ())
1049          return true;
1050        if (!can_inline_edge_by_limits_p (e, true))
1051          return true;
1052        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1053          *(bool *)has_hot_call = true;
1054      }
1055   return false;
1056 }
1057
1058 /* If NODE has a caller, return true.  */
1059
1060 static bool
1061 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1062 {
1063   if (node->callers)
1064     return true;
1065   return false;
1066 }
1067
1068 /* Decide if inlining NODE would reduce unit size by eliminating
1069    the offline copy of function.
1070    When COLD is true the cold calls are considered, too.  */
1071
1072 static bool
1073 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1074 {
1075   bool has_hot_call = false;
1076
1077   /* Aliases gets inlined along with the function they alias.  */
1078   if (node->alias)
1079     return false;
1080   /* Already inlined?  */
1081   if (node->inlined_to)
1082     return false;
1083   /* Does it have callers?  */
1084   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1085     return false;
1086   /* Inlining into all callers would increase size?  */
1087   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1088     return false;
1089   /* All inlines must be possible.  */
1090   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1091                                          true))
1092     return false;
1093   if (!cold && !has_hot_call)
1094     return false;
1095   return true;
1096 }
1097
1098 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1099    in estimate_edge_badness.  */
1100
1101 static bool
1102 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1103 {
1104   return size < (DECL_DECLARED_INLINE_P (where->decl)
1105                  ? inline_insns_single (where, false)
1106                  : inline_insns_auto (where, false));
1107 }
1108
1109 /* A cost model driving the inlining heuristics in a way so the edges with
1110    smallest badness are inlined first.  After each inlining is performed
1111    the costs of all caller edges of nodes affected are recomputed so the
1112    metrics may accurately depend on values such as number of inlinable callers
1113    of the function or function body size.  */
1114
1115 static sreal
1116 edge_badness (struct cgraph_edge *edge, bool dump)
1117 {
1118   sreal badness;
1119   int growth;
1120   sreal edge_time, unspec_edge_time;
1121   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1122   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1123   ipa_hints hints;
1124   cgraph_node *caller = (edge->caller->inlined_to
1125                          ? edge->caller->inlined_to
1126                          : edge->caller);
1127
1128   growth = estimate_edge_growth (edge);
1129   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1130   hints = estimate_edge_hints (edge);
1131   gcc_checking_assert (edge_time >= 0);
1132   /* Check that inlined time is better, but tolerate some roundoff issues.
1133      FIXME: When callee profile drops to 0 we account calls more.  This
1134      should be fixed by never doing that.  */
1135   gcc_checking_assert ((edge_time * 100
1136                         - callee_info->time * 101).to_int () <= 0
1137                         || callee->count.ipa ().initialized_p ());
1138   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1139
1140   if (dump)
1141     {
1142       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1143                edge->caller->dump_name (),
1144                edge->callee->dump_name ());
1145       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1146                growth,
1147                edge_time.to_double (),
1148                unspec_edge_time.to_double ());
1149       ipa_dump_hints (dump_file, hints);
1150       if (big_speedup_p (edge))
1151         fprintf (dump_file, " big_speedup");
1152       fprintf (dump_file, "\n");
1153     }
1154
1155   /* Always prefer inlining saving code size.  */
1156   if (growth <= 0)
1157     {
1158       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1159       if (dump)
1160         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1161                  growth);
1162     }
1163    /* Inlining into EXTERNAL functions is not going to change anything unless
1164       they are themselves inlined.  */
1165    else if (DECL_EXTERNAL (caller->decl))
1166     {
1167       if (dump)
1168         fprintf (dump_file, "      max: function is external\n");
1169       return sreal::max ();
1170     }
1171   /* When profile is available. Compute badness as:
1172
1173                  time_saved * caller_count
1174      goodness =  -------------------------------------------------
1175                  growth_of_caller * overall_growth * combined_size
1176
1177      badness = - goodness
1178
1179      Again use negative value to make calls with profile appear hotter
1180      then calls without.
1181   */
1182   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1183            || caller->count.ipa ().nonzero_p ())
1184     {
1185       sreal numerator, denominator;
1186       int overall_growth;
1187       sreal freq = edge->sreal_frequency ();
1188
1189       numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1190       if (numerator <= 0)
1191         numerator = ((sreal) 1 >> 8);
1192       if (caller->count.ipa ().nonzero_p ())
1193         numerator *= caller->count.ipa ().to_gcov_type ();
1194       else if (caller->count.ipa ().initialized_p ())
1195         numerator = numerator >> 11;
1196       denominator = growth;
1197
1198       overall_growth = callee_info->growth;
1199
1200       /* Look for inliner wrappers of the form:
1201
1202          inline_caller ()
1203            {
1204              do_fast_job...
1205              if (need_more_work)
1206                noninline_callee ();
1207            }
1208          Without penalizing this case, we usually inline noninline_callee
1209          into the inline_caller because overall_growth is small preventing
1210          further inlining of inline_caller.
1211
1212          Penalize only callgraph edges to functions with small overall
1213          growth ...
1214         */
1215       if (growth > overall_growth
1216           /* ... and having only one caller which is not inlined ... */
1217           && callee_info->single_caller
1218           && !edge->caller->inlined_to
1219           /* ... and edges executed only conditionally ... */
1220           && freq < 1
1221           /* ... consider case where callee is not inline but caller is ... */
1222           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1223                && DECL_DECLARED_INLINE_P (caller->decl))
1224               /* ... or when early optimizers decided to split and edge
1225                  frequency still indicates splitting is a win ... */
1226               || (callee->split_part && !caller->split_part
1227                   && freq * 100
1228                          < opt_for_fn (caller->decl,
1229                                        param_partial_inlining_entry_probability)
1230                   /* ... and do not overwrite user specified hints.   */
1231                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1232                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1233         {
1234           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1235           int caller_growth = caller_info->growth;
1236
1237           /* Only apply the penalty when caller looks like inline candidate,
1238              and it is not called once.  */
1239           if (!caller_info->single_caller && overall_growth < caller_growth
1240               && caller_info->inlinable
1241               && wrapper_heuristics_may_apply
1242                  (caller, ipa_size_summaries->get (caller)->size))
1243             {
1244               if (dump)
1245                 fprintf (dump_file,
1246                          "     Wrapper penalty. Increasing growth %i to %i\n",
1247                          overall_growth, caller_growth);
1248               overall_growth = caller_growth;
1249             }
1250         }
1251       if (overall_growth > 0)
1252         {
1253           /* Strongly prefer functions with few callers that can be inlined
1254              fully.  The square root here leads to smaller binaries at average.
1255              Watch however for extreme cases and return to linear function
1256              when growth is large.  */
1257           if (overall_growth < 256)
1258             overall_growth *= overall_growth;
1259           else
1260             overall_growth += 256 * 256 - 256;
1261           denominator *= overall_growth;
1262         }
1263       denominator *= ipa_size_summaries->get (caller)->size + growth;
1264
1265       badness = - numerator / denominator;
1266
1267       if (dump)
1268         {
1269           fprintf (dump_file,
1270                    "      %f: guessed profile. frequency %f, count %" PRId64
1271                    " caller count %" PRId64
1272                    " time saved %f"
1273                    " overall growth %i (current) %i (original)"
1274                    " %i (compensated)\n",
1275                    badness.to_double (),
1276                    freq.to_double (),
1277                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1278                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1279                    inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1280                    estimate_growth (callee),
1281                    callee_info->growth, overall_growth);
1282         }
1283     }
1284   /* When function local profile is not available or it does not give
1285      useful information (i.e. frequency is zero), base the cost on
1286      loop nest and overall size growth, so we optimize for overall number
1287      of functions fully inlined in program.  */
1288   else
1289     {
1290       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1291       badness = growth;
1292
1293       /* Decrease badness if call is nested.  */
1294       if (badness > 0)
1295         badness = badness >> nest;
1296       else
1297         badness = badness << nest;
1298       if (dump)
1299         fprintf (dump_file, "      %f: no profile. nest %i\n",
1300                  badness.to_double (), nest);
1301     }
1302   gcc_checking_assert (badness != 0);
1303
1304   if (edge->recursive_p ())
1305     badness = badness.shift (badness > 0 ? 4 : -4);
1306   if ((hints & (INLINE_HINT_indirect_call
1307                 | INLINE_HINT_loop_iterations
1308                 | INLINE_HINT_loop_stride))
1309       || callee_info->growth <= 0)
1310     badness = badness.shift (badness > 0 ? -2 : 2);
1311   if (hints & (INLINE_HINT_same_scc))
1312     badness = badness.shift (badness > 0 ? 3 : -3);
1313   else if (hints & (INLINE_HINT_in_scc))
1314     badness = badness.shift (badness > 0 ? 2 : -2);
1315   else if (hints & (INLINE_HINT_cross_module))
1316     badness = badness.shift (badness > 0 ? 1 : -1);
1317   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1318     badness = badness.shift (badness > 0 ? -4 : 4);
1319   else if ((hints & INLINE_HINT_declared_inline))
1320     badness = badness.shift (badness > 0 ? -3 : 3);
1321   if (dump)
1322     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1323   return badness;
1324 }
1325
1326 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1327 static inline void
1328 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1329 {
1330   sreal badness = edge_badness (edge, false);
1331   if (edge->aux)
1332     {
1333       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1334       gcc_checking_assert (n->get_data () == edge);
1335
1336       /* fibonacci_heap::replace_key does busy updating of the
1337          heap that is unnecessarily expensive.
1338          We do lazy increases: after extracting minimum if the key
1339          turns out to be out of date, it is re-inserted into heap
1340          with correct value.  */
1341       if (badness < n->get_key ())
1342         {
1343           if (dump_file && (dump_flags & TDF_DETAILS))
1344             {
1345               fprintf (dump_file,
1346                        "  decreasing badness %s -> %s, %f to %f\n",
1347                        edge->caller->dump_name (),
1348                        edge->callee->dump_name (),
1349                        n->get_key ().to_double (),
1350                        badness.to_double ());
1351             }
1352           heap->decrease_key (n, badness);
1353         }
1354     }
1355   else
1356     {
1357        if (dump_file && (dump_flags & TDF_DETAILS))
1358          {
1359            fprintf (dump_file,
1360                     "  enqueuing call %s -> %s, badness %f\n",
1361                     edge->caller->dump_name (),
1362                     edge->callee->dump_name (),
1363                     badness.to_double ());
1364          }
1365       edge->aux = heap->insert (badness, edge);
1366     }
1367 }
1368
1369
1370 /* NODE was inlined.
1371    All caller edges needs to be reset because
1372    size estimates change. Similarly callees needs reset
1373    because better context may be known.  */
1374
1375 static void
1376 reset_edge_caches (struct cgraph_node *node)
1377 {
1378   struct cgraph_edge *edge;
1379   struct cgraph_edge *e = node->callees;
1380   struct cgraph_node *where = node;
1381   struct ipa_ref *ref;
1382
1383   if (where->inlined_to)
1384     where = where->inlined_to;
1385
1386   reset_node_cache (where);
1387
1388   if (edge_growth_cache != NULL)
1389     for (edge = where->callers; edge; edge = edge->next_caller)
1390       if (edge->inline_failed)
1391         edge_growth_cache->remove (edge);
1392
1393   FOR_EACH_ALIAS (where, ref)
1394     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1395
1396   if (!e)
1397     return;
1398
1399   while (true)
1400     if (!e->inline_failed && e->callee->callees)
1401       e = e->callee->callees;
1402     else
1403       {
1404         if (edge_growth_cache != NULL && e->inline_failed)
1405           edge_growth_cache->remove (e);
1406         if (e->next_callee)
1407           e = e->next_callee;
1408         else
1409           {
1410             do
1411               {
1412                 if (e->caller == node)
1413                   return;
1414                 e = e->caller->callers;
1415               }
1416             while (!e->next_callee);
1417             e = e->next_callee;
1418           }
1419       }
1420 }
1421
1422 /* Recompute HEAP nodes for each of caller of NODE.
1423    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1424    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1425    it is inlinable. Otherwise check all edges.  */
1426
1427 static void
1428 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1429                     bitmap updated_nodes,
1430                     struct cgraph_edge *check_inlinablity_for)
1431 {
1432   struct cgraph_edge *edge;
1433   struct ipa_ref *ref;
1434
1435   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1436       || node->inlined_to)
1437     return;
1438   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1439     return;
1440
1441   FOR_EACH_ALIAS (node, ref)
1442     {
1443       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1444       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1445     }
1446
1447   for (edge = node->callers; edge; edge = edge->next_caller)
1448     if (edge->inline_failed)
1449       {
1450         if (!check_inlinablity_for
1451             || check_inlinablity_for == edge)
1452           {
1453             if (can_inline_edge_p (edge, false)
1454                 && want_inline_small_function_p (edge, false)
1455                 && can_inline_edge_by_limits_p (edge, false))
1456               update_edge_key (heap, edge);
1457             else if (edge->aux)
1458               {
1459                 report_inline_failed_reason (edge);
1460                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1461                 edge->aux = NULL;
1462               }
1463           }
1464         else if (edge->aux)
1465           update_edge_key (heap, edge);
1466       }
1467 }
1468
1469 /* Recompute HEAP nodes for each uninlined call in NODE
1470    If UPDATE_SINCE is non-NULL check if edges called within that function
1471    are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1472    where all edges have new context).
1473
1474    This is used when we know that edge badnesses are going only to increase
1475    (we introduced new call site) and thus all we need is to insert newly
1476    created edges into heap.  */
1477
1478 static void
1479 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1480                     struct cgraph_node *update_since,
1481                     bitmap updated_nodes)
1482 {
1483   struct cgraph_edge *e = node->callees;
1484   bool check_inlinability = update_since == node;
1485
1486   if (!e)
1487     return;
1488   while (true)
1489     if (!e->inline_failed && e->callee->callees)
1490       {
1491         if (e->callee == update_since)
1492           check_inlinability = true;
1493         e = e->callee->callees;
1494       }
1495     else
1496       {
1497         enum availability avail;
1498         struct cgraph_node *callee;
1499         if (!check_inlinability)
1500           {
1501             if (e->aux
1502                 && !bitmap_bit_p (updated_nodes,
1503                                   e->callee->ultimate_alias_target
1504                                     (&avail, e->caller)->get_uid ()))
1505               update_edge_key (heap, e);
1506           }
1507         /* We do not reset callee growth cache here.  Since we added a new call,
1508            growth should have just increased and consequently badness metric
1509            don't need updating.  */
1510         else if (e->inline_failed
1511                  && (callee = e->callee->ultimate_alias_target (&avail,
1512                                                                 e->caller))
1513                  && avail >= AVAIL_AVAILABLE
1514                  && ipa_fn_summaries->get (callee) != NULL
1515                  && ipa_fn_summaries->get (callee)->inlinable
1516                  && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1517           {
1518             if (can_inline_edge_p (e, false)
1519                 && want_inline_small_function_p (e, false)
1520                 && can_inline_edge_by_limits_p (e, false))
1521               {
1522                 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1523                 gcc_checking_assert (check_inlinability || e->aux);
1524                 update_edge_key (heap, e);
1525               }
1526             else if (e->aux)
1527               {
1528                 report_inline_failed_reason (e);
1529                 heap->delete_node ((edge_heap_node_t *) e->aux);
1530                 e->aux = NULL;
1531               }
1532           }
1533         /* In case we redirected to unreachable node we only need to remove the
1534            fibheap entry.  */
1535         else if (e->aux)
1536           {
1537             heap->delete_node ((edge_heap_node_t *) e->aux);
1538             e->aux = NULL;
1539           }
1540         if (e->next_callee)
1541           e = e->next_callee;
1542         else
1543           {
1544             do
1545               {
1546                 if (e->caller == node)
1547                   return;
1548                 if (e->caller == update_since)
1549                   check_inlinability = false;
1550                 e = e->caller->callers;
1551               }
1552             while (!e->next_callee);
1553             e = e->next_callee;
1554           }
1555       }
1556 }
1557
1558 /* Enqueue all recursive calls from NODE into priority queue depending on
1559    how likely we want to recursively inline the call.  */
1560
1561 static void
1562 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1563                         edge_heap_t *heap)
1564 {
1565   struct cgraph_edge *e;
1566   enum availability avail;
1567
1568   for (e = where->callees; e; e = e->next_callee)
1569     if (e->callee == node
1570         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1571             && avail > AVAIL_INTERPOSABLE))
1572       heap->insert (-e->sreal_frequency (), e);
1573   for (e = where->callees; e; e = e->next_callee)
1574     if (!e->inline_failed)
1575       lookup_recursive_calls (node, e->callee, heap);
1576 }
1577
1578 /* Decide on recursive inlining: in the case function has recursive calls,
1579    inline until body size reaches given argument.  If any new indirect edges
1580    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1581    is NULL.  */
1582
1583 static bool
1584 recursive_inlining (struct cgraph_edge *edge,
1585                     vec<cgraph_edge *> *new_edges)
1586 {
1587   cgraph_node *to  = (edge->caller->inlined_to
1588                       ? edge->caller->inlined_to : edge->caller);
1589   int limit = opt_for_fn (to->decl,
1590                           param_max_inline_insns_recursive_auto);
1591   edge_heap_t heap (sreal::min ());
1592   struct cgraph_node *node;
1593   struct cgraph_edge *e;
1594   struct cgraph_node *master_clone = NULL, *next;
1595   int depth = 0;
1596   int n = 0;
1597
1598   node = edge->caller;
1599   if (node->inlined_to)
1600     node = node->inlined_to;
1601
1602   if (DECL_DECLARED_INLINE_P (node->decl))
1603     limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1604
1605   /* Make sure that function is small enough to be considered for inlining.  */
1606   if (estimate_size_after_inlining (node, edge)  >= limit)
1607     return false;
1608   lookup_recursive_calls (node, node, &heap);
1609   if (heap.empty ())
1610     return false;
1611
1612   if (dump_file)
1613     fprintf (dump_file,
1614              "  Performing recursive inlining on %s\n",
1615              node->name ());
1616
1617   /* Do the inlining and update list of recursive call during process.  */
1618   while (!heap.empty ())
1619     {
1620       struct cgraph_edge *curr = heap.extract_min ();
1621       struct cgraph_node *cnode, *dest = curr->callee;
1622
1623       if (!can_inline_edge_p (curr, true)
1624           || !can_inline_edge_by_limits_p (curr, true))
1625         continue;
1626
1627       /* MASTER_CLONE is produced in the case we already started modified
1628          the function. Be sure to redirect edge to the original body before
1629          estimating growths otherwise we will be seeing growths after inlining
1630          the already modified body.  */
1631       if (master_clone)
1632         {
1633           curr->redirect_callee (master_clone);
1634           if (edge_growth_cache != NULL)
1635             edge_growth_cache->remove (curr);
1636         }
1637
1638       if (estimate_size_after_inlining (node, curr) > limit)
1639         {
1640           curr->redirect_callee (dest);
1641           if (edge_growth_cache != NULL)
1642             edge_growth_cache->remove (curr);
1643           break;
1644         }
1645
1646       depth = 1;
1647       for (cnode = curr->caller;
1648            cnode->inlined_to; cnode = cnode->callers->caller)
1649         if (node->decl
1650             == curr->callee->ultimate_alias_target ()->decl)
1651           depth++;
1652
1653       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1654         {
1655           curr->redirect_callee (dest);
1656           if (edge_growth_cache != NULL)
1657             edge_growth_cache->remove (curr);
1658           continue;
1659         }
1660
1661       if (dump_file)
1662         {
1663           fprintf (dump_file,
1664                    "   Inlining call of depth %i", depth);
1665           if (node->count.nonzero_p () && curr->count.initialized_p ())
1666             {
1667               fprintf (dump_file, " called approx. %.2f times per call",
1668                        (double)curr->count.to_gcov_type ()
1669                        / node->count.to_gcov_type ());
1670             }
1671           fprintf (dump_file, "\n");
1672         }
1673       if (!master_clone)
1674         {
1675           /* We need original clone to copy around.  */
1676           master_clone = node->create_clone (node->decl, node->count,
1677             false, vNULL, true, NULL, NULL);
1678           for (e = master_clone->callees; e; e = e->next_callee)
1679             if (!e->inline_failed)
1680               clone_inlined_nodes (e, true, false, NULL);
1681           curr->redirect_callee (master_clone);
1682           if (edge_growth_cache != NULL)
1683             edge_growth_cache->remove (curr);
1684         }
1685
1686       inline_call (curr, false, new_edges, &overall_size, true);
1687       reset_node_cache (node);
1688       lookup_recursive_calls (node, curr->callee, &heap);
1689       n++;
1690     }
1691
1692   if (!heap.empty () && dump_file)
1693     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1694
1695   if (!master_clone)
1696     return false;
1697
1698   if (dump_enabled_p ())
1699     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1700                      "\n   Inlined %i times, "
1701                      "body grown from size %i to %i, time %f to %f\n", n,
1702                      ipa_size_summaries->get (master_clone)->size,
1703                      ipa_size_summaries->get (node)->size,
1704                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1705                      ipa_fn_summaries->get (node)->time.to_double ());
1706
1707   /* Remove master clone we used for inlining.  We rely that clones inlined
1708      into master clone gets queued just before master clone so we don't
1709      need recursion.  */
1710   for (node = symtab->first_function (); node != master_clone;
1711        node = next)
1712     {
1713       next = symtab->next_function (node);
1714       if (node->inlined_to == master_clone)
1715         node->remove ();
1716     }
1717   master_clone->remove ();
1718   return true;
1719 }
1720
1721
1722 /* Given whole compilation unit estimate of INSNS, compute how large we can
1723    allow the unit to grow.  */
1724
1725 static int64_t
1726 compute_max_insns (cgraph_node *node, int insns)
1727 {
1728   int max_insns = insns;
1729   if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1730     max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1731
1732   return ((int64_t) max_insns
1733           * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1734 }
1735
1736
1737 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1738
1739 static void
1740 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1741 {
1742   while (new_edges.length () > 0)
1743     {
1744       struct cgraph_edge *edge = new_edges.pop ();
1745
1746       gcc_assert (!edge->aux);
1747       gcc_assert (edge->callee);
1748       if (edge->inline_failed
1749           && can_inline_edge_p (edge, true)
1750           && want_inline_small_function_p (edge, true)
1751           && can_inline_edge_by_limits_p (edge, true))
1752         edge->aux = heap->insert (edge_badness (edge, false), edge);
1753     }
1754 }
1755
1756 /* Remove EDGE from the fibheap.  */
1757
1758 static void
1759 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1760 {
1761   if (e->aux)
1762     {
1763       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1764       e->aux = NULL;
1765     }
1766 }
1767
1768 /* Return true if speculation of edge E seems useful.
1769    If ANTICIPATE_INLINING is true, be conservative and hope that E
1770    may get inlined.  */
1771
1772 bool
1773 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1774 {
1775   /* If we have already decided to inline the edge, it seems useful.  */
1776   if (!e->inline_failed)
1777     return true;
1778
1779   enum availability avail;
1780   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1781                                                                  e->caller);
1782   struct cgraph_edge *direct, *indirect;
1783   struct ipa_ref *ref;
1784
1785   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1786
1787   if (!e->maybe_hot_p ())
1788     return false;
1789
1790   /* See if IP optimizations found something potentially useful about the
1791      function.  For now we look only for CONST/PURE flags.  Almost everything
1792      else we propagate is useless.  */
1793   if (avail >= AVAIL_AVAILABLE)
1794     {
1795       int ecf_flags = flags_from_decl_or_type (target->decl);
1796       if (ecf_flags & ECF_CONST)
1797         {
1798           e->speculative_call_info (direct, indirect, ref);
1799           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1800             return true;
1801         }
1802       else if (ecf_flags & ECF_PURE)
1803         {
1804           e->speculative_call_info (direct, indirect, ref);
1805           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1806             return true;
1807         }
1808     }
1809   /* If we did not managed to inline the function nor redirect
1810      to an ipa-cp clone (that are seen by having local flag set),
1811      it is probably pointless to inline it unless hardware is missing
1812      indirect call predictor.  */
1813   if (!anticipate_inlining && !target->local)
1814     return false;
1815   /* For overwritable targets there is not much to do.  */
1816   if (!can_inline_edge_p (e, false)
1817       || !can_inline_edge_by_limits_p (e, false, true))
1818     return false;
1819   /* OK, speculation seems interesting.  */
1820   return true;
1821 }
1822
1823 /* We know that EDGE is not going to be inlined.
1824    See if we can remove speculation.  */
1825
1826 static void
1827 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1828 {
1829   if (edge->speculative && !speculation_useful_p (edge, false))
1830     {
1831       struct cgraph_node *node = edge->caller;
1832       struct cgraph_node *where = node->inlined_to
1833                                   ? node->inlined_to : node;
1834       auto_bitmap updated_nodes;
1835
1836       if (edge->count.ipa ().initialized_p ())
1837         spec_rem += edge->count.ipa ();
1838       edge->resolve_speculation ();
1839       reset_edge_caches (where);
1840       ipa_update_overall_fn_summary (where);
1841       update_caller_keys (edge_heap, where,
1842                           updated_nodes, NULL);
1843       update_callee_keys (edge_heap, where, NULL,
1844                           updated_nodes);
1845     }
1846 }
1847
1848 /* Return true if NODE should be accounted for overall size estimate.
1849    Skip all nodes optimized for size so we can measure the growth of hot
1850    part of program no matter of the padding.  */
1851
1852 bool
1853 inline_account_function_p (struct cgraph_node *node)
1854 {
1855    return (!DECL_EXTERNAL (node->decl)
1856            && !opt_for_fn (node->decl, optimize_size)
1857            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1858 }
1859
1860 /* Count number of callers of NODE and store it into DATA (that
1861    points to int.  Worker for cgraph_for_node_and_aliases.  */
1862
1863 static bool
1864 sum_callers (struct cgraph_node *node, void *data)
1865 {
1866   struct cgraph_edge *e;
1867   int *num_calls = (int *)data;
1868
1869   for (e = node->callers; e; e = e->next_caller)
1870     (*num_calls)++;
1871   return false;
1872 }
1873
1874 /* We only propagate across edges with non-interposable callee.  */
1875
1876 inline bool
1877 ignore_edge_p (struct cgraph_edge *e)
1878 {
1879   enum availability avail;
1880   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1881   return (avail <= AVAIL_INTERPOSABLE);
1882 }
1883
1884 /* We use greedy algorithm for inlining of small functions:
1885    All inline candidates are put into prioritized heap ordered in
1886    increasing badness.
1887
1888    The inlining of small functions is bounded by unit growth parameters.  */
1889
1890 static void
1891 inline_small_functions (void)
1892 {
1893   struct cgraph_node *node;
1894   struct cgraph_edge *edge;
1895   edge_heap_t edge_heap (sreal::min ());
1896   auto_bitmap updated_nodes;
1897   int min_size;
1898   auto_vec<cgraph_edge *> new_indirect_edges;
1899   int initial_size = 0;
1900   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1901   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1902   new_indirect_edges.create (8);
1903
1904   edge_removal_hook_holder
1905     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1906
1907   /* Compute overall unit size and other global parameters used by badness
1908      metrics.  */
1909
1910   max_count = profile_count::uninitialized ();
1911   ipa_reduced_postorder (order, true, ignore_edge_p);
1912   free (order);
1913
1914   FOR_EACH_DEFINED_FUNCTION (node)
1915     if (!node->inlined_to)
1916       {
1917         if (!node->alias && node->analyzed
1918             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1919             && opt_for_fn (node->decl, optimize))
1920           {
1921             class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1922             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1923
1924             /* Do not account external functions, they will be optimized out
1925                if not inlined.  Also only count the non-cold portion of program.  */
1926             if (inline_account_function_p (node))
1927               initial_size += ipa_size_summaries->get (node)->size;
1928             info->growth = estimate_growth (node);
1929
1930             int num_calls = 0;
1931             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1932                                                true);
1933             if (num_calls == 1)
1934               info->single_caller = true;
1935             if (dfs && dfs->next_cycle)
1936               {
1937                 struct cgraph_node *n2;
1938                 int id = dfs->scc_no + 1;
1939                 for (n2 = node; n2;
1940                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1941                   if (opt_for_fn (n2->decl, optimize))
1942                     {
1943                       ipa_fn_summary *info2 = ipa_fn_summaries->get
1944                          (n2->inlined_to ? n2->inlined_to : n2);
1945                       if (info2->scc_no)
1946                         break;
1947                       info2->scc_no = id;
1948                     }
1949               }
1950           }
1951
1952         for (edge = node->callers; edge; edge = edge->next_caller)
1953           max_count = max_count.max (edge->count.ipa ());
1954       }
1955   ipa_free_postorder_info ();
1956   initialize_growth_caches ();
1957
1958   if (dump_file)
1959     fprintf (dump_file,
1960              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1961              initial_size);
1962
1963   overall_size = initial_size;
1964   min_size = overall_size;
1965
1966   /* Populate the heap with all edges we might inline.  */
1967
1968   FOR_EACH_DEFINED_FUNCTION (node)
1969     {
1970       bool update = false;
1971       struct cgraph_edge *next = NULL;
1972       bool has_speculative = false;
1973
1974       if (!opt_for_fn (node->decl, optimize))
1975         continue;
1976
1977       if (dump_file)
1978         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1979
1980       for (edge = node->callees; edge; edge = edge->next_callee)
1981         {
1982           if (edge->inline_failed
1983               && !edge->aux
1984               && can_inline_edge_p (edge, true)
1985               && want_inline_small_function_p (edge, true)
1986               && can_inline_edge_by_limits_p (edge, true)
1987               && edge->inline_failed)
1988             {
1989               gcc_assert (!edge->aux);
1990               update_edge_key (&edge_heap, edge);
1991             }
1992           if (edge->speculative)
1993             has_speculative = true;
1994         }
1995       if (has_speculative)
1996         for (edge = node->callees; edge; edge = next)
1997           {
1998             next = edge->next_callee;
1999             if (edge->speculative
2000                 && !speculation_useful_p (edge, edge->aux != NULL))
2001               {
2002                 edge->resolve_speculation ();
2003                 update = true;
2004               }
2005           }
2006       if (update)
2007         {
2008           struct cgraph_node *where = node->inlined_to
2009                                       ? node->inlined_to : node;
2010           ipa_update_overall_fn_summary (where);
2011           reset_edge_caches (where);
2012           update_caller_keys (&edge_heap, where,
2013                               updated_nodes, NULL);
2014           update_callee_keys (&edge_heap, where, NULL,
2015                               updated_nodes);
2016           bitmap_clear (updated_nodes);
2017         }
2018     }
2019
2020   gcc_assert (in_lto_p
2021               || !(max_count > 0)
2022               || (profile_info && flag_branch_probabilities));
2023
2024   while (!edge_heap.empty ())
2025     {
2026       int old_size = overall_size;
2027       struct cgraph_node *where, *callee;
2028       sreal badness = edge_heap.min_key ();
2029       sreal current_badness;
2030       int growth;
2031
2032       edge = edge_heap.extract_min ();
2033       gcc_assert (edge->aux);
2034       edge->aux = NULL;
2035       if (!edge->inline_failed || !edge->callee->analyzed)
2036         continue;
2037
2038       /* Be sure that caches are maintained consistent.
2039          This check is affected by scaling roundoff errors when compiling for
2040          IPA this we skip it in that case.  */
2041       if (flag_checking && !edge->callee->count.ipa_p ()
2042           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2043         {
2044           sreal cached_badness = edge_badness (edge, false);
2045
2046           int old_size_est = estimate_edge_size (edge);
2047           sreal old_time_est = estimate_edge_time (edge);
2048           int old_hints_est = estimate_edge_hints (edge);
2049
2050           if (edge_growth_cache != NULL)
2051             edge_growth_cache->remove (edge);
2052           reset_node_cache (edge->caller->inlined_to
2053                             ? edge->caller->inlined_to
2054                             : edge->caller);
2055           gcc_assert (old_size_est == estimate_edge_size (edge));
2056           gcc_assert (old_time_est == estimate_edge_time (edge));
2057           /* FIXME:
2058
2059              gcc_assert (old_hints_est == estimate_edge_hints (edge));
2060
2061              fails with profile feedback because some hints depends on
2062              maybe_hot_edge_p predicate and because callee gets inlined to other
2063              calls, the edge may become cold.
2064              This ought to be fixed by computing relative probabilities
2065              for given invocation but that will be better done once whole
2066              code is converted to sreals.  Disable for now and revert to "wrong"
2067              value so enable/disable checking paths agree.  */
2068           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2069
2070           /* When updating the edge costs, we only decrease badness in the keys.
2071              Increases of badness are handled lazily; when we see key with out
2072              of date value on it, we re-insert it now.  */
2073           current_badness = edge_badness (edge, false);
2074           gcc_assert (cached_badness == current_badness);
2075           gcc_assert (current_badness >= badness);
2076         }
2077       else
2078         current_badness = edge_badness (edge, false);
2079       if (current_badness != badness)
2080         {
2081           if (edge_heap.min () && current_badness > edge_heap.min_key ())
2082             {
2083               edge->aux = edge_heap.insert (current_badness, edge);
2084               continue;
2085             }
2086           else
2087             badness = current_badness;
2088         }
2089
2090       if (!can_inline_edge_p (edge, true)
2091           || !can_inline_edge_by_limits_p (edge, true))
2092         {
2093           resolve_noninline_speculation (&edge_heap, edge);
2094           continue;
2095         }
2096
2097       callee = edge->callee->ultimate_alias_target ();
2098       growth = estimate_edge_growth (edge);
2099       if (dump_file)
2100         {
2101           fprintf (dump_file,
2102                    "\nConsidering %s with %i size\n",
2103                    callee->dump_name (),
2104                    ipa_size_summaries->get (callee)->size);
2105           fprintf (dump_file,
2106                    " to be inlined into %s in %s:%i\n"
2107                    " Estimated badness is %f, frequency %.2f.\n",
2108                    edge->caller->dump_name (),
2109                    edge->call_stmt
2110                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
2111                                                         edge->call_stmt))
2112                        > BUILTINS_LOCATION)
2113                    ? gimple_filename ((const gimple *) edge->call_stmt)
2114                    : "unknown",
2115                    edge->call_stmt
2116                    ? gimple_lineno ((const gimple *) edge->call_stmt)
2117                    : -1,
2118                    badness.to_double (),
2119                    edge->sreal_frequency ().to_double ());
2120           if (edge->count.ipa ().initialized_p ())
2121             {
2122               fprintf (dump_file, " Called ");
2123               edge->count.ipa ().dump (dump_file);
2124               fprintf (dump_file, " times\n");
2125             }
2126           if (dump_flags & TDF_DETAILS)
2127             edge_badness (edge, true);
2128         }
2129
2130       where = edge->caller;
2131
2132       if (overall_size + growth > compute_max_insns (where, min_size)
2133           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2134         {
2135           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2136           report_inline_failed_reason (edge);
2137           resolve_noninline_speculation (&edge_heap, edge);
2138           continue;
2139         }
2140
2141       if (!want_inline_small_function_p (edge, true))
2142         {
2143           resolve_noninline_speculation (&edge_heap, edge);
2144           continue;
2145         }
2146
2147       profile_count old_count = callee->count;
2148
2149       /* Heuristics for inlining small functions work poorly for
2150          recursive calls where we do effects similar to loop unrolling.
2151          When inlining such edge seems profitable, leave decision on
2152          specific inliner.  */
2153       if (edge->recursive_p ())
2154         {
2155           if (where->inlined_to)
2156             where = where->inlined_to;
2157           if (!recursive_inlining (edge,
2158                                    opt_for_fn (edge->caller->decl,
2159                                                flag_indirect_inlining)
2160                                    ? &new_indirect_edges : NULL))
2161             {
2162               edge->inline_failed = CIF_RECURSIVE_INLINING;
2163               resolve_noninline_speculation (&edge_heap, edge);
2164               continue;
2165             }
2166           reset_edge_caches (where);
2167           /* Recursive inliner inlines all recursive calls of the function
2168              at once. Consequently we need to update all callee keys.  */
2169           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2170             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2171           update_callee_keys (&edge_heap, where, where, updated_nodes);
2172           bitmap_clear (updated_nodes);
2173         }
2174       else
2175         {
2176           struct cgraph_node *outer_node = NULL;
2177           int depth = 0;
2178
2179           /* Consider the case where self recursive function A is inlined
2180              into B.  This is desired optimization in some cases, since it
2181              leads to effect similar of loop peeling and we might completely
2182              optimize out the recursive call.  However we must be extra
2183              selective.  */
2184
2185           where = edge->caller;
2186           while (where->inlined_to)
2187             {
2188               if (where->decl == callee->decl)
2189                 outer_node = where, depth++;
2190               where = where->callers->caller;
2191             }
2192           if (outer_node
2193               && !want_inline_self_recursive_call_p (edge, outer_node,
2194                                                      true, depth))
2195             {
2196               edge->inline_failed
2197                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2198                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2199               resolve_noninline_speculation (&edge_heap, edge);
2200               continue;
2201             }
2202           else if (depth && dump_file)
2203             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2204
2205           gcc_checking_assert (!callee->inlined_to);
2206
2207           int old_size = ipa_size_summaries->get (where)->size;
2208           sreal old_time = ipa_fn_summaries->get (where)->time;
2209
2210           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2211           reset_edge_caches (edge->callee);
2212           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2213
2214           /* If caller's size and time increased we do not need to update
2215              all edges because badness is not going to decrease.  */
2216           if (old_size <= ipa_size_summaries->get (where)->size
2217               && old_time <= ipa_fn_summaries->get (where)->time
2218               /* Wrapper penalty may be non-monotonous in this respect.
2219                  Fortunately it only affects small functions.  */
2220               && !wrapper_heuristics_may_apply (where, old_size))
2221             update_callee_keys (&edge_heap, edge->callee, edge->callee,
2222                                 updated_nodes);
2223           else
2224             update_callee_keys (&edge_heap, where,
2225                                 edge->callee,
2226                                 updated_nodes);
2227         }
2228       where = edge->caller;
2229       if (where->inlined_to)
2230         where = where->inlined_to;
2231
2232       /* Our profitability metric can depend on local properties
2233          such as number of inlinable calls and size of the function body.
2234          After inlining these properties might change for the function we
2235          inlined into (since it's body size changed) and for the functions
2236          called by function we inlined (since number of it inlinable callers
2237          might change).  */
2238       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2239       /* Offline copy count has possibly changed, recompute if profile is
2240          available.  */
2241       struct cgraph_node *n
2242               = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2243       if (n != edge->callee && n->analyzed && !(n->count == old_count)
2244           && n->count.ipa_p ())
2245         update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2246       bitmap_clear (updated_nodes);
2247
2248       if (dump_enabled_p ())
2249         {
2250           ipa_fn_summary *s = ipa_fn_summaries->get (where);
2251
2252           /* dump_printf can't handle %+i.  */
2253           char buf_net_change[100];
2254           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2255                     overall_size - old_size);
2256
2257           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2258                            " Inlined %C into %C which now has time %f and "
2259                            "size %i, net change of %s%s.\n",
2260                            edge->callee, edge->caller,
2261                            s->time.to_double (),
2262                            ipa_size_summaries->get (edge->caller)->size,
2263                            buf_net_change,
2264                            cross_module_call_p (edge) ? " (cross module)":"");
2265         }
2266       if (min_size > overall_size)
2267         {
2268           min_size = overall_size;
2269
2270           if (dump_file)
2271             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2272         }
2273     }
2274
2275   free_growth_caches ();
2276   if (dump_enabled_p ())
2277     dump_printf (MSG_NOTE,
2278                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2279                  initial_size, overall_size,
2280                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2281   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2282 }
2283
2284 /* Flatten NODE.  Performed both during early inlining and
2285    at IPA inlining time.  */
2286
2287 static void
2288 flatten_function (struct cgraph_node *node, bool early, bool update)
2289 {
2290   struct cgraph_edge *e;
2291
2292   /* We shouldn't be called recursively when we are being processed.  */
2293   gcc_assert (node->aux == NULL);
2294
2295   node->aux = (void *) node;
2296
2297   for (e = node->callees; e; e = e->next_callee)
2298     {
2299       struct cgraph_node *orig_callee;
2300       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2301
2302       /* We've hit cycle?  It is time to give up.  */
2303       if (callee->aux)
2304         {
2305           if (dump_enabled_p ())
2306             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2307                              "Not inlining %C into %C to avoid cycle.\n",
2308                              callee, e->caller);
2309           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2310             e->inline_failed = CIF_RECURSIVE_INLINING;
2311           continue;
2312         }
2313
2314       /* When the edge is already inlined, we just need to recurse into
2315          it in order to fully flatten the leaves.  */
2316       if (!e->inline_failed)
2317         {
2318           flatten_function (callee, early, false);
2319           continue;
2320         }
2321
2322       /* Flatten attribute needs to be processed during late inlining. For
2323          extra code quality we however do flattening during early optimization,
2324          too.  */
2325       if (!early
2326           ? !can_inline_edge_p (e, true)
2327             && !can_inline_edge_by_limits_p (e, true)
2328           : !can_early_inline_edge_p (e))
2329         continue;
2330
2331       if (e->recursive_p ())
2332         {
2333           if (dump_enabled_p ())
2334             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2335                              "Not inlining: recursive call.\n");
2336           continue;
2337         }
2338
2339       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2340           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2341         {
2342           if (dump_enabled_p ())
2343             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2344                              "Not inlining: SSA form does not match.\n");
2345           continue;
2346         }
2347
2348       /* Inline the edge and flatten the inline clone.  Avoid
2349          recursing through the original node if the node was cloned.  */
2350       if (dump_enabled_p ())
2351         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2352                          " Inlining %C into %C.\n",
2353                          callee, e->caller);
2354       orig_callee = callee;
2355       inline_call (e, true, NULL, NULL, false);
2356       if (e->callee != orig_callee)
2357         orig_callee->aux = (void *) node;
2358       flatten_function (e->callee, early, false);
2359       if (e->callee != orig_callee)
2360         orig_callee->aux = NULL;
2361     }
2362
2363   node->aux = NULL;
2364   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2365   if (update && opt_for_fn (where->decl, optimize))
2366     ipa_update_overall_fn_summary (where);
2367 }
2368
2369 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2370    DATA points to number of calls originally found so we avoid infinite
2371    recursion.  */
2372
2373 static bool
2374 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2375                          hash_set<cgraph_node *> *callers)
2376 {
2377   int *num_calls = (int *)data;
2378   bool callee_removed = false;
2379
2380   while (node->callers && !node->inlined_to)
2381     {
2382       struct cgraph_node *caller = node->callers->caller;
2383
2384       if (!can_inline_edge_p (node->callers, true)
2385           || !can_inline_edge_by_limits_p (node->callers, true)
2386           || node->callers->recursive_p ())
2387         {
2388           if (dump_file)
2389             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2390           *num_calls = 0;
2391           return false;
2392         }
2393
2394       if (dump_file)
2395         {
2396           cgraph_node *ultimate = node->ultimate_alias_target ();
2397           fprintf (dump_file,
2398                    "\nInlining %s size %i.\n",
2399                    ultimate->name (),
2400                    ipa_size_summaries->get (ultimate)->size);
2401           fprintf (dump_file,
2402                    " Called once from %s %i insns.\n",
2403                    node->callers->caller->name (),
2404                    ipa_size_summaries->get (node->callers->caller)->size);
2405         }
2406
2407       /* Remember which callers we inlined to, delaying updating the
2408          overall summary.  */
2409       callers->add (node->callers->caller);
2410       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2411       if (dump_file)
2412         fprintf (dump_file,
2413                  " Inlined into %s which now has %i size\n",
2414                  caller->name (),
2415                  ipa_size_summaries->get (caller)->size);
2416       if (!(*num_calls)--)
2417         {
2418           if (dump_file)
2419             fprintf (dump_file, "New calls found; giving up.\n");
2420           return callee_removed;
2421         }
2422       if (callee_removed)
2423         return true;
2424     }
2425   return false;
2426 }
2427
2428 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2429    update.  */
2430
2431 static bool
2432 inline_to_all_callers (struct cgraph_node *node, void *data)
2433 {
2434   hash_set<cgraph_node *> callers;
2435   bool res = inline_to_all_callers_1 (node, data, &callers);
2436   /* Perform the delayed update of the overall summary of all callers
2437      processed.  This avoids quadratic behavior in the cases where
2438      we have a lot of calls to the same function.  */
2439   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2440        i != callers.end (); ++i)
2441     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2442   return res;
2443 }
2444
2445 /* Output overall time estimate.  */
2446 static void
2447 dump_overall_stats (void)
2448 {
2449   sreal sum_weighted = 0, sum = 0;
2450   struct cgraph_node *node;
2451
2452   FOR_EACH_DEFINED_FUNCTION (node)
2453     if (!node->inlined_to
2454         && !node->alias)
2455       {
2456         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2457         if (s != NULL)
2458           {
2459           sum += s->time;
2460           if (node->count.ipa ().initialized_p ())
2461             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2462           }
2463       }
2464   fprintf (dump_file, "Overall time estimate: "
2465            "%f weighted by profile: "
2466            "%f\n", sum.to_double (), sum_weighted.to_double ());
2467 }
2468
2469 /* Output some useful stats about inlining.  */
2470
2471 static void
2472 dump_inline_stats (void)
2473 {
2474   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2475   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2476   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2477   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2478   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2479   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2480   int64_t reason[CIF_N_REASONS][2];
2481   sreal reason_freq[CIF_N_REASONS];
2482   int i;
2483   struct cgraph_node *node;
2484
2485   memset (reason, 0, sizeof (reason));
2486   for (i=0; i < CIF_N_REASONS; i++)
2487     reason_freq[i] = 0;
2488   FOR_EACH_DEFINED_FUNCTION (node)
2489   {
2490     struct cgraph_edge *e;
2491     for (e = node->callees; e; e = e->next_callee)
2492       {
2493         if (e->inline_failed)
2494           {
2495             if (e->count.ipa ().initialized_p ())
2496               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2497             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2498             reason[(int) e->inline_failed][1] ++;
2499             if (DECL_VIRTUAL_P (e->callee->decl)
2500                 && e->count.ipa ().initialized_p ())
2501               {
2502                 if (e->indirect_inlining_edge)
2503                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2504                 else
2505                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2506               }
2507             else if (e->count.ipa ().initialized_p ())
2508               {
2509                 if (e->indirect_inlining_edge)
2510                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2511                 else
2512                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2513               }
2514           }
2515         else if (e->count.ipa ().initialized_p ())
2516           {
2517             if (e->speculative)
2518               {
2519                 if (DECL_VIRTUAL_P (e->callee->decl))
2520                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2521                 else
2522                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2523               }
2524             else if (DECL_VIRTUAL_P (e->callee->decl))
2525               {
2526                 if (e->indirect_inlining_edge)
2527                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2528                 else
2529                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2530               }
2531             else
2532               {
2533                 if (e->indirect_inlining_edge)
2534                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2535                 else
2536                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2537               }
2538           }
2539       }
2540     for (e = node->indirect_calls; e; e = e->next_callee)
2541       if (e->indirect_info->polymorphic
2542           & e->count.ipa ().initialized_p ())
2543         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2544       else if (e->count.ipa ().initialized_p ())
2545         indirect_cnt += e->count.ipa ().to_gcov_type ();
2546   }
2547   if (max_count.initialized_p ())
2548     {
2549       fprintf (dump_file,
2550                "Inlined %" PRId64 " + speculative "
2551                "%" PRId64 " + speculative polymorphic "
2552                "%" PRId64 " + previously indirect "
2553                "%" PRId64 " + virtual "
2554                "%" PRId64 " + virtual and previously indirect "
2555                "%" PRId64 "\n" "Not inlined "
2556                "%" PRId64 " + previously indirect "
2557                "%" PRId64 " + virtual "
2558                "%" PRId64 " + virtual and previously indirect "
2559                "%" PRId64 " + still indirect "
2560                "%" PRId64 " + still indirect polymorphic "
2561                "%" PRId64 "\n", inlined_cnt,
2562                inlined_speculative, inlined_speculative_ply,
2563                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2564                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2565                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2566       fprintf (dump_file, "Removed speculations ");
2567       spec_rem.dump (dump_file);
2568       fprintf (dump_file, "\n");
2569     }
2570   dump_overall_stats ();
2571   fprintf (dump_file, "\nWhy inlining failed?\n");
2572   for (i = 0; i < CIF_N_REASONS; i++)
2573     if (reason[i][1])
2574       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2575                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2576                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2577 }
2578
2579 /* Called when node is removed.  */
2580
2581 static void
2582 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2583 {
2584   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2585     return;
2586
2587   hash_set<struct cgraph_node *> *removed
2588     = (hash_set<struct cgraph_node *> *) data;
2589   removed->add (node);
2590 }
2591
2592 /* Decide on the inlining.  We do so in the topological order to avoid
2593    expenses on updating data structures.  */
2594
2595 static unsigned int
2596 ipa_inline (void)
2597 {
2598   struct cgraph_node *node;
2599   int nnodes;
2600   struct cgraph_node **order;
2601   int i, j;
2602   int cold;
2603   bool remove_functions = false;
2604
2605   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2606
2607   if (dump_file)
2608     ipa_dump_fn_summaries (dump_file);
2609
2610   nnodes = ipa_reverse_postorder (order);
2611   spec_rem = profile_count::zero ();
2612
2613   FOR_EACH_FUNCTION (node)
2614     {
2615       node->aux = 0;
2616
2617       /* Recompute the default reasons for inlining because they may have
2618          changed during merging.  */
2619       if (in_lto_p)
2620         {
2621           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2622             {
2623               gcc_assert (e->inline_failed);
2624               initialize_inline_failed (e);
2625             }
2626           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2627             initialize_inline_failed (e);
2628         }
2629     }
2630
2631   if (dump_file)
2632     fprintf (dump_file, "\nFlattening functions:\n");
2633
2634   /* First shrink order array, so that it only contains nodes with
2635      flatten attribute.  */
2636   for (i = nnodes - 1, j = i; i >= 0; i--)
2637     {
2638       node = order[i];
2639       if (node->definition
2640           && lookup_attribute ("flatten",
2641                                DECL_ATTRIBUTES (node->decl)) != NULL)
2642         order[j--] = order[i];
2643     }
2644
2645   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2646      nodes with flatten attribute.  If there is more than one such
2647      node, we need to register a node removal hook, as flatten_function
2648      could remove other nodes with flatten attribute.  See PR82801.  */
2649   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2650   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2651   if (j < nnodes - 2)
2652     {
2653       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2654       node_removal_hook_holder
2655         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2656                                            flatten_removed_nodes);
2657     }
2658
2659   /* In the first pass handle functions to be flattened.  Do this with
2660      a priority so none of our later choices will make this impossible.  */
2661   for (i = nnodes - 1; i > j; i--)
2662     {
2663       node = order[i];
2664       if (flatten_removed_nodes
2665           && flatten_removed_nodes->contains (node))
2666         continue;
2667
2668       /* Handle nodes to be flattened.
2669          Ideally when processing callees we stop inlining at the
2670          entry of cycles, possibly cloning that entry point and
2671          try to flatten itself turning it into a self-recursive
2672          function.  */
2673       if (dump_file)
2674         fprintf (dump_file, "Flattening %s\n", node->name ());
2675       flatten_function (node, false, true);
2676     }
2677
2678   if (j < nnodes - 2)
2679     {
2680       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2681       delete flatten_removed_nodes;
2682     }
2683   free (order);
2684
2685   if (dump_file)
2686     dump_overall_stats ();
2687
2688   inline_small_functions ();
2689
2690   gcc_assert (symtab->state == IPA_SSA);
2691   symtab->state = IPA_SSA_AFTER_INLINING;
2692   /* Do first after-inlining removal.  We want to remove all "stale" extern
2693      inline functions and virtual functions so we really know what is called
2694      once.  */
2695   symtab->remove_unreachable_nodes (dump_file);
2696
2697   /* Inline functions with a property that after inlining into all callers the
2698      code size will shrink because the out-of-line copy is eliminated.
2699      We do this regardless on the callee size as long as function growth limits
2700      are met.  */
2701   if (dump_file)
2702     fprintf (dump_file,
2703              "\nDeciding on functions to be inlined into all callers and "
2704              "removing useless speculations:\n");
2705
2706   /* Inlining one function called once has good chance of preventing
2707      inlining other function into the same callee.  Ideally we should
2708      work in priority order, but probably inlining hot functions first
2709      is good cut without the extra pain of maintaining the queue.
2710
2711      ??? this is not really fitting the bill perfectly: inlining function
2712      into callee often leads to better optimization of callee due to
2713      increased context for optimization.
2714      For example if main() function calls a function that outputs help
2715      and then function that does the main optimization, we should inline
2716      the second with priority even if both calls are cold by themselves.
2717
2718      We probably want to implement new predicate replacing our use of
2719      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2720      to be hot.  */
2721   for (cold = 0; cold <= 1; cold ++)
2722     {
2723       FOR_EACH_DEFINED_FUNCTION (node)
2724         {
2725           struct cgraph_edge *edge, *next;
2726           bool update=false;
2727
2728           if (!opt_for_fn (node->decl, optimize)
2729               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2730             continue;
2731
2732           for (edge = node->callees; edge; edge = next)
2733             {
2734               next = edge->next_callee;
2735               if (edge->speculative && !speculation_useful_p (edge, false))
2736                 {
2737                   if (edge->count.ipa ().initialized_p ())
2738                     spec_rem += edge->count.ipa ();
2739                   edge->resolve_speculation ();
2740                   update = true;
2741                   remove_functions = true;
2742                 }
2743             }
2744           if (update)
2745             {
2746               struct cgraph_node *where = node->inlined_to
2747                                           ? node->inlined_to : node;
2748               reset_edge_caches (where);
2749               ipa_update_overall_fn_summary (where);
2750             }
2751           if (want_inline_function_to_all_callers_p (node, cold))
2752             {
2753               int num_calls = 0;
2754               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2755                                                  true);
2756               while (node->call_for_symbol_and_aliases
2757                        (inline_to_all_callers, &num_calls, true))
2758                 ;
2759               remove_functions = true;
2760             }
2761         }
2762     }
2763
2764   /* Free ipa-prop structures if they are no longer needed.  */
2765   ipa_free_all_structures_after_iinln ();
2766
2767   if (dump_enabled_p ())
2768     dump_printf (MSG_NOTE,
2769                  "\nInlined %i calls, eliminated %i functions\n\n",
2770                  ncalls_inlined, nfunctions_inlined);
2771   if (dump_file)
2772     dump_inline_stats ();
2773
2774   if (dump_file)
2775     ipa_dump_fn_summaries (dump_file);
2776   return remove_functions ? TODO_remove_functions : 0;
2777 }
2778
2779 /* Inline always-inline function calls in NODE.  */
2780
2781 static bool
2782 inline_always_inline_functions (struct cgraph_node *node)
2783 {
2784   struct cgraph_edge *e;
2785   bool inlined = false;
2786
2787   for (e = node->callees; e; e = e->next_callee)
2788     {
2789       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2790       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2791         continue;
2792
2793       if (e->recursive_p ())
2794         {
2795           if (dump_enabled_p ())
2796             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2797                              "  Not inlining recursive call to %C.\n",
2798                              e->callee);
2799           e->inline_failed = CIF_RECURSIVE_INLINING;
2800           continue;
2801         }
2802
2803       if (!can_early_inline_edge_p (e))
2804         {
2805           /* Set inlined to true if the callee is marked "always_inline" but
2806              is not inlinable.  This will allow flagging an error later in
2807              expand_call_inline in tree-inline.c.  */
2808           if (lookup_attribute ("always_inline",
2809                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2810             inlined = true;
2811           continue;
2812         }
2813
2814       if (dump_enabled_p ())
2815         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2816                          "  Inlining %C into %C (always_inline).\n",
2817                          e->callee, e->caller);
2818       inline_call (e, true, NULL, NULL, false);
2819       inlined = true;
2820     }
2821   if (inlined)
2822     ipa_update_overall_fn_summary (node);
2823
2824   return inlined;
2825 }
2826
2827 /* Decide on the inlining.  We do so in the topological order to avoid
2828    expenses on updating data structures.  */
2829
2830 static bool
2831 early_inline_small_functions (struct cgraph_node *node)
2832 {
2833   struct cgraph_edge *e;
2834   bool inlined = false;
2835
2836   for (e = node->callees; e; e = e->next_callee)
2837     {
2838       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2839
2840       /* We can encounter not-yet-analyzed function during
2841          early inlining on callgraphs with strongly
2842          connected components.  */
2843       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2844       if (s == NULL || !s->inlinable || !e->inline_failed)
2845         continue;
2846
2847       /* Do not consider functions not declared inline.  */
2848       if (!DECL_DECLARED_INLINE_P (callee->decl)
2849           && !opt_for_fn (node->decl, flag_inline_small_functions)
2850           && !opt_for_fn (node->decl, flag_inline_functions))
2851         continue;
2852
2853       if (dump_enabled_p ())
2854         dump_printf_loc (MSG_NOTE, e->call_stmt,
2855                          "Considering inline candidate %C.\n",
2856                          callee);
2857
2858       if (!can_early_inline_edge_p (e))
2859         continue;
2860
2861       if (e->recursive_p ())
2862         {
2863           if (dump_enabled_p ())
2864             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2865                              "  Not inlining: recursive call.\n");
2866           continue;
2867         }
2868
2869       if (!want_early_inline_function_p (e))
2870         continue;
2871
2872       if (dump_enabled_p ())
2873         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2874                          " Inlining %C into %C.\n",
2875                          callee, e->caller);
2876       inline_call (e, true, NULL, NULL, false);
2877       inlined = true;
2878     }
2879
2880   if (inlined)
2881     ipa_update_overall_fn_summary (node);
2882
2883   return inlined;
2884 }
2885
2886 unsigned int
2887 early_inliner (function *fun)
2888 {
2889   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2890   struct cgraph_edge *edge;
2891   unsigned int todo = 0;
2892   int iterations = 0;
2893   bool inlined = false;
2894
2895   if (seen_error ())
2896     return 0;
2897
2898   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2899      happens when some pass decides to construct new function and
2900      cgraph_add_new_function calls lowering passes and early optimization on
2901      it.  This may confuse ourself when early inliner decide to inline call to
2902      function clone, because function clones don't have parameter list in
2903      ipa-prop matching their signature.  */
2904   if (ipa_node_params_sum)
2905     return 0;
2906
2907   if (flag_checking)
2908     node->verify ();
2909   node->remove_all_references ();
2910
2911   /* Even when not optimizing or not inlining inline always-inline
2912      functions.  */
2913   inlined = inline_always_inline_functions (node);
2914
2915   if (!optimize
2916       || flag_no_inline
2917       || !flag_early_inlining
2918       /* Never inline regular functions into always-inline functions
2919          during incremental inlining.  This sucks as functions calling
2920          always inline functions will get less optimized, but at the
2921          same time inlining of functions calling always inline
2922          function into an always inline function might introduce
2923          cycles of edges to be always inlined in the callgraph.
2924
2925          We might want to be smarter and just avoid this type of inlining.  */
2926       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2927           && lookup_attribute ("always_inline",
2928                                DECL_ATTRIBUTES (node->decl))))
2929     ;
2930   else if (lookup_attribute ("flatten",
2931                              DECL_ATTRIBUTES (node->decl)) != NULL)
2932     {
2933       /* When the function is marked to be flattened, recursively inline
2934          all calls in it.  */
2935       if (dump_enabled_p ())
2936         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2937                      "Flattening %C\n", node);
2938       flatten_function (node, true, true);
2939       inlined = true;
2940     }
2941   else
2942     {
2943       /* If some always_inline functions was inlined, apply the changes.
2944          This way we will not account always inline into growth limits and
2945          moreover we will inline calls from always inlines that we skipped
2946          previously because of conditional above.  */
2947       if (inlined)
2948         {
2949           timevar_push (TV_INTEGRATION);
2950           todo |= optimize_inline_calls (current_function_decl);
2951           /* optimize_inline_calls call above might have introduced new
2952              statements that don't have inline parameters computed.  */
2953           for (edge = node->callees; edge; edge = edge->next_callee)
2954             {
2955               /* We can enounter not-yet-analyzed function during
2956                  early inlining on callgraphs with strongly
2957                  connected components.  */
2958               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2959               es->call_stmt_size
2960                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2961               es->call_stmt_time
2962                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2963             }
2964           ipa_update_overall_fn_summary (node);
2965           inlined = false;
2966           timevar_pop (TV_INTEGRATION);
2967         }
2968       /* We iterate incremental inlining to get trivial cases of indirect
2969          inlining.  */
2970       while (iterations < param_early_inliner_max_iterations
2971              && early_inline_small_functions (node))
2972         {
2973           timevar_push (TV_INTEGRATION);
2974           todo |= optimize_inline_calls (current_function_decl);
2975
2976           /* Technically we ought to recompute inline parameters so the new
2977              iteration of early inliner works as expected.  We however have
2978              values approximately right and thus we only need to update edge
2979              info that might be cleared out for newly discovered edges.  */
2980           for (edge = node->callees; edge; edge = edge->next_callee)
2981             {
2982               /* We have no summary for new bound store calls yet.  */
2983               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2984               es->call_stmt_size
2985                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2986               es->call_stmt_time
2987                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2988             }
2989           if (iterations < param_early_inliner_max_iterations - 1)
2990             ipa_update_overall_fn_summary (node);
2991           timevar_pop (TV_INTEGRATION);
2992           iterations++;
2993           inlined = false;
2994         }
2995       if (dump_file)
2996         fprintf (dump_file, "Iterations: %i\n", iterations);
2997     }
2998
2999   if (inlined)
3000     {
3001       timevar_push (TV_INTEGRATION);
3002       todo |= optimize_inline_calls (current_function_decl);
3003       timevar_pop (TV_INTEGRATION);
3004     }
3005
3006   fun->always_inline_functions_inlined = true;
3007
3008   return todo;
3009 }
3010
3011 /* Do inlining of small functions.  Doing so early helps profiling and other
3012    passes to be somewhat more effective and avoids some code duplication in
3013    later real inlining pass for testcases with very many function calls.  */
3014
3015 namespace {
3016
3017 const pass_data pass_data_early_inline =
3018 {
3019   GIMPLE_PASS, /* type */
3020   "einline", /* name */
3021   OPTGROUP_INLINE, /* optinfo_flags */
3022   TV_EARLY_INLINING, /* tv_id */
3023   PROP_ssa, /* properties_required */
3024   0, /* properties_provided */
3025   0, /* properties_destroyed */
3026   0, /* todo_flags_start */
3027   0, /* todo_flags_finish */
3028 };
3029
3030 class pass_early_inline : public gimple_opt_pass
3031 {
3032 public:
3033   pass_early_inline (gcc::context *ctxt)
3034     : gimple_opt_pass (pass_data_early_inline, ctxt)
3035   {}
3036
3037   /* opt_pass methods: */
3038   virtual unsigned int execute (function *);
3039
3040 }; // class pass_early_inline
3041
3042 unsigned int
3043 pass_early_inline::execute (function *fun)
3044 {
3045   return early_inliner (fun);
3046 }
3047
3048 } // anon namespace
3049
3050 gimple_opt_pass *
3051 make_pass_early_inline (gcc::context *ctxt)
3052 {
3053   return new pass_early_inline (ctxt);
3054 }
3055
3056 namespace {
3057
3058 const pass_data pass_data_ipa_inline =
3059 {
3060   IPA_PASS, /* type */
3061   "inline", /* name */
3062   OPTGROUP_INLINE, /* optinfo_flags */
3063   TV_IPA_INLINING, /* tv_id */
3064   0, /* properties_required */
3065   0, /* properties_provided */
3066   0, /* properties_destroyed */
3067   0, /* todo_flags_start */
3068   ( TODO_dump_symtab ), /* todo_flags_finish */
3069 };
3070
3071 class pass_ipa_inline : public ipa_opt_pass_d
3072 {
3073 public:
3074   pass_ipa_inline (gcc::context *ctxt)
3075     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3076                       NULL, /* generate_summary */
3077                       NULL, /* write_summary */
3078                       NULL, /* read_summary */
3079                       NULL, /* write_optimization_summary */
3080                       NULL, /* read_optimization_summary */
3081                       NULL, /* stmt_fixup */
3082                       0, /* function_transform_todo_flags_start */
3083                       inline_transform, /* function_transform */
3084                       NULL) /* variable_transform */
3085   {}
3086
3087   /* opt_pass methods: */
3088   virtual unsigned int execute (function *) { return ipa_inline (); }
3089
3090 }; // class pass_ipa_inline
3091
3092 } // anon namespace
3093
3094 ipa_opt_pass_d *
3095 make_pass_ipa_inline (gcc::context *ctxt)
3096 {
3097   return new pass_ipa_inline (ctxt);
3098 }