gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass cannot really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "profile.h"
 109 #include "symbol-summary.h"
 110 #include "tree-vrp.h"
 111 #include "ipa-prop.h"
 112 #include "ipa-fnsummary.h"
 113 #include "ipa-inline.h"
 114 #include "ipa-utils.h"
 115 #include "sreal.h"
 116 #include "auto-profile.h"
 117 #include "builtins.h"
 118 #include "fibonacci_heap.h"
 119 #include "stringpool.h"
 120 #include "attribs.h"
 121 #include "asan.h"
 122
 123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 125
 126 /* Statistics we collect about inlining algorithm.  */
 127 static int overall_size;
 128 static profile_count max_count;
 129 static profile_count spec_rem;
 130
 131 /* Return false when inlining edge E would lead to violating
 132    limits on function unit growth or stack usage growth.
 133
 134    The relative function body growth limit is present generally
 135    to avoid problems with non-linear behavior of the compiler.
 136    To allow inlining huge functions into tiny wrapper, the limit
 137    is always based on the bigger of the two functions considered.
 138
 139    For stack growth limits we always base the growth in stack usage
 140    of the callers.  We want to prevent applications from segfaulting
 141    on stack overflow when functions with huge stack frames gets
 142    inlined. */
 143
 144 static bool
 145 caller_growth_limits (struct cgraph_edge *e)
 146 {
 147   struct cgraph_node *to = e->caller;
 148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 149   int newsize;
 150   int limit = 0;
 151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
 153
 154   /* Look for function e->caller is inlined to.  While doing
 155      so work out the largest function body on the way.  As
 156      described above, we want to base our function growth
 157      limits based on that.  Not on the self size of the
 158      outer function, not on the self size of inline code
 159      we immediately inline to.  This is the most relaxed
 160      interpretation of the rule "do not grow large functions
 161      too much in order to prevent compiler from exploding".  */
 162   while (true)
 163     {
 164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
 165       if (limit < size_info->self_size)
 166         limit = size_info->self_size;
 167       if (stack_size_limit < size_info->estimated_self_stack_size)
 168         stack_size_limit = size_info->estimated_self_stack_size;
 169       if (to->inlined_to)
 170         to = to->callers->caller;
 171       else
 172         break;
 173     }
 174
 175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
 176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
 177
 178   if (limit < what_size_info->self_size)
 179     limit = what_size_info->self_size;
 180
 181   limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= ipa_size_summaries->get (what)->size
 187       && newsize > opt_for_fn (to->decl, param_large_function_insns)
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * opt_for_fn (to->decl, param_stack_frame_growth)
 204                        / 100);
 205
 206   inlined_stack = (ipa_get_stack_frame_offset (to)
 207                    + outer_info->estimated_self_stack_size
 208                    + what_info->estimated_stack_size);
 209   /* Check new stack consumption with stack consumption at the place
 210      stack is used.  */
 211   if (inlined_stack > stack_size_limit
 212       /* If function already has large stack usage from sibling
 213          inline call, we can inline, too.
 214          This bit overoptimistically assume that we are good at stack
 215          packing.  */
 216       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
 217       && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
 218     {
 219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 220       return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Dump info about why inlining has failed.  */
 226
 227 static void
 228 report_inline_failed_reason (struct cgraph_edge *e)
 229 {
 230   if (dump_enabled_p ())
 231     {
 232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 233                        "  not inlinable: %C -> %C, %s\n",
 234                        e->caller, e->callee,
 235                        cgraph_inline_failed_string (e->inline_failed));
 236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 237            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 238           && e->caller->lto_file_data
 239           && e->callee->ultimate_alias_target ()->lto_file_data)
 240         {
 241           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 242                            "  LTO objects: %s, %s\n",
 243                            e->caller->lto_file_data->file_name,
 244                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 245         }
 246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 247         if (dump_file)
 248           cl_target_option_print_diff
 249             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 250              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 252         if (dump_file)
 253           cl_optimization_print_diff
 254             (dump_file, 2, opts_for_fn (e->caller->decl),
 255              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 256     }
 257 }
 258
 259  /* Decide whether sanitizer-related attributes allow inlining. */
 260
 261 static bool
 262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 263 {
 264   if (!caller || !callee)
 265     return true;
 266
 267   /* Allow inlining always_inline functions into no_sanitize_address
 268      functions.  */
 269   if (!sanitize_flags_p (SANITIZE_ADDRESS, caller)
 270       && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
 271     return true;
 272
 273   return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
 274            == sanitize_flags_p (SANITIZE_ADDRESS, callee))
 275           && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
 276               == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
 277           && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
 278               == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
 279 }
 280
 281 /* Used for flags where it is safe to inline when caller's value is
 282    grater than callee's.  */
 283 #define check_maybe_up(flag) \
 284       (opts_for_fn (caller->decl)->x_##flag             \
 285        != opts_for_fn (callee->decl)->x_##flag          \
 286        && (!always_inline                               \
 287            || opts_for_fn (caller->decl)->x_##flag      \
 288               < opts_for_fn (callee->decl)->x_##flag))
 289 /* Used for flags where it is safe to inline when caller's value is
 290    smaller than callee's.  */
 291 #define check_maybe_down(flag) \
 292       (opts_for_fn (caller->decl)->x_##flag             \
 293        != opts_for_fn (callee->decl)->x_##flag          \
 294        && (!always_inline                               \
 295            || opts_for_fn (caller->decl)->x_##flag      \
 296               > opts_for_fn (callee->decl)->x_##flag))
 297 /* Used for flags where exact match is needed for correctness.  */
 298 #define check_match(flag) \
 299       (opts_for_fn (caller->decl)->x_##flag             \
 300        != opts_for_fn (callee->decl)->x_##flag)
 301
 302 /* Decide if we can inline the edge and possibly update
 303    inline_failed reason.
 304    We check whether inlining is possible at all and whether
 305    caller growth limits allow doing so.
 306
 307    if REPORT is true, output reason to the dump file. */
 308
 309 static bool
 310 can_inline_edge_p (struct cgraph_edge *e, bool report,
 311                    bool early = false)
 312 {
 313   gcc_checking_assert (e->inline_failed);
 314
 315   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 316     {
 317       if (report)
 318         report_inline_failed_reason (e);
 319       return false;
 320     }
 321
 322   bool inlinable = true;
 323   enum availability avail;
 324   cgraph_node *caller = (e->caller->inlined_to
 325                          ? e->caller->inlined_to : e->caller);
 326   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 327
 328   if (!callee->definition)
 329     {
 330       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 331       inlinable = false;
 332     }
 333   if (!early && (!opt_for_fn (callee->decl, optimize)
 334                  || !opt_for_fn (caller->decl, optimize)))
 335     {
 336       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 337       inlinable = false;
 338     }
 339   else if (callee->calls_comdat_local)
 340     {
 341       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 342       inlinable = false;
 343     }
 344   else if (avail <= AVAIL_INTERPOSABLE)
 345     {
 346       e->inline_failed = CIF_OVERWRITABLE;
 347       inlinable = false;
 348     }
 349   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 350      initialized to one of FINAL_ERROR reasons.  */
 351   else if (e->call_stmt_cannot_inline_p)
 352     gcc_unreachable ();
 353   /* Don't inline if the functions have different EH personalities.  */
 354   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 355            && DECL_FUNCTION_PERSONALITY (callee->decl)
 356            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 357                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 358     {
 359       e->inline_failed = CIF_EH_PERSONALITY;
 360       inlinable = false;
 361     }
 362   /* TM pure functions should not be inlined into non-TM_pure
 363      functions.  */
 364   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 365     {
 366       e->inline_failed = CIF_UNSPECIFIED;
 367       inlinable = false;
 368     }
 369   /* Check compatibility of target optimization options.  */
 370   else if (!targetm.target_option.can_inline_p (caller->decl,
 371                                                 callee->decl))
 372     {
 373       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 374       inlinable = false;
 375     }
 376   else if (ipa_fn_summaries->get (callee) == NULL
 377            || !ipa_fn_summaries->get (callee)->inlinable)
 378     {
 379       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 380       inlinable = false;
 381     }
 382   /* Don't inline a function with mismatched sanitization attributes. */
 383   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 384     {
 385       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 386       inlinable = false;
 387     }
 388   if (!inlinable && report)
 389     report_inline_failed_reason (e);
 390   return inlinable;
 391 }
 392
 393 /* Return inlining_insns_single limit for function N. If HINT is true
 394    scale up the bound.  */
 395
 396 static int
 397 inline_insns_single (cgraph_node *n, bool hint)
 398 {
 399   if (hint)
 400     return opt_for_fn (n->decl, param_max_inline_insns_single)
 401            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 402   return opt_for_fn (n->decl, param_max_inline_insns_single);
 403 }
 404
 405 /* Return inlining_insns_auto limit for function N. If HINT is true
 406    scale up the bound.   */
 407
 408 static int
 409 inline_insns_auto (cgraph_node *n, bool hint)
 410 {
 411   int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
 412   if (hint)
 413     return max_inline_insns_auto
 414            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 415   return max_inline_insns_auto;
 416 }
 417
 418 /* Decide if we can inline the edge and possibly update
 419    inline_failed reason.
 420    We check whether inlining is possible at all and whether
 421    caller growth limits allow doing so.
 422
 423    if REPORT is true, output reason to the dump file.
 424
 425    if DISREGARD_LIMITS is true, ignore size limits.  */
 426
 427 static bool
 428 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 429                              bool disregard_limits = false, bool early = false)
 430 {
 431   gcc_checking_assert (e->inline_failed);
 432
 433   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 434     {
 435       if (report)
 436         report_inline_failed_reason (e);
 437       return false;
 438     }
 439
 440   bool inlinable = true;
 441   enum availability avail;
 442   cgraph_node *caller = (e->caller->inlined_to
 443                          ? e->caller->inlined_to : e->caller);
 444   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 445   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 446   tree callee_tree
 447     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 448   /* Check if caller growth allows the inlining.  */
 449   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 450       && !disregard_limits
 451       && !lookup_attribute ("flatten",
 452                  DECL_ATTRIBUTES (caller->decl))
 453       && !caller_growth_limits (e))
 454     inlinable = false;
 455   else if (callee->externally_visible
 456            && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 457            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 458     {
 459       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 460       inlinable = false;
 461     }
 462   /* Don't inline a function with a higher optimization level than the
 463      caller.  FIXME: this is really just tip of iceberg of handling
 464      optimization attribute.  */
 465   else if (caller_tree != callee_tree)
 466     {
 467       bool always_inline =
 468              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 469               && lookup_attribute ("always_inline",
 470                                    DECL_ATTRIBUTES (callee->decl)));
 471       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 472       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 473
 474      /* Until GCC 4.9 we did not check the semantics-altering flags
 475         below and inlined across optimization boundaries.
 476         Enabling checks below breaks several packages by refusing
 477         to inline library always_inline functions. See PR65873.
 478         Disable the check for early inlining for now until better solution
 479         is found.  */
 480      if (always_inline && early)
 481         ;
 482       /* There are some options that change IL semantics which means
 483          we cannot inline in these cases for correctness reason.
 484          Not even for always_inline declared functions.  */
 485      else if (check_match (flag_wrapv)
 486               || check_match (flag_trapv)
 487               || check_match (flag_pcc_struct_return)
 488               /* When caller or callee does FP math, be sure FP codegen flags
 489                  compatible.  */
 490               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 491                   && (check_maybe_up (flag_rounding_math)
 492                       || check_maybe_up (flag_trapping_math)
 493                       || check_maybe_down (flag_unsafe_math_optimizations)
 494                       || check_maybe_down (flag_finite_math_only)
 495                       || check_maybe_up (flag_signaling_nans)
 496                       || check_maybe_down (flag_cx_limited_range)
 497                       || check_maybe_up (flag_signed_zeros)
 498                       || check_maybe_down (flag_associative_math)
 499                       || check_maybe_down (flag_reciprocal_math)
 500                       || check_maybe_down (flag_fp_int_builtin_inexact)
 501                       /* Strictly speaking only when the callee contains function
 502                          calls that may end up setting errno.  */
 503                       || check_maybe_up (flag_errno_math)))
 504               /* We do not want to make code compiled with exceptions to be
 505                  brought into a non-EH function unless we know that the callee
 506                  does not throw.
 507                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 508               || (check_maybe_up (flag_non_call_exceptions)
 509                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 510               || (check_maybe_up (flag_exceptions)
 511                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 512               /* When devirtualization is disabled for callee, it is not safe
 513                  to inline it as we possibly mangled the type info.
 514                  Allow early inlining of always inlines.  */
 515               || (!early && check_maybe_down (flag_devirtualize)))
 516         {
 517           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 518           inlinable = false;
 519         }
 520       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 521       else if (always_inline)
 522         ;
 523       /* When user added an attribute to the callee honor it.  */
 524       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 525                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 526         {
 527           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 528           inlinable = false;
 529         }
 530       /* If explicit optimize attribute are not used, the mismatch is caused
 531          by different command line options used to build different units.
 532          Do not care about COMDAT functions - those are intended to be
 533          optimized with the optimization flags of module they are used in.
 534          Also do not care about mixing up size/speed optimization when
 535          DECL_DISREGARD_INLINE_LIMITS is set.  */
 536       else if ((callee->merged_comdat
 537                 && !lookup_attribute ("optimize",
 538                                       DECL_ATTRIBUTES (caller->decl)))
 539                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 540         ;
 541       /* If mismatch is caused by merging two LTO units with different
 542          optimization flags we want to be bit nicer.  However never inline
 543          if one of functions is not optimized at all.  */
 544       else if (!opt_for_fn (callee->decl, optimize)
 545                || !opt_for_fn (caller->decl, optimize))
 546         {
 547           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 548           inlinable = false;
 549         }
 550       /* If callee is optimized for size and caller is not, allow inlining if
 551          code shrinks or we are in param_max_inline_insns_single limit and
 552          callee is inline (and thus likely an unified comdat).
 553          This will allow caller to run faster.  */
 554       else if (opt_for_fn (callee->decl, optimize_size)
 555                > opt_for_fn (caller->decl, optimize_size))
 556         {
 557           int growth = estimate_edge_growth (e);
 558           if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
 559               && (!DECL_DECLARED_INLINE_P (callee->decl)
 560                   && growth >= MAX (inline_insns_single (caller, false),
 561                                     inline_insns_auto (caller, false))))
 562             {
 563               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 564               inlinable = false;
 565             }
 566         }
 567       /* If callee is more aggressively optimized for performance than caller,
 568          we generally want to inline only cheap (runtime wise) functions.  */
 569       else if (opt_for_fn (callee->decl, optimize_size)
 570                < opt_for_fn (caller->decl, optimize_size)
 571                || (opt_for_fn (callee->decl, optimize)
 572                    > opt_for_fn (caller->decl, optimize)))
 573         {
 574           if (estimate_edge_time (e)
 575               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 576             {
 577               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 578               inlinable = false;
 579             }
 580         }
 581
 582     }
 583
 584   if (!inlinable && report)
 585     report_inline_failed_reason (e);
 586   return inlinable;
 587 }
 588
 589
 590 /* Return true if the edge E is inlinable during early inlining.  */
 591
 592 static bool
 593 can_early_inline_edge_p (struct cgraph_edge *e)
 594 {
 595   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 596   /* Early inliner might get called at WPA stage when IPA pass adds new
 597      function.  In this case we cannot really do any of early inlining
 598      because function bodies are missing.  */
 599   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 600     return false;
 601   if (!gimple_has_body_p (callee->decl))
 602     {
 603       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 604       return false;
 605     }
 606   /* In early inliner some of callees may not be in SSA form yet
 607      (i.e. the callgraph is cyclic and we did not process
 608      the callee by early inliner, yet).  We don't have CIF code for this
 609      case; later we will re-do the decision in the real inliner.  */
 610   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 611       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 612     {
 613       if (dump_enabled_p ())
 614         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 615                          "  edge not inlinable: not in SSA form\n");
 616       return false;
 617     }
 618   if (!can_inline_edge_p (e, true, true)
 619       || !can_inline_edge_by_limits_p (e, true, false, true))
 620     return false;
 621   return true;
 622 }
 623
 624
 625 /* Return number of calls in N.  Ignore cheap builtins.  */
 626
 627 static int
 628 num_calls (struct cgraph_node *n)
 629 {
 630   struct cgraph_edge *e;
 631   int num = 0;
 632
 633   for (e = n->callees; e; e = e->next_callee)
 634     if (!is_inexpensive_builtin (e->callee->decl))
 635       num++;
 636   return num;
 637 }
 638
 639
 640 /* Return true if we are interested in inlining small function.  */
 641
 642 static bool
 643 want_early_inline_function_p (struct cgraph_edge *e)
 644 {
 645   bool want_inline = true;
 646   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 647
 648   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 649     ;
 650   /* For AutoFDO, we need to make sure that before profile summary, all
 651      hot paths' IR look exactly the same as profiled binary. As a result,
 652      in einliner, we will disregard size limit and inline those callsites
 653      that are:
 654        * inlined in the profiled binary, and
 655        * the cloned callee has enough samples to be considered "hot".  */
 656   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 657     ;
 658   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 659            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 660     {
 661       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 662       report_inline_failed_reason (e);
 663       want_inline = false;
 664     }
 665   else
 666     {
 667       /* First take care of very large functions.  */
 668       int min_growth = estimate_min_edge_growth (e), growth = 0;
 669       int n;
 670       int early_inlining_insns = param_early_inlining_insns;
 671
 672       if (min_growth > early_inlining_insns)
 673         {
 674           if (dump_enabled_p ())
 675             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 676                              "  will not early inline: %C->%C, "
 677                              "call is cold and code would grow "
 678                              "at least by %i\n",
 679                              e->caller, callee,
 680                              min_growth);
 681           want_inline = false;
 682         }
 683       else
 684         growth = estimate_edge_growth (e);
 685
 686
 687       if (!want_inline || growth <= param_max_inline_insns_size)
 688         ;
 689       else if (!e->maybe_hot_p ())
 690         {
 691           if (dump_enabled_p ())
 692             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 693                              "  will not early inline: %C->%C, "
 694                              "call is cold and code would grow by %i\n",
 695                              e->caller, callee,
 696                              growth);
 697           want_inline = false;
 698         }
 699       else if (growth > early_inlining_insns)
 700         {
 701           if (dump_enabled_p ())
 702             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 703                              "  will not early inline: %C->%C, "
 704                              "growth %i exceeds --param early-inlining-insns\n",
 705                              e->caller, callee, growth);
 706           want_inline = false;
 707         }
 708       else if ((n = num_calls (callee)) != 0
 709                && growth * (n + 1) > early_inlining_insns)
 710         {
 711           if (dump_enabled_p ())
 712             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 713                              "  will not early inline: %C->%C, "
 714                              "growth %i exceeds --param early-inlining-insns "
 715                              "divided by number of calls\n",
 716                              e->caller, callee, growth);
 717           want_inline = false;
 718         }
 719     }
 720   return want_inline;
 721 }
 722
 723 /* Compute time of the edge->caller + edge->callee execution when inlining
 724    does not happen.  */
 725
 726 inline sreal
 727 compute_uninlined_call_time (struct cgraph_edge *edge,
 728                              sreal uninlined_call_time,
 729                              sreal freq)
 730 {
 731   cgraph_node *caller = (edge->caller->inlined_to
 732                          ? edge->caller->inlined_to
 733                          : edge->caller);
 734
 735   if (freq > 0)
 736     uninlined_call_time *= freq;
 737   else
 738     uninlined_call_time = uninlined_call_time >> 11;
 739
 740   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 741   return uninlined_call_time + caller_time;
 742 }
 743
 744 /* Same as compute_uinlined_call_time but compute time when inlining
 745    does happen.  */
 746
 747 inline sreal
 748 compute_inlined_call_time (struct cgraph_edge *edge,
 749                            sreal time,
 750                            sreal freq)
 751 {
 752   cgraph_node *caller = (edge->caller->inlined_to
 753                          ? edge->caller->inlined_to
 754                          : edge->caller);
 755   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 756
 757   if (freq > 0)
 758     time *= freq;
 759   else
 760     time = time >> 11;
 761
 762   /* This calculation should match one in ipa-inline-analysis.c
 763      (estimate_edge_size_and_time).  */
 764   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 765   time += caller_time;
 766   if (time <= 0)
 767     time = ((sreal) 1) >> 8;
 768   gcc_checking_assert (time >= 0);
 769   return time;
 770 }
 771
 772 /* Determine time saved by inlining EDGE of frequency FREQ
 773    where callee's runtime w/o inlining is UNINLINED_TYPE
 774    and with inlined is INLINED_TYPE.  */
 775
 776 inline sreal
 777 inlining_speedup (struct cgraph_edge *edge,
 778                   sreal freq,
 779                   sreal uninlined_time,
 780                   sreal inlined_time)
 781 {
 782   sreal speedup = uninlined_time - inlined_time;
 783   /* Handling of call_time should match one in ipa-inline-fnsummary.c
 784      (estimate_edge_size_and_time).  */
 785   sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
 786
 787   if (freq > 0)
 788     {
 789       speedup = (speedup + call_time);
 790       if (freq != 1)
 791        speedup = speedup * freq;
 792     }
 793   else if (freq == 0)
 794     speedup = speedup >> 11;
 795   gcc_checking_assert (speedup >= 0);
 796   return speedup;
 797 }
 798
 799 /* Return true if the speedup for inlining E is bigger than
 800    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 801
 802 static bool
 803 big_speedup_p (struct cgraph_edge *e)
 804 {
 805   sreal unspec_time;
 806   sreal spec_time = estimate_edge_time (e, &unspec_time);
 807   sreal freq = e->sreal_frequency ();
 808   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
 809   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
 810   cgraph_node *caller = (e->caller->inlined_to
 811                          ? e->caller->inlined_to
 812                          : e->caller);
 813   int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
 814
 815   if ((time - inlined_time) * 100 > time * limit)
 816     return true;
 817   return false;
 818 }
 819
 820 /* Return true if we are interested in inlining small function.
 821    When REPORT is true, report reason to dump file.  */
 822
 823 static bool
 824 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 825 {
 826   bool want_inline = true;
 827   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 828   cgraph_node *to  = (e->caller->inlined_to
 829                       ? e->caller->inlined_to : e->caller);
 830
 831   /* Allow this function to be called before can_inline_edge_p,
 832      since it's usually cheaper.  */
 833   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 834     want_inline = false;
 835   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 836     ;
 837   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 838            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 839     {
 840       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 841       want_inline = false;
 842     }
 843   /* Do fast and conservative check if the function can be good
 844      inline candidate.  */
 845   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 846            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 847            && ipa_fn_summaries->get (callee)->min_size
 848                 - ipa_call_summaries->get (e)->call_stmt_size
 849               > inline_insns_auto (e->caller, true))
 850     {
 851       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 852       want_inline = false;
 853     }
 854   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 855             || e->count.ipa ().nonzero_p ())
 856            && ipa_fn_summaries->get (callee)->min_size
 857                 - ipa_call_summaries->get (e)->call_stmt_size
 858               > inline_insns_single (e->caller, true))
 859     {
 860       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 861                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 862                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 863       want_inline = false;
 864     }
 865   else
 866     {
 867       int growth = estimate_edge_growth (e);
 868       ipa_hints hints = estimate_edge_hints (e);
 869       bool apply_hints = (hints & (INLINE_HINT_indirect_call
 870                                    | INLINE_HINT_known_hot
 871                                    | INLINE_HINT_loop_iterations
 872                                    | INLINE_HINT_loop_stride));
 873
 874       if (growth <= opt_for_fn (to->decl,
 875                                 param_max_inline_insns_size))
 876         ;
 877       /* Apply param_max_inline_insns_single limit.  Do not do so when
 878          hints suggests that inlining given function is very profitable.
 879          Avoid computation of big_speedup_p when not necessary to change
 880          outcome of decision.  */
 881       else if (DECL_DECLARED_INLINE_P (callee->decl)
 882                && growth >= inline_insns_single (e->caller, apply_hints)
 883                && (apply_hints
 884                    || growth >= inline_insns_single (e->caller, true)
 885                    || !big_speedup_p (e)))
 886         {
 887           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 888           want_inline = false;
 889         }
 890       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 891                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 892                && growth >= opt_for_fn (to->decl,
 893                                         param_max_inline_insns_small))
 894         {
 895           /* growth_positive_p is expensive, always test it last.  */
 896           if (growth >= inline_insns_single (e->caller, false)
 897               || growth_positive_p (callee, e, growth))
 898             {
 899               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 900               want_inline = false;
 901             }
 902         }
 903       /* Apply param_max_inline_insns_auto limit for functions not declared
 904          inline.  Bypass the limit when speedup seems big.  */
 905       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 906                && growth >= inline_insns_auto (e->caller, apply_hints)
 907                && (apply_hints
 908                    || growth >= inline_insns_auto (e->caller, true)
 909                    || !big_speedup_p (e)))
 910         {
 911           /* growth_positive_p is expensive, always test it last.  */
 912           if (growth >= inline_insns_single (e->caller, false)
 913               || growth_positive_p (callee, e, growth))
 914             {
 915               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 916               want_inline = false;
 917             }
 918         }
 919       /* If call is cold, do not inline when function body would grow. */
 920       else if (!e->maybe_hot_p ()
 921                && (growth >= inline_insns_single (e->caller, false)
 922                    || growth_positive_p (callee, e, growth)))
 923         {
 924           e->inline_failed = CIF_UNLIKELY_CALL;
 925           want_inline = false;
 926         }
 927     }
 928   if (!want_inline && report)
 929     report_inline_failed_reason (e);
 930   return want_inline;
 931 }
 932
 933 /* EDGE is self recursive edge.
 934    We handle two cases - when function A is inlining into itself
 935    or when function A is being inlined into another inliner copy of function
 936    A within function B.
 937
 938    In first case OUTER_NODE points to the toplevel copy of A, while
 939    in the second case OUTER_NODE points to the outermost copy of A in B.
 940
 941    In both cases we want to be extra selective since
 942    inlining the call will just introduce new recursive calls to appear.  */
 943
 944 static bool
 945 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 946                                    struct cgraph_node *outer_node,
 947                                    bool peeling,
 948                                    int depth)
 949 {
 950   char const *reason = NULL;
 951   bool want_inline = true;
 952   sreal caller_freq = 1;
 953   int max_depth = opt_for_fn (outer_node->decl,
 954                               param_max_inline_recursive_depth_auto);
 955
 956   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 957     max_depth = opt_for_fn (outer_node->decl,
 958                             param_max_inline_recursive_depth);
 959
 960   if (!edge->maybe_hot_p ())
 961     {
 962       reason = "recursive call is cold";
 963       want_inline = false;
 964     }
 965   else if (depth > max_depth)
 966     {
 967       reason = "--param max-inline-recursive-depth exceeded.";
 968       want_inline = false;
 969     }
 970   else if (outer_node->inlined_to
 971            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
 972     {
 973       reason = "caller frequency is 0";
 974       want_inline = false;
 975     }
 976
 977   if (!want_inline)
 978     ;
 979   /* Inlining of self recursive function into copy of itself within other
 980      function is transformation similar to loop peeling.
 981
 982      Peeling is profitable if we can inline enough copies to make probability
 983      of actual call to the self recursive function very small.  Be sure that
 984      the probability of recursion is small.
 985
 986      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 987      This way the expected number of recursion is at most max_depth.  */
 988   else if (peeling)
 989     {
 990       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
 991       int i;
 992       for (i = 1; i < depth; i++)
 993         max_prob = max_prob * max_prob;
 994       if (edge->sreal_frequency () >= max_prob * caller_freq)
 995         {
 996           reason = "frequency of recursive call is too large";
 997           want_inline = false;
 998         }
 999     }
1000   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1001      recursion depth is large.  We reduce function call overhead and increase
1002      chances that things fit in hardware return predictor.
1003
1004      Recursive inlining might however increase cost of stack frame setup
1005      actually slowing down functions whose recursion tree is wide rather than
1006      deep.
1007
1008      Deciding reliably on when to do recursive inlining without profile feedback
1009      is tricky.  For now we disable recursive inlining when probability of self
1010      recursion is low.
1011
1012      Recursive inlining of self recursive call within loop also results in
1013      large loop depths that generally optimize badly.  We may want to throttle
1014      down inlining in those cases.  In particular this seems to happen in one
1015      of libstdc++ rb tree methods.  */
1016   else
1017     {
1018       if (edge->sreal_frequency () * 100
1019           <= caller_freq
1020              * opt_for_fn (outer_node->decl,
1021                            param_min_inline_recursive_probability))
1022         {
1023           reason = "frequency of recursive call is too small";
1024           want_inline = false;
1025         }
1026     }
1027   if (!want_inline && dump_enabled_p ())
1028     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1029                      "   not inlining recursively: %s\n", reason);
1030   return want_inline;
1031 }
1032
1033 /* Return true when NODE has uninlinable caller;
1034    set HAS_HOT_CALL if it has hot call.
1035    Worker for cgraph_for_node_and_aliases.  */
1036
1037 static bool
1038 check_callers (struct cgraph_node *node, void *has_hot_call)
1039 {
1040   struct cgraph_edge *e;
1041    for (e = node->callers; e; e = e->next_caller)
1042      {
1043        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1044            || !opt_for_fn (e->caller->decl, optimize))
1045          return true;
1046        if (!can_inline_edge_p (e, true))
1047          return true;
1048        if (e->recursive_p ())
1049          return true;
1050        if (!can_inline_edge_by_limits_p (e, true))
1051          return true;
1052        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1053          *(bool *)has_hot_call = true;
1054      }
1055   return false;
1056 }
1057
1058 /* If NODE has a caller, return true.  */
1059
1060 static bool
1061 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1062 {
1063   if (node->callers)
1064     return true;
1065   return false;
1066 }
1067
1068 /* Decide if inlining NODE would reduce unit size by eliminating
1069    the offline copy of function.
1070    When COLD is true the cold calls are considered, too.  */
1071
1072 static bool
1073 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1074 {
1075   bool has_hot_call = false;
1076
1077   /* Aliases gets inlined along with the function they alias.  */
1078   if (node->alias)
1079     return false;
1080   /* Already inlined?  */
1081   if (node->inlined_to)
1082     return false;
1083   /* Does it have callers?  */
1084   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1085     return false;
1086   /* Inlining into all callers would increase size?  */
1087   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1088     return false;
1089   /* All inlines must be possible.  */
1090   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1091                                          true))
1092     return false;
1093   if (!cold && !has_hot_call)
1094     return false;
1095   return true;
1096 }
1097
1098 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1099    in estimate_edge_badness.  */
1100
1101 static bool
1102 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1103 {
1104   return size < (DECL_DECLARED_INLINE_P (where->decl)
1105                  ? inline_insns_single (where, false)
1106                  : inline_insns_auto (where, false));
1107 }
1108
1109 /* A cost model driving the inlining heuristics in a way so the edges with
1110    smallest badness are inlined first.  After each inlining is performed
1111    the costs of all caller edges of nodes affected are recomputed so the
1112    metrics may accurately depend on values such as number of inlinable callers
1113    of the function or function body size.  */
1114
1115 static sreal
1116 edge_badness (struct cgraph_edge *edge, bool dump)
1117 {
1118   sreal badness;
1119   int growth;
1120   sreal edge_time, unspec_edge_time;
1121   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1122   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1123   ipa_hints hints;
1124   cgraph_node *caller = (edge->caller->inlined_to
1125                          ? edge->caller->inlined_to
1126                          : edge->caller);
1127
1128   growth = estimate_edge_growth (edge);
1129   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1130   hints = estimate_edge_hints (edge);
1131   gcc_checking_assert (edge_time >= 0);
1132   /* Check that inlined time is better, but tolerate some roundoff issues.
1133      FIXME: When callee profile drops to 0 we account calls more.  This
1134      should be fixed by never doing that.  */
1135   gcc_checking_assert ((edge_time * 100
1136                         - callee_info->time * 101).to_int () <= 0
1137                         || callee->count.ipa ().initialized_p ());
1138   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1139
1140   if (dump)
1141     {
1142       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1143                edge->caller->dump_name (),
1144                edge->callee->dump_name ());
1145       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1146                growth,
1147                edge_time.to_double (),
1148                unspec_edge_time.to_double ());
1149       ipa_dump_hints (dump_file, hints);
1150       if (big_speedup_p (edge))
1151         fprintf (dump_file, " big_speedup");
1152       fprintf (dump_file, "\n");
1153     }
1154
1155   /* Always prefer inlining saving code size.  */
1156   if (growth <= 0)
1157     {
1158       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1159       if (dump)
1160         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1161                  growth);
1162     }
1163    /* Inlining into EXTERNAL functions is not going to change anything unless
1164       they are themselves inlined.  */
1165    else if (DECL_EXTERNAL (caller->decl))
1166     {
1167       if (dump)
1168         fprintf (dump_file, "      max: function is external\n");
1169       return sreal::max ();
1170     }
1171   /* When profile is available. Compute badness as:
1172
1173                  time_saved * caller_count
1174      goodness =  -------------------------------------------------
1175                  growth_of_caller * overall_growth * combined_size
1176
1177      badness = - goodness
1178
1179      Again use negative value to make calls with profile appear hotter
1180      then calls without.
1181   */
1182   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1183            || caller->count.ipa ().nonzero_p ())
1184     {
1185       sreal numerator, denominator;
1186       int overall_growth;
1187       sreal freq = edge->sreal_frequency ();
1188
1189       numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1190       if (numerator <= 0)
1191         numerator = ((sreal) 1 >> 8);
1192       if (caller->count.ipa ().nonzero_p ())
1193         numerator *= caller->count.ipa ().to_gcov_type ();
1194       else if (caller->count.ipa ().initialized_p ())
1195         numerator = numerator >> 11;
1196       denominator = growth;
1197
1198       overall_growth = callee_info->growth;
1199
1200       /* Look for inliner wrappers of the form:
1201
1202          inline_caller ()
1203            {
1204              do_fast_job...
1205              if (need_more_work)
1206                noninline_callee ();
1207            }
1208          Without penalizing this case, we usually inline noninline_callee
1209          into the inline_caller because overall_growth is small preventing
1210          further inlining of inline_caller.
1211
1212          Penalize only callgraph edges to functions with small overall
1213          growth ...
1214         */
1215       if (growth > overall_growth
1216           /* ... and having only one caller which is not inlined ... */
1217           && callee_info->single_caller
1218           && !edge->caller->inlined_to
1219           /* ... and edges executed only conditionally ... */
1220           && freq < 1
1221           /* ... consider case where callee is not inline but caller is ... */
1222           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1223                && DECL_DECLARED_INLINE_P (caller->decl))
1224               /* ... or when early optimizers decided to split and edge
1225                  frequency still indicates splitting is a win ... */
1226               || (callee->split_part && !caller->split_part
1227                   && freq * 100
1228                          < opt_for_fn (caller->decl,
1229                                        param_partial_inlining_entry_probability)
1230                   /* ... and do not overwrite user specified hints.   */
1231                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1232                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1233         {
1234           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1235           int caller_growth = caller_info->growth;
1236
1237           /* Only apply the penalty when caller looks like inline candidate,
1238              and it is not called once.  */
1239           if (!caller_info->single_caller && overall_growth < caller_growth
1240               && caller_info->inlinable
1241               && wrapper_heuristics_may_apply
1242                  (caller, ipa_size_summaries->get (caller)->size))
1243             {
1244               if (dump)
1245                 fprintf (dump_file,
1246                          "     Wrapper penalty. Increasing growth %i to %i\n",
1247                          overall_growth, caller_growth);
1248               overall_growth = caller_growth;
1249             }
1250         }
1251       if (overall_growth > 0)
1252         {
1253           /* Strongly prefer functions with few callers that can be inlined
1254              fully.  The square root here leads to smaller binaries at average.
1255              Watch however for extreme cases and return to linear function
1256              when growth is large.  */
1257           if (overall_growth < 256)
1258             overall_growth *= overall_growth;
1259           else
1260             overall_growth += 256 * 256 - 256;
1261           denominator *= overall_growth;
1262         }
1263       denominator *= ipa_size_summaries->get (caller)->size + growth;
1264
1265       badness = - numerator / denominator;
1266
1267       if (dump)
1268         {
1269           fprintf (dump_file,
1270                    "      %f: guessed profile. frequency %f, count %" PRId64
1271                    " caller count %" PRId64
1272                    " time saved %f"
1273                    " overall growth %i (current) %i (original)"
1274                    " %i (compensated)\n",
1275                    badness.to_double (),
1276                    freq.to_double (),
1277                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1278                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1279                    inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1280                    estimate_growth (callee),
1281                    callee_info->growth, overall_growth);
1282         }
1283     }
1284   /* When function local profile is not available or it does not give
1285      useful information (i.e. frequency is zero), base the cost on
1286      loop nest and overall size growth, so we optimize for overall number
1287      of functions fully inlined in program.  */
1288   else
1289     {
1290       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1291       badness = growth;
1292
1293       /* Decrease badness if call is nested.  */
1294       if (badness > 0)
1295         badness = badness >> nest;
1296       else
1297         badness = badness << nest;
1298       if (dump)
1299         fprintf (dump_file, "      %f: no profile. nest %i\n",
1300                  badness.to_double (), nest);
1301     }
1302   gcc_checking_assert (badness != 0);
1303
1304   if (edge->recursive_p ())
1305     badness = badness.shift (badness > 0 ? 4 : -4);
1306   if ((hints & (INLINE_HINT_indirect_call
1307                 | INLINE_HINT_loop_iterations
1308                 | INLINE_HINT_loop_stride))
1309       || callee_info->growth <= 0)
1310     badness = badness.shift (badness > 0 ? -2 : 2);
1311   if (hints & (INLINE_HINT_same_scc))
1312     badness = badness.shift (badness > 0 ? 3 : -3);
1313   else if (hints & (INLINE_HINT_in_scc))
1314     badness = badness.shift (badness > 0 ? 2 : -2);
1315   else if (hints & (INLINE_HINT_cross_module))
1316     badness = badness.shift (badness > 0 ? 1 : -1);
1317   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1318     badness = badness.shift (badness > 0 ? -4 : 4);
1319   else if ((hints & INLINE_HINT_declared_inline))
1320     badness = badness.shift (badness > 0 ? -3 : 3);
1321   if (dump)
1322     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1323   return badness;
1324 }
1325
1326 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1327 static inline void
1328 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1329 {
1330   sreal badness = edge_badness (edge, false);
1331   if (edge->aux)
1332     {
1333       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1334       gcc_checking_assert (n->get_data () == edge);
1335
1336       /* fibonacci_heap::replace_key does busy updating of the
1337          heap that is unnecessarily expensive.
1338          We do lazy increases: after extracting minimum if the key
1339          turns out to be out of date, it is re-inserted into heap
1340          with correct value.  */
1341       if (badness < n->get_key ())
1342         {
1343           if (dump_file && (dump_flags & TDF_DETAILS))
1344             {
1345               fprintf (dump_file,
1346                        "  decreasing badness %s -> %s, %f to %f\n",
1347                        edge->caller->dump_name (),
1348                        edge->callee->dump_name (),
1349                        n->get_key ().to_double (),
1350                        badness.to_double ());
1351             }
1352           heap->decrease_key (n, badness);
1353         }
1354     }
1355   else
1356     {
1357        if (dump_file && (dump_flags & TDF_DETAILS))
1358          {
1359            fprintf (dump_file,
1360                     "  enqueuing call %s -> %s, badness %f\n",
1361                     edge->caller->dump_name (),
1362                     edge->callee->dump_name (),
1363                     badness.to_double ());
1364          }
1365       edge->aux = heap->insert (badness, edge);
1366     }
1367 }
1368
1369
1370 /* NODE was inlined.
1371    All caller edges needs to be reset because
1372    size estimates change. Similarly callees needs reset
1373    because better context may be known.  */
1374
1375 static void
1376 reset_edge_caches (struct cgraph_node *node)
1377 {
1378   struct cgraph_edge *edge;
1379   struct cgraph_edge *e = node->callees;
1380   struct cgraph_node *where = node;
1381   struct ipa_ref *ref;
1382
1383   if (where->inlined_to)
1384     where = where->inlined_to;
1385
1386   reset_node_cache (where);
1387
1388   if (edge_growth_cache != NULL)
1389     for (edge = where->callers; edge; edge = edge->next_caller)
1390       if (edge->inline_failed)
1391         edge_growth_cache->remove (edge);
1392
1393   FOR_EACH_ALIAS (where, ref)
1394     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1395
1396   if (!e)
1397     return;
1398
1399   while (true)
1400     if (!e->inline_failed && e->callee->callees)
1401       e = e->callee->callees;
1402     else
1403       {
1404         if (edge_growth_cache != NULL && e->inline_failed)
1405           edge_growth_cache->remove (e);
1406         if (e->next_callee)
1407           e = e->next_callee;
1408         else
1409           {
1410             do
1411               {
1412                 if (e->caller == node)
1413                   return;
1414                 e = e->caller->callers;
1415               }
1416             while (!e->next_callee);
1417             e = e->next_callee;
1418           }
1419       }
1420 }
1421
1422 /* Recompute HEAP nodes for each of caller of NODE.
1423    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1424    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1425    it is inlinable. Otherwise check all edges.  */
1426
1427 static void
1428 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1429                     bitmap updated_nodes,
1430                     struct cgraph_edge *check_inlinablity_for)
1431 {
1432   struct cgraph_edge *edge;
1433   struct ipa_ref *ref;
1434
1435   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1436       || node->inlined_to)
1437     return;
1438   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1439     return;
1440
1441   FOR_EACH_ALIAS (node, ref)
1442     {
1443       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1444       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1445     }
1446
1447   for (edge = node->callers; edge; edge = edge->next_caller)
1448     if (edge->inline_failed)
1449       {
1450         if (!check_inlinablity_for
1451             || check_inlinablity_for == edge)
1452           {
1453             if (can_inline_edge_p (edge, false)
1454                 && want_inline_small_function_p (edge, false)
1455                 && can_inline_edge_by_limits_p (edge, false))
1456               update_edge_key (heap, edge);
1457             else if (edge->aux)
1458               {
1459                 report_inline_failed_reason (edge);
1460                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1461                 edge->aux = NULL;
1462               }
1463           }
1464         else if (edge->aux)
1465           update_edge_key (heap, edge);
1466       }
1467 }
1468
1469 /* Recompute HEAP nodes for each uninlined call in NODE
1470    If UPDATE_SINCE is non-NULL check if edges called within that function
1471    are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1472    where all edges have new context).
1473
1474    This is used when we know that edge badnesses are going only to increase
1475    (we introduced new call site) and thus all we need is to insert newly
1476    created edges into heap.  */
1477
1478 static void
1479 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1480                     struct cgraph_node *update_since,
1481                     bitmap updated_nodes)
1482 {
1483   struct cgraph_edge *e = node->callees;
1484   bool check_inlinability = update_since == node;
1485
1486   if (!e)
1487     return;
1488   while (true)
1489     if (!e->inline_failed && e->callee->callees)
1490       {
1491         if (e->callee == update_since)
1492           check_inlinability = true;
1493         e = e->callee->callees;
1494       }
1495     else
1496       {
1497         enum availability avail;
1498         struct cgraph_node *callee;
1499         if (!check_inlinability)
1500           {
1501             if (e->aux
1502                 && !bitmap_bit_p (updated_nodes,
1503                                   e->callee->ultimate_alias_target
1504                                     (&avail, e->caller)->get_uid ()))
1505               update_edge_key (heap, e);
1506           }
1507         /* We do not reset callee growth cache here.  Since we added a new call,
1508            growth should have just increased and consequently badness metric
1509            don't need updating.  */
1510         else if (e->inline_failed
1511                  && (callee = e->callee->ultimate_alias_target (&avail,
1512                                                                 e->caller))
1513                  && avail >= AVAIL_AVAILABLE
1514                  && ipa_fn_summaries->get (callee) != NULL
1515                  && ipa_fn_summaries->get (callee)->inlinable
1516                  && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1517           {
1518             if (can_inline_edge_p (e, false)
1519                 && want_inline_small_function_p (e, false)
1520                 && can_inline_edge_by_limits_p (e, false))
1521               {
1522                 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1523                 gcc_checking_assert (check_inlinability || e->aux);
1524                 update_edge_key (heap, e);
1525               }
1526             else if (e->aux)
1527               {
1528                 report_inline_failed_reason (e);
1529                 heap->delete_node ((edge_heap_node_t *) e->aux);
1530                 e->aux = NULL;
1531               }
1532           }
1533         /* In case we redirected to unreachable node we only need to remove the
1534            fibheap entry.  */
1535         else if (e->aux)
1536           {
1537             heap->delete_node ((edge_heap_node_t *) e->aux);
1538             e->aux = NULL;
1539           }
1540         if (e->next_callee)
1541           e = e->next_callee;
1542         else
1543           {
1544             do
1545               {
1546                 if (e->caller == node)
1547                   return;
1548                 if (e->caller == update_since)
1549                   check_inlinability = false;
1550                 e = e->caller->callers;
1551               }
1552             while (!e->next_callee);
1553             e = e->next_callee;
1554           }
1555       }
1556 }
1557
1558 /* Enqueue all recursive calls from NODE into priority queue depending on
1559    how likely we want to recursively inline the call.  */
1560
1561 static void
1562 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1563                         edge_heap_t *heap)
1564 {
1565   struct cgraph_edge *e;
1566   enum availability avail;
1567
1568   for (e = where->callees; e; e = e->next_callee)
1569     if (e->callee == node
1570         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1571             && avail > AVAIL_INTERPOSABLE))
1572       heap->insert (-e->sreal_frequency (), e);
1573   for (e = where->callees; e; e = e->next_callee)
1574     if (!e->inline_failed)
1575       lookup_recursive_calls (node, e->callee, heap);
1576 }
1577
1578 /* Decide on recursive inlining: in the case function has recursive calls,
1579    inline until body size reaches given argument.  If any new indirect edges
1580    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1581    is NULL.  */
1582
1583 static bool
1584 recursive_inlining (struct cgraph_edge *edge,
1585                     vec<cgraph_edge *> *new_edges)
1586 {
1587   cgraph_node *to  = (edge->caller->inlined_to
1588                       ? edge->caller->inlined_to : edge->caller);
1589   int limit = opt_for_fn (to->decl,
1590                           param_max_inline_insns_recursive_auto);
1591   edge_heap_t heap (sreal::min ());
1592   struct cgraph_node *node;
1593   struct cgraph_edge *e;
1594   struct cgraph_node *master_clone = NULL, *next;
1595   int depth = 0;
1596   int n = 0;
1597
1598   node = edge->caller;
1599   if (node->inlined_to)
1600     node = node->inlined_to;
1601
1602   if (DECL_DECLARED_INLINE_P (node->decl))
1603     limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1604
1605   /* Make sure that function is small enough to be considered for inlining.  */
1606   if (estimate_size_after_inlining (node, edge)  >= limit)
1607     return false;
1608   lookup_recursive_calls (node, node, &heap);
1609   if (heap.empty ())
1610     return false;
1611
1612   if (dump_file)
1613     fprintf (dump_file,
1614              "  Performing recursive inlining on %s\n", node->dump_name ());
1615
1616   /* Do the inlining and update list of recursive call during process.  */
1617   while (!heap.empty ())
1618     {
1619       struct cgraph_edge *curr = heap.extract_min ();
1620       struct cgraph_node *cnode, *dest = curr->callee;
1621
1622       if (!can_inline_edge_p (curr, true)
1623           || !can_inline_edge_by_limits_p (curr, true))
1624         continue;
1625
1626       /* MASTER_CLONE is produced in the case we already started modified
1627          the function. Be sure to redirect edge to the original body before
1628          estimating growths otherwise we will be seeing growths after inlining
1629          the already modified body.  */
1630       if (master_clone)
1631         {
1632           curr->redirect_callee (master_clone);
1633           if (edge_growth_cache != NULL)
1634             edge_growth_cache->remove (curr);
1635         }
1636
1637       if (estimate_size_after_inlining (node, curr) > limit)
1638         {
1639           curr->redirect_callee (dest);
1640           if (edge_growth_cache != NULL)
1641             edge_growth_cache->remove (curr);
1642           break;
1643         }
1644
1645       depth = 1;
1646       for (cnode = curr->caller;
1647            cnode->inlined_to; cnode = cnode->callers->caller)
1648         if (node->decl
1649             == curr->callee->ultimate_alias_target ()->decl)
1650           depth++;
1651
1652       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1653         {
1654           curr->redirect_callee (dest);
1655           if (edge_growth_cache != NULL)
1656             edge_growth_cache->remove (curr);
1657           continue;
1658         }
1659
1660       if (dump_file)
1661         {
1662           fprintf (dump_file,
1663                    "   Inlining call of depth %i", depth);
1664           if (node->count.nonzero_p () && curr->count.initialized_p ())
1665             {
1666               fprintf (dump_file, " called approx. %.2f times per call",
1667                        (double)curr->count.to_gcov_type ()
1668                        / node->count.to_gcov_type ());
1669             }
1670           fprintf (dump_file, "\n");
1671         }
1672       if (!master_clone)
1673         {
1674           /* We need original clone to copy around.  */
1675           master_clone = node->create_clone (node->decl, node->count,
1676             false, vNULL, true, NULL, NULL);
1677           for (e = master_clone->callees; e; e = e->next_callee)
1678             if (!e->inline_failed)
1679               clone_inlined_nodes (e, true, false, NULL);
1680           curr->redirect_callee (master_clone);
1681           if (edge_growth_cache != NULL)
1682             edge_growth_cache->remove (curr);
1683         }
1684
1685       inline_call (curr, false, new_edges, &overall_size, true);
1686       reset_node_cache (node);
1687       lookup_recursive_calls (node, curr->callee, &heap);
1688       n++;
1689     }
1690
1691   if (!heap.empty () && dump_file)
1692     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1693
1694   if (!master_clone)
1695     return false;
1696
1697   if (dump_enabled_p ())
1698     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1699                      "\n   Inlined %i times, "
1700                      "body grown from size %i to %i, time %f to %f\n", n,
1701                      ipa_size_summaries->get (master_clone)->size,
1702                      ipa_size_summaries->get (node)->size,
1703                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1704                      ipa_fn_summaries->get (node)->time.to_double ());
1705
1706   /* Remove master clone we used for inlining.  We rely that clones inlined
1707      into master clone gets queued just before master clone so we don't
1708      need recursion.  */
1709   for (node = symtab->first_function (); node != master_clone;
1710        node = next)
1711     {
1712       next = symtab->next_function (node);
1713       if (node->inlined_to == master_clone)
1714         node->remove ();
1715     }
1716   master_clone->remove ();
1717   return true;
1718 }
1719
1720
1721 /* Given whole compilation unit estimate of INSNS, compute how large we can
1722    allow the unit to grow.  */
1723
1724 static int64_t
1725 compute_max_insns (cgraph_node *node, int insns)
1726 {
1727   int max_insns = insns;
1728   if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1729     max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1730
1731   return ((int64_t) max_insns
1732           * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1733 }
1734
1735
1736 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1737
1738 static void
1739 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1740 {
1741   while (new_edges.length () > 0)
1742     {
1743       struct cgraph_edge *edge = new_edges.pop ();
1744
1745       gcc_assert (!edge->aux);
1746       gcc_assert (edge->callee);
1747       if (edge->inline_failed
1748           && can_inline_edge_p (edge, true)
1749           && want_inline_small_function_p (edge, true)
1750           && can_inline_edge_by_limits_p (edge, true))
1751         edge->aux = heap->insert (edge_badness (edge, false), edge);
1752     }
1753 }
1754
1755 /* Remove EDGE from the fibheap.  */
1756
1757 static void
1758 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1759 {
1760   if (e->aux)
1761     {
1762       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1763       e->aux = NULL;
1764     }
1765 }
1766
1767 /* Return true if speculation of edge E seems useful.
1768    If ANTICIPATE_INLINING is true, be conservative and hope that E
1769    may get inlined.  */
1770
1771 bool
1772 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1773 {
1774   /* If we have already decided to inline the edge, it seems useful.  */
1775   if (!e->inline_failed)
1776     return true;
1777
1778   enum availability avail;
1779   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1780                                                                  e->caller);
1781   struct cgraph_edge *direct, *indirect;
1782   struct ipa_ref *ref;
1783
1784   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1785
1786   if (!e->maybe_hot_p ())
1787     return false;
1788
1789   /* See if IP optimizations found something potentially useful about the
1790      function.  For now we look only for CONST/PURE flags.  Almost everything
1791      else we propagate is useless.  */
1792   if (avail >= AVAIL_AVAILABLE)
1793     {
1794       int ecf_flags = flags_from_decl_or_type (target->decl);
1795       if (ecf_flags & ECF_CONST)
1796         {
1797           e->speculative_call_info (direct, indirect, ref);
1798           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1799             return true;
1800         }
1801       else if (ecf_flags & ECF_PURE)
1802         {
1803           e->speculative_call_info (direct, indirect, ref);
1804           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1805             return true;
1806         }
1807     }
1808   /* If we did not managed to inline the function nor redirect
1809      to an ipa-cp clone (that are seen by having local flag set),
1810      it is probably pointless to inline it unless hardware is missing
1811      indirect call predictor.  */
1812   if (!anticipate_inlining && !target->local)
1813     return false;
1814   /* For overwritable targets there is not much to do.  */
1815   if (!can_inline_edge_p (e, false)
1816       || !can_inline_edge_by_limits_p (e, false, true))
1817     return false;
1818   /* OK, speculation seems interesting.  */
1819   return true;
1820 }
1821
1822 /* We know that EDGE is not going to be inlined.
1823    See if we can remove speculation.  */
1824
1825 static void
1826 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1827 {
1828   if (edge->speculative && !speculation_useful_p (edge, false))
1829     {
1830       struct cgraph_node *node = edge->caller;
1831       struct cgraph_node *where = node->inlined_to
1832                                   ? node->inlined_to : node;
1833       auto_bitmap updated_nodes;
1834
1835       if (edge->count.ipa ().initialized_p ())
1836         spec_rem += edge->count.ipa ();
1837       cgraph_edge::resolve_speculation (edge);
1838       reset_edge_caches (where);
1839       ipa_update_overall_fn_summary (where);
1840       update_caller_keys (edge_heap, where,
1841                           updated_nodes, NULL);
1842       update_callee_keys (edge_heap, where, NULL,
1843                           updated_nodes);
1844     }
1845 }
1846
1847 /* Return true if NODE should be accounted for overall size estimate.
1848    Skip all nodes optimized for size so we can measure the growth of hot
1849    part of program no matter of the padding.  */
1850
1851 bool
1852 inline_account_function_p (struct cgraph_node *node)
1853 {
1854    return (!DECL_EXTERNAL (node->decl)
1855            && !opt_for_fn (node->decl, optimize_size)
1856            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1857 }
1858
1859 /* Count number of callers of NODE and store it into DATA (that
1860    points to int.  Worker for cgraph_for_node_and_aliases.  */
1861
1862 static bool
1863 sum_callers (struct cgraph_node *node, void *data)
1864 {
1865   struct cgraph_edge *e;
1866   int *num_calls = (int *)data;
1867
1868   for (e = node->callers; e; e = e->next_caller)
1869     (*num_calls)++;
1870   return false;
1871 }
1872
1873 /* We only propagate across edges with non-interposable callee.  */
1874
1875 inline bool
1876 ignore_edge_p (struct cgraph_edge *e)
1877 {
1878   enum availability avail;
1879   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1880   return (avail <= AVAIL_INTERPOSABLE);
1881 }
1882
1883 /* We use greedy algorithm for inlining of small functions:
1884    All inline candidates are put into prioritized heap ordered in
1885    increasing badness.
1886
1887    The inlining of small functions is bounded by unit growth parameters.  */
1888
1889 static void
1890 inline_small_functions (void)
1891 {
1892   struct cgraph_node *node;
1893   struct cgraph_edge *edge;
1894   edge_heap_t edge_heap (sreal::min ());
1895   auto_bitmap updated_nodes;
1896   int min_size;
1897   auto_vec<cgraph_edge *> new_indirect_edges;
1898   int initial_size = 0;
1899   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1900   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1901   new_indirect_edges.create (8);
1902
1903   edge_removal_hook_holder
1904     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1905
1906   /* Compute overall unit size and other global parameters used by badness
1907      metrics.  */
1908
1909   max_count = profile_count::uninitialized ();
1910   ipa_reduced_postorder (order, true, ignore_edge_p);
1911   free (order);
1912
1913   FOR_EACH_DEFINED_FUNCTION (node)
1914     if (!node->inlined_to)
1915       {
1916         if (!node->alias && node->analyzed
1917             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1918             && opt_for_fn (node->decl, optimize))
1919           {
1920             class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1921             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1922
1923             /* Do not account external functions, they will be optimized out
1924                if not inlined.  Also only count the non-cold portion of program.  */
1925             if (inline_account_function_p (node))
1926               initial_size += ipa_size_summaries->get (node)->size;
1927             info->growth = estimate_growth (node);
1928
1929             int num_calls = 0;
1930             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1931                                                true);
1932             if (num_calls == 1)
1933               info->single_caller = true;
1934             if (dfs && dfs->next_cycle)
1935               {
1936                 struct cgraph_node *n2;
1937                 int id = dfs->scc_no + 1;
1938                 for (n2 = node; n2;
1939                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1940                   if (opt_for_fn (n2->decl, optimize))
1941                     {
1942                       ipa_fn_summary *info2 = ipa_fn_summaries->get
1943                          (n2->inlined_to ? n2->inlined_to : n2);
1944                       if (info2->scc_no)
1945                         break;
1946                       info2->scc_no = id;
1947                     }
1948               }
1949           }
1950
1951         for (edge = node->callers; edge; edge = edge->next_caller)
1952           max_count = max_count.max (edge->count.ipa ());
1953       }
1954   ipa_free_postorder_info ();
1955   initialize_growth_caches ();
1956
1957   if (dump_file)
1958     fprintf (dump_file,
1959              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1960              initial_size);
1961
1962   overall_size = initial_size;
1963   min_size = overall_size;
1964
1965   /* Populate the heap with all edges we might inline.  */
1966
1967   FOR_EACH_DEFINED_FUNCTION (node)
1968     {
1969       bool update = false;
1970       struct cgraph_edge *next = NULL;
1971       bool has_speculative = false;
1972
1973       if (!opt_for_fn (node->decl, optimize))
1974         continue;
1975
1976       if (dump_file)
1977         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1978
1979       for (edge = node->callees; edge; edge = edge->next_callee)
1980         {
1981           if (edge->inline_failed
1982               && !edge->aux
1983               && can_inline_edge_p (edge, true)
1984               && want_inline_small_function_p (edge, true)
1985               && can_inline_edge_by_limits_p (edge, true)
1986               && edge->inline_failed)
1987             {
1988               gcc_assert (!edge->aux);
1989               update_edge_key (&edge_heap, edge);
1990             }
1991           if (edge->speculative)
1992             has_speculative = true;
1993         }
1994       if (has_speculative)
1995         for (edge = node->callees; edge; edge = next)
1996           {
1997             next = edge->next_callee;
1998             if (edge->speculative
1999                 && !speculation_useful_p (edge, edge->aux != NULL))
2000               {
2001                 cgraph_edge::resolve_speculation (edge);
2002                 update = true;
2003               }
2004           }
2005       if (update)
2006         {
2007           struct cgraph_node *where = node->inlined_to
2008                                       ? node->inlined_to : node;
2009           ipa_update_overall_fn_summary (where);
2010           reset_edge_caches (where);
2011           update_caller_keys (&edge_heap, where,
2012                               updated_nodes, NULL);
2013           update_callee_keys (&edge_heap, where, NULL,
2014                               updated_nodes);
2015           bitmap_clear (updated_nodes);
2016         }
2017     }
2018
2019   gcc_assert (in_lto_p
2020               || !(max_count > 0)
2021               || (profile_info && flag_branch_probabilities));
2022
2023   while (!edge_heap.empty ())
2024     {
2025       int old_size = overall_size;
2026       struct cgraph_node *where, *callee;
2027       sreal badness = edge_heap.min_key ();
2028       sreal current_badness;
2029       int growth;
2030
2031       edge = edge_heap.extract_min ();
2032       gcc_assert (edge->aux);
2033       edge->aux = NULL;
2034       if (!edge->inline_failed || !edge->callee->analyzed)
2035         continue;
2036
2037       /* Be sure that caches are maintained consistent.
2038          This check is affected by scaling roundoff errors when compiling for
2039          IPA this we skip it in that case.  */
2040       if (flag_checking && !edge->callee->count.ipa_p ()
2041           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2042         {
2043           sreal cached_badness = edge_badness (edge, false);
2044
2045           int old_size_est = estimate_edge_size (edge);
2046           sreal old_time_est = estimate_edge_time (edge);
2047           int old_hints_est = estimate_edge_hints (edge);
2048
2049           if (edge_growth_cache != NULL)
2050             edge_growth_cache->remove (edge);
2051           reset_node_cache (edge->caller->inlined_to
2052                             ? edge->caller->inlined_to
2053                             : edge->caller);
2054           gcc_assert (old_size_est == estimate_edge_size (edge));
2055           gcc_assert (old_time_est == estimate_edge_time (edge));
2056           /* FIXME:
2057
2058              gcc_assert (old_hints_est == estimate_edge_hints (edge));
2059
2060              fails with profile feedback because some hints depends on
2061              maybe_hot_edge_p predicate and because callee gets inlined to other
2062              calls, the edge may become cold.
2063              This ought to be fixed by computing relative probabilities
2064              for given invocation but that will be better done once whole
2065              code is converted to sreals.  Disable for now and revert to "wrong"
2066              value so enable/disable checking paths agree.  */
2067           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2068
2069           /* When updating the edge costs, we only decrease badness in the keys.
2070              Increases of badness are handled lazily; when we see key with out
2071              of date value on it, we re-insert it now.  */
2072           current_badness = edge_badness (edge, false);
2073           gcc_assert (cached_badness == current_badness);
2074           gcc_assert (current_badness >= badness);
2075         }
2076       else
2077         current_badness = edge_badness (edge, false);
2078       if (current_badness != badness)
2079         {
2080           if (edge_heap.min () && current_badness > edge_heap.min_key ())
2081             {
2082               edge->aux = edge_heap.insert (current_badness, edge);
2083               continue;
2084             }
2085           else
2086             badness = current_badness;
2087         }
2088
2089       if (!can_inline_edge_p (edge, true)
2090           || !can_inline_edge_by_limits_p (edge, true))
2091         {
2092           resolve_noninline_speculation (&edge_heap, edge);
2093           continue;
2094         }
2095
2096       callee = edge->callee->ultimate_alias_target ();
2097       growth = estimate_edge_growth (edge);
2098       if (dump_file)
2099         {
2100           fprintf (dump_file,
2101                    "\nConsidering %s with %i size\n",
2102                    callee->dump_name (),
2103                    ipa_size_summaries->get (callee)->size);
2104           fprintf (dump_file,
2105                    " to be inlined into %s in %s:%i\n"
2106                    " Estimated badness is %f, frequency %.2f.\n",
2107                    edge->caller->dump_name (),
2108                    edge->call_stmt
2109                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
2110                                                         edge->call_stmt))
2111                        > BUILTINS_LOCATION)
2112                    ? gimple_filename ((const gimple *) edge->call_stmt)
2113                    : "unknown",
2114                    edge->call_stmt
2115                    ? gimple_lineno ((const gimple *) edge->call_stmt)
2116                    : -1,
2117                    badness.to_double (),
2118                    edge->sreal_frequency ().to_double ());
2119           if (edge->count.ipa ().initialized_p ())
2120             {
2121               fprintf (dump_file, " Called ");
2122               edge->count.ipa ().dump (dump_file);
2123               fprintf (dump_file, " times\n");
2124             }
2125           if (dump_flags & TDF_DETAILS)
2126             edge_badness (edge, true);
2127         }
2128
2129       where = edge->caller;
2130
2131       if (overall_size + growth > compute_max_insns (where, min_size)
2132           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2133         {
2134           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2135           report_inline_failed_reason (edge);
2136           resolve_noninline_speculation (&edge_heap, edge);
2137           continue;
2138         }
2139
2140       if (!want_inline_small_function_p (edge, true))
2141         {
2142           resolve_noninline_speculation (&edge_heap, edge);
2143           continue;
2144         }
2145
2146       profile_count old_count = callee->count;
2147
2148       /* Heuristics for inlining small functions work poorly for
2149          recursive calls where we do effects similar to loop unrolling.
2150          When inlining such edge seems profitable, leave decision on
2151          specific inliner.  */
2152       if (edge->recursive_p ())
2153         {
2154           if (where->inlined_to)
2155             where = where->inlined_to;
2156           if (!recursive_inlining (edge,
2157                                    opt_for_fn (edge->caller->decl,
2158                                                flag_indirect_inlining)
2159                                    ? &new_indirect_edges : NULL))
2160             {
2161               edge->inline_failed = CIF_RECURSIVE_INLINING;
2162               resolve_noninline_speculation (&edge_heap, edge);
2163               continue;
2164             }
2165           reset_edge_caches (where);
2166           /* Recursive inliner inlines all recursive calls of the function
2167              at once. Consequently we need to update all callee keys.  */
2168           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2169             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2170           update_callee_keys (&edge_heap, where, where, updated_nodes);
2171           bitmap_clear (updated_nodes);
2172         }
2173       else
2174         {
2175           struct cgraph_node *outer_node = NULL;
2176           int depth = 0;
2177
2178           /* Consider the case where self recursive function A is inlined
2179              into B.  This is desired optimization in some cases, since it
2180              leads to effect similar of loop peeling and we might completely
2181              optimize out the recursive call.  However we must be extra
2182              selective.  */
2183
2184           where = edge->caller;
2185           while (where->inlined_to)
2186             {
2187               if (where->decl == callee->decl)
2188                 outer_node = where, depth++;
2189               where = where->callers->caller;
2190             }
2191           if (outer_node
2192               && !want_inline_self_recursive_call_p (edge, outer_node,
2193                                                      true, depth))
2194             {
2195               edge->inline_failed
2196                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2197                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2198               resolve_noninline_speculation (&edge_heap, edge);
2199               continue;
2200             }
2201           else if (depth && dump_file)
2202             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2203
2204           gcc_checking_assert (!callee->inlined_to);
2205
2206           int old_size = ipa_size_summaries->get (where)->size;
2207           sreal old_time = ipa_fn_summaries->get (where)->time;
2208
2209           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2210           reset_edge_caches (edge->callee);
2211           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2212
2213           /* If caller's size and time increased we do not need to update
2214              all edges because badness is not going to decrease.  */
2215           if (old_size <= ipa_size_summaries->get (where)->size
2216               && old_time <= ipa_fn_summaries->get (where)->time
2217               /* Wrapper penalty may be non-monotonous in this respect.
2218                  Fortunately it only affects small functions.  */
2219               && !wrapper_heuristics_may_apply (where, old_size))
2220             update_callee_keys (&edge_heap, edge->callee, edge->callee,
2221                                 updated_nodes);
2222           else
2223             update_callee_keys (&edge_heap, where,
2224                                 edge->callee,
2225                                 updated_nodes);
2226         }
2227       where = edge->caller;
2228       if (where->inlined_to)
2229         where = where->inlined_to;
2230
2231       /* Our profitability metric can depend on local properties
2232          such as number of inlinable calls and size of the function body.
2233          After inlining these properties might change for the function we
2234          inlined into (since it's body size changed) and for the functions
2235          called by function we inlined (since number of it inlinable callers
2236          might change).  */
2237       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2238       /* Offline copy count has possibly changed, recompute if profile is
2239          available.  */
2240       struct cgraph_node *n
2241               = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2242       if (n != edge->callee && n->analyzed && !(n->count == old_count)
2243           && n->count.ipa_p ())
2244         update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2245       bitmap_clear (updated_nodes);
2246
2247       if (dump_enabled_p ())
2248         {
2249           ipa_fn_summary *s = ipa_fn_summaries->get (where);
2250
2251           /* dump_printf can't handle %+i.  */
2252           char buf_net_change[100];
2253           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2254                     overall_size - old_size);
2255
2256           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2257                            " Inlined %C into %C which now has time %f and "
2258                            "size %i, net change of %s%s.\n",
2259                            edge->callee, edge->caller,
2260                            s->time.to_double (),
2261                            ipa_size_summaries->get (edge->caller)->size,
2262                            buf_net_change,
2263                            cross_module_call_p (edge) ? " (cross module)":"");
2264         }
2265       if (min_size > overall_size)
2266         {
2267           min_size = overall_size;
2268
2269           if (dump_file)
2270             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2271         }
2272     }
2273
2274   free_growth_caches ();
2275   if (dump_enabled_p ())
2276     dump_printf (MSG_NOTE,
2277                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2278                  initial_size, overall_size,
2279                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2280   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2281 }
2282
2283 /* Flatten NODE.  Performed both during early inlining and
2284    at IPA inlining time.  */
2285
2286 static void
2287 flatten_function (struct cgraph_node *node, bool early, bool update)
2288 {
2289   struct cgraph_edge *e;
2290
2291   /* We shouldn't be called recursively when we are being processed.  */
2292   gcc_assert (node->aux == NULL);
2293
2294   node->aux = (void *) node;
2295
2296   for (e = node->callees; e; e = e->next_callee)
2297     {
2298       struct cgraph_node *orig_callee;
2299       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2300
2301       /* We've hit cycle?  It is time to give up.  */
2302       if (callee->aux)
2303         {
2304           if (dump_enabled_p ())
2305             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2306                              "Not inlining %C into %C to avoid cycle.\n",
2307                              callee, e->caller);
2308           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2309             e->inline_failed = CIF_RECURSIVE_INLINING;
2310           continue;
2311         }
2312
2313       /* When the edge is already inlined, we just need to recurse into
2314          it in order to fully flatten the leaves.  */
2315       if (!e->inline_failed)
2316         {
2317           flatten_function (callee, early, false);
2318           continue;
2319         }
2320
2321       /* Flatten attribute needs to be processed during late inlining. For
2322          extra code quality we however do flattening during early optimization,
2323          too.  */
2324       if (!early
2325           ? !can_inline_edge_p (e, true)
2326             && !can_inline_edge_by_limits_p (e, true)
2327           : !can_early_inline_edge_p (e))
2328         continue;
2329
2330       if (e->recursive_p ())
2331         {
2332           if (dump_enabled_p ())
2333             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2334                              "Not inlining: recursive call.\n");
2335           continue;
2336         }
2337
2338       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2339           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2340         {
2341           if (dump_enabled_p ())
2342             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2343                              "Not inlining: SSA form does not match.\n");
2344           continue;
2345         }
2346
2347       /* Inline the edge and flatten the inline clone.  Avoid
2348          recursing through the original node if the node was cloned.  */
2349       if (dump_enabled_p ())
2350         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2351                          " Inlining %C into %C.\n",
2352                          callee, e->caller);
2353       orig_callee = callee;
2354       inline_call (e, true, NULL, NULL, false);
2355       if (e->callee != orig_callee)
2356         orig_callee->aux = (void *) node;
2357       flatten_function (e->callee, early, false);
2358       if (e->callee != orig_callee)
2359         orig_callee->aux = NULL;
2360     }
2361
2362   node->aux = NULL;
2363   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2364   if (update && opt_for_fn (where->decl, optimize))
2365     ipa_update_overall_fn_summary (where);
2366 }
2367
2368 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2369    DATA points to number of calls originally found so we avoid infinite
2370    recursion.  */
2371
2372 static bool
2373 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2374                          hash_set<cgraph_node *> *callers)
2375 {
2376   int *num_calls = (int *)data;
2377   bool callee_removed = false;
2378
2379   while (node->callers && !node->inlined_to)
2380     {
2381       struct cgraph_node *caller = node->callers->caller;
2382
2383       if (!can_inline_edge_p (node->callers, true)
2384           || !can_inline_edge_by_limits_p (node->callers, true)
2385           || node->callers->recursive_p ())
2386         {
2387           if (dump_file)
2388             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2389           *num_calls = 0;
2390           return false;
2391         }
2392
2393       if (dump_file)
2394         {
2395           cgraph_node *ultimate = node->ultimate_alias_target ();
2396           fprintf (dump_file,
2397                    "\nInlining %s size %i.\n",
2398                    ultimate->dump_name (),
2399                    ipa_size_summaries->get (ultimate)->size);
2400           fprintf (dump_file,
2401                    " Called once from %s %i insns.\n",
2402                    node->callers->caller->dump_name (),
2403                    ipa_size_summaries->get (node->callers->caller)->size);
2404         }
2405
2406       /* Remember which callers we inlined to, delaying updating the
2407          overall summary.  */
2408       callers->add (node->callers->caller);
2409       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2410       if (dump_file)
2411         fprintf (dump_file,
2412                  " Inlined into %s which now has %i size\n",
2413                  caller->dump_name (),
2414                  ipa_size_summaries->get (caller)->size);
2415       if (!(*num_calls)--)
2416         {
2417           if (dump_file)
2418             fprintf (dump_file, "New calls found; giving up.\n");
2419           return callee_removed;
2420         }
2421       if (callee_removed)
2422         return true;
2423     }
2424   return false;
2425 }
2426
2427 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2428    update.  */
2429
2430 static bool
2431 inline_to_all_callers (struct cgraph_node *node, void *data)
2432 {
2433   hash_set<cgraph_node *> callers;
2434   bool res = inline_to_all_callers_1 (node, data, &callers);
2435   /* Perform the delayed update of the overall summary of all callers
2436      processed.  This avoids quadratic behavior in the cases where
2437      we have a lot of calls to the same function.  */
2438   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2439        i != callers.end (); ++i)
2440     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2441   return res;
2442 }
2443
2444 /* Output overall time estimate.  */
2445 static void
2446 dump_overall_stats (void)
2447 {
2448   sreal sum_weighted = 0, sum = 0;
2449   struct cgraph_node *node;
2450
2451   FOR_EACH_DEFINED_FUNCTION (node)
2452     if (!node->inlined_to
2453         && !node->alias)
2454       {
2455         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2456         if (s != NULL)
2457           {
2458           sum += s->time;
2459           if (node->count.ipa ().initialized_p ())
2460             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2461           }
2462       }
2463   fprintf (dump_file, "Overall time estimate: "
2464            "%f weighted by profile: "
2465            "%f\n", sum.to_double (), sum_weighted.to_double ());
2466 }
2467
2468 /* Output some useful stats about inlining.  */
2469
2470 static void
2471 dump_inline_stats (void)
2472 {
2473   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2474   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2475   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2476   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2477   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2478   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2479   int64_t reason[CIF_N_REASONS][2];
2480   sreal reason_freq[CIF_N_REASONS];
2481   int i;
2482   struct cgraph_node *node;
2483
2484   memset (reason, 0, sizeof (reason));
2485   for (i=0; i < CIF_N_REASONS; i++)
2486     reason_freq[i] = 0;
2487   FOR_EACH_DEFINED_FUNCTION (node)
2488   {
2489     struct cgraph_edge *e;
2490     for (e = node->callees; e; e = e->next_callee)
2491       {
2492         if (e->inline_failed)
2493           {
2494             if (e->count.ipa ().initialized_p ())
2495               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2496             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2497             reason[(int) e->inline_failed][1] ++;
2498             if (DECL_VIRTUAL_P (e->callee->decl)
2499                 && e->count.ipa ().initialized_p ())
2500               {
2501                 if (e->indirect_inlining_edge)
2502                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2503                 else
2504                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2505               }
2506             else if (e->count.ipa ().initialized_p ())
2507               {
2508                 if (e->indirect_inlining_edge)
2509                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2510                 else
2511                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2512               }
2513           }
2514         else if (e->count.ipa ().initialized_p ())
2515           {
2516             if (e->speculative)
2517               {
2518                 if (DECL_VIRTUAL_P (e->callee->decl))
2519                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2520                 else
2521                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2522               }
2523             else if (DECL_VIRTUAL_P (e->callee->decl))
2524               {
2525                 if (e->indirect_inlining_edge)
2526                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2527                 else
2528                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2529               }
2530             else
2531               {
2532                 if (e->indirect_inlining_edge)
2533                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2534                 else
2535                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2536               }
2537           }
2538       }
2539     for (e = node->indirect_calls; e; e = e->next_callee)
2540       if (e->indirect_info->polymorphic
2541           & e->count.ipa ().initialized_p ())
2542         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2543       else if (e->count.ipa ().initialized_p ())
2544         indirect_cnt += e->count.ipa ().to_gcov_type ();
2545   }
2546   if (max_count.initialized_p ())
2547     {
2548       fprintf (dump_file,
2549                "Inlined %" PRId64 " + speculative "
2550                "%" PRId64 " + speculative polymorphic "
2551                "%" PRId64 " + previously indirect "
2552                "%" PRId64 " + virtual "
2553                "%" PRId64 " + virtual and previously indirect "
2554                "%" PRId64 "\n" "Not inlined "
2555                "%" PRId64 " + previously indirect "
2556                "%" PRId64 " + virtual "
2557                "%" PRId64 " + virtual and previously indirect "
2558                "%" PRId64 " + still indirect "
2559                "%" PRId64 " + still indirect polymorphic "
2560                "%" PRId64 "\n", inlined_cnt,
2561                inlined_speculative, inlined_speculative_ply,
2562                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2563                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2564                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2565       fprintf (dump_file, "Removed speculations ");
2566       spec_rem.dump (dump_file);
2567       fprintf (dump_file, "\n");
2568     }
2569   dump_overall_stats ();
2570   fprintf (dump_file, "\nWhy inlining failed?\n");
2571   for (i = 0; i < CIF_N_REASONS; i++)
2572     if (reason[i][1])
2573       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2574                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2575                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2576 }
2577
2578 /* Called when node is removed.  */
2579
2580 static void
2581 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2582 {
2583   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2584     return;
2585
2586   hash_set<struct cgraph_node *> *removed
2587     = (hash_set<struct cgraph_node *> *) data;
2588   removed->add (node);
2589 }
2590
2591 /* Decide on the inlining.  We do so in the topological order to avoid
2592    expenses on updating data structures.  */
2593
2594 static unsigned int
2595 ipa_inline (void)
2596 {
2597   struct cgraph_node *node;
2598   int nnodes;
2599   struct cgraph_node **order;
2600   int i, j;
2601   int cold;
2602   bool remove_functions = false;
2603
2604   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2605
2606   if (dump_file)
2607     ipa_dump_fn_summaries (dump_file);
2608
2609   nnodes = ipa_reverse_postorder (order);
2610   spec_rem = profile_count::zero ();
2611
2612   FOR_EACH_FUNCTION (node)
2613     {
2614       node->aux = 0;
2615
2616       /* Recompute the default reasons for inlining because they may have
2617          changed during merging.  */
2618       if (in_lto_p)
2619         {
2620           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2621             {
2622               gcc_assert (e->inline_failed);
2623               initialize_inline_failed (e);
2624             }
2625           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2626             initialize_inline_failed (e);
2627         }
2628     }
2629
2630   if (dump_file)
2631     fprintf (dump_file, "\nFlattening functions:\n");
2632
2633   /* First shrink order array, so that it only contains nodes with
2634      flatten attribute.  */
2635   for (i = nnodes - 1, j = i; i >= 0; i--)
2636     {
2637       node = order[i];
2638       if (node->definition
2639           && lookup_attribute ("flatten",
2640                                DECL_ATTRIBUTES (node->decl)) != NULL)
2641         order[j--] = order[i];
2642     }
2643
2644   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2645      nodes with flatten attribute.  If there is more than one such
2646      node, we need to register a node removal hook, as flatten_function
2647      could remove other nodes with flatten attribute.  See PR82801.  */
2648   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2649   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2650   if (j < nnodes - 2)
2651     {
2652       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2653       node_removal_hook_holder
2654         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2655                                            flatten_removed_nodes);
2656     }
2657
2658   /* In the first pass handle functions to be flattened.  Do this with
2659      a priority so none of our later choices will make this impossible.  */
2660   for (i = nnodes - 1; i > j; i--)
2661     {
2662       node = order[i];
2663       if (flatten_removed_nodes
2664           && flatten_removed_nodes->contains (node))
2665         continue;
2666
2667       /* Handle nodes to be flattened.
2668          Ideally when processing callees we stop inlining at the
2669          entry of cycles, possibly cloning that entry point and
2670          try to flatten itself turning it into a self-recursive
2671          function.  */
2672       if (dump_file)
2673         fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2674       flatten_function (node, false, true);
2675     }
2676
2677   if (j < nnodes - 2)
2678     {
2679       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2680       delete flatten_removed_nodes;
2681     }
2682   free (order);
2683
2684   if (dump_file)
2685     dump_overall_stats ();
2686
2687   inline_small_functions ();
2688
2689   gcc_assert (symtab->state == IPA_SSA);
2690   symtab->state = IPA_SSA_AFTER_INLINING;
2691   /* Do first after-inlining removal.  We want to remove all "stale" extern
2692      inline functions and virtual functions so we really know what is called
2693      once.  */
2694   symtab->remove_unreachable_nodes (dump_file);
2695
2696   /* Inline functions with a property that after inlining into all callers the
2697      code size will shrink because the out-of-line copy is eliminated.
2698      We do this regardless on the callee size as long as function growth limits
2699      are met.  */
2700   if (dump_file)
2701     fprintf (dump_file,
2702              "\nDeciding on functions to be inlined into all callers and "
2703              "removing useless speculations:\n");
2704
2705   /* Inlining one function called once has good chance of preventing
2706      inlining other function into the same callee.  Ideally we should
2707      work in priority order, but probably inlining hot functions first
2708      is good cut without the extra pain of maintaining the queue.
2709
2710      ??? this is not really fitting the bill perfectly: inlining function
2711      into callee often leads to better optimization of callee due to
2712      increased context for optimization.
2713      For example if main() function calls a function that outputs help
2714      and then function that does the main optimization, we should inline
2715      the second with priority even if both calls are cold by themselves.
2716
2717      We probably want to implement new predicate replacing our use of
2718      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2719      to be hot.  */
2720   for (cold = 0; cold <= 1; cold ++)
2721     {
2722       FOR_EACH_DEFINED_FUNCTION (node)
2723         {
2724           struct cgraph_edge *edge, *next;
2725           bool update=false;
2726
2727           if (!opt_for_fn (node->decl, optimize)
2728               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2729             continue;
2730
2731           for (edge = node->callees; edge; edge = next)
2732             {
2733               next = edge->next_callee;
2734               if (edge->speculative && !speculation_useful_p (edge, false))
2735                 {
2736                   if (edge->count.ipa ().initialized_p ())
2737                     spec_rem += edge->count.ipa ();
2738                   cgraph_edge::resolve_speculation (edge);
2739                   update = true;
2740                   remove_functions = true;
2741                 }
2742             }
2743           if (update)
2744             {
2745               struct cgraph_node *where = node->inlined_to
2746                                           ? node->inlined_to : node;
2747               reset_edge_caches (where);
2748               ipa_update_overall_fn_summary (where);
2749             }
2750           if (want_inline_function_to_all_callers_p (node, cold))
2751             {
2752               int num_calls = 0;
2753               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2754                                                  true);
2755               while (node->call_for_symbol_and_aliases
2756                        (inline_to_all_callers, &num_calls, true))
2757                 ;
2758               remove_functions = true;
2759             }
2760         }
2761     }
2762
2763   /* Free ipa-prop structures if they are no longer needed.  */
2764   ipa_free_all_structures_after_iinln ();
2765
2766   if (dump_enabled_p ())
2767     dump_printf (MSG_NOTE,
2768                  "\nInlined %i calls, eliminated %i functions\n\n",
2769                  ncalls_inlined, nfunctions_inlined);
2770   if (dump_file)
2771     dump_inline_stats ();
2772
2773   if (dump_file)
2774     ipa_dump_fn_summaries (dump_file);
2775   return remove_functions ? TODO_remove_functions : 0;
2776 }
2777
2778 /* Inline always-inline function calls in NODE.  */
2779
2780 static bool
2781 inline_always_inline_functions (struct cgraph_node *node)
2782 {
2783   struct cgraph_edge *e;
2784   bool inlined = false;
2785
2786   for (e = node->callees; e; e = e->next_callee)
2787     {
2788       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2789       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2790         continue;
2791
2792       if (e->recursive_p ())
2793         {
2794           if (dump_enabled_p ())
2795             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2796                              "  Not inlining recursive call to %C.\n",
2797                              e->callee);
2798           e->inline_failed = CIF_RECURSIVE_INLINING;
2799           continue;
2800         }
2801
2802       if (!can_early_inline_edge_p (e))
2803         {
2804           /* Set inlined to true if the callee is marked "always_inline" but
2805              is not inlinable.  This will allow flagging an error later in
2806              expand_call_inline in tree-inline.c.  */
2807           if (lookup_attribute ("always_inline",
2808                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2809             inlined = true;
2810           continue;
2811         }
2812
2813       if (dump_enabled_p ())
2814         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2815                          "  Inlining %C into %C (always_inline).\n",
2816                          e->callee, e->caller);
2817       inline_call (e, true, NULL, NULL, false);
2818       inlined = true;
2819     }
2820   if (inlined)
2821     ipa_update_overall_fn_summary (node);
2822
2823   return inlined;
2824 }
2825
2826 /* Decide on the inlining.  We do so in the topological order to avoid
2827    expenses on updating data structures.  */
2828
2829 static bool
2830 early_inline_small_functions (struct cgraph_node *node)
2831 {
2832   struct cgraph_edge *e;
2833   bool inlined = false;
2834
2835   for (e = node->callees; e; e = e->next_callee)
2836     {
2837       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2838
2839       /* We can encounter not-yet-analyzed function during
2840          early inlining on callgraphs with strongly
2841          connected components.  */
2842       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2843       if (s == NULL || !s->inlinable || !e->inline_failed)
2844         continue;
2845
2846       /* Do not consider functions not declared inline.  */
2847       if (!DECL_DECLARED_INLINE_P (callee->decl)
2848           && !opt_for_fn (node->decl, flag_inline_small_functions)
2849           && !opt_for_fn (node->decl, flag_inline_functions))
2850         continue;
2851
2852       if (dump_enabled_p ())
2853         dump_printf_loc (MSG_NOTE, e->call_stmt,
2854                          "Considering inline candidate %C.\n",
2855                          callee);
2856
2857       if (!can_early_inline_edge_p (e))
2858         continue;
2859
2860       if (e->recursive_p ())
2861         {
2862           if (dump_enabled_p ())
2863             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2864                              "  Not inlining: recursive call.\n");
2865           continue;
2866         }
2867
2868       if (!want_early_inline_function_p (e))
2869         continue;
2870
2871       if (dump_enabled_p ())
2872         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2873                          " Inlining %C into %C.\n",
2874                          callee, e->caller);
2875       inline_call (e, true, NULL, NULL, false);
2876       inlined = true;
2877     }
2878
2879   if (inlined)
2880     ipa_update_overall_fn_summary (node);
2881
2882   return inlined;
2883 }
2884
2885 unsigned int
2886 early_inliner (function *fun)
2887 {
2888   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2889   struct cgraph_edge *edge;
2890   unsigned int todo = 0;
2891   int iterations = 0;
2892   bool inlined = false;
2893
2894   if (seen_error ())
2895     return 0;
2896
2897   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2898      happens when some pass decides to construct new function and
2899      cgraph_add_new_function calls lowering passes and early optimization on
2900      it.  This may confuse ourself when early inliner decide to inline call to
2901      function clone, because function clones don't have parameter list in
2902      ipa-prop matching their signature.  */
2903   if (ipa_node_params_sum)
2904     return 0;
2905
2906   if (flag_checking)
2907     node->verify ();
2908   node->remove_all_references ();
2909
2910   /* Even when not optimizing or not inlining inline always-inline
2911      functions.  */
2912   inlined = inline_always_inline_functions (node);
2913
2914   if (!optimize
2915       || flag_no_inline
2916       || !flag_early_inlining
2917       /* Never inline regular functions into always-inline functions
2918          during incremental inlining.  This sucks as functions calling
2919          always inline functions will get less optimized, but at the
2920          same time inlining of functions calling always inline
2921          function into an always inline function might introduce
2922          cycles of edges to be always inlined in the callgraph.
2923
2924          We might want to be smarter and just avoid this type of inlining.  */
2925       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2926           && lookup_attribute ("always_inline",
2927                                DECL_ATTRIBUTES (node->decl))))
2928     ;
2929   else if (lookup_attribute ("flatten",
2930                              DECL_ATTRIBUTES (node->decl)) != NULL)
2931     {
2932       /* When the function is marked to be flattened, recursively inline
2933          all calls in it.  */
2934       if (dump_enabled_p ())
2935         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2936                      "Flattening %C\n", node);
2937       flatten_function (node, true, true);
2938       inlined = true;
2939     }
2940   else
2941     {
2942       /* If some always_inline functions was inlined, apply the changes.
2943          This way we will not account always inline into growth limits and
2944          moreover we will inline calls from always inlines that we skipped
2945          previously because of conditional above.  */
2946       if (inlined)
2947         {
2948           timevar_push (TV_INTEGRATION);
2949           todo |= optimize_inline_calls (current_function_decl);
2950           /* optimize_inline_calls call above might have introduced new
2951              statements that don't have inline parameters computed.  */
2952           for (edge = node->callees; edge; edge = edge->next_callee)
2953             {
2954               /* We can enounter not-yet-analyzed function during
2955                  early inlining on callgraphs with strongly
2956                  connected components.  */
2957               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2958               es->call_stmt_size
2959                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2960               es->call_stmt_time
2961                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2962             }
2963           ipa_update_overall_fn_summary (node);
2964           inlined = false;
2965           timevar_pop (TV_INTEGRATION);
2966         }
2967       /* We iterate incremental inlining to get trivial cases of indirect
2968          inlining.  */
2969       while (iterations < opt_for_fn (node->decl,
2970                                       param_early_inliner_max_iterations)
2971              && early_inline_small_functions (node))
2972         {
2973           timevar_push (TV_INTEGRATION);
2974           todo |= optimize_inline_calls (current_function_decl);
2975
2976           /* Technically we ought to recompute inline parameters so the new
2977              iteration of early inliner works as expected.  We however have
2978              values approximately right and thus we only need to update edge
2979              info that might be cleared out for newly discovered edges.  */
2980           for (edge = node->callees; edge; edge = edge->next_callee)
2981             {
2982               /* We have no summary for new bound store calls yet.  */
2983               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2984               es->call_stmt_size
2985                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2986               es->call_stmt_time
2987                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2988             }
2989           if (iterations < opt_for_fn (node->decl,
2990                                        param_early_inliner_max_iterations) - 1)
2991             ipa_update_overall_fn_summary (node);
2992           timevar_pop (TV_INTEGRATION);
2993           iterations++;
2994           inlined = false;
2995         }
2996       if (dump_file)
2997         fprintf (dump_file, "Iterations: %i\n", iterations);
2998     }
2999
3000   if (inlined)
3001     {
3002       timevar_push (TV_INTEGRATION);
3003       todo |= optimize_inline_calls (current_function_decl);
3004       timevar_pop (TV_INTEGRATION);
3005     }
3006
3007   fun->always_inline_functions_inlined = true;
3008
3009   return todo;
3010 }
3011
3012 /* Do inlining of small functions.  Doing so early helps profiling and other
3013    passes to be somewhat more effective and avoids some code duplication in
3014    later real inlining pass for testcases with very many function calls.  */
3015
3016 namespace {
3017
3018 const pass_data pass_data_early_inline =
3019 {
3020   GIMPLE_PASS, /* type */
3021   "einline", /* name */
3022   OPTGROUP_INLINE, /* optinfo_flags */
3023   TV_EARLY_INLINING, /* tv_id */
3024   PROP_ssa, /* properties_required */
3025   0, /* properties_provided */
3026   0, /* properties_destroyed */
3027   0, /* todo_flags_start */
3028   0, /* todo_flags_finish */
3029 };
3030
3031 class pass_early_inline : public gimple_opt_pass
3032 {
3033 public:
3034   pass_early_inline (gcc::context *ctxt)
3035     : gimple_opt_pass (pass_data_early_inline, ctxt)
3036   {}
3037
3038   /* opt_pass methods: */
3039   virtual unsigned int execute (function *);
3040
3041 }; // class pass_early_inline
3042
3043 unsigned int
3044 pass_early_inline::execute (function *fun)
3045 {
3046   return early_inliner (fun);
3047 }
3048
3049 } // anon namespace
3050
3051 gimple_opt_pass *
3052 make_pass_early_inline (gcc::context *ctxt)
3053 {
3054   return new pass_early_inline (ctxt);
3055 }
3056
3057 namespace {
3058
3059 const pass_data pass_data_ipa_inline =
3060 {
3061   IPA_PASS, /* type */
3062   "inline", /* name */
3063   OPTGROUP_INLINE, /* optinfo_flags */
3064   TV_IPA_INLINING, /* tv_id */
3065   0, /* properties_required */
3066   0, /* properties_provided */
3067   0, /* properties_destroyed */
3068   0, /* todo_flags_start */
3069   ( TODO_dump_symtab ), /* todo_flags_finish */
3070 };
3071
3072 class pass_ipa_inline : public ipa_opt_pass_d
3073 {
3074 public:
3075   pass_ipa_inline (gcc::context *ctxt)
3076     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3077                       NULL, /* generate_summary */
3078                       NULL, /* write_summary */
3079                       NULL, /* read_summary */
3080                       NULL, /* write_optimization_summary */
3081                       NULL, /* read_optimization_summary */
3082                       NULL, /* stmt_fixup */
3083                       0, /* function_transform_todo_flags_start */
3084                       inline_transform, /* function_transform */
3085                       NULL) /* variable_transform */
3086   {}
3087
3088   /* opt_pass methods: */
3089   virtual unsigned int execute (function *) { return ipa_inline (); }
3090
3091 }; // class pass_ipa_inline
3092
3093 } // anon namespace
3094
3095 ipa_opt_pass_d *
3096 make_pass_ipa_inline (gcc::context *ctxt)
3097 {
3098   return new pass_ipa_inline (ctxt);
3099 }