gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass cannot really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "profile.h"
 109 #include "symbol-summary.h"
 110 #include "tree-vrp.h"
 111 #include "ipa-prop.h"
 112 #include "ipa-fnsummary.h"
 113 #include "ipa-inline.h"
 114 #include "ipa-utils.h"
 115 #include "sreal.h"
 116 #include "auto-profile.h"
 117 #include "builtins.h"
 118 #include "fibonacci_heap.h"
 119 #include "stringpool.h"
 120 #include "attribs.h"
 121 #include "asan.h"
 122
 123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 125
 126 /* Statistics we collect about inlining algorithm.  */
 127 static int overall_size;
 128 static profile_count max_count;
 129 static profile_count spec_rem;
 130
 131 /* Return false when inlining edge E would lead to violating
 132    limits on function unit growth or stack usage growth.
 133
 134    The relative function body growth limit is present generally
 135    to avoid problems with non-linear behavior of the compiler.
 136    To allow inlining huge functions into tiny wrapper, the limit
 137    is always based on the bigger of the two functions considered.
 138
 139    For stack growth limits we always base the growth in stack usage
 140    of the callers.  We want to prevent applications from segfaulting
 141    on stack overflow when functions with huge stack frames gets
 142    inlined. */
 143
 144 static bool
 145 caller_growth_limits (struct cgraph_edge *e)
 146 {
 147   struct cgraph_node *to = e->caller;
 148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 149   int newsize;
 150   int limit = 0;
 151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
 153
 154   /* Look for function e->caller is inlined to.  While doing
 155      so work out the largest function body on the way.  As
 156      described above, we want to base our function growth
 157      limits based on that.  Not on the self size of the
 158      outer function, not on the self size of inline code
 159      we immediately inline to.  This is the most relaxed
 160      interpretation of the rule "do not grow large functions
 161      too much in order to prevent compiler from exploding".  */
 162   while (true)
 163     {
 164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
 165       if (limit < size_info->self_size)
 166         limit = size_info->self_size;
 167       if (stack_size_limit < size_info->estimated_self_stack_size)
 168         stack_size_limit = size_info->estimated_self_stack_size;
 169       if (to->inlined_to)
 170         to = to->callers->caller;
 171       else
 172         break;
 173     }
 174
 175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
 176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
 177
 178   if (limit < what_size_info->self_size)
 179     limit = what_size_info->self_size;
 180
 181   limit += limit * param_large_function_growth / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= ipa_size_summaries->get (what)->size
 187       && newsize > param_large_function_insns
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * param_stack_frame_growth / 100);
 204
 205   inlined_stack = (ipa_get_stack_frame_offset (to)
 206                    + outer_info->estimated_self_stack_size
 207                    + what_info->estimated_stack_size);
 208   /* Check new stack consumption with stack consumption at the place
 209      stack is used.  */
 210   if (inlined_stack > stack_size_limit
 211       /* If function already has large stack usage from sibling
 212          inline call, we can inline, too.
 213          This bit overoptimistically assume that we are good at stack
 214          packing.  */
 215       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
 216       && inlined_stack > param_large_stack_frame)
 217     {
 218       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 219       return false;
 220     }
 221   return true;
 222 }
 223
 224 /* Dump info about why inlining has failed.  */
 225
 226 static void
 227 report_inline_failed_reason (struct cgraph_edge *e)
 228 {
 229   if (dump_enabled_p ())
 230     {
 231       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 232                        "  not inlinable: %C -> %C, %s\n",
 233                        e->caller, e->callee,
 234                        cgraph_inline_failed_string (e->inline_failed));
 235       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 236            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 237           && e->caller->lto_file_data
 238           && e->callee->ultimate_alias_target ()->lto_file_data)
 239         {
 240           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 241                            "  LTO objects: %s, %s\n",
 242                            e->caller->lto_file_data->file_name,
 243                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 244         }
 245       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 246         if (dump_file)
 247           cl_target_option_print_diff
 248             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 249              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 250       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 251         if (dump_file)
 252           cl_optimization_print_diff
 253             (dump_file, 2, opts_for_fn (e->caller->decl),
 254              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 255     }
 256 }
 257
 258  /* Decide whether sanitizer-related attributes allow inlining. */
 259
 260 static bool
 261 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 262 {
 263   if (!caller || !callee)
 264     return true;
 265
 266   /* Allow inlining always_inline functions into no_sanitize_address
 267      functions.  */
 268   if (!sanitize_flags_p (SANITIZE_ADDRESS, caller)
 269       && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
 270     return true;
 271
 272   return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
 273            == sanitize_flags_p (SANITIZE_ADDRESS, callee))
 274           && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
 275               == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
 276           && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
 277               == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
 278 }
 279
 280 /* Used for flags where it is safe to inline when caller's value is
 281    grater than callee's.  */
 282 #define check_maybe_up(flag) \
 283       (opts_for_fn (caller->decl)->x_##flag             \
 284        != opts_for_fn (callee->decl)->x_##flag          \
 285        && (!always_inline                               \
 286            || opts_for_fn (caller->decl)->x_##flag      \
 287               < opts_for_fn (callee->decl)->x_##flag))
 288 /* Used for flags where it is safe to inline when caller's value is
 289    smaller than callee's.  */
 290 #define check_maybe_down(flag) \
 291       (opts_for_fn (caller->decl)->x_##flag             \
 292        != opts_for_fn (callee->decl)->x_##flag          \
 293        && (!always_inline                               \
 294            || opts_for_fn (caller->decl)->x_##flag      \
 295               > opts_for_fn (callee->decl)->x_##flag))
 296 /* Used for flags where exact match is needed for correctness.  */
 297 #define check_match(flag) \
 298       (opts_for_fn (caller->decl)->x_##flag             \
 299        != opts_for_fn (callee->decl)->x_##flag)
 300
 301 /* Decide if we can inline the edge and possibly update
 302    inline_failed reason.
 303    We check whether inlining is possible at all and whether
 304    caller growth limits allow doing so.
 305
 306    if REPORT is true, output reason to the dump file. */
 307
 308 static bool
 309 can_inline_edge_p (struct cgraph_edge *e, bool report,
 310                    bool early = false)
 311 {
 312   gcc_checking_assert (e->inline_failed);
 313
 314   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 315     {
 316       if (report)
 317         report_inline_failed_reason (e);
 318       return false;
 319     }
 320
 321   bool inlinable = true;
 322   enum availability avail;
 323   cgraph_node *caller = (e->caller->inlined_to
 324                          ? e->caller->inlined_to : e->caller);
 325   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 326
 327   if (!callee->definition)
 328     {
 329       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 330       inlinable = false;
 331     }
 332   if (!early && (!opt_for_fn (callee->decl, optimize)
 333                  || !opt_for_fn (caller->decl, optimize)))
 334     {
 335       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 336       inlinable = false;
 337     }
 338   else if (callee->calls_comdat_local)
 339     {
 340       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 341       inlinable = false;
 342     }
 343   else if (avail <= AVAIL_INTERPOSABLE)
 344     {
 345       e->inline_failed = CIF_OVERWRITABLE;
 346       inlinable = false;
 347     }
 348   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 349      initialized to one of FINAL_ERROR reasons.  */
 350   else if (e->call_stmt_cannot_inline_p)
 351     gcc_unreachable ();
 352   /* Don't inline if the functions have different EH personalities.  */
 353   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 354            && DECL_FUNCTION_PERSONALITY (callee->decl)
 355            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 356                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 357     {
 358       e->inline_failed = CIF_EH_PERSONALITY;
 359       inlinable = false;
 360     }
 361   /* TM pure functions should not be inlined into non-TM_pure
 362      functions.  */
 363   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 364     {
 365       e->inline_failed = CIF_UNSPECIFIED;
 366       inlinable = false;
 367     }
 368   /* Check compatibility of target optimization options.  */
 369   else if (!targetm.target_option.can_inline_p (caller->decl,
 370                                                 callee->decl))
 371     {
 372       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 373       inlinable = false;
 374     }
 375   else if (ipa_fn_summaries->get (callee) == NULL
 376            || !ipa_fn_summaries->get (callee)->inlinable)
 377     {
 378       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 379       inlinable = false;
 380     }
 381   /* Don't inline a function with mismatched sanitization attributes. */
 382   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 383     {
 384       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 385       inlinable = false;
 386     }
 387   if (!inlinable && report)
 388     report_inline_failed_reason (e);
 389   return inlinable;
 390 }
 391
 392 /* Return inlining_insns_single limit for function N. If HINT is true
 393    scale up the bound.  */
 394
 395 static int
 396 inline_insns_single (cgraph_node *n, bool hint)
 397 {
 398   if (opt_for_fn (n->decl, optimize) >= 3)
 399     {
 400       if (hint)
 401         return param_max_inline_insns_single
 402                * param_inline_heuristics_hint_percent / 100;
 403       return param_max_inline_insns_single;
 404     }
 405   else
 406     {
 407       if (hint)
 408         return param_max_inline_insns_single_o2
 409                * param_inline_heuristics_hint_percent_o2 / 100;
 410       return param_max_inline_insns_single_o2;
 411     }
 412 }
 413
 414 /* Return inlining_insns_auto limit for function N. If HINT is true
 415    scale up the bound.   */
 416
 417 static int
 418 inline_insns_auto (cgraph_node *n, bool hint)
 419 {
 420   if (opt_for_fn (n->decl, optimize) >= 3)
 421     {
 422       if (hint)
 423         return param_max_inline_insns_auto
 424                * param_inline_heuristics_hint_percent / 100;
 425       return param_max_inline_insns_auto;
 426     }
 427   else
 428     {
 429       if (hint)
 430         return param_max_inline_insns_auto_o2
 431                * param_inline_heuristics_hint_percent_o2 / 100;
 432       return param_max_inline_insns_auto_o2;
 433     }
 434 }
 435
 436 /* Decide if we can inline the edge and possibly update
 437    inline_failed reason.
 438    We check whether inlining is possible at all and whether
 439    caller growth limits allow doing so.
 440
 441    if REPORT is true, output reason to the dump file.
 442
 443    if DISREGARD_LIMITS is true, ignore size limits.  */
 444
 445 static bool
 446 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 447                              bool disregard_limits = false, bool early = false)
 448 {
 449   gcc_checking_assert (e->inline_failed);
 450
 451   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 452     {
 453       if (report)
 454         report_inline_failed_reason (e);
 455       return false;
 456     }
 457
 458   bool inlinable = true;
 459   enum availability avail;
 460   cgraph_node *caller = (e->caller->inlined_to
 461                          ? e->caller->inlined_to : e->caller);
 462   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 463   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 464   tree callee_tree
 465     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 466   /* Check if caller growth allows the inlining.  */
 467   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 468       && !disregard_limits
 469       && !lookup_attribute ("flatten",
 470                  DECL_ATTRIBUTES (caller->decl))
 471       && !caller_growth_limits (e))
 472     inlinable = false;
 473   else if (callee->externally_visible
 474            && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 475            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 476     {
 477       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 478       inlinable = false;
 479     }
 480   /* Don't inline a function with a higher optimization level than the
 481      caller.  FIXME: this is really just tip of iceberg of handling
 482      optimization attribute.  */
 483   else if (caller_tree != callee_tree)
 484     {
 485       bool always_inline =
 486              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 487               && lookup_attribute ("always_inline",
 488                                    DECL_ATTRIBUTES (callee->decl)));
 489       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 490       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 491
 492      /* Until GCC 4.9 we did not check the semantics-altering flags
 493         below and inlined across optimization boundaries.
 494         Enabling checks below breaks several packages by refusing
 495         to inline library always_inline functions. See PR65873.
 496         Disable the check for early inlining for now until better solution
 497         is found.  */
 498      if (always_inline && early)
 499         ;
 500       /* There are some options that change IL semantics which means
 501          we cannot inline in these cases for correctness reason.
 502          Not even for always_inline declared functions.  */
 503      else if (check_match (flag_wrapv)
 504               || check_match (flag_trapv)
 505               || check_match (flag_pcc_struct_return)
 506               /* When caller or callee does FP math, be sure FP codegen flags
 507                  compatible.  */
 508               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 509                   && (check_maybe_up (flag_rounding_math)
 510                       || check_maybe_up (flag_trapping_math)
 511                       || check_maybe_down (flag_unsafe_math_optimizations)
 512                       || check_maybe_down (flag_finite_math_only)
 513                       || check_maybe_up (flag_signaling_nans)
 514                       || check_maybe_down (flag_cx_limited_range)
 515                       || check_maybe_up (flag_signed_zeros)
 516                       || check_maybe_down (flag_associative_math)
 517                       || check_maybe_down (flag_reciprocal_math)
 518                       || check_maybe_down (flag_fp_int_builtin_inexact)
 519                       /* Strictly speaking only when the callee contains function
 520                          calls that may end up setting errno.  */
 521                       || check_maybe_up (flag_errno_math)))
 522               /* We do not want to make code compiled with exceptions to be
 523                  brought into a non-EH function unless we know that the callee
 524                  does not throw.
 525                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 526               || (check_maybe_up (flag_non_call_exceptions)
 527                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 528               || (check_maybe_up (flag_exceptions)
 529                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 530               /* When devirtualization is diabled for callee, it is not safe
 531                  to inline it as we possibly mangled the type info.
 532                  Allow early inlining of always inlines.  */
 533               || (!early && check_maybe_down (flag_devirtualize)))
 534         {
 535           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 536           inlinable = false;
 537         }
 538       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 539       else if (always_inline)
 540         ;
 541       /* When user added an attribute to the callee honor it.  */
 542       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 543                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 544         {
 545           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 546           inlinable = false;
 547         }
 548       /* If explicit optimize attribute are not used, the mismatch is caused
 549          by different command line options used to build different units.
 550          Do not care about COMDAT functions - those are intended to be
 551          optimized with the optimization flags of module they are used in.
 552          Also do not care about mixing up size/speed optimization when
 553          DECL_DISREGARD_INLINE_LIMITS is set.  */
 554       else if ((callee->merged_comdat
 555                 && !lookup_attribute ("optimize",
 556                                       DECL_ATTRIBUTES (caller->decl)))
 557                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 558         ;
 559       /* If mismatch is caused by merging two LTO units with different
 560          optimizationflags we want to be bit nicer.  However never inline
 561          if one of functions is not optimized at all.  */
 562       else if (!opt_for_fn (callee->decl, optimize)
 563                || !opt_for_fn (caller->decl, optimize))
 564         {
 565           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 566           inlinable = false;
 567         }
 568       /* If callee is optimized for size and caller is not, allow inlining if
 569          code shrinks or we are in param_max_inline_insns_single limit and
 570          callee is inline (and thus likely an unified comdat).
 571          This will allow caller to run faster.  */
 572       else if (opt_for_fn (callee->decl, optimize_size)
 573                > opt_for_fn (caller->decl, optimize_size))
 574         {
 575           int growth = estimate_edge_growth (e);
 576           if (growth > param_max_inline_insns_size
 577               && (!DECL_DECLARED_INLINE_P (callee->decl)
 578                   && growth >= MAX (inline_insns_single (caller, false),
 579                                     inline_insns_auto (caller, false))))
 580             {
 581               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 582               inlinable = false;
 583             }
 584         }
 585       /* If callee is more aggressively optimized for performance than caller,
 586          we generally want to inline only cheap (runtime wise) functions.  */
 587       else if (opt_for_fn (callee->decl, optimize_size)
 588                < opt_for_fn (caller->decl, optimize_size)
 589                || (opt_for_fn (callee->decl, optimize)
 590                    > opt_for_fn (caller->decl, optimize)))
 591         {
 592           if (estimate_edge_time (e)
 593               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 594             {
 595               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 596               inlinable = false;
 597             }
 598         }
 599
 600     }
 601
 602   if (!inlinable && report)
 603     report_inline_failed_reason (e);
 604   return inlinable;
 605 }
 606
 607
 608 /* Return true if the edge E is inlinable during early inlining.  */
 609
 610 static bool
 611 can_early_inline_edge_p (struct cgraph_edge *e)
 612 {
 613   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 614   /* Early inliner might get called at WPA stage when IPA pass adds new
 615      function.  In this case we cannot really do any of early inlining
 616      because function bodies are missing.  */
 617   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 618     return false;
 619   if (!gimple_has_body_p (callee->decl))
 620     {
 621       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 622       return false;
 623     }
 624   /* In early inliner some of callees may not be in SSA form yet
 625      (i.e. the callgraph is cyclic and we did not process
 626      the callee by early inliner, yet).  We don't have CIF code for this
 627      case; later we will re-do the decision in the real inliner.  */
 628   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 629       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 630     {
 631       if (dump_enabled_p ())
 632         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 633                          "  edge not inlinable: not in SSA form\n");
 634       return false;
 635     }
 636   if (!can_inline_edge_p (e, true, true)
 637       || !can_inline_edge_by_limits_p (e, true, false, true))
 638     return false;
 639   return true;
 640 }
 641
 642
 643 /* Return number of calls in N.  Ignore cheap builtins.  */
 644
 645 static int
 646 num_calls (struct cgraph_node *n)
 647 {
 648   struct cgraph_edge *e;
 649   int num = 0;
 650
 651   for (e = n->callees; e; e = e->next_callee)
 652     if (!is_inexpensive_builtin (e->callee->decl))
 653       num++;
 654   return num;
 655 }
 656
 657
 658 /* Return true if we are interested in inlining small function.  */
 659
 660 static bool
 661 want_early_inline_function_p (struct cgraph_edge *e)
 662 {
 663   bool want_inline = true;
 664   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 665
 666   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 667     ;
 668   /* For AutoFDO, we need to make sure that before profile summary, all
 669      hot paths' IR look exactly the same as profiled binary. As a result,
 670      in einliner, we will disregard size limit and inline those callsites
 671      that are:
 672        * inlined in the profiled binary, and
 673        * the cloned callee has enough samples to be considered "hot".  */
 674   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 675     ;
 676   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 677            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 678     {
 679       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 680       report_inline_failed_reason (e);
 681       want_inline = false;
 682     }
 683   else
 684     {
 685       int growth = estimate_edge_growth (e);
 686       int n;
 687       int early_inlining_insns = opt_for_fn (e->caller->decl, optimize) >= 3
 688                                  ? param_early_inlining_insns
 689                                  : param_early_inlining_insns_o2;
 690
 691
 692       if (growth <= param_max_inline_insns_size)
 693         ;
 694       else if (!e->maybe_hot_p ())
 695         {
 696           if (dump_enabled_p ())
 697             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 698                              "  will not early inline: %C->%C, "
 699                              "call is cold and code would grow by %i\n",
 700                              e->caller, callee,
 701                              growth);
 702           want_inline = false;
 703         }
 704       else if (growth > early_inlining_insns)
 705         {
 706           if (dump_enabled_p ())
 707             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 708                              "  will not early inline: %C->%C, "
 709                              "growth %i exceeds --param early-inlining-insns%s\n",
 710                              e->caller, callee, growth,
 711                              opt_for_fn (e->caller->decl, optimize) >= 3
 712                              ? "" : "-O2");
 713           want_inline = false;
 714         }
 715       else if ((n = num_calls (callee)) != 0
 716                && growth * (n + 1) > early_inlining_insns)
 717         {
 718           if (dump_enabled_p ())
 719             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 720                              "  will not early inline: %C->%C, "
 721                              "growth %i exceeds --param early-inlining-insns%s "
 722                              "divided by number of calls\n",
 723                              e->caller, callee, growth,
 724                              opt_for_fn (e->caller->decl, optimize) >= 3
 725                              ? "" : "-O2");
 726           want_inline = false;
 727         }
 728     }
 729   return want_inline;
 730 }
 731
 732 /* Compute time of the edge->caller + edge->callee execution when inlining
 733    does not happen.  */
 734
 735 inline sreal
 736 compute_uninlined_call_time (struct cgraph_edge *edge,
 737                              sreal uninlined_call_time,
 738                              sreal freq)
 739 {
 740   cgraph_node *caller = (edge->caller->inlined_to
 741                          ? edge->caller->inlined_to
 742                          : edge->caller);
 743
 744   if (freq > 0)
 745     uninlined_call_time *= freq;
 746   else
 747     uninlined_call_time = uninlined_call_time >> 11;
 748
 749   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 750   return uninlined_call_time + caller_time;
 751 }
 752
 753 /* Same as compute_uinlined_call_time but compute time when inlining
 754    does happen.  */
 755
 756 inline sreal
 757 compute_inlined_call_time (struct cgraph_edge *edge,
 758                            sreal time,
 759                            sreal freq)
 760 {
 761   cgraph_node *caller = (edge->caller->inlined_to
 762                          ? edge->caller->inlined_to
 763                          : edge->caller);
 764   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 765
 766   if (freq > 0)
 767     time *= freq;
 768   else
 769     time = time >> 11;
 770
 771   /* This calculation should match one in ipa-inline-analysis.c
 772      (estimate_edge_size_and_time).  */
 773   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 774   time += caller_time;
 775   if (time <= 0)
 776     time = ((sreal) 1) >> 8;
 777   gcc_checking_assert (time >= 0);
 778   return time;
 779 }
 780
 781 /* Return true if the speedup for inlining E is bigger than
 782    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 783
 784 static bool
 785 big_speedup_p (struct cgraph_edge *e)
 786 {
 787   sreal unspec_time;
 788   sreal spec_time = estimate_edge_time (e, &unspec_time);
 789   sreal freq = e->sreal_frequency ();
 790   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
 791   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
 792   cgraph_node *caller = (e->caller->inlined_to
 793                          ? e->caller->inlined_to
 794                          : e->caller);
 795   int limit = opt_for_fn (caller->decl, optimize) >= 3
 796               ? param_inline_min_speedup
 797               : param_inline_min_speedup_o2;
 798
 799   if ((time - inlined_time) * 100 > time * limit)
 800     return true;
 801   return false;
 802 }
 803
 804 /* Return true if we are interested in inlining small function.
 805    When REPORT is true, report reason to dump file.  */
 806
 807 static bool
 808 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 809 {
 810   bool want_inline = true;
 811   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 812
 813   /* Allow this function to be called before can_inline_edge_p,
 814      since it's usually cheaper.  */
 815   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 816     want_inline = false;
 817   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 818     ;
 819   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 820            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 821     {
 822       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 823       want_inline = false;
 824     }
 825   /* Do fast and conservative check if the function can be good
 826      inline candidate.  */
 827   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 828            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 829            && ipa_fn_summaries->get (callee)->min_size
 830                 - ipa_call_summaries->get (e)->call_stmt_size
 831               > inline_insns_auto (e->caller, true))
 832     {
 833       if (opt_for_fn (e->caller->decl, optimize) >= 3)
 834         e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 835       else
 836         e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT;
 837       want_inline = false;
 838     }
 839   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 840             || e->count.ipa ().nonzero_p ())
 841            && ipa_fn_summaries->get (callee)->min_size
 842                 - ipa_call_summaries->get (e)->call_stmt_size
 843               > inline_insns_single (e->caller, true))
 844     {
 845       if (opt_for_fn (e->caller->decl, optimize) >= 3)
 846         e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 847                             ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 848                             : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 849       else
 850         e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 851                             ? CIF_MAX_INLINE_INSNS_SINGLE_O2_LIMIT
 852                             : CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT);
 853       want_inline = false;
 854     }
 855   else
 856     {
 857       int growth = estimate_edge_growth (e);
 858       ipa_hints hints = estimate_edge_hints (e);
 859       bool apply_hints = (hints & (INLINE_HINT_indirect_call
 860                                    | INLINE_HINT_known_hot
 861                                    | INLINE_HINT_loop_iterations
 862                                    | INLINE_HINT_loop_stride));
 863
 864       if (growth <= param_max_inline_insns_size)
 865         ;
 866       /* Apply param_max_inline_insns_single limit.  Do not do so when
 867          hints suggests that inlining given function is very profitable.
 868          Avoid computation of big_speedup_p when not necessary to change
 869          outcome of decision.  */
 870       else if (DECL_DECLARED_INLINE_P (callee->decl)
 871                && growth >= inline_insns_single (e->caller, apply_hints)
 872                && (apply_hints
 873                    || growth >= inline_insns_single (e->caller, true)
 874                    || !big_speedup_p (e)))
 875         {
 876           if (opt_for_fn (e->caller->decl, optimize) >= 3)
 877             e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 878           else
 879             e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_O2_LIMIT;
 880           want_inline = false;
 881         }
 882       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 883                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 884                && growth >= param_max_inline_insns_small)
 885         {
 886           /* growth_positive_p is expensive, always test it last.  */
 887           if (growth >= inline_insns_single (e->caller, false)
 888               || growth_positive_p (callee, e, growth))
 889             {
 890               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 891               want_inline = false;
 892             }
 893         }
 894       /* Apply param_max_inline_insns_auto limit for functions not declared
 895          inline.  Bypass the limit when speedup seems big.  */
 896       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 897                && growth >= inline_insns_auto (e->caller, apply_hints)
 898                && (apply_hints
 899                    || growth >= inline_insns_auto (e->caller, true)
 900                    || !big_speedup_p (e)))
 901         {
 902           /* growth_positive_p is expensive, always test it last.  */
 903           if (growth >= inline_insns_single (e->caller, false)
 904               || growth_positive_p (callee, e, growth))
 905             {
 906               if (opt_for_fn (e->caller->decl, optimize) >= 3)
 907                 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 908               else
 909                 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT;
 910               want_inline = false;
 911             }
 912         }
 913       /* If call is cold, do not inline when function body would grow. */
 914       else if (!e->maybe_hot_p ()
 915                && (growth >= inline_insns_single (e->caller, false)
 916                    || growth_positive_p (callee, e, growth)))
 917         {
 918           e->inline_failed = CIF_UNLIKELY_CALL;
 919           want_inline = false;
 920         }
 921     }
 922   if (!want_inline && report)
 923     report_inline_failed_reason (e);
 924   return want_inline;
 925 }
 926
 927 /* EDGE is self recursive edge.
 928    We hand two cases - when function A is inlining into itself
 929    or when function A is being inlined into another inliner copy of function
 930    A within function B.
 931
 932    In first case OUTER_NODE points to the toplevel copy of A, while
 933    in the second case OUTER_NODE points to the outermost copy of A in B.
 934
 935    In both cases we want to be extra selective since
 936    inlining the call will just introduce new recursive calls to appear.  */
 937
 938 static bool
 939 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 940                                    struct cgraph_node *outer_node,
 941                                    bool peeling,
 942                                    int depth)
 943 {
 944   char const *reason = NULL;
 945   bool want_inline = true;
 946   sreal caller_freq = 1;
 947   int max_depth = param_max_inline_recursive_depth_auto;
 948
 949   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 950     max_depth = param_max_inline_recursive_depth;
 951
 952   if (!edge->maybe_hot_p ())
 953     {
 954       reason = "recursive call is cold";
 955       want_inline = false;
 956     }
 957   else if (depth > max_depth)
 958     {
 959       reason = "--param max-inline-recursive-depth exceeded.";
 960       want_inline = false;
 961     }
 962   else if (outer_node->inlined_to
 963            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
 964     {
 965       reason = "caller frequency is 0";
 966       want_inline = false;
 967     }
 968
 969   if (!want_inline)
 970     ;
 971   /* Inlining of self recursive function into copy of itself within other
 972      function is transformation similar to loop peeling.
 973
 974      Peeling is profitable if we can inline enough copies to make probability
 975      of actual call to the self recursive function very small.  Be sure that
 976      the probability of recursion is small.
 977
 978      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 979      This way the expected number of recursion is at most max_depth.  */
 980   else if (peeling)
 981     {
 982       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
 983       int i;
 984       for (i = 1; i < depth; i++)
 985         max_prob = max_prob * max_prob;
 986       if (edge->sreal_frequency () >= max_prob * caller_freq)
 987         {
 988           reason = "frequency of recursive call is too large";
 989           want_inline = false;
 990         }
 991     }
 992   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
 993      recursion depth is large.  We reduce function call overhead and increase
 994      chances that things fit in hardware return predictor.
 995
 996      Recursive inlining might however increase cost of stack frame setup
 997      actually slowing down functions whose recursion tree is wide rather than
 998      deep.
 999
1000      Deciding reliably on when to do recursive inlining without profile feedback
1001      is tricky.  For now we disable recursive inlining when probability of self
1002      recursion is low.
1003
1004      Recursive inlining of self recursive call within loop also results in
1005      large loop depths that generally optimize badly.  We may want to throttle
1006      down inlining in those cases.  In particular this seems to happen in one
1007      of libstdc++ rb tree methods.  */
1008   else
1009     {
1010       if (edge->sreal_frequency () * 100
1011           <= caller_freq
1012              * param_min_inline_recursive_probability)
1013         {
1014           reason = "frequency of recursive call is too small";
1015           want_inline = false;
1016         }
1017     }
1018   if (!want_inline && dump_enabled_p ())
1019     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1020                      "   not inlining recursively: %s\n", reason);
1021   return want_inline;
1022 }
1023
1024 /* Return true when NODE has uninlinable caller;
1025    set HAS_HOT_CALL if it has hot call.
1026    Worker for cgraph_for_node_and_aliases.  */
1027
1028 static bool
1029 check_callers (struct cgraph_node *node, void *has_hot_call)
1030 {
1031   struct cgraph_edge *e;
1032    for (e = node->callers; e; e = e->next_caller)
1033      {
1034        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1035            || !opt_for_fn (e->caller->decl, optimize))
1036          return true;
1037        if (!can_inline_edge_p (e, true))
1038          return true;
1039        if (e->recursive_p ())
1040          return true;
1041        if (!can_inline_edge_by_limits_p (e, true))
1042          return true;
1043        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1044          *(bool *)has_hot_call = true;
1045      }
1046   return false;
1047 }
1048
1049 /* If NODE has a caller, return true.  */
1050
1051 static bool
1052 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1053 {
1054   if (node->callers)
1055     return true;
1056   return false;
1057 }
1058
1059 /* Decide if inlining NODE would reduce unit size by eliminating
1060    the offline copy of function.
1061    When COLD is true the cold calls are considered, too.  */
1062
1063 static bool
1064 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1065 {
1066   bool has_hot_call = false;
1067
1068   /* Aliases gets inlined along with the function they alias.  */
1069   if (node->alias)
1070     return false;
1071   /* Already inlined?  */
1072   if (node->inlined_to)
1073     return false;
1074   /* Does it have callers?  */
1075   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1076     return false;
1077   /* Inlining into all callers would increase size?  */
1078   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1079     return false;
1080   /* All inlines must be possible.  */
1081   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1082                                          true))
1083     return false;
1084   if (!cold && !has_hot_call)
1085     return false;
1086   return true;
1087 }
1088
1089 /* A cost model driving the inlining heuristics in a way so the edges with
1090    smallest badness are inlined first.  After each inlining is performed
1091    the costs of all caller edges of nodes affected are recomputed so the
1092    metrics may accurately depend on values such as number of inlinable callers
1093    of the function or function body size.  */
1094
1095 static sreal
1096 edge_badness (struct cgraph_edge *edge, bool dump)
1097 {
1098   sreal badness;
1099   int growth;
1100   sreal edge_time, unspec_edge_time;
1101   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1102   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1103   ipa_hints hints;
1104   cgraph_node *caller = (edge->caller->inlined_to
1105                          ? edge->caller->inlined_to
1106                          : edge->caller);
1107
1108   growth = estimate_edge_growth (edge);
1109   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1110   hints = estimate_edge_hints (edge);
1111   gcc_checking_assert (edge_time >= 0);
1112   /* Check that inlined time is better, but tolerate some roundoff issues.
1113      FIXME: When callee profile drops to 0 we account calls more.  This
1114      should be fixed by never doing that.  */
1115   gcc_checking_assert ((edge_time * 100
1116                         - callee_info->time * 101).to_int () <= 0
1117                         || callee->count.ipa ().initialized_p ());
1118   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1119
1120   if (dump)
1121     {
1122       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1123                edge->caller->dump_name (),
1124                edge->callee->dump_name ());
1125       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1126                growth,
1127                edge_time.to_double (),
1128                unspec_edge_time.to_double ());
1129       ipa_dump_hints (dump_file, hints);
1130       if (big_speedup_p (edge))
1131         fprintf (dump_file, " big_speedup");
1132       fprintf (dump_file, "\n");
1133     }
1134
1135   /* Always prefer inlining saving code size.  */
1136   if (growth <= 0)
1137     {
1138       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1139       if (dump)
1140         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1141                  growth);
1142     }
1143    /* Inlining into EXTERNAL functions is not going to change anything unless
1144       they are themselves inlined.  */
1145    else if (DECL_EXTERNAL (caller->decl))
1146     {
1147       if (dump)
1148         fprintf (dump_file, "      max: function is external\n");
1149       return sreal::max ();
1150     }
1151   /* When profile is available. Compute badness as:
1152
1153                  time_saved * caller_count
1154      goodness =  -------------------------------------------------
1155                  growth_of_caller * overall_growth * combined_size
1156
1157      badness = - goodness
1158
1159      Again use negative value to make calls with profile appear hotter
1160      then calls without.
1161   */
1162   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1163            || caller->count.ipa ().nonzero_p ())
1164     {
1165       sreal numerator, denominator;
1166       int overall_growth;
1167       sreal freq = edge->sreal_frequency ();
1168       sreal inlined_time = compute_inlined_call_time (edge, edge_time, freq);
1169
1170       numerator = (compute_uninlined_call_time (edge, unspec_edge_time, freq)
1171                    - inlined_time);
1172       if (numerator <= 0)
1173         numerator = ((sreal) 1 >> 8);
1174       if (caller->count.ipa ().nonzero_p ())
1175         numerator *= caller->count.ipa ().to_gcov_type ();
1176       else if (caller->count.ipa ().initialized_p ())
1177         numerator = numerator >> 11;
1178       denominator = growth;
1179
1180       overall_growth = callee_info->growth;
1181
1182       /* Look for inliner wrappers of the form:
1183
1184          inline_caller ()
1185            {
1186              do_fast_job...
1187              if (need_more_work)
1188                noninline_callee ();
1189            }
1190          Withhout penalizing this case, we usually inline noninline_callee
1191          into the inline_caller because overall_growth is small preventing
1192          further inlining of inline_caller.
1193
1194          Penalize only callgraph edges to functions with small overall
1195          growth ...
1196         */
1197       if (growth > overall_growth
1198           /* ... and having only one caller which is not inlined ... */
1199           && callee_info->single_caller
1200           && !edge->caller->inlined_to
1201           /* ... and edges executed only conditionally ... */
1202           && freq < 1
1203           /* ... consider case where callee is not inline but caller is ... */
1204           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1205                && DECL_DECLARED_INLINE_P (caller->decl))
1206               /* ... or when early optimizers decided to split and edge
1207                  frequency still indicates splitting is a win ... */
1208               || (callee->split_part && !caller->split_part
1209                   && freq * 100 < param_partial_inlining_entry_probability
1210                   /* ... and do not overwrite user specified hints.   */
1211                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1212                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1213         {
1214           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1215           int caller_growth = caller_info->growth;
1216
1217           /* Only apply the penalty when caller looks like inline candidate,
1218              and it is not called once.  */
1219           if (!caller_info->single_caller && overall_growth < caller_growth
1220               && caller_info->inlinable
1221               && ipa_size_summaries->get (caller)->size
1222                  < (DECL_DECLARED_INLINE_P (caller->decl)
1223                     ? inline_insns_single (caller, false)
1224                     : inline_insns_auto (caller, false)))
1225             {
1226               if (dump)
1227                 fprintf (dump_file,
1228                          "     Wrapper penalty. Increasing growth %i to %i\n",
1229                          overall_growth, caller_growth);
1230               overall_growth = caller_growth;
1231             }
1232         }
1233       if (overall_growth > 0)
1234         {
1235           /* Strongly preffer functions with few callers that can be inlined
1236              fully.  The square root here leads to smaller binaries at average.
1237              Watch however for extreme cases and return to linear function
1238              when growth is large.  */
1239           if (overall_growth < 256)
1240             overall_growth *= overall_growth;
1241           else
1242             overall_growth += 256 * 256 - 256;
1243           denominator *= overall_growth;
1244         }
1245       denominator *= ipa_size_summaries->get (caller)->size + growth;
1246
1247       badness = - numerator / denominator;
1248
1249       if (dump)
1250         {
1251           fprintf (dump_file,
1252                    "      %f: guessed profile. frequency %f, count %" PRId64
1253                    " caller count %" PRId64
1254                    " time w/o inlining %f, time with inlining %f"
1255                    " overall growth %i (current) %i (original)"
1256                    " %i (compensated)\n",
1257                    badness.to_double (),
1258                    freq.to_double (),
1259                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1260                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1261                    compute_uninlined_call_time (edge,
1262                                                 unspec_edge_time, freq).to_double (),
1263                    inlined_time.to_double (),
1264                    estimate_growth (callee),
1265                    callee_info->growth, overall_growth);
1266         }
1267     }
1268   /* When function local profile is not available or it does not give
1269      useful information (ie frequency is zero), base the cost on
1270      loop nest and overall size growth, so we optimize for overall number
1271      of functions fully inlined in program.  */
1272   else
1273     {
1274       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1275       badness = growth;
1276
1277       /* Decrease badness if call is nested.  */
1278       if (badness > 0)
1279         badness = badness >> nest;
1280       else
1281         badness = badness << nest;
1282       if (dump)
1283         fprintf (dump_file, "      %f: no profile. nest %i\n",
1284                  badness.to_double (), nest);
1285     }
1286   gcc_checking_assert (badness != 0);
1287
1288   if (edge->recursive_p ())
1289     badness = badness.shift (badness > 0 ? 4 : -4);
1290   if ((hints & (INLINE_HINT_indirect_call
1291                 | INLINE_HINT_loop_iterations
1292                 | INLINE_HINT_loop_stride))
1293       || callee_info->growth <= 0)
1294     badness = badness.shift (badness > 0 ? -2 : 2);
1295   if (hints & (INLINE_HINT_same_scc))
1296     badness = badness.shift (badness > 0 ? 3 : -3);
1297   else if (hints & (INLINE_HINT_in_scc))
1298     badness = badness.shift (badness > 0 ? 2 : -2);
1299   else if (hints & (INLINE_HINT_cross_module))
1300     badness = badness.shift (badness > 0 ? 1 : -1);
1301   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1302     badness = badness.shift (badness > 0 ? -4 : 4);
1303   else if ((hints & INLINE_HINT_declared_inline))
1304     badness = badness.shift (badness > 0 ? -3 : 3);
1305   if (dump)
1306     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1307   return badness;
1308 }
1309
1310 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1311 static inline void
1312 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1313 {
1314   sreal badness = edge_badness (edge, false);
1315   if (edge->aux)
1316     {
1317       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1318       gcc_checking_assert (n->get_data () == edge);
1319
1320       /* fibonacci_heap::replace_key does busy updating of the
1321          heap that is unnecesarily expensive.
1322          We do lazy increases: after extracting minimum if the key
1323          turns out to be out of date, it is re-inserted into heap
1324          with correct value.  */
1325       if (badness < n->get_key ())
1326         {
1327           if (dump_file && (dump_flags & TDF_DETAILS))
1328             {
1329               fprintf (dump_file,
1330                        "  decreasing badness %s -> %s, %f to %f\n",
1331                        edge->caller->dump_name (),
1332                        edge->callee->dump_name (),
1333                        n->get_key ().to_double (),
1334                        badness.to_double ());
1335             }
1336           heap->decrease_key (n, badness);
1337         }
1338     }
1339   else
1340     {
1341        if (dump_file && (dump_flags & TDF_DETAILS))
1342          {
1343            fprintf (dump_file,
1344                     "  enqueuing call %s -> %s, badness %f\n",
1345                     edge->caller->dump_name (),
1346                     edge->callee->dump_name (),
1347                     badness.to_double ());
1348          }
1349       edge->aux = heap->insert (badness, edge);
1350     }
1351 }
1352
1353
1354 /* NODE was inlined.
1355    All caller edges needs to be resetted because
1356    size estimates change. Similarly callees needs reset
1357    because better context may be known.  */
1358
1359 static void
1360 reset_edge_caches (struct cgraph_node *node)
1361 {
1362   struct cgraph_edge *edge;
1363   struct cgraph_edge *e = node->callees;
1364   struct cgraph_node *where = node;
1365   struct ipa_ref *ref;
1366
1367   if (where->inlined_to)
1368     where = where->inlined_to;
1369
1370   reset_node_cache (where);
1371
1372   if (edge_growth_cache != NULL)
1373     for (edge = where->callers; edge; edge = edge->next_caller)
1374       if (edge->inline_failed)
1375         edge_growth_cache->remove (edge);
1376
1377   FOR_EACH_ALIAS (where, ref)
1378     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1379
1380   if (!e)
1381     return;
1382
1383   while (true)
1384     if (!e->inline_failed && e->callee->callees)
1385       e = e->callee->callees;
1386     else
1387       {
1388         if (edge_growth_cache != NULL && e->inline_failed)
1389           edge_growth_cache->remove (e);
1390         if (e->next_callee)
1391           e = e->next_callee;
1392         else
1393           {
1394             do
1395               {
1396                 if (e->caller == node)
1397                   return;
1398                 e = e->caller->callers;
1399               }
1400             while (!e->next_callee);
1401             e = e->next_callee;
1402           }
1403       }
1404 }
1405
1406 /* Recompute HEAP nodes for each of caller of NODE.
1407    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1408    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1409    it is inlinable. Otherwise check all edges.  */
1410
1411 static void
1412 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1413                     bitmap updated_nodes,
1414                     struct cgraph_edge *check_inlinablity_for)
1415 {
1416   struct cgraph_edge *edge;
1417   struct ipa_ref *ref;
1418
1419   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1420       || node->inlined_to)
1421     return;
1422   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1423     return;
1424
1425   FOR_EACH_ALIAS (node, ref)
1426     {
1427       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1428       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1429     }
1430
1431   for (edge = node->callers; edge; edge = edge->next_caller)
1432     if (edge->inline_failed)
1433       {
1434         if (!check_inlinablity_for
1435             || check_inlinablity_for == edge)
1436           {
1437             if (can_inline_edge_p (edge, false)
1438                 && want_inline_small_function_p (edge, false)
1439                 && can_inline_edge_by_limits_p (edge, false))
1440               update_edge_key (heap, edge);
1441             else if (edge->aux)
1442               {
1443                 report_inline_failed_reason (edge);
1444                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1445                 edge->aux = NULL;
1446               }
1447           }
1448         else if (edge->aux)
1449           update_edge_key (heap, edge);
1450       }
1451 }
1452
1453 /* Recompute HEAP nodes for each uninlined call in NODE.
1454    This is used when we know that edge badnesses are going only to increase
1455    (we introduced new call site) and thus all we need is to insert newly
1456    created edges into heap.  */
1457
1458 static void
1459 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1460                     bitmap updated_nodes)
1461 {
1462   struct cgraph_edge *e = node->callees;
1463
1464   if (!e)
1465     return;
1466   while (true)
1467     if (!e->inline_failed && e->callee->callees)
1468       e = e->callee->callees;
1469     else
1470       {
1471         enum availability avail;
1472         struct cgraph_node *callee;
1473         /* We do not reset callee growth cache here.  Since we added a new call,
1474            growth chould have just increased and consequentely badness metric
1475            don't need updating.  */
1476         if (e->inline_failed
1477             && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1478             && ipa_fn_summaries->get (callee) != NULL
1479             && ipa_fn_summaries->get (callee)->inlinable
1480             && avail >= AVAIL_AVAILABLE
1481             && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1482           {
1483             if (can_inline_edge_p (e, false)
1484                 && want_inline_small_function_p (e, false)
1485                 && can_inline_edge_by_limits_p (e, false))
1486               update_edge_key (heap, e);
1487             else if (e->aux)
1488               {
1489                 report_inline_failed_reason (e);
1490                 heap->delete_node ((edge_heap_node_t *) e->aux);
1491                 e->aux = NULL;
1492               }
1493           }
1494         if (e->next_callee)
1495           e = e->next_callee;
1496         else
1497           {
1498             do
1499               {
1500                 if (e->caller == node)
1501                   return;
1502                 e = e->caller->callers;
1503               }
1504             while (!e->next_callee);
1505             e = e->next_callee;
1506           }
1507       }
1508 }
1509
1510 /* Enqueue all recursive calls from NODE into priority queue depending on
1511    how likely we want to recursively inline the call.  */
1512
1513 static void
1514 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1515                         edge_heap_t *heap)
1516 {
1517   struct cgraph_edge *e;
1518   enum availability avail;
1519
1520   for (e = where->callees; e; e = e->next_callee)
1521     if (e->callee == node
1522         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1523             && avail > AVAIL_INTERPOSABLE))
1524       heap->insert (-e->sreal_frequency (), e);
1525   for (e = where->callees; e; e = e->next_callee)
1526     if (!e->inline_failed)
1527       lookup_recursive_calls (node, e->callee, heap);
1528 }
1529
1530 /* Decide on recursive inlining: in the case function has recursive calls,
1531    inline until body size reaches given argument.  If any new indirect edges
1532    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1533    is NULL.  */
1534
1535 static bool
1536 recursive_inlining (struct cgraph_edge *edge,
1537                     vec<cgraph_edge *> *new_edges)
1538 {
1539   int limit = param_max_inline_insns_recursive_auto;
1540   edge_heap_t heap (sreal::min ());
1541   struct cgraph_node *node;
1542   struct cgraph_edge *e;
1543   struct cgraph_node *master_clone = NULL, *next;
1544   int depth = 0;
1545   int n = 0;
1546
1547   node = edge->caller;
1548   if (node->inlined_to)
1549     node = node->inlined_to;
1550
1551   if (DECL_DECLARED_INLINE_P (node->decl))
1552     limit = param_max_inline_insns_recursive;
1553
1554   /* Make sure that function is small enough to be considered for inlining.  */
1555   if (estimate_size_after_inlining (node, edge)  >= limit)
1556     return false;
1557   lookup_recursive_calls (node, node, &heap);
1558   if (heap.empty ())
1559     return false;
1560
1561   if (dump_file)
1562     fprintf (dump_file,
1563              "  Performing recursive inlining on %s\n",
1564              node->name ());
1565
1566   /* Do the inlining and update list of recursive call during process.  */
1567   while (!heap.empty ())
1568     {
1569       struct cgraph_edge *curr = heap.extract_min ();
1570       struct cgraph_node *cnode, *dest = curr->callee;
1571
1572       if (!can_inline_edge_p (curr, true)
1573           || !can_inline_edge_by_limits_p (curr, true))
1574         continue;
1575
1576       /* MASTER_CLONE is produced in the case we already started modified
1577          the function. Be sure to redirect edge to the original body before
1578          estimating growths otherwise we will be seeing growths after inlining
1579          the already modified body.  */
1580       if (master_clone)
1581         {
1582           curr->redirect_callee (master_clone);
1583           if (edge_growth_cache != NULL)
1584             edge_growth_cache->remove (curr);
1585         }
1586
1587       if (estimate_size_after_inlining (node, curr) > limit)
1588         {
1589           curr->redirect_callee (dest);
1590           if (edge_growth_cache != NULL)
1591             edge_growth_cache->remove (curr);
1592           break;
1593         }
1594
1595       depth = 1;
1596       for (cnode = curr->caller;
1597            cnode->inlined_to; cnode = cnode->callers->caller)
1598         if (node->decl
1599             == curr->callee->ultimate_alias_target ()->decl)
1600           depth++;
1601
1602       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1603         {
1604           curr->redirect_callee (dest);
1605           if (edge_growth_cache != NULL)
1606             edge_growth_cache->remove (curr);
1607           continue;
1608         }
1609
1610       if (dump_file)
1611         {
1612           fprintf (dump_file,
1613                    "   Inlining call of depth %i", depth);
1614           if (node->count.nonzero_p () && curr->count.initialized_p ())
1615             {
1616               fprintf (dump_file, " called approx. %.2f times per call",
1617                        (double)curr->count.to_gcov_type ()
1618                        / node->count.to_gcov_type ());
1619             }
1620           fprintf (dump_file, "\n");
1621         }
1622       if (!master_clone)
1623         {
1624           /* We need original clone to copy around.  */
1625           master_clone = node->create_clone (node->decl, node->count,
1626             false, vNULL, true, NULL, NULL);
1627           for (e = master_clone->callees; e; e = e->next_callee)
1628             if (!e->inline_failed)
1629               clone_inlined_nodes (e, true, false, NULL);
1630           curr->redirect_callee (master_clone);
1631           if (edge_growth_cache != NULL)
1632             edge_growth_cache->remove (curr);
1633         }
1634
1635       inline_call (curr, false, new_edges, &overall_size, true);
1636       reset_node_cache (node);
1637       lookup_recursive_calls (node, curr->callee, &heap);
1638       n++;
1639     }
1640
1641   if (!heap.empty () && dump_file)
1642     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1643
1644   if (!master_clone)
1645     return false;
1646
1647   if (dump_enabled_p ())
1648     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1649                      "\n   Inlined %i times, "
1650                      "body grown from size %i to %i, time %f to %f\n", n,
1651                      ipa_size_summaries->get (master_clone)->size,
1652                      ipa_size_summaries->get (node)->size,
1653                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1654                      ipa_fn_summaries->get (node)->time.to_double ());
1655
1656   /* Remove master clone we used for inlining.  We rely that clones inlined
1657      into master clone gets queued just before master clone so we don't
1658      need recursion.  */
1659   for (node = symtab->first_function (); node != master_clone;
1660        node = next)
1661     {
1662       next = symtab->next_function (node);
1663       if (node->inlined_to == master_clone)
1664         node->remove ();
1665     }
1666   master_clone->remove ();
1667   return true;
1668 }
1669
1670
1671 /* Given whole compilation unit estimate of INSNS, compute how large we can
1672    allow the unit to grow.  */
1673
1674 static int
1675 compute_max_insns (int insns)
1676 {
1677   int max_insns = insns;
1678   if (max_insns < param_large_unit_insns)
1679     max_insns = param_large_unit_insns;
1680
1681   return ((int64_t) max_insns
1682           * (100 + param_inline_unit_growth) / 100);
1683 }
1684
1685
1686 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1687
1688 static void
1689 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1690 {
1691   while (new_edges.length () > 0)
1692     {
1693       struct cgraph_edge *edge = new_edges.pop ();
1694
1695       gcc_assert (!edge->aux);
1696       gcc_assert (edge->callee);
1697       if (edge->inline_failed
1698           && can_inline_edge_p (edge, true)
1699           && want_inline_small_function_p (edge, true)
1700           && can_inline_edge_by_limits_p (edge, true))
1701         edge->aux = heap->insert (edge_badness (edge, false), edge);
1702     }
1703 }
1704
1705 /* Remove EDGE from the fibheap.  */
1706
1707 static void
1708 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1709 {
1710   if (e->aux)
1711     {
1712       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1713       e->aux = NULL;
1714     }
1715 }
1716
1717 /* Return true if speculation of edge E seems useful.
1718    If ANTICIPATE_INLINING is true, be conservative and hope that E
1719    may get inlined.  */
1720
1721 bool
1722 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1723 {
1724   /* If we have already decided to inline the edge, it seems useful.  */
1725   if (!e->inline_failed)
1726     return true;
1727
1728   enum availability avail;
1729   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1730                                                                  e->caller);
1731   struct cgraph_edge *direct, *indirect;
1732   struct ipa_ref *ref;
1733
1734   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1735
1736   if (!e->maybe_hot_p ())
1737     return false;
1738
1739   /* See if IP optimizations found something potentially useful about the
1740      function.  For now we look only for CONST/PURE flags.  Almost everything
1741      else we propagate is useless.  */
1742   if (avail >= AVAIL_AVAILABLE)
1743     {
1744       int ecf_flags = flags_from_decl_or_type (target->decl);
1745       if (ecf_flags & ECF_CONST)
1746         {
1747           e->speculative_call_info (direct, indirect, ref);
1748           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1749             return true;
1750         }
1751       else if (ecf_flags & ECF_PURE)
1752         {
1753           e->speculative_call_info (direct, indirect, ref);
1754           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1755             return true;
1756         }
1757     }
1758   /* If we did not managed to inline the function nor redirect
1759      to an ipa-cp clone (that are seen by having local flag set),
1760      it is probably pointless to inline it unless hardware is missing
1761      indirect call predictor.  */
1762   if (!anticipate_inlining && !target->local)
1763     return false;
1764   /* For overwritable targets there is not much to do.  */
1765   if (!can_inline_edge_p (e, false)
1766       || !can_inline_edge_by_limits_p (e, false, true))
1767     return false;
1768   /* OK, speculation seems interesting.  */
1769   return true;
1770 }
1771
1772 /* We know that EDGE is not going to be inlined.
1773    See if we can remove speculation.  */
1774
1775 static void
1776 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1777 {
1778   if (edge->speculative && !speculation_useful_p (edge, false))
1779     {
1780       struct cgraph_node *node = edge->caller;
1781       struct cgraph_node *where = node->inlined_to
1782                                   ? node->inlined_to : node;
1783       auto_bitmap updated_nodes;
1784
1785       if (edge->count.ipa ().initialized_p ())
1786         spec_rem += edge->count.ipa ();
1787       edge->resolve_speculation ();
1788       reset_edge_caches (where);
1789       ipa_update_overall_fn_summary (where);
1790       update_caller_keys (edge_heap, where,
1791                           updated_nodes, NULL);
1792       update_callee_keys (edge_heap, where,
1793                           updated_nodes);
1794     }
1795 }
1796
1797 /* Return true if NODE should be accounted for overall size estimate.
1798    Skip all nodes optimized for size so we can measure the growth of hot
1799    part of program no matter of the padding.  */
1800
1801 bool
1802 inline_account_function_p (struct cgraph_node *node)
1803 {
1804    return (!DECL_EXTERNAL (node->decl)
1805            && !opt_for_fn (node->decl, optimize_size)
1806            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1807 }
1808
1809 /* Count number of callers of NODE and store it into DATA (that
1810    points to int.  Worker for cgraph_for_node_and_aliases.  */
1811
1812 static bool
1813 sum_callers (struct cgraph_node *node, void *data)
1814 {
1815   struct cgraph_edge *e;
1816   int *num_calls = (int *)data;
1817
1818   for (e = node->callers; e; e = e->next_caller)
1819     (*num_calls)++;
1820   return false;
1821 }
1822
1823 /* We only propagate across edges with non-interposable callee.  */
1824
1825 inline bool
1826 ignore_edge_p (struct cgraph_edge *e)
1827 {
1828   enum availability avail;
1829   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1830   return (avail <= AVAIL_INTERPOSABLE);
1831 }
1832
1833 /* We use greedy algorithm for inlining of small functions:
1834    All inline candidates are put into prioritized heap ordered in
1835    increasing badness.
1836
1837    The inlining of small functions is bounded by unit growth parameters.  */
1838
1839 static void
1840 inline_small_functions (void)
1841 {
1842   struct cgraph_node *node;
1843   struct cgraph_edge *edge;
1844   edge_heap_t edge_heap (sreal::min ());
1845   auto_bitmap updated_nodes;
1846   int min_size, max_size;
1847   auto_vec<cgraph_edge *> new_indirect_edges;
1848   int initial_size = 0;
1849   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1850   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1851   new_indirect_edges.create (8);
1852
1853   edge_removal_hook_holder
1854     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1855
1856   /* Compute overall unit size and other global parameters used by badness
1857      metrics.  */
1858
1859   max_count = profile_count::uninitialized ();
1860   ipa_reduced_postorder (order, true, ignore_edge_p);
1861   free (order);
1862
1863   FOR_EACH_DEFINED_FUNCTION (node)
1864     if (!node->inlined_to)
1865       {
1866         if (!node->alias && node->analyzed
1867             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1868             && opt_for_fn (node->decl, optimize))
1869           {
1870             class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1871             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1872
1873             /* Do not account external functions, they will be optimized out
1874                if not inlined.  Also only count the non-cold portion of program.  */
1875             if (inline_account_function_p (node))
1876               initial_size += ipa_size_summaries->get (node)->size;
1877             info->growth = estimate_growth (node);
1878
1879             int num_calls = 0;
1880             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1881                                                true);
1882             if (num_calls == 1)
1883               info->single_caller = true;
1884             if (dfs && dfs->next_cycle)
1885               {
1886                 struct cgraph_node *n2;
1887                 int id = dfs->scc_no + 1;
1888                 for (n2 = node; n2;
1889                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1890                   if (opt_for_fn (n2->decl, optimize))
1891                     {
1892                       ipa_fn_summary *info2 = ipa_fn_summaries->get
1893                          (n2->inlined_to ? n2->inlined_to : n2);
1894                       if (info2->scc_no)
1895                         break;
1896                       info2->scc_no = id;
1897                     }
1898               }
1899           }
1900
1901         for (edge = node->callers; edge; edge = edge->next_caller)
1902           max_count = max_count.max (edge->count.ipa ());
1903       }
1904   ipa_free_postorder_info ();
1905   initialize_growth_caches ();
1906
1907   if (dump_file)
1908     fprintf (dump_file,
1909              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1910              initial_size);
1911
1912   overall_size = initial_size;
1913   max_size = compute_max_insns (overall_size);
1914   min_size = overall_size;
1915
1916   /* Populate the heap with all edges we might inline.  */
1917
1918   FOR_EACH_DEFINED_FUNCTION (node)
1919     {
1920       bool update = false;
1921       struct cgraph_edge *next = NULL;
1922       bool has_speculative = false;
1923
1924       if (!opt_for_fn (node->decl, optimize))
1925         continue;
1926
1927       if (dump_file)
1928         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1929
1930       for (edge = node->callees; edge; edge = next)
1931         {
1932           next = edge->next_callee;
1933           if (edge->inline_failed
1934               && !edge->aux
1935               && can_inline_edge_p (edge, true)
1936               && want_inline_small_function_p (edge, true)
1937               && can_inline_edge_by_limits_p (edge, true)
1938               && edge->inline_failed)
1939             {
1940               gcc_assert (!edge->aux);
1941               update_edge_key (&edge_heap, edge);
1942             }
1943           if (edge->speculative)
1944             has_speculative = true;
1945         }
1946       if (has_speculative)
1947         for (edge = node->callees; edge; edge = next)
1948           if (edge->speculative && !speculation_useful_p (edge,
1949                                                           edge->aux != NULL))
1950             {
1951               edge->resolve_speculation ();
1952               update = true;
1953             }
1954       if (update)
1955         {
1956           struct cgraph_node *where = node->inlined_to
1957                                       ? node->inlined_to : node;
1958           ipa_update_overall_fn_summary (where);
1959           reset_edge_caches (where);
1960           update_caller_keys (&edge_heap, where,
1961                               updated_nodes, NULL);
1962           update_callee_keys (&edge_heap, where,
1963                               updated_nodes);
1964           bitmap_clear (updated_nodes);
1965         }
1966     }
1967
1968   gcc_assert (in_lto_p
1969               || !(max_count > 0)
1970               || (profile_info && flag_branch_probabilities));
1971
1972   while (!edge_heap.empty ())
1973     {
1974       int old_size = overall_size;
1975       struct cgraph_node *where, *callee;
1976       sreal badness = edge_heap.min_key ();
1977       sreal current_badness;
1978       int growth;
1979
1980       edge = edge_heap.extract_min ();
1981       gcc_assert (edge->aux);
1982       edge->aux = NULL;
1983       if (!edge->inline_failed || !edge->callee->analyzed)
1984         continue;
1985
1986       /* Be sure that caches are maintained consistent.
1987          This check is affected by scaling roundoff errors when compiling for
1988          IPA this we skip it in that case.  */
1989       if (flag_checking && !edge->callee->count.ipa_p ()
1990           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
1991         {
1992           sreal cached_badness = edge_badness (edge, false);
1993
1994           int old_size_est = estimate_edge_size (edge);
1995           sreal old_time_est = estimate_edge_time (edge);
1996           int old_hints_est = estimate_edge_hints (edge);
1997
1998           if (edge_growth_cache != NULL)
1999             edge_growth_cache->remove (edge);
2000           reset_node_cache (edge->caller->inlined_to
2001                             ? edge->caller->inlined_to
2002                             : edge->caller);
2003           gcc_assert (old_size_est == estimate_edge_size (edge));
2004           gcc_assert (old_time_est == estimate_edge_time (edge));
2005           /* FIXME:
2006
2007              gcc_assert (old_hints_est == estimate_edge_hints (edge));
2008
2009              fails with profile feedback because some hints depends on
2010              maybe_hot_edge_p predicate and because callee gets inlined to other
2011              calls, the edge may become cold.
2012              This ought to be fixed by computing relative probabilities
2013              for given invocation but that will be better done once whole
2014              code is converted to sreals.  Disable for now and revert to "wrong"
2015              value so enable/disable checking paths agree.  */
2016           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2017
2018           /* When updating the edge costs, we only decrease badness in the keys.
2019              Increases of badness are handled lazilly; when we see key with out
2020              of date value on it, we re-insert it now.  */
2021           current_badness = edge_badness (edge, false);
2022           gcc_assert (cached_badness == current_badness);
2023           gcc_assert (current_badness >= badness);
2024         }
2025       else
2026         current_badness = edge_badness (edge, false);
2027       if (current_badness != badness)
2028         {
2029           if (edge_heap.min () && current_badness > edge_heap.min_key ())
2030             {
2031               edge->aux = edge_heap.insert (current_badness, edge);
2032               continue;
2033             }
2034           else
2035             badness = current_badness;
2036         }
2037
2038       if (!can_inline_edge_p (edge, true)
2039           || !can_inline_edge_by_limits_p (edge, true))
2040         {
2041           resolve_noninline_speculation (&edge_heap, edge);
2042           continue;
2043         }
2044
2045       callee = edge->callee->ultimate_alias_target ();
2046       growth = estimate_edge_growth (edge);
2047       if (dump_file)
2048         {
2049           fprintf (dump_file,
2050                    "\nConsidering %s with %i size\n",
2051                    callee->dump_name (),
2052                    ipa_size_summaries->get (callee)->size);
2053           fprintf (dump_file,
2054                    " to be inlined into %s in %s:%i\n"
2055                    " Estimated badness is %f, frequency %.2f.\n",
2056                    edge->caller->dump_name (),
2057                    edge->call_stmt
2058                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
2059                                                         edge->call_stmt))
2060                        > BUILTINS_LOCATION)
2061                    ? gimple_filename ((const gimple *) edge->call_stmt)
2062                    : "unknown",
2063                    edge->call_stmt
2064                    ? gimple_lineno ((const gimple *) edge->call_stmt)
2065                    : -1,
2066                    badness.to_double (),
2067                    edge->sreal_frequency ().to_double ());
2068           if (edge->count.ipa ().initialized_p ())
2069             {
2070               fprintf (dump_file, " Called ");
2071               edge->count.ipa ().dump (dump_file);
2072               fprintf (dump_file, " times\n");
2073             }
2074           if (dump_flags & TDF_DETAILS)
2075             edge_badness (edge, true);
2076         }
2077
2078       if (overall_size + growth > max_size
2079           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2080         {
2081           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2082           report_inline_failed_reason (edge);
2083           resolve_noninline_speculation (&edge_heap, edge);
2084           continue;
2085         }
2086
2087       if (!want_inline_small_function_p (edge, true))
2088         {
2089           resolve_noninline_speculation (&edge_heap, edge);
2090           continue;
2091         }
2092
2093       /* Heuristics for inlining small functions work poorly for
2094          recursive calls where we do effects similar to loop unrolling.
2095          When inlining such edge seems profitable, leave decision on
2096          specific inliner.  */
2097       if (edge->recursive_p ())
2098         {
2099           where = edge->caller;
2100           if (where->inlined_to)
2101             where = where->inlined_to;
2102           if (!recursive_inlining (edge,
2103                                    opt_for_fn (edge->caller->decl,
2104                                                flag_indirect_inlining)
2105                                    ? &new_indirect_edges : NULL))
2106             {
2107               edge->inline_failed = CIF_RECURSIVE_INLINING;
2108               resolve_noninline_speculation (&edge_heap, edge);
2109               continue;
2110             }
2111           reset_edge_caches (where);
2112           /* Recursive inliner inlines all recursive calls of the function
2113              at once. Consequently we need to update all callee keys.  */
2114           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2115             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2116           update_callee_keys (&edge_heap, where, updated_nodes);
2117           bitmap_clear (updated_nodes);
2118         }
2119       else
2120         {
2121           struct cgraph_node *outer_node = NULL;
2122           int depth = 0;
2123
2124           /* Consider the case where self recursive function A is inlined
2125              into B.  This is desired optimization in some cases, since it
2126              leads to effect similar of loop peeling and we might completely
2127              optimize out the recursive call.  However we must be extra
2128              selective.  */
2129
2130           where = edge->caller;
2131           while (where->inlined_to)
2132             {
2133               if (where->decl == callee->decl)
2134                 outer_node = where, depth++;
2135               where = where->callers->caller;
2136             }
2137           if (outer_node
2138               && !want_inline_self_recursive_call_p (edge, outer_node,
2139                                                      true, depth))
2140             {
2141               edge->inline_failed
2142                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2143                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2144               resolve_noninline_speculation (&edge_heap, edge);
2145               continue;
2146             }
2147           else if (depth && dump_file)
2148             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2149
2150           gcc_checking_assert (!callee->inlined_to);
2151           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2152           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2153
2154           reset_edge_caches (edge->callee);
2155
2156           update_callee_keys (&edge_heap, where, updated_nodes);
2157         }
2158       where = edge->caller;
2159       if (where->inlined_to)
2160         where = where->inlined_to;
2161
2162       /* Our profitability metric can depend on local properties
2163          such as number of inlinable calls and size of the function body.
2164          After inlining these properties might change for the function we
2165          inlined into (since it's body size changed) and for the functions
2166          called by function we inlined (since number of it inlinable callers
2167          might change).  */
2168       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2169       /* Offline copy count has possibly changed, recompute if profile is
2170          available.  */
2171       struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2172       if (n != edge->callee && n->analyzed && n->count.ipa ().initialized_p ())
2173         update_callee_keys (&edge_heap, n, updated_nodes);
2174       bitmap_clear (updated_nodes);
2175
2176       if (dump_enabled_p ())
2177         {
2178           ipa_fn_summary *s = ipa_fn_summaries->get (where);
2179
2180           /* dump_printf can't handle %+i.  */
2181           char buf_net_change[100];
2182           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2183                     overall_size - old_size);
2184
2185           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2186                            " Inlined %C into %C which now has time %f and "
2187                            "size %i, net change of %s.\n",
2188                            edge->callee, edge->caller,
2189                            s->time.to_double (),
2190                            ipa_size_summaries->get (edge->caller)->size,
2191                            buf_net_change);
2192         }
2193       if (min_size > overall_size)
2194         {
2195           min_size = overall_size;
2196           max_size = compute_max_insns (min_size);
2197
2198           if (dump_file)
2199             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2200         }
2201     }
2202
2203   free_growth_caches ();
2204   if (dump_enabled_p ())
2205     dump_printf (MSG_NOTE,
2206                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2207                  initial_size, overall_size,
2208                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2209   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2210 }
2211
2212 /* Flatten NODE.  Performed both during early inlining and
2213    at IPA inlining time.  */
2214
2215 static void
2216 flatten_function (struct cgraph_node *node, bool early, bool update)
2217 {
2218   struct cgraph_edge *e;
2219
2220   /* We shouldn't be called recursively when we are being processed.  */
2221   gcc_assert (node->aux == NULL);
2222
2223   node->aux = (void *) node;
2224
2225   for (e = node->callees; e; e = e->next_callee)
2226     {
2227       struct cgraph_node *orig_callee;
2228       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2229
2230       /* We've hit cycle?  It is time to give up.  */
2231       if (callee->aux)
2232         {
2233           if (dump_enabled_p ())
2234             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2235                              "Not inlining %C into %C to avoid cycle.\n",
2236                              callee, e->caller);
2237           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2238             e->inline_failed = CIF_RECURSIVE_INLINING;
2239           continue;
2240         }
2241
2242       /* When the edge is already inlined, we just need to recurse into
2243          it in order to fully flatten the leaves.  */
2244       if (!e->inline_failed)
2245         {
2246           flatten_function (callee, early, false);
2247           continue;
2248         }
2249
2250       /* Flatten attribute needs to be processed during late inlining. For
2251          extra code quality we however do flattening during early optimization,
2252          too.  */
2253       if (!early
2254           ? !can_inline_edge_p (e, true)
2255             && !can_inline_edge_by_limits_p (e, true)
2256           : !can_early_inline_edge_p (e))
2257         continue;
2258
2259       if (e->recursive_p ())
2260         {
2261           if (dump_enabled_p ())
2262             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2263                              "Not inlining: recursive call.\n");
2264           continue;
2265         }
2266
2267       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2268           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2269         {
2270           if (dump_enabled_p ())
2271             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2272                              "Not inlining: SSA form does not match.\n");
2273           continue;
2274         }
2275
2276       /* Inline the edge and flatten the inline clone.  Avoid
2277          recursing through the original node if the node was cloned.  */
2278       if (dump_enabled_p ())
2279         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2280                          " Inlining %C into %C.\n",
2281                          callee, e->caller);
2282       orig_callee = callee;
2283       inline_call (e, true, NULL, NULL, false);
2284       if (e->callee != orig_callee)
2285         orig_callee->aux = (void *) node;
2286       flatten_function (e->callee, early, false);
2287       if (e->callee != orig_callee)
2288         orig_callee->aux = NULL;
2289     }
2290
2291   node->aux = NULL;
2292   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2293   if (update && opt_for_fn (where->decl, optimize))
2294     ipa_update_overall_fn_summary (where);
2295 }
2296
2297 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2298    DATA points to number of calls originally found so we avoid infinite
2299    recursion.  */
2300
2301 static bool
2302 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2303                          hash_set<cgraph_node *> *callers)
2304 {
2305   int *num_calls = (int *)data;
2306   bool callee_removed = false;
2307
2308   while (node->callers && !node->inlined_to)
2309     {
2310       struct cgraph_node *caller = node->callers->caller;
2311
2312       if (!can_inline_edge_p (node->callers, true)
2313           || !can_inline_edge_by_limits_p (node->callers, true)
2314           || node->callers->recursive_p ())
2315         {
2316           if (dump_file)
2317             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2318           *num_calls = 0;
2319           return false;
2320         }
2321
2322       if (dump_file)
2323         {
2324           cgraph_node *ultimate = node->ultimate_alias_target ();
2325           fprintf (dump_file,
2326                    "\nInlining %s size %i.\n",
2327                    ultimate->name (),
2328                    ipa_size_summaries->get (ultimate)->size);
2329           fprintf (dump_file,
2330                    " Called once from %s %i insns.\n",
2331                    node->callers->caller->name (),
2332                    ipa_size_summaries->get (node->callers->caller)->size);
2333         }
2334
2335       /* Remember which callers we inlined to, delaying updating the
2336          overall summary.  */
2337       callers->add (node->callers->caller);
2338       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2339       if (dump_file)
2340         fprintf (dump_file,
2341                  " Inlined into %s which now has %i size\n",
2342                  caller->name (),
2343                  ipa_size_summaries->get (caller)->size);
2344       if (!(*num_calls)--)
2345         {
2346           if (dump_file)
2347             fprintf (dump_file, "New calls found; giving up.\n");
2348           return callee_removed;
2349         }
2350       if (callee_removed)
2351         return true;
2352     }
2353   return false;
2354 }
2355
2356 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2357    update.  */
2358
2359 static bool
2360 inline_to_all_callers (struct cgraph_node *node, void *data)
2361 {
2362   hash_set<cgraph_node *> callers;
2363   bool res = inline_to_all_callers_1 (node, data, &callers);
2364   /* Perform the delayed update of the overall summary of all callers
2365      processed.  This avoids quadratic behavior in the cases where
2366      we have a lot of calls to the same function.  */
2367   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2368        i != callers.end (); ++i)
2369     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2370   return res;
2371 }
2372
2373 /* Output overall time estimate.  */
2374 static void
2375 dump_overall_stats (void)
2376 {
2377   sreal sum_weighted = 0, sum = 0;
2378   struct cgraph_node *node;
2379
2380   FOR_EACH_DEFINED_FUNCTION (node)
2381     if (!node->inlined_to
2382         && !node->alias)
2383       {
2384         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2385         if (s != NULL)
2386           {
2387           sum += s->time;
2388           if (node->count.ipa ().initialized_p ())
2389             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2390           }
2391       }
2392   fprintf (dump_file, "Overall time estimate: "
2393            "%f weighted by profile: "
2394            "%f\n", sum.to_double (), sum_weighted.to_double ());
2395 }
2396
2397 /* Output some useful stats about inlining.  */
2398
2399 static void
2400 dump_inline_stats (void)
2401 {
2402   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2403   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2404   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2405   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2406   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2407   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2408   int64_t reason[CIF_N_REASONS][2];
2409   sreal reason_freq[CIF_N_REASONS];
2410   int i;
2411   struct cgraph_node *node;
2412
2413   memset (reason, 0, sizeof (reason));
2414   for (i=0; i < CIF_N_REASONS; i++)
2415     reason_freq[i] = 0;
2416   FOR_EACH_DEFINED_FUNCTION (node)
2417   {
2418     struct cgraph_edge *e;
2419     for (e = node->callees; e; e = e->next_callee)
2420       {
2421         if (e->inline_failed)
2422           {
2423             if (e->count.ipa ().initialized_p ())
2424               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2425             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2426             reason[(int) e->inline_failed][1] ++;
2427             if (DECL_VIRTUAL_P (e->callee->decl)
2428                 && e->count.ipa ().initialized_p ())
2429               {
2430                 if (e->indirect_inlining_edge)
2431                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2432                 else
2433                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2434               }
2435             else if (e->count.ipa ().initialized_p ())
2436               {
2437                 if (e->indirect_inlining_edge)
2438                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2439                 else
2440                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2441               }
2442           }
2443         else if (e->count.ipa ().initialized_p ())
2444           {
2445             if (e->speculative)
2446               {
2447                 if (DECL_VIRTUAL_P (e->callee->decl))
2448                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2449                 else
2450                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2451               }
2452             else if (DECL_VIRTUAL_P (e->callee->decl))
2453               {
2454                 if (e->indirect_inlining_edge)
2455                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2456                 else
2457                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2458               }
2459             else
2460               {
2461                 if (e->indirect_inlining_edge)
2462                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2463                 else
2464                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2465               }
2466           }
2467       }
2468     for (e = node->indirect_calls; e; e = e->next_callee)
2469       if (e->indirect_info->polymorphic
2470           & e->count.ipa ().initialized_p ())
2471         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2472       else if (e->count.ipa ().initialized_p ())
2473         indirect_cnt += e->count.ipa ().to_gcov_type ();
2474   }
2475   if (max_count.initialized_p ())
2476     {
2477       fprintf (dump_file,
2478                "Inlined %" PRId64 " + speculative "
2479                "%" PRId64 " + speculative polymorphic "
2480                "%" PRId64 " + previously indirect "
2481                "%" PRId64 " + virtual "
2482                "%" PRId64 " + virtual and previously indirect "
2483                "%" PRId64 "\n" "Not inlined "
2484                "%" PRId64 " + previously indirect "
2485                "%" PRId64 " + virtual "
2486                "%" PRId64 " + virtual and previously indirect "
2487                "%" PRId64 " + stil indirect "
2488                "%" PRId64 " + still indirect polymorphic "
2489                "%" PRId64 "\n", inlined_cnt,
2490                inlined_speculative, inlined_speculative_ply,
2491                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2492                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2493                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2494       fprintf (dump_file, "Removed speculations ");
2495       spec_rem.dump (dump_file);
2496       fprintf (dump_file, "\n");
2497     }
2498   dump_overall_stats ();
2499   fprintf (dump_file, "\nWhy inlining failed?\n");
2500   for (i = 0; i < CIF_N_REASONS; i++)
2501     if (reason[i][1])
2502       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2503                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2504                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2505 }
2506
2507 /* Called when node is removed.  */
2508
2509 static void
2510 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2511 {
2512   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2513     return;
2514
2515   hash_set<struct cgraph_node *> *removed
2516     = (hash_set<struct cgraph_node *> *) data;
2517   removed->add (node);
2518 }
2519
2520 /* Decide on the inlining.  We do so in the topological order to avoid
2521    expenses on updating data structures.  */
2522
2523 static unsigned int
2524 ipa_inline (void)
2525 {
2526   struct cgraph_node *node;
2527   int nnodes;
2528   struct cgraph_node **order;
2529   int i, j;
2530   int cold;
2531   bool remove_functions = false;
2532
2533   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2534
2535   if (dump_file)
2536     ipa_dump_fn_summaries (dump_file);
2537
2538   nnodes = ipa_reverse_postorder (order);
2539   spec_rem = profile_count::zero ();
2540
2541   FOR_EACH_FUNCTION (node)
2542     {
2543       node->aux = 0;
2544
2545       /* Recompute the default reasons for inlining because they may have
2546          changed during merging.  */
2547       if (in_lto_p)
2548         {
2549           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2550             {
2551               gcc_assert (e->inline_failed);
2552               initialize_inline_failed (e);
2553             }
2554           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2555             initialize_inline_failed (e);
2556         }
2557     }
2558
2559   if (dump_file)
2560     fprintf (dump_file, "\nFlattening functions:\n");
2561
2562   /* First shrink order array, so that it only contains nodes with
2563      flatten attribute.  */
2564   for (i = nnodes - 1, j = i; i >= 0; i--)
2565     {
2566       node = order[i];
2567       if (node->definition
2568           && lookup_attribute ("flatten",
2569                                DECL_ATTRIBUTES (node->decl)) != NULL)
2570         order[j--] = order[i];
2571     }
2572
2573   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2574      nodes with flatten attribute.  If there is more than one such
2575      node, we need to register a node removal hook, as flatten_function
2576      could remove other nodes with flatten attribute.  See PR82801.  */
2577   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2578   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2579   if (j < nnodes - 2)
2580     {
2581       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2582       node_removal_hook_holder
2583         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2584                                            flatten_removed_nodes);
2585     }
2586
2587   /* In the first pass handle functions to be flattened.  Do this with
2588      a priority so none of our later choices will make this impossible.  */
2589   for (i = nnodes - 1; i > j; i--)
2590     {
2591       node = order[i];
2592       if (flatten_removed_nodes
2593           && flatten_removed_nodes->contains (node))
2594         continue;
2595
2596       /* Handle nodes to be flattened.
2597          Ideally when processing callees we stop inlining at the
2598          entry of cycles, possibly cloning that entry point and
2599          try to flatten itself turning it into a self-recursive
2600          function.  */
2601       if (dump_file)
2602         fprintf (dump_file, "Flattening %s\n", node->name ());
2603       flatten_function (node, false, true);
2604     }
2605
2606   if (j < nnodes - 2)
2607     {
2608       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2609       delete flatten_removed_nodes;
2610     }
2611   free (order);
2612
2613   if (dump_file)
2614     dump_overall_stats ();
2615
2616   inline_small_functions ();
2617
2618   gcc_assert (symtab->state == IPA_SSA);
2619   symtab->state = IPA_SSA_AFTER_INLINING;
2620   /* Do first after-inlining removal.  We want to remove all "stale" extern
2621      inline functions and virtual functions so we really know what is called
2622      once.  */
2623   symtab->remove_unreachable_nodes (dump_file);
2624
2625   /* Inline functions with a property that after inlining into all callers the
2626      code size will shrink because the out-of-line copy is eliminated.
2627      We do this regardless on the callee size as long as function growth limits
2628      are met.  */
2629   if (dump_file)
2630     fprintf (dump_file,
2631              "\nDeciding on functions to be inlined into all callers and "
2632              "removing useless speculations:\n");
2633
2634   /* Inlining one function called once has good chance of preventing
2635      inlining other function into the same callee.  Ideally we should
2636      work in priority order, but probably inlining hot functions first
2637      is good cut without the extra pain of maintaining the queue.
2638
2639      ??? this is not really fitting the bill perfectly: inlining function
2640      into callee often leads to better optimization of callee due to
2641      increased context for optimization.
2642      For example if main() function calls a function that outputs help
2643      and then function that does the main optmization, we should inline
2644      the second with priority even if both calls are cold by themselves.
2645
2646      We probably want to implement new predicate replacing our use of
2647      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2648      to be hot.  */
2649   for (cold = 0; cold <= 1; cold ++)
2650     {
2651       FOR_EACH_DEFINED_FUNCTION (node)
2652         {
2653           struct cgraph_edge *edge, *next;
2654           bool update=false;
2655
2656           if (!opt_for_fn (node->decl, optimize)
2657               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2658             continue;
2659
2660           for (edge = node->callees; edge; edge = next)
2661             {
2662               next = edge->next_callee;
2663               if (edge->speculative && !speculation_useful_p (edge, false))
2664                 {
2665                   if (edge->count.ipa ().initialized_p ())
2666                     spec_rem += edge->count.ipa ();
2667                   edge->resolve_speculation ();
2668                   update = true;
2669                   remove_functions = true;
2670                 }
2671             }
2672           if (update)
2673             {
2674               struct cgraph_node *where = node->inlined_to
2675                                           ? node->inlined_to : node;
2676               reset_edge_caches (where);
2677               ipa_update_overall_fn_summary (where);
2678             }
2679           if (want_inline_function_to_all_callers_p (node, cold))
2680             {
2681               int num_calls = 0;
2682               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2683                                                  true);
2684               while (node->call_for_symbol_and_aliases
2685                        (inline_to_all_callers, &num_calls, true))
2686                 ;
2687               remove_functions = true;
2688             }
2689         }
2690     }
2691
2692   /* Free ipa-prop structures if they are no longer needed.  */
2693   ipa_free_all_structures_after_iinln ();
2694
2695   if (dump_enabled_p ())
2696     dump_printf (MSG_NOTE,
2697                  "\nInlined %i calls, eliminated %i functions\n\n",
2698                  ncalls_inlined, nfunctions_inlined);
2699   if (dump_file)
2700     dump_inline_stats ();
2701
2702   if (dump_file)
2703     ipa_dump_fn_summaries (dump_file);
2704   return remove_functions ? TODO_remove_functions : 0;
2705 }
2706
2707 /* Inline always-inline function calls in NODE.  */
2708
2709 static bool
2710 inline_always_inline_functions (struct cgraph_node *node)
2711 {
2712   struct cgraph_edge *e;
2713   bool inlined = false;
2714
2715   for (e = node->callees; e; e = e->next_callee)
2716     {
2717       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2718       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2719         continue;
2720
2721       if (e->recursive_p ())
2722         {
2723           if (dump_enabled_p ())
2724             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2725                              "  Not inlining recursive call to %C.\n",
2726                              e->callee);
2727           e->inline_failed = CIF_RECURSIVE_INLINING;
2728           continue;
2729         }
2730
2731       if (!can_early_inline_edge_p (e))
2732         {
2733           /* Set inlined to true if the callee is marked "always_inline" but
2734              is not inlinable.  This will allow flagging an error later in
2735              expand_call_inline in tree-inline.c.  */
2736           if (lookup_attribute ("always_inline",
2737                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2738             inlined = true;
2739           continue;
2740         }
2741
2742       if (dump_enabled_p ())
2743         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2744                          "  Inlining %C into %C (always_inline).\n",
2745                          e->callee, e->caller);
2746       inline_call (e, true, NULL, NULL, false);
2747       inlined = true;
2748     }
2749   if (inlined)
2750     ipa_update_overall_fn_summary (node);
2751
2752   return inlined;
2753 }
2754
2755 /* Decide on the inlining.  We do so in the topological order to avoid
2756    expenses on updating data structures.  */
2757
2758 static bool
2759 early_inline_small_functions (struct cgraph_node *node)
2760 {
2761   struct cgraph_edge *e;
2762   bool inlined = false;
2763
2764   for (e = node->callees; e; e = e->next_callee)
2765     {
2766       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2767
2768       /* We can enounter not-yet-analyzed function during
2769          early inlining on callgraphs with strongly
2770          connected components.  */
2771       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2772       if (s == NULL || !s->inlinable || !e->inline_failed)
2773         continue;
2774
2775       /* Do not consider functions not declared inline.  */
2776       if (!DECL_DECLARED_INLINE_P (callee->decl)
2777           && !opt_for_fn (node->decl, flag_inline_small_functions)
2778           && !opt_for_fn (node->decl, flag_inline_functions))
2779         continue;
2780
2781       if (dump_enabled_p ())
2782         dump_printf_loc (MSG_NOTE, e->call_stmt,
2783                          "Considering inline candidate %C.\n",
2784                          callee);
2785
2786       if (!can_early_inline_edge_p (e))
2787         continue;
2788
2789       if (e->recursive_p ())
2790         {
2791           if (dump_enabled_p ())
2792             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2793                              "  Not inlining: recursive call.\n");
2794           continue;
2795         }
2796
2797       if (!want_early_inline_function_p (e))
2798         continue;
2799
2800       if (dump_enabled_p ())
2801         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2802                          " Inlining %C into %C.\n",
2803                          callee, e->caller);
2804       inline_call (e, true, NULL, NULL, false);
2805       inlined = true;
2806     }
2807
2808   if (inlined)
2809     ipa_update_overall_fn_summary (node);
2810
2811   return inlined;
2812 }
2813
2814 unsigned int
2815 early_inliner (function *fun)
2816 {
2817   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2818   struct cgraph_edge *edge;
2819   unsigned int todo = 0;
2820   int iterations = 0;
2821   bool inlined = false;
2822
2823   if (seen_error ())
2824     return 0;
2825
2826   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2827      happens when some pass decides to construct new function and
2828      cgraph_add_new_function calls lowering passes and early optimization on
2829      it.  This may confuse ourself when early inliner decide to inline call to
2830      function clone, because function clones don't have parameter list in
2831      ipa-prop matching their signature.  */
2832   if (ipa_node_params_sum)
2833     return 0;
2834
2835   if (flag_checking)
2836     node->verify ();
2837   node->remove_all_references ();
2838
2839   /* Even when not optimizing or not inlining inline always-inline
2840      functions.  */
2841   inlined = inline_always_inline_functions (node);
2842
2843   if (!optimize
2844       || flag_no_inline
2845       || !flag_early_inlining
2846       /* Never inline regular functions into always-inline functions
2847          during incremental inlining.  This sucks as functions calling
2848          always inline functions will get less optimized, but at the
2849          same time inlining of functions calling always inline
2850          function into an always inline function might introduce
2851          cycles of edges to be always inlined in the callgraph.
2852
2853          We might want to be smarter and just avoid this type of inlining.  */
2854       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2855           && lookup_attribute ("always_inline",
2856                                DECL_ATTRIBUTES (node->decl))))
2857     ;
2858   else if (lookup_attribute ("flatten",
2859                              DECL_ATTRIBUTES (node->decl)) != NULL)
2860     {
2861       /* When the function is marked to be flattened, recursively inline
2862          all calls in it.  */
2863       if (dump_enabled_p ())
2864         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2865                      "Flattening %C\n", node);
2866       flatten_function (node, true, true);
2867       inlined = true;
2868     }
2869   else
2870     {
2871       /* If some always_inline functions was inlined, apply the changes.
2872          This way we will not account always inline into growth limits and
2873          moreover we will inline calls from always inlines that we skipped
2874          previously because of conditional above.  */
2875       if (inlined)
2876         {
2877           timevar_push (TV_INTEGRATION);
2878           todo |= optimize_inline_calls (current_function_decl);
2879           /* optimize_inline_calls call above might have introduced new
2880              statements that don't have inline parameters computed.  */
2881           for (edge = node->callees; edge; edge = edge->next_callee)
2882             {
2883               /* We can enounter not-yet-analyzed function during
2884                  early inlining on callgraphs with strongly
2885                  connected components.  */
2886               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2887               es->call_stmt_size
2888                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2889               es->call_stmt_time
2890                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2891             }
2892           ipa_update_overall_fn_summary (node);
2893           inlined = false;
2894           timevar_pop (TV_INTEGRATION);
2895         }
2896       /* We iterate incremental inlining to get trivial cases of indirect
2897          inlining.  */
2898       while (iterations < param_early_inliner_max_iterations
2899              && early_inline_small_functions (node))
2900         {
2901           timevar_push (TV_INTEGRATION);
2902           todo |= optimize_inline_calls (current_function_decl);
2903
2904           /* Technically we ought to recompute inline parameters so the new
2905              iteration of early inliner works as expected.  We however have
2906              values approximately right and thus we only need to update edge
2907              info that might be cleared out for newly discovered edges.  */
2908           for (edge = node->callees; edge; edge = edge->next_callee)
2909             {
2910               /* We have no summary for new bound store calls yet.  */
2911               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2912               es->call_stmt_size
2913                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2914               es->call_stmt_time
2915                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2916             }
2917           if (iterations < param_early_inliner_max_iterations - 1)
2918             ipa_update_overall_fn_summary (node);
2919           timevar_pop (TV_INTEGRATION);
2920           iterations++;
2921           inlined = false;
2922         }
2923       if (dump_file)
2924         fprintf (dump_file, "Iterations: %i\n", iterations);
2925     }
2926
2927   if (inlined)
2928     {
2929       timevar_push (TV_INTEGRATION);
2930       todo |= optimize_inline_calls (current_function_decl);
2931       timevar_pop (TV_INTEGRATION);
2932     }
2933
2934   fun->always_inline_functions_inlined = true;
2935
2936   return todo;
2937 }
2938
2939 /* Do inlining of small functions.  Doing so early helps profiling and other
2940    passes to be somewhat more effective and avoids some code duplication in
2941    later real inlining pass for testcases with very many function calls.  */
2942
2943 namespace {
2944
2945 const pass_data pass_data_early_inline =
2946 {
2947   GIMPLE_PASS, /* type */
2948   "einline", /* name */
2949   OPTGROUP_INLINE, /* optinfo_flags */
2950   TV_EARLY_INLINING, /* tv_id */
2951   PROP_ssa, /* properties_required */
2952   0, /* properties_provided */
2953   0, /* properties_destroyed */
2954   0, /* todo_flags_start */
2955   0, /* todo_flags_finish */
2956 };
2957
2958 class pass_early_inline : public gimple_opt_pass
2959 {
2960 public:
2961   pass_early_inline (gcc::context *ctxt)
2962     : gimple_opt_pass (pass_data_early_inline, ctxt)
2963   {}
2964
2965   /* opt_pass methods: */
2966   virtual unsigned int execute (function *);
2967
2968 }; // class pass_early_inline
2969
2970 unsigned int
2971 pass_early_inline::execute (function *fun)
2972 {
2973   return early_inliner (fun);
2974 }
2975
2976 } // anon namespace
2977
2978 gimple_opt_pass *
2979 make_pass_early_inline (gcc::context *ctxt)
2980 {
2981   return new pass_early_inline (ctxt);
2982 }
2983
2984 namespace {
2985
2986 const pass_data pass_data_ipa_inline =
2987 {
2988   IPA_PASS, /* type */
2989   "inline", /* name */
2990   OPTGROUP_INLINE, /* optinfo_flags */
2991   TV_IPA_INLINING, /* tv_id */
2992   0, /* properties_required */
2993   0, /* properties_provided */
2994   0, /* properties_destroyed */
2995   0, /* todo_flags_start */
2996   ( TODO_dump_symtab ), /* todo_flags_finish */
2997 };
2998
2999 class pass_ipa_inline : public ipa_opt_pass_d
3000 {
3001 public:
3002   pass_ipa_inline (gcc::context *ctxt)
3003     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3004                       NULL, /* generate_summary */
3005                       NULL, /* write_summary */
3006                       NULL, /* read_summary */
3007                       NULL, /* write_optimization_summary */
3008                       NULL, /* read_optimization_summary */
3009                       NULL, /* stmt_fixup */
3010                       0, /* function_transform_todo_flags_start */
3011                       inline_transform, /* function_transform */
3012                       NULL) /* variable_transform */
3013   {}
3014
3015   /* opt_pass methods: */
3016   virtual unsigned int execute (function *) { return ipa_inline (); }
3017
3018 }; // class pass_ipa_inline
3019
3020 } // anon namespace
3021
3022 ipa_opt_pass_d *
3023 make_pass_ipa_inline (gcc::context *ctxt)
3024 {
3025   return new pass_ipa_inline (ctxt);
3026 }