gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass cannot really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "profile.h"
 109 #include "symbol-summary.h"
 110 #include "tree-vrp.h"
 111 #include "ipa-prop.h"
 112 #include "ipa-fnsummary.h"
 113 #include "ipa-inline.h"
 114 #include "ipa-utils.h"
 115 #include "sreal.h"
 116 #include "auto-profile.h"
 117 #include "builtins.h"
 118 #include "fibonacci_heap.h"
 119 #include "stringpool.h"
 120 #include "attribs.h"
 121 #include "asan.h"
 122
 123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 125
 126 /* Statistics we collect about inlining algorithm.  */
 127 static int overall_size;
 128 static profile_count max_count;
 129 static profile_count spec_rem;
 130
 131 /* Return false when inlining edge E would lead to violating
 132    limits on function unit growth or stack usage growth.
 133
 134    The relative function body growth limit is present generally
 135    to avoid problems with non-linear behavior of the compiler.
 136    To allow inlining huge functions into tiny wrapper, the limit
 137    is always based on the bigger of the two functions considered.
 138
 139    For stack growth limits we always base the growth in stack usage
 140    of the callers.  We want to prevent applications from segfaulting
 141    on stack overflow when functions with huge stack frames gets
 142    inlined. */
 143
 144 static bool
 145 caller_growth_limits (struct cgraph_edge *e)
 146 {
 147   struct cgraph_node *to = e->caller;
 148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 149   int newsize;
 150   int limit = 0;
 151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
 153
 154   /* Look for function e->caller is inlined to.  While doing
 155      so work out the largest function body on the way.  As
 156      described above, we want to base our function growth
 157      limits based on that.  Not on the self size of the
 158      outer function, not on the self size of inline code
 159      we immediately inline to.  This is the most relaxed
 160      interpretation of the rule "do not grow large functions
 161      too much in order to prevent compiler from exploding".  */
 162   while (true)
 163     {
 164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
 165       if (limit < size_info->self_size)
 166         limit = size_info->self_size;
 167       if (stack_size_limit < size_info->estimated_self_stack_size)
 168         stack_size_limit = size_info->estimated_self_stack_size;
 169       if (to->inlined_to)
 170         to = to->callers->caller;
 171       else
 172         break;
 173     }
 174
 175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
 176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
 177
 178   if (limit < what_size_info->self_size)
 179     limit = what_size_info->self_size;
 180
 181   limit += limit * param_large_function_growth / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= ipa_size_summaries->get (what)->size
 187       && newsize > param_large_function_insns
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * param_stack_frame_growth / 100);
 204
 205   inlined_stack = (ipa_get_stack_frame_offset (to)
 206                    + outer_info->estimated_self_stack_size
 207                    + what_info->estimated_stack_size);
 208   /* Check new stack consumption with stack consumption at the place
 209      stack is used.  */
 210   if (inlined_stack > stack_size_limit
 211       /* If function already has large stack usage from sibling
 212          inline call, we can inline, too.
 213          This bit overoptimistically assume that we are good at stack
 214          packing.  */
 215       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
 216       && inlined_stack > param_large_stack_frame)
 217     {
 218       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 219       return false;
 220     }
 221   return true;
 222 }
 223
 224 /* Dump info about why inlining has failed.  */
 225
 226 static void
 227 report_inline_failed_reason (struct cgraph_edge *e)
 228 {
 229   if (dump_enabled_p ())
 230     {
 231       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 232                        "  not inlinable: %C -> %C, %s\n",
 233                        e->caller, e->callee,
 234                        cgraph_inline_failed_string (e->inline_failed));
 235       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 236            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 237           && e->caller->lto_file_data
 238           && e->callee->ultimate_alias_target ()->lto_file_data)
 239         {
 240           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 241                            "  LTO objects: %s, %s\n",
 242                            e->caller->lto_file_data->file_name,
 243                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 244         }
 245       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 246         if (dump_file)
 247           cl_target_option_print_diff
 248             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 249              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 250       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 251         if (dump_file)
 252           cl_optimization_print_diff
 253             (dump_file, 2, opts_for_fn (e->caller->decl),
 254              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 255     }
 256 }
 257
 258  /* Decide whether sanitizer-related attributes allow inlining. */
 259
 260 static bool
 261 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 262 {
 263   if (!caller || !callee)
 264     return true;
 265
 266   /* Allow inlining always_inline functions into no_sanitize_address
 267      functions.  */
 268   if (!sanitize_flags_p (SANITIZE_ADDRESS, caller)
 269       && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
 270     return true;
 271
 272   return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
 273            == sanitize_flags_p (SANITIZE_ADDRESS, callee))
 274           && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
 275               == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
 276           && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
 277               == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
 278 }
 279
 280 /* Used for flags where it is safe to inline when caller's value is
 281    grater than callee's.  */
 282 #define check_maybe_up(flag) \
 283       (opts_for_fn (caller->decl)->x_##flag             \
 284        != opts_for_fn (callee->decl)->x_##flag          \
 285        && (!always_inline                               \
 286            || opts_for_fn (caller->decl)->x_##flag      \
 287               < opts_for_fn (callee->decl)->x_##flag))
 288 /* Used for flags where it is safe to inline when caller's value is
 289    smaller than callee's.  */
 290 #define check_maybe_down(flag) \
 291       (opts_for_fn (caller->decl)->x_##flag             \
 292        != opts_for_fn (callee->decl)->x_##flag          \
 293        && (!always_inline                               \
 294            || opts_for_fn (caller->decl)->x_##flag      \
 295               > opts_for_fn (callee->decl)->x_##flag))
 296 /* Used for flags where exact match is needed for correctness.  */
 297 #define check_match(flag) \
 298       (opts_for_fn (caller->decl)->x_##flag             \
 299        != opts_for_fn (callee->decl)->x_##flag)
 300
 301 /* Decide if we can inline the edge and possibly update
 302    inline_failed reason.
 303    We check whether inlining is possible at all and whether
 304    caller growth limits allow doing so.
 305
 306    if REPORT is true, output reason to the dump file. */
 307
 308 static bool
 309 can_inline_edge_p (struct cgraph_edge *e, bool report,
 310                    bool early = false)
 311 {
 312   gcc_checking_assert (e->inline_failed);
 313
 314   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 315     {
 316       if (report)
 317         report_inline_failed_reason (e);
 318       return false;
 319     }
 320
 321   bool inlinable = true;
 322   enum availability avail;
 323   cgraph_node *caller = (e->caller->inlined_to
 324                          ? e->caller->inlined_to : e->caller);
 325   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 326
 327   if (!callee->definition)
 328     {
 329       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 330       inlinable = false;
 331     }
 332   if (!early && (!opt_for_fn (callee->decl, optimize)
 333                  || !opt_for_fn (caller->decl, optimize)))
 334     {
 335       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 336       inlinable = false;
 337     }
 338   else if (callee->calls_comdat_local)
 339     {
 340       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 341       inlinable = false;
 342     }
 343   else if (avail <= AVAIL_INTERPOSABLE)
 344     {
 345       e->inline_failed = CIF_OVERWRITABLE;
 346       inlinable = false;
 347     }
 348   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 349      initialized to one of FINAL_ERROR reasons.  */
 350   else if (e->call_stmt_cannot_inline_p)
 351     gcc_unreachable ();
 352   /* Don't inline if the functions have different EH personalities.  */
 353   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 354            && DECL_FUNCTION_PERSONALITY (callee->decl)
 355            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 356                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 357     {
 358       e->inline_failed = CIF_EH_PERSONALITY;
 359       inlinable = false;
 360     }
 361   /* TM pure functions should not be inlined into non-TM_pure
 362      functions.  */
 363   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 364     {
 365       e->inline_failed = CIF_UNSPECIFIED;
 366       inlinable = false;
 367     }
 368   /* Check compatibility of target optimization options.  */
 369   else if (!targetm.target_option.can_inline_p (caller->decl,
 370                                                 callee->decl))
 371     {
 372       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 373       inlinable = false;
 374     }
 375   else if (ipa_fn_summaries->get (callee) == NULL
 376            || !ipa_fn_summaries->get (callee)->inlinable)
 377     {
 378       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 379       inlinable = false;
 380     }
 381   /* Don't inline a function with mismatched sanitization attributes. */
 382   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 383     {
 384       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 385       inlinable = false;
 386     }
 387   if (!inlinable && report)
 388     report_inline_failed_reason (e);
 389   return inlinable;
 390 }
 391
 392 /* Return inlining_insns_single limit for function N. If HINT is true
 393    scale up the bound.  */
 394
 395 static int
 396 inline_insns_single (cgraph_node *n, bool hint)
 397 {
 398   if (opt_for_fn (n->decl, optimize) >= 3)
 399     {
 400       if (hint)
 401         return param_max_inline_insns_single
 402                * param_inline_heuristics_hint_percent / 100;
 403       return param_max_inline_insns_single;
 404     }
 405   else
 406     {
 407       if (hint)
 408         return param_max_inline_insns_single_o2
 409                * param_inline_heuristics_hint_percent_o2 / 100;
 410       return param_max_inline_insns_single_o2;
 411     }
 412 }
 413
 414 /* Return inlining_insns_auto limit for function N. If HINT is true
 415    scale up the bound.   */
 416
 417 static int
 418 inline_insns_auto (cgraph_node *n, bool hint)
 419 {
 420   int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
 421   if (hint)
 422     return max_inline_insns_auto * param_inline_heuristics_hint_percent / 100;
 423   return max_inline_insns_auto;
 424 }
 425
 426 /* Decide if we can inline the edge and possibly update
 427    inline_failed reason.
 428    We check whether inlining is possible at all and whether
 429    caller growth limits allow doing so.
 430
 431    if REPORT is true, output reason to the dump file.
 432
 433    if DISREGARD_LIMITS is true, ignore size limits.  */
 434
 435 static bool
 436 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 437                              bool disregard_limits = false, bool early = false)
 438 {
 439   gcc_checking_assert (e->inline_failed);
 440
 441   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 442     {
 443       if (report)
 444         report_inline_failed_reason (e);
 445       return false;
 446     }
 447
 448   bool inlinable = true;
 449   enum availability avail;
 450   cgraph_node *caller = (e->caller->inlined_to
 451                          ? e->caller->inlined_to : e->caller);
 452   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 453   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 454   tree callee_tree
 455     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 456   /* Check if caller growth allows the inlining.  */
 457   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 458       && !disregard_limits
 459       && !lookup_attribute ("flatten",
 460                  DECL_ATTRIBUTES (caller->decl))
 461       && !caller_growth_limits (e))
 462     inlinable = false;
 463   else if (callee->externally_visible
 464            && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 465            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 466     {
 467       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 468       inlinable = false;
 469     }
 470   /* Don't inline a function with a higher optimization level than the
 471      caller.  FIXME: this is really just tip of iceberg of handling
 472      optimization attribute.  */
 473   else if (caller_tree != callee_tree)
 474     {
 475       bool always_inline =
 476              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 477               && lookup_attribute ("always_inline",
 478                                    DECL_ATTRIBUTES (callee->decl)));
 479       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 480       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 481
 482      /* Until GCC 4.9 we did not check the semantics-altering flags
 483         below and inlined across optimization boundaries.
 484         Enabling checks below breaks several packages by refusing
 485         to inline library always_inline functions. See PR65873.
 486         Disable the check for early inlining for now until better solution
 487         is found.  */
 488      if (always_inline && early)
 489         ;
 490       /* There are some options that change IL semantics which means
 491          we cannot inline in these cases for correctness reason.
 492          Not even for always_inline declared functions.  */
 493      else if (check_match (flag_wrapv)
 494               || check_match (flag_trapv)
 495               || check_match (flag_pcc_struct_return)
 496               /* When caller or callee does FP math, be sure FP codegen flags
 497                  compatible.  */
 498               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 499                   && (check_maybe_up (flag_rounding_math)
 500                       || check_maybe_up (flag_trapping_math)
 501                       || check_maybe_down (flag_unsafe_math_optimizations)
 502                       || check_maybe_down (flag_finite_math_only)
 503                       || check_maybe_up (flag_signaling_nans)
 504                       || check_maybe_down (flag_cx_limited_range)
 505                       || check_maybe_up (flag_signed_zeros)
 506                       || check_maybe_down (flag_associative_math)
 507                       || check_maybe_down (flag_reciprocal_math)
 508                       || check_maybe_down (flag_fp_int_builtin_inexact)
 509                       /* Strictly speaking only when the callee contains function
 510                          calls that may end up setting errno.  */
 511                       || check_maybe_up (flag_errno_math)))
 512               /* We do not want to make code compiled with exceptions to be
 513                  brought into a non-EH function unless we know that the callee
 514                  does not throw.
 515                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 516               || (check_maybe_up (flag_non_call_exceptions)
 517                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 518               || (check_maybe_up (flag_exceptions)
 519                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 520               /* When devirtualization is diabled for callee, it is not safe
 521                  to inline it as we possibly mangled the type info.
 522                  Allow early inlining of always inlines.  */
 523               || (!early && check_maybe_down (flag_devirtualize)))
 524         {
 525           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 526           inlinable = false;
 527         }
 528       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 529       else if (always_inline)
 530         ;
 531       /* When user added an attribute to the callee honor it.  */
 532       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 533                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 534         {
 535           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 536           inlinable = false;
 537         }
 538       /* If explicit optimize attribute are not used, the mismatch is caused
 539          by different command line options used to build different units.
 540          Do not care about COMDAT functions - those are intended to be
 541          optimized with the optimization flags of module they are used in.
 542          Also do not care about mixing up size/speed optimization when
 543          DECL_DISREGARD_INLINE_LIMITS is set.  */
 544       else if ((callee->merged_comdat
 545                 && !lookup_attribute ("optimize",
 546                                       DECL_ATTRIBUTES (caller->decl)))
 547                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 548         ;
 549       /* If mismatch is caused by merging two LTO units with different
 550          optimizationflags we want to be bit nicer.  However never inline
 551          if one of functions is not optimized at all.  */
 552       else if (!opt_for_fn (callee->decl, optimize)
 553                || !opt_for_fn (caller->decl, optimize))
 554         {
 555           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 556           inlinable = false;
 557         }
 558       /* If callee is optimized for size and caller is not, allow inlining if
 559          code shrinks or we are in param_max_inline_insns_single limit and
 560          callee is inline (and thus likely an unified comdat).
 561          This will allow caller to run faster.  */
 562       else if (opt_for_fn (callee->decl, optimize_size)
 563                > opt_for_fn (caller->decl, optimize_size))
 564         {
 565           int growth = estimate_edge_growth (e);
 566           if (growth > param_max_inline_insns_size
 567               && (!DECL_DECLARED_INLINE_P (callee->decl)
 568                   && growth >= MAX (inline_insns_single (caller, false),
 569                                     inline_insns_auto (caller, false))))
 570             {
 571               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 572               inlinable = false;
 573             }
 574         }
 575       /* If callee is more aggressively optimized for performance than caller,
 576          we generally want to inline only cheap (runtime wise) functions.  */
 577       else if (opt_for_fn (callee->decl, optimize_size)
 578                < opt_for_fn (caller->decl, optimize_size)
 579                || (opt_for_fn (callee->decl, optimize)
 580                    > opt_for_fn (caller->decl, optimize)))
 581         {
 582           if (estimate_edge_time (e)
 583               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 584             {
 585               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 586               inlinable = false;
 587             }
 588         }
 589
 590     }
 591
 592   if (!inlinable && report)
 593     report_inline_failed_reason (e);
 594   return inlinable;
 595 }
 596
 597
 598 /* Return true if the edge E is inlinable during early inlining.  */
 599
 600 static bool
 601 can_early_inline_edge_p (struct cgraph_edge *e)
 602 {
 603   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 604   /* Early inliner might get called at WPA stage when IPA pass adds new
 605      function.  In this case we cannot really do any of early inlining
 606      because function bodies are missing.  */
 607   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 608     return false;
 609   if (!gimple_has_body_p (callee->decl))
 610     {
 611       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 612       return false;
 613     }
 614   /* In early inliner some of callees may not be in SSA form yet
 615      (i.e. the callgraph is cyclic and we did not process
 616      the callee by early inliner, yet).  We don't have CIF code for this
 617      case; later we will re-do the decision in the real inliner.  */
 618   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 619       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 620     {
 621       if (dump_enabled_p ())
 622         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 623                          "  edge not inlinable: not in SSA form\n");
 624       return false;
 625     }
 626   if (!can_inline_edge_p (e, true, true)
 627       || !can_inline_edge_by_limits_p (e, true, false, true))
 628     return false;
 629   return true;
 630 }
 631
 632
 633 /* Return number of calls in N.  Ignore cheap builtins.  */
 634
 635 static int
 636 num_calls (struct cgraph_node *n)
 637 {
 638   struct cgraph_edge *e;
 639   int num = 0;
 640
 641   for (e = n->callees; e; e = e->next_callee)
 642     if (!is_inexpensive_builtin (e->callee->decl))
 643       num++;
 644   return num;
 645 }
 646
 647
 648 /* Return true if we are interested in inlining small function.  */
 649
 650 static bool
 651 want_early_inline_function_p (struct cgraph_edge *e)
 652 {
 653   bool want_inline = true;
 654   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 655
 656   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 657     ;
 658   /* For AutoFDO, we need to make sure that before profile summary, all
 659      hot paths' IR look exactly the same as profiled binary. As a result,
 660      in einliner, we will disregard size limit and inline those callsites
 661      that are:
 662        * inlined in the profiled binary, and
 663        * the cloned callee has enough samples to be considered "hot".  */
 664   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 665     ;
 666   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 667            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 668     {
 669       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 670       report_inline_failed_reason (e);
 671       want_inline = false;
 672     }
 673   else
 674     {
 675       int growth = estimate_edge_growth (e);
 676       int n;
 677       int early_inlining_insns = opt_for_fn (e->caller->decl, optimize) >= 3
 678                                  ? param_early_inlining_insns
 679                                  : param_early_inlining_insns_o2;
 680
 681
 682       if (growth <= param_max_inline_insns_size)
 683         ;
 684       else if (!e->maybe_hot_p ())
 685         {
 686           if (dump_enabled_p ())
 687             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 688                              "  will not early inline: %C->%C, "
 689                              "call is cold and code would grow by %i\n",
 690                              e->caller, callee,
 691                              growth);
 692           want_inline = false;
 693         }
 694       else if (growth > early_inlining_insns)
 695         {
 696           if (dump_enabled_p ())
 697             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 698                              "  will not early inline: %C->%C, "
 699                              "growth %i exceeds --param early-inlining-insns%s\n",
 700                              e->caller, callee, growth,
 701                              opt_for_fn (e->caller->decl, optimize) >= 3
 702                              ? "" : "-O2");
 703           want_inline = false;
 704         }
 705       else if ((n = num_calls (callee)) != 0
 706                && growth * (n + 1) > early_inlining_insns)
 707         {
 708           if (dump_enabled_p ())
 709             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 710                              "  will not early inline: %C->%C, "
 711                              "growth %i exceeds --param early-inlining-insns%s "
 712                              "divided by number of calls\n",
 713                              e->caller, callee, growth,
 714                              opt_for_fn (e->caller->decl, optimize) >= 3
 715                              ? "" : "-O2");
 716           want_inline = false;
 717         }
 718     }
 719   return want_inline;
 720 }
 721
 722 /* Compute time of the edge->caller + edge->callee execution when inlining
 723    does not happen.  */
 724
 725 inline sreal
 726 compute_uninlined_call_time (struct cgraph_edge *edge,
 727                              sreal uninlined_call_time,
 728                              sreal freq)
 729 {
 730   cgraph_node *caller = (edge->caller->inlined_to
 731                          ? edge->caller->inlined_to
 732                          : edge->caller);
 733
 734   if (freq > 0)
 735     uninlined_call_time *= freq;
 736   else
 737     uninlined_call_time = uninlined_call_time >> 11;
 738
 739   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 740   return uninlined_call_time + caller_time;
 741 }
 742
 743 /* Same as compute_uinlined_call_time but compute time when inlining
 744    does happen.  */
 745
 746 inline sreal
 747 compute_inlined_call_time (struct cgraph_edge *edge,
 748                            sreal time,
 749                            sreal freq)
 750 {
 751   cgraph_node *caller = (edge->caller->inlined_to
 752                          ? edge->caller->inlined_to
 753                          : edge->caller);
 754   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 755
 756   if (freq > 0)
 757     time *= freq;
 758   else
 759     time = time >> 11;
 760
 761   /* This calculation should match one in ipa-inline-analysis.c
 762      (estimate_edge_size_and_time).  */
 763   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 764   time += caller_time;
 765   if (time <= 0)
 766     time = ((sreal) 1) >> 8;
 767   gcc_checking_assert (time >= 0);
 768   return time;
 769 }
 770
 771 /* Return true if the speedup for inlining E is bigger than
 772    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 773
 774 static bool
 775 big_speedup_p (struct cgraph_edge *e)
 776 {
 777   sreal unspec_time;
 778   sreal spec_time = estimate_edge_time (e, &unspec_time);
 779   sreal freq = e->sreal_frequency ();
 780   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
 781   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
 782   cgraph_node *caller = (e->caller->inlined_to
 783                          ? e->caller->inlined_to
 784                          : e->caller);
 785   int limit = opt_for_fn (caller->decl, optimize) >= 3
 786               ? param_inline_min_speedup
 787               : param_inline_min_speedup_o2;
 788
 789   if ((time - inlined_time) * 100 > time * limit)
 790     return true;
 791   return false;
 792 }
 793
 794 /* Return true if we are interested in inlining small function.
 795    When REPORT is true, report reason to dump file.  */
 796
 797 static bool
 798 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 799 {
 800   bool want_inline = true;
 801   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 802
 803   /* Allow this function to be called before can_inline_edge_p,
 804      since it's usually cheaper.  */
 805   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 806     want_inline = false;
 807   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 808     ;
 809   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 810            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 811     {
 812       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 813       want_inline = false;
 814     }
 815   /* Do fast and conservative check if the function can be good
 816      inline candidate.  */
 817   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 818            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 819            && ipa_fn_summaries->get (callee)->min_size
 820                 - ipa_call_summaries->get (e)->call_stmt_size
 821               > inline_insns_auto (e->caller, true))
 822     {
 823       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 824       want_inline = false;
 825     }
 826   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 827             || e->count.ipa ().nonzero_p ())
 828            && ipa_fn_summaries->get (callee)->min_size
 829                 - ipa_call_summaries->get (e)->call_stmt_size
 830               > inline_insns_single (e->caller, true))
 831     {
 832       if (opt_for_fn (e->caller->decl, optimize) >= 3)
 833         e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 834                             ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 835                             : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 836       else
 837         e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 838                               ? CIF_MAX_INLINE_INSNS_SINGLE_O2_LIMIT
 839                               : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 840       want_inline = false;
 841     }
 842   else
 843     {
 844       int growth = estimate_edge_growth (e);
 845       ipa_hints hints = estimate_edge_hints (e);
 846       bool apply_hints = (hints & (INLINE_HINT_indirect_call
 847                                    | INLINE_HINT_known_hot
 848                                    | INLINE_HINT_loop_iterations
 849                                    | INLINE_HINT_loop_stride));
 850
 851       if (growth <= param_max_inline_insns_size)
 852         ;
 853       /* Apply param_max_inline_insns_single limit.  Do not do so when
 854          hints suggests that inlining given function is very profitable.
 855          Avoid computation of big_speedup_p when not necessary to change
 856          outcome of decision.  */
 857       else if (DECL_DECLARED_INLINE_P (callee->decl)
 858                && growth >= inline_insns_single (e->caller, apply_hints)
 859                && (apply_hints
 860                    || growth >= inline_insns_single (e->caller, true)
 861                    || !big_speedup_p (e)))
 862         {
 863           if (opt_for_fn (e->caller->decl, optimize) >= 3)
 864             e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 865           else
 866             e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_O2_LIMIT;
 867           want_inline = false;
 868         }
 869       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 870                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 871                && growth >= param_max_inline_insns_small)
 872         {
 873           /* growth_positive_p is expensive, always test it last.  */
 874           if (growth >= inline_insns_single (e->caller, false)
 875               || growth_positive_p (callee, e, growth))
 876             {
 877               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 878               want_inline = false;
 879             }
 880         }
 881       /* Apply param_max_inline_insns_auto limit for functions not declared
 882          inline.  Bypass the limit when speedup seems big.  */
 883       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 884                && growth >= inline_insns_auto (e->caller, apply_hints)
 885                && (apply_hints
 886                    || growth >= inline_insns_auto (e->caller, true)
 887                    || !big_speedup_p (e)))
 888         {
 889           /* growth_positive_p is expensive, always test it last.  */
 890           if (growth >= inline_insns_single (e->caller, false)
 891               || growth_positive_p (callee, e, growth))
 892             {
 893               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 894               want_inline = false;
 895             }
 896         }
 897       /* If call is cold, do not inline when function body would grow. */
 898       else if (!e->maybe_hot_p ()
 899                && (growth >= inline_insns_single (e->caller, false)
 900                    || growth_positive_p (callee, e, growth)))
 901         {
 902           e->inline_failed = CIF_UNLIKELY_CALL;
 903           want_inline = false;
 904         }
 905     }
 906   if (!want_inline && report)
 907     report_inline_failed_reason (e);
 908   return want_inline;
 909 }
 910
 911 /* EDGE is self recursive edge.
 912    We hand two cases - when function A is inlining into itself
 913    or when function A is being inlined into another inliner copy of function
 914    A within function B.
 915
 916    In first case OUTER_NODE points to the toplevel copy of A, while
 917    in the second case OUTER_NODE points to the outermost copy of A in B.
 918
 919    In both cases we want to be extra selective since
 920    inlining the call will just introduce new recursive calls to appear.  */
 921
 922 static bool
 923 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 924                                    struct cgraph_node *outer_node,
 925                                    bool peeling,
 926                                    int depth)
 927 {
 928   char const *reason = NULL;
 929   bool want_inline = true;
 930   sreal caller_freq = 1;
 931   int max_depth = param_max_inline_recursive_depth_auto;
 932
 933   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 934     max_depth = param_max_inline_recursive_depth;
 935
 936   if (!edge->maybe_hot_p ())
 937     {
 938       reason = "recursive call is cold";
 939       want_inline = false;
 940     }
 941   else if (depth > max_depth)
 942     {
 943       reason = "--param max-inline-recursive-depth exceeded.";
 944       want_inline = false;
 945     }
 946   else if (outer_node->inlined_to
 947            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
 948     {
 949       reason = "caller frequency is 0";
 950       want_inline = false;
 951     }
 952
 953   if (!want_inline)
 954     ;
 955   /* Inlining of self recursive function into copy of itself within other
 956      function is transformation similar to loop peeling.
 957
 958      Peeling is profitable if we can inline enough copies to make probability
 959      of actual call to the self recursive function very small.  Be sure that
 960      the probability of recursion is small.
 961
 962      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 963      This way the expected number of recursion is at most max_depth.  */
 964   else if (peeling)
 965     {
 966       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
 967       int i;
 968       for (i = 1; i < depth; i++)
 969         max_prob = max_prob * max_prob;
 970       if (edge->sreal_frequency () >= max_prob * caller_freq)
 971         {
 972           reason = "frequency of recursive call is too large";
 973           want_inline = false;
 974         }
 975     }
 976   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
 977      recursion depth is large.  We reduce function call overhead and increase
 978      chances that things fit in hardware return predictor.
 979
 980      Recursive inlining might however increase cost of stack frame setup
 981      actually slowing down functions whose recursion tree is wide rather than
 982      deep.
 983
 984      Deciding reliably on when to do recursive inlining without profile feedback
 985      is tricky.  For now we disable recursive inlining when probability of self
 986      recursion is low.
 987
 988      Recursive inlining of self recursive call within loop also results in
 989      large loop depths that generally optimize badly.  We may want to throttle
 990      down inlining in those cases.  In particular this seems to happen in one
 991      of libstdc++ rb tree methods.  */
 992   else
 993     {
 994       if (edge->sreal_frequency () * 100
 995           <= caller_freq
 996              * param_min_inline_recursive_probability)
 997         {
 998           reason = "frequency of recursive call is too small";
 999           want_inline = false;
1000         }
1001     }
1002   if (!want_inline && dump_enabled_p ())
1003     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1004                      "   not inlining recursively: %s\n", reason);
1005   return want_inline;
1006 }
1007
1008 /* Return true when NODE has uninlinable caller;
1009    set HAS_HOT_CALL if it has hot call.
1010    Worker for cgraph_for_node_and_aliases.  */
1011
1012 static bool
1013 check_callers (struct cgraph_node *node, void *has_hot_call)
1014 {
1015   struct cgraph_edge *e;
1016    for (e = node->callers; e; e = e->next_caller)
1017      {
1018        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1019            || !opt_for_fn (e->caller->decl, optimize))
1020          return true;
1021        if (!can_inline_edge_p (e, true))
1022          return true;
1023        if (e->recursive_p ())
1024          return true;
1025        if (!can_inline_edge_by_limits_p (e, true))
1026          return true;
1027        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1028          *(bool *)has_hot_call = true;
1029      }
1030   return false;
1031 }
1032
1033 /* If NODE has a caller, return true.  */
1034
1035 static bool
1036 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1037 {
1038   if (node->callers)
1039     return true;
1040   return false;
1041 }
1042
1043 /* Decide if inlining NODE would reduce unit size by eliminating
1044    the offline copy of function.
1045    When COLD is true the cold calls are considered, too.  */
1046
1047 static bool
1048 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1049 {
1050   bool has_hot_call = false;
1051
1052   /* Aliases gets inlined along with the function they alias.  */
1053   if (node->alias)
1054     return false;
1055   /* Already inlined?  */
1056   if (node->inlined_to)
1057     return false;
1058   /* Does it have callers?  */
1059   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1060     return false;
1061   /* Inlining into all callers would increase size?  */
1062   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1063     return false;
1064   /* All inlines must be possible.  */
1065   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1066                                          true))
1067     return false;
1068   if (!cold && !has_hot_call)
1069     return false;
1070   return true;
1071 }
1072
1073 /* A cost model driving the inlining heuristics in a way so the edges with
1074    smallest badness are inlined first.  After each inlining is performed
1075    the costs of all caller edges of nodes affected are recomputed so the
1076    metrics may accurately depend on values such as number of inlinable callers
1077    of the function or function body size.  */
1078
1079 static sreal
1080 edge_badness (struct cgraph_edge *edge, bool dump)
1081 {
1082   sreal badness;
1083   int growth;
1084   sreal edge_time, unspec_edge_time;
1085   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1086   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1087   ipa_hints hints;
1088   cgraph_node *caller = (edge->caller->inlined_to
1089                          ? edge->caller->inlined_to
1090                          : edge->caller);
1091
1092   growth = estimate_edge_growth (edge);
1093   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1094   hints = estimate_edge_hints (edge);
1095   gcc_checking_assert (edge_time >= 0);
1096   /* Check that inlined time is better, but tolerate some roundoff issues.
1097      FIXME: When callee profile drops to 0 we account calls more.  This
1098      should be fixed by never doing that.  */
1099   gcc_checking_assert ((edge_time * 100
1100                         - callee_info->time * 101).to_int () <= 0
1101                         || callee->count.ipa ().initialized_p ());
1102   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1103
1104   if (dump)
1105     {
1106       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1107                edge->caller->dump_name (),
1108                edge->callee->dump_name ());
1109       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1110                growth,
1111                edge_time.to_double (),
1112                unspec_edge_time.to_double ());
1113       ipa_dump_hints (dump_file, hints);
1114       if (big_speedup_p (edge))
1115         fprintf (dump_file, " big_speedup");
1116       fprintf (dump_file, "\n");
1117     }
1118
1119   /* Always prefer inlining saving code size.  */
1120   if (growth <= 0)
1121     {
1122       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1123       if (dump)
1124         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1125                  growth);
1126     }
1127    /* Inlining into EXTERNAL functions is not going to change anything unless
1128       they are themselves inlined.  */
1129    else if (DECL_EXTERNAL (caller->decl))
1130     {
1131       if (dump)
1132         fprintf (dump_file, "      max: function is external\n");
1133       return sreal::max ();
1134     }
1135   /* When profile is available. Compute badness as:
1136
1137                  time_saved * caller_count
1138      goodness =  -------------------------------------------------
1139                  growth_of_caller * overall_growth * combined_size
1140
1141      badness = - goodness
1142
1143      Again use negative value to make calls with profile appear hotter
1144      then calls without.
1145   */
1146   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1147            || caller->count.ipa ().nonzero_p ())
1148     {
1149       sreal numerator, denominator;
1150       int overall_growth;
1151       sreal freq = edge->sreal_frequency ();
1152       sreal inlined_time = compute_inlined_call_time (edge, edge_time, freq);
1153
1154       numerator = (compute_uninlined_call_time (edge, unspec_edge_time, freq)
1155                    - inlined_time);
1156       if (numerator <= 0)
1157         numerator = ((sreal) 1 >> 8);
1158       if (caller->count.ipa ().nonzero_p ())
1159         numerator *= caller->count.ipa ().to_gcov_type ();
1160       else if (caller->count.ipa ().initialized_p ())
1161         numerator = numerator >> 11;
1162       denominator = growth;
1163
1164       overall_growth = callee_info->growth;
1165
1166 #if 1
1167       /* Look for inliner wrappers of the form:
1168
1169          inline_caller ()
1170            {
1171              do_fast_job...
1172              if (need_more_work)
1173                noninline_callee ();
1174            }
1175          Withhout penalizing this case, we usually inline noninline_callee
1176          into the inline_caller because overall_growth is small preventing
1177          further inlining of inline_caller.
1178
1179          Penalize only callgraph edges to functions with small overall
1180          growth ...
1181         */
1182       if (growth > overall_growth
1183           /* ... and having only one caller which is not inlined ... */
1184           && callee_info->single_caller
1185           && !edge->caller->inlined_to
1186           /* ... and edges executed only conditionally ... */
1187           && freq < 1
1188           /* ... consider case where callee is not inline but caller is ... */
1189           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1190                && DECL_DECLARED_INLINE_P (caller->decl))
1191               /* ... or when early optimizers decided to split and edge
1192                  frequency still indicates splitting is a win ... */
1193               || (callee->split_part && !caller->split_part
1194                   && freq * 100 < param_partial_inlining_entry_probability
1195                   /* ... and do not overwrite user specified hints.   */
1196                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1197                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1198         {
1199           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1200           int caller_growth = caller_info->growth;
1201
1202           /* Only apply the penalty when caller looks like inline candidate,
1203              and it is not called once.  */
1204           if (!caller_info->single_caller && overall_growth < caller_growth
1205               && caller_info->inlinable
1206               && ipa_size_summaries->get (caller)->size
1207                  < (DECL_DECLARED_INLINE_P (caller->decl)
1208                     ? inline_insns_single (caller, false)
1209                     : inline_insns_auto (caller, false)))
1210             {
1211               if (dump)
1212                 fprintf (dump_file,
1213                          "     Wrapper penalty. Increasing growth %i to %i\n",
1214                          overall_growth, caller_growth);
1215               overall_growth = caller_growth;
1216             }
1217         }
1218 #endif
1219       if (overall_growth > 0)
1220         {
1221           /* Strongly preffer functions with few callers that can be inlined
1222              fully.  The square root here leads to smaller binaries at average.
1223              Watch however for extreme cases and return to linear function
1224              when growth is large.  */
1225           if (overall_growth < 256)
1226             overall_growth *= overall_growth;
1227           else
1228             overall_growth += 256 * 256 - 256;
1229           denominator *= overall_growth;
1230         }
1231       denominator *= ipa_size_summaries->get (caller)->size + growth;
1232
1233       badness = - numerator / denominator;
1234
1235       if (dump)
1236         {
1237           fprintf (dump_file,
1238                    "      %f: guessed profile. frequency %f, count %" PRId64
1239                    " caller count %" PRId64
1240                    " time w/o inlining %f, time with inlining %f"
1241                    " overall growth %i (current) %i (original)"
1242                    " %i (compensated)\n",
1243                    badness.to_double (),
1244                    freq.to_double (),
1245                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1246                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1247                    compute_uninlined_call_time (edge,
1248                                                 unspec_edge_time, freq).to_double (),
1249                    inlined_time.to_double (),
1250                    estimate_growth (callee),
1251                    callee_info->growth, overall_growth);
1252         }
1253     }
1254   /* When function local profile is not available or it does not give
1255      useful information (ie frequency is zero), base the cost on
1256      loop nest and overall size growth, so we optimize for overall number
1257      of functions fully inlined in program.  */
1258   else
1259     {
1260       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1261       badness = growth;
1262
1263       /* Decrease badness if call is nested.  */
1264       if (badness > 0)
1265         badness = badness >> nest;
1266       else
1267         badness = badness << nest;
1268       if (dump)
1269         fprintf (dump_file, "      %f: no profile. nest %i\n",
1270                  badness.to_double (), nest);
1271     }
1272   gcc_checking_assert (badness != 0);
1273
1274   if (edge->recursive_p ())
1275     badness = badness.shift (badness > 0 ? 4 : -4);
1276   if ((hints & (INLINE_HINT_indirect_call
1277                 | INLINE_HINT_loop_iterations
1278                 | INLINE_HINT_loop_stride))
1279       || callee_info->growth <= 0)
1280     badness = badness.shift (badness > 0 ? -2 : 2);
1281   if (hints & (INLINE_HINT_same_scc))
1282     badness = badness.shift (badness > 0 ? 3 : -3);
1283   else if (hints & (INLINE_HINT_in_scc))
1284     badness = badness.shift (badness > 0 ? 2 : -2);
1285   else if (hints & (INLINE_HINT_cross_module))
1286     badness = badness.shift (badness > 0 ? 1 : -1);
1287   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1288     badness = badness.shift (badness > 0 ? -4 : 4);
1289   else if ((hints & INLINE_HINT_declared_inline))
1290     badness = badness.shift (badness > 0 ? -3 : 3);
1291   if (dump)
1292     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1293   return badness;
1294 }
1295
1296 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1297 static inline void
1298 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1299 {
1300   sreal badness = edge_badness (edge, false);
1301   if (edge->aux)
1302     {
1303       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1304       gcc_checking_assert (n->get_data () == edge);
1305
1306       /* fibonacci_heap::replace_key does busy updating of the
1307          heap that is unnecesarily expensive.
1308          We do lazy increases: after extracting minimum if the key
1309          turns out to be out of date, it is re-inserted into heap
1310          with correct value.  */
1311       if (badness < n->get_key ())
1312         {
1313           if (dump_file && (dump_flags & TDF_DETAILS))
1314             {
1315               fprintf (dump_file,
1316                        "  decreasing badness %s -> %s, %f to %f\n",
1317                        edge->caller->dump_name (),
1318                        edge->callee->dump_name (),
1319                        n->get_key ().to_double (),
1320                        badness.to_double ());
1321             }
1322           heap->decrease_key (n, badness);
1323         }
1324     }
1325   else
1326     {
1327        if (dump_file && (dump_flags & TDF_DETAILS))
1328          {
1329            fprintf (dump_file,
1330                     "  enqueuing call %s -> %s, badness %f\n",
1331                     edge->caller->dump_name (),
1332                     edge->callee->dump_name (),
1333                     badness.to_double ());
1334          }
1335       edge->aux = heap->insert (badness, edge);
1336     }
1337 }
1338
1339
1340 /* NODE was inlined.
1341    All caller edges needs to be resetted because
1342    size estimates change. Similarly callees needs reset
1343    because better context may be known.  */
1344
1345 static void
1346 reset_edge_caches (struct cgraph_node *node)
1347 {
1348   struct cgraph_edge *edge;
1349   struct cgraph_edge *e = node->callees;
1350   struct cgraph_node *where = node;
1351   struct ipa_ref *ref;
1352
1353   if (where->inlined_to)
1354     where = where->inlined_to;
1355
1356   reset_node_cache (where);
1357
1358   if (edge_growth_cache != NULL)
1359     for (edge = where->callers; edge; edge = edge->next_caller)
1360       if (edge->inline_failed)
1361         edge_growth_cache->remove (edge);
1362
1363   FOR_EACH_ALIAS (where, ref)
1364     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1365
1366   if (!e)
1367     return;
1368
1369   while (true)
1370     if (!e->inline_failed && e->callee->callees)
1371       e = e->callee->callees;
1372     else
1373       {
1374         if (edge_growth_cache != NULL && e->inline_failed)
1375           edge_growth_cache->remove (e);
1376         if (e->next_callee)
1377           e = e->next_callee;
1378         else
1379           {
1380             do
1381               {
1382                 if (e->caller == node)
1383                   return;
1384                 e = e->caller->callers;
1385               }
1386             while (!e->next_callee);
1387             e = e->next_callee;
1388           }
1389       }
1390 }
1391
1392 /* Recompute HEAP nodes for each of caller of NODE.
1393    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1394    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1395    it is inlinable. Otherwise check all edges.  */
1396
1397 static void
1398 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1399                     bitmap updated_nodes,
1400                     struct cgraph_edge *check_inlinablity_for)
1401 {
1402   struct cgraph_edge *edge;
1403   struct ipa_ref *ref;
1404
1405   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1406       || node->inlined_to)
1407     return;
1408   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1409     return;
1410
1411   FOR_EACH_ALIAS (node, ref)
1412     {
1413       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1414       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1415     }
1416
1417   for (edge = node->callers; edge; edge = edge->next_caller)
1418     if (edge->inline_failed)
1419       {
1420         if (!check_inlinablity_for
1421             || check_inlinablity_for == edge)
1422           {
1423             if (can_inline_edge_p (edge, false)
1424                 && want_inline_small_function_p (edge, false)
1425                 && can_inline_edge_by_limits_p (edge, false))
1426               update_edge_key (heap, edge);
1427             else if (edge->aux)
1428               {
1429                 report_inline_failed_reason (edge);
1430                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1431                 edge->aux = NULL;
1432               }
1433           }
1434         else if (edge->aux)
1435           update_edge_key (heap, edge);
1436       }
1437 }
1438
1439 /* Recompute HEAP nodes for each uninlined call in NODE.
1440    This is used when we know that edge badnesses are going only to increase
1441    (we introduced new call site) and thus all we need is to insert newly
1442    created edges into heap.  */
1443
1444 static void
1445 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1446                     bitmap updated_nodes)
1447 {
1448   struct cgraph_edge *e = node->callees;
1449
1450   if (!e)
1451     return;
1452   while (true)
1453     if (!e->inline_failed && e->callee->callees)
1454       e = e->callee->callees;
1455     else
1456       {
1457         enum availability avail;
1458         struct cgraph_node *callee;
1459         /* We do not reset callee growth cache here.  Since we added a new call,
1460            growth chould have just increased and consequentely badness metric
1461            don't need updating.  */
1462         if (e->inline_failed
1463             && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1464             && ipa_fn_summaries->get (callee) != NULL
1465             && ipa_fn_summaries->get (callee)->inlinable
1466             && avail >= AVAIL_AVAILABLE
1467             && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1468           {
1469             if (can_inline_edge_p (e, false)
1470                 && want_inline_small_function_p (e, false)
1471                 && can_inline_edge_by_limits_p (e, false))
1472               update_edge_key (heap, e);
1473             else if (e->aux)
1474               {
1475                 report_inline_failed_reason (e);
1476                 heap->delete_node ((edge_heap_node_t *) e->aux);
1477                 e->aux = NULL;
1478               }
1479           }
1480         if (e->next_callee)
1481           e = e->next_callee;
1482         else
1483           {
1484             do
1485               {
1486                 if (e->caller == node)
1487                   return;
1488                 e = e->caller->callers;
1489               }
1490             while (!e->next_callee);
1491             e = e->next_callee;
1492           }
1493       }
1494 }
1495
1496 /* Enqueue all recursive calls from NODE into priority queue depending on
1497    how likely we want to recursively inline the call.  */
1498
1499 static void
1500 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1501                         edge_heap_t *heap)
1502 {
1503   struct cgraph_edge *e;
1504   enum availability avail;
1505
1506   for (e = where->callees; e; e = e->next_callee)
1507     if (e->callee == node
1508         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1509             && avail > AVAIL_INTERPOSABLE))
1510       heap->insert (-e->sreal_frequency (), e);
1511   for (e = where->callees; e; e = e->next_callee)
1512     if (!e->inline_failed)
1513       lookup_recursive_calls (node, e->callee, heap);
1514 }
1515
1516 /* Decide on recursive inlining: in the case function has recursive calls,
1517    inline until body size reaches given argument.  If any new indirect edges
1518    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1519    is NULL.  */
1520
1521 static bool
1522 recursive_inlining (struct cgraph_edge *edge,
1523                     vec<cgraph_edge *> *new_edges)
1524 {
1525   int limit = param_max_inline_insns_recursive_auto;
1526   edge_heap_t heap (sreal::min ());
1527   struct cgraph_node *node;
1528   struct cgraph_edge *e;
1529   struct cgraph_node *master_clone = NULL, *next;
1530   int depth = 0;
1531   int n = 0;
1532
1533   node = edge->caller;
1534   if (node->inlined_to)
1535     node = node->inlined_to;
1536
1537   if (DECL_DECLARED_INLINE_P (node->decl))
1538     limit = param_max_inline_insns_recursive;
1539
1540   /* Make sure that function is small enough to be considered for inlining.  */
1541   if (estimate_size_after_inlining (node, edge)  >= limit)
1542     return false;
1543   lookup_recursive_calls (node, node, &heap);
1544   if (heap.empty ())
1545     return false;
1546
1547   if (dump_file)
1548     fprintf (dump_file,
1549              "  Performing recursive inlining on %s\n",
1550              node->name ());
1551
1552   /* Do the inlining and update list of recursive call during process.  */
1553   while (!heap.empty ())
1554     {
1555       struct cgraph_edge *curr = heap.extract_min ();
1556       struct cgraph_node *cnode, *dest = curr->callee;
1557
1558       if (!can_inline_edge_p (curr, true)
1559           || !can_inline_edge_by_limits_p (curr, true))
1560         continue;
1561
1562       /* MASTER_CLONE is produced in the case we already started modified
1563          the function. Be sure to redirect edge to the original body before
1564          estimating growths otherwise we will be seeing growths after inlining
1565          the already modified body.  */
1566       if (master_clone)
1567         {
1568           curr->redirect_callee (master_clone);
1569           if (edge_growth_cache != NULL)
1570             edge_growth_cache->remove (curr);
1571         }
1572
1573       if (estimate_size_after_inlining (node, curr) > limit)
1574         {
1575           curr->redirect_callee (dest);
1576           if (edge_growth_cache != NULL)
1577             edge_growth_cache->remove (curr);
1578           break;
1579         }
1580
1581       depth = 1;
1582       for (cnode = curr->caller;
1583            cnode->inlined_to; cnode = cnode->callers->caller)
1584         if (node->decl
1585             == curr->callee->ultimate_alias_target ()->decl)
1586           depth++;
1587
1588       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1589         {
1590           curr->redirect_callee (dest);
1591           if (edge_growth_cache != NULL)
1592             edge_growth_cache->remove (curr);
1593           continue;
1594         }
1595
1596       if (dump_file)
1597         {
1598           fprintf (dump_file,
1599                    "   Inlining call of depth %i", depth);
1600           if (node->count.nonzero_p () && curr->count.initialized_p ())
1601             {
1602               fprintf (dump_file, " called approx. %.2f times per call",
1603                        (double)curr->count.to_gcov_type ()
1604                        / node->count.to_gcov_type ());
1605             }
1606           fprintf (dump_file, "\n");
1607         }
1608       if (!master_clone)
1609         {
1610           /* We need original clone to copy around.  */
1611           master_clone = node->create_clone (node->decl, node->count,
1612             false, vNULL, true, NULL, NULL);
1613           for (e = master_clone->callees; e; e = e->next_callee)
1614             if (!e->inline_failed)
1615               clone_inlined_nodes (e, true, false, NULL);
1616           curr->redirect_callee (master_clone);
1617           if (edge_growth_cache != NULL)
1618             edge_growth_cache->remove (curr);
1619         }
1620
1621       inline_call (curr, false, new_edges, &overall_size, true);
1622       reset_node_cache (node);
1623       lookup_recursive_calls (node, curr->callee, &heap);
1624       n++;
1625     }
1626
1627   if (!heap.empty () && dump_file)
1628     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1629
1630   if (!master_clone)
1631     return false;
1632
1633   if (dump_enabled_p ())
1634     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1635                      "\n   Inlined %i times, "
1636                      "body grown from size %i to %i, time %f to %f\n", n,
1637                      ipa_size_summaries->get (master_clone)->size,
1638                      ipa_size_summaries->get (node)->size,
1639                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1640                      ipa_fn_summaries->get (node)->time.to_double ());
1641
1642   /* Remove master clone we used for inlining.  We rely that clones inlined
1643      into master clone gets queued just before master clone so we don't
1644      need recursion.  */
1645   for (node = symtab->first_function (); node != master_clone;
1646        node = next)
1647     {
1648       next = symtab->next_function (node);
1649       if (node->inlined_to == master_clone)
1650         node->remove ();
1651     }
1652   master_clone->remove ();
1653   return true;
1654 }
1655
1656
1657 /* Given whole compilation unit estimate of INSNS, compute how large we can
1658    allow the unit to grow.  */
1659
1660 static int
1661 compute_max_insns (int insns)
1662 {
1663   int max_insns = insns;
1664   if (max_insns < param_large_unit_insns)
1665     max_insns = param_large_unit_insns;
1666
1667   return ((int64_t) max_insns
1668           * (100 + param_inline_unit_growth) / 100);
1669 }
1670
1671
1672 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1673
1674 static void
1675 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1676 {
1677   while (new_edges.length () > 0)
1678     {
1679       struct cgraph_edge *edge = new_edges.pop ();
1680
1681       gcc_assert (!edge->aux);
1682       gcc_assert (edge->callee);
1683       if (edge->inline_failed
1684           && can_inline_edge_p (edge, true)
1685           && want_inline_small_function_p (edge, true)
1686           && can_inline_edge_by_limits_p (edge, true))
1687         edge->aux = heap->insert (edge_badness (edge, false), edge);
1688     }
1689 }
1690
1691 /* Remove EDGE from the fibheap.  */
1692
1693 static void
1694 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1695 {
1696   if (e->aux)
1697     {
1698       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1699       e->aux = NULL;
1700     }
1701 }
1702
1703 /* Return true if speculation of edge E seems useful.
1704    If ANTICIPATE_INLINING is true, be conservative and hope that E
1705    may get inlined.  */
1706
1707 bool
1708 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1709 {
1710   /* If we have already decided to inline the edge, it seems useful.  */
1711   if (!e->inline_failed)
1712     return true;
1713
1714   enum availability avail;
1715   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1716                                                                  e->caller);
1717   struct cgraph_edge *direct, *indirect;
1718   struct ipa_ref *ref;
1719
1720   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1721
1722   if (!e->maybe_hot_p ())
1723     return false;
1724
1725   /* See if IP optimizations found something potentially useful about the
1726      function.  For now we look only for CONST/PURE flags.  Almost everything
1727      else we propagate is useless.  */
1728   if (avail >= AVAIL_AVAILABLE)
1729     {
1730       int ecf_flags = flags_from_decl_or_type (target->decl);
1731       if (ecf_flags & ECF_CONST)
1732         {
1733           e->speculative_call_info (direct, indirect, ref);
1734           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1735             return true;
1736         }
1737       else if (ecf_flags & ECF_PURE)
1738         {
1739           e->speculative_call_info (direct, indirect, ref);
1740           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1741             return true;
1742         }
1743     }
1744   /* If we did not managed to inline the function nor redirect
1745      to an ipa-cp clone (that are seen by having local flag set),
1746      it is probably pointless to inline it unless hardware is missing
1747      indirect call predictor.  */
1748   if (!anticipate_inlining && !target->local)
1749     return false;
1750   /* For overwritable targets there is not much to do.  */
1751   if (!can_inline_edge_p (e, false)
1752       || !can_inline_edge_by_limits_p (e, false, true))
1753     return false;
1754   /* OK, speculation seems interesting.  */
1755   return true;
1756 }
1757
1758 /* We know that EDGE is not going to be inlined.
1759    See if we can remove speculation.  */
1760
1761 static void
1762 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1763 {
1764   if (edge->speculative && !speculation_useful_p (edge, false))
1765     {
1766       struct cgraph_node *node = edge->caller;
1767       struct cgraph_node *where = node->inlined_to
1768                                   ? node->inlined_to : node;
1769       auto_bitmap updated_nodes;
1770
1771       if (edge->count.ipa ().initialized_p ())
1772         spec_rem += edge->count.ipa ();
1773       edge->resolve_speculation ();
1774       reset_edge_caches (where);
1775       ipa_update_overall_fn_summary (where);
1776       update_caller_keys (edge_heap, where,
1777                           updated_nodes, NULL);
1778       update_callee_keys (edge_heap, where,
1779                           updated_nodes);
1780     }
1781 }
1782
1783 /* Return true if NODE should be accounted for overall size estimate.
1784    Skip all nodes optimized for size so we can measure the growth of hot
1785    part of program no matter of the padding.  */
1786
1787 bool
1788 inline_account_function_p (struct cgraph_node *node)
1789 {
1790    return (!DECL_EXTERNAL (node->decl)
1791            && !opt_for_fn (node->decl, optimize_size)
1792            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1793 }
1794
1795 /* Count number of callers of NODE and store it into DATA (that
1796    points to int.  Worker for cgraph_for_node_and_aliases.  */
1797
1798 static bool
1799 sum_callers (struct cgraph_node *node, void *data)
1800 {
1801   struct cgraph_edge *e;
1802   int *num_calls = (int *)data;
1803
1804   for (e = node->callers; e; e = e->next_caller)
1805     (*num_calls)++;
1806   return false;
1807 }
1808
1809 /* We only propagate across edges with non-interposable callee.  */
1810
1811 inline bool
1812 ignore_edge_p (struct cgraph_edge *e)
1813 {
1814   enum availability avail;
1815   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1816   return (avail <= AVAIL_INTERPOSABLE);
1817 }
1818
1819 /* We use greedy algorithm for inlining of small functions:
1820    All inline candidates are put into prioritized heap ordered in
1821    increasing badness.
1822
1823    The inlining of small functions is bounded by unit growth parameters.  */
1824
1825 static void
1826 inline_small_functions (void)
1827 {
1828   struct cgraph_node *node;
1829   struct cgraph_edge *edge;
1830   edge_heap_t edge_heap (sreal::min ());
1831   auto_bitmap updated_nodes;
1832   int min_size, max_size;
1833   auto_vec<cgraph_edge *> new_indirect_edges;
1834   int initial_size = 0;
1835   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1836   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1837   new_indirect_edges.create (8);
1838
1839   edge_removal_hook_holder
1840     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1841
1842   /* Compute overall unit size and other global parameters used by badness
1843      metrics.  */
1844
1845   max_count = profile_count::uninitialized ();
1846   ipa_reduced_postorder (order, true, ignore_edge_p);
1847   free (order);
1848
1849   FOR_EACH_DEFINED_FUNCTION (node)
1850     if (!node->inlined_to)
1851       {
1852         if (!node->alias && node->analyzed
1853             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1854             && opt_for_fn (node->decl, optimize))
1855           {
1856             class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1857             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1858
1859             /* Do not account external functions, they will be optimized out
1860                if not inlined.  Also only count the non-cold portion of program.  */
1861             if (inline_account_function_p (node))
1862               initial_size += ipa_size_summaries->get (node)->size;
1863             info->growth = estimate_growth (node);
1864
1865             int num_calls = 0;
1866             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1867                                                true);
1868             if (num_calls == 1)
1869               info->single_caller = true;
1870             if (dfs && dfs->next_cycle)
1871               {
1872                 struct cgraph_node *n2;
1873                 int id = dfs->scc_no + 1;
1874                 for (n2 = node; n2;
1875                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1876                   if (opt_for_fn (n2->decl, optimize))
1877                     {
1878                       ipa_fn_summary *info2 = ipa_fn_summaries->get
1879                          (n2->inlined_to ? n2->inlined_to : n2);
1880                       if (info2->scc_no)
1881                         break;
1882                       info2->scc_no = id;
1883                     }
1884               }
1885           }
1886
1887         for (edge = node->callers; edge; edge = edge->next_caller)
1888           max_count = max_count.max (edge->count.ipa ());
1889       }
1890   ipa_free_postorder_info ();
1891   initialize_growth_caches ();
1892
1893   if (dump_file)
1894     fprintf (dump_file,
1895              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1896              initial_size);
1897
1898   overall_size = initial_size;
1899   max_size = compute_max_insns (overall_size);
1900   min_size = overall_size;
1901
1902   /* Populate the heap with all edges we might inline.  */
1903
1904   FOR_EACH_DEFINED_FUNCTION (node)
1905     {
1906       bool update = false;
1907       struct cgraph_edge *next = NULL;
1908       bool has_speculative = false;
1909
1910       if (!opt_for_fn (node->decl, optimize))
1911         continue;
1912
1913       if (dump_file)
1914         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1915
1916       for (edge = node->callees; edge; edge = next)
1917         {
1918           next = edge->next_callee;
1919           if (edge->inline_failed
1920               && !edge->aux
1921               && can_inline_edge_p (edge, true)
1922               && want_inline_small_function_p (edge, true)
1923               && can_inline_edge_by_limits_p (edge, true)
1924               && edge->inline_failed)
1925             {
1926               gcc_assert (!edge->aux);
1927               update_edge_key (&edge_heap, edge);
1928             }
1929           if (edge->speculative)
1930             has_speculative = true;
1931         }
1932       if (has_speculative)
1933         for (edge = node->callees; edge; edge = next)
1934           if (edge->speculative && !speculation_useful_p (edge,
1935                                                           edge->aux != NULL))
1936             {
1937               edge->resolve_speculation ();
1938               update = true;
1939             }
1940       if (update)
1941         {
1942           struct cgraph_node *where = node->inlined_to
1943                                       ? node->inlined_to : node;
1944           ipa_update_overall_fn_summary (where);
1945           reset_edge_caches (where);
1946           update_caller_keys (&edge_heap, where,
1947                               updated_nodes, NULL);
1948           update_callee_keys (&edge_heap, where,
1949                               updated_nodes);
1950           bitmap_clear (updated_nodes);
1951         }
1952     }
1953
1954   gcc_assert (in_lto_p
1955               || !(max_count > 0)
1956               || (profile_info && flag_branch_probabilities));
1957
1958   while (!edge_heap.empty ())
1959     {
1960       int old_size = overall_size;
1961       struct cgraph_node *where, *callee;
1962       sreal badness = edge_heap.min_key ();
1963       sreal current_badness;
1964       int growth;
1965
1966       edge = edge_heap.extract_min ();
1967       gcc_assert (edge->aux);
1968       edge->aux = NULL;
1969       if (!edge->inline_failed || !edge->callee->analyzed)
1970         continue;
1971
1972       /* Be sure that caches are maintained consistent.
1973          This check is affected by scaling roundoff errors when compiling for
1974          IPA this we skip it in that case.  */
1975       if (flag_checking && !edge->callee->count.ipa_p ()
1976           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
1977         {
1978           sreal cached_badness = edge_badness (edge, false);
1979
1980           int old_size_est = estimate_edge_size (edge);
1981           sreal old_time_est = estimate_edge_time (edge);
1982           int old_hints_est = estimate_edge_hints (edge);
1983
1984           if (edge_growth_cache != NULL)
1985             edge_growth_cache->remove (edge);
1986           reset_node_cache (edge->caller->inlined_to
1987                             ? edge->caller->inlined_to
1988                             : edge->caller);
1989           gcc_assert (old_size_est == estimate_edge_size (edge));
1990           gcc_assert (old_time_est == estimate_edge_time (edge));
1991           /* FIXME:
1992
1993              gcc_assert (old_hints_est == estimate_edge_hints (edge));
1994
1995              fails with profile feedback because some hints depends on
1996              maybe_hot_edge_p predicate and because callee gets inlined to other
1997              calls, the edge may become cold.
1998              This ought to be fixed by computing relative probabilities
1999              for given invocation but that will be better done once whole
2000              code is converted to sreals.  Disable for now and revert to "wrong"
2001              value so enable/disable checking paths agree.  */
2002           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2003
2004           /* When updating the edge costs, we only decrease badness in the keys.
2005              Increases of badness are handled lazilly; when we see key with out
2006              of date value on it, we re-insert it now.  */
2007           current_badness = edge_badness (edge, false);
2008           gcc_assert (cached_badness == current_badness);
2009           gcc_assert (current_badness >= badness);
2010         }
2011       else
2012         current_badness = edge_badness (edge, false);
2013       if (current_badness != badness)
2014         {
2015           if (edge_heap.min () && current_badness > edge_heap.min_key ())
2016             {
2017               edge->aux = edge_heap.insert (current_badness, edge);
2018               continue;
2019             }
2020           else
2021             badness = current_badness;
2022         }
2023
2024       if (!can_inline_edge_p (edge, true)
2025           || !can_inline_edge_by_limits_p (edge, true))
2026         {
2027           resolve_noninline_speculation (&edge_heap, edge);
2028           continue;
2029         }
2030
2031       callee = edge->callee->ultimate_alias_target ();
2032       growth = estimate_edge_growth (edge);
2033       if (dump_file)
2034         {
2035           fprintf (dump_file,
2036                    "\nConsidering %s with %i size\n",
2037                    callee->dump_name (),
2038                    ipa_size_summaries->get (callee)->size);
2039           fprintf (dump_file,
2040                    " to be inlined into %s in %s:%i\n"
2041                    " Estimated badness is %f, frequency %.2f.\n",
2042                    edge->caller->dump_name (),
2043                    edge->call_stmt
2044                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
2045                                                         edge->call_stmt))
2046                        > BUILTINS_LOCATION)
2047                    ? gimple_filename ((const gimple *) edge->call_stmt)
2048                    : "unknown",
2049                    edge->call_stmt
2050                    ? gimple_lineno ((const gimple *) edge->call_stmt)
2051                    : -1,
2052                    badness.to_double (),
2053                    edge->sreal_frequency ().to_double ());
2054           if (edge->count.ipa ().initialized_p ())
2055             {
2056               fprintf (dump_file, " Called ");
2057               edge->count.ipa ().dump (dump_file);
2058               fprintf (dump_file, " times\n");
2059             }
2060           if (dump_flags & TDF_DETAILS)
2061             edge_badness (edge, true);
2062         }
2063
2064       if (overall_size + growth > max_size
2065           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2066         {
2067           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2068           report_inline_failed_reason (edge);
2069           resolve_noninline_speculation (&edge_heap, edge);
2070           continue;
2071         }
2072
2073       if (!want_inline_small_function_p (edge, true))
2074         {
2075           resolve_noninline_speculation (&edge_heap, edge);
2076           continue;
2077         }
2078
2079       /* Heuristics for inlining small functions work poorly for
2080          recursive calls where we do effects similar to loop unrolling.
2081          When inlining such edge seems profitable, leave decision on
2082          specific inliner.  */
2083       if (edge->recursive_p ())
2084         {
2085           where = edge->caller;
2086           if (where->inlined_to)
2087             where = where->inlined_to;
2088           if (!recursive_inlining (edge,
2089                                    opt_for_fn (edge->caller->decl,
2090                                                flag_indirect_inlining)
2091                                    ? &new_indirect_edges : NULL))
2092             {
2093               edge->inline_failed = CIF_RECURSIVE_INLINING;
2094               resolve_noninline_speculation (&edge_heap, edge);
2095               continue;
2096             }
2097           reset_edge_caches (where);
2098           /* Recursive inliner inlines all recursive calls of the function
2099              at once. Consequently we need to update all callee keys.  */
2100           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2101             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2102           update_callee_keys (&edge_heap, where, updated_nodes);
2103           bitmap_clear (updated_nodes);
2104         }
2105       else
2106         {
2107           struct cgraph_node *outer_node = NULL;
2108           int depth = 0;
2109
2110           /* Consider the case where self recursive function A is inlined
2111              into B.  This is desired optimization in some cases, since it
2112              leads to effect similar of loop peeling and we might completely
2113              optimize out the recursive call.  However we must be extra
2114              selective.  */
2115
2116           where = edge->caller;
2117           while (where->inlined_to)
2118             {
2119               if (where->decl == callee->decl)
2120                 outer_node = where, depth++;
2121               where = where->callers->caller;
2122             }
2123           if (outer_node
2124               && !want_inline_self_recursive_call_p (edge, outer_node,
2125                                                      true, depth))
2126             {
2127               edge->inline_failed
2128                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2129                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2130               resolve_noninline_speculation (&edge_heap, edge);
2131               continue;
2132             }
2133           else if (depth && dump_file)
2134             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2135
2136           gcc_checking_assert (!callee->inlined_to);
2137
2138           int old_size = ipa_size_summaries->get (where)->size;
2139           sreal old_time = ipa_fn_summaries->get (where)->time;
2140
2141           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2142           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2143
2144           reset_edge_caches (edge->callee);
2145
2146           /* If caller's size and time increased we do not need to update
2147              all edges becuase badness is not going to decrease.  */
2148           if (old_size <= ipa_size_summaries->get (where)->size
2149               && old_time <= ipa_fn_summaries->get (where)->time
2150               && 0)
2151             update_callee_keys (&edge_heap, edge->callee, updated_nodes);
2152           else
2153             update_callee_keys (&edge_heap, where, updated_nodes);
2154         }
2155       where = edge->caller;
2156       if (where->inlined_to)
2157         where = where->inlined_to;
2158
2159       /* Our profitability metric can depend on local properties
2160          such as number of inlinable calls and size of the function body.
2161          After inlining these properties might change for the function we
2162          inlined into (since it's body size changed) and for the functions
2163          called by function we inlined (since number of it inlinable callers
2164          might change).  */
2165       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2166       /* Offline copy count has possibly changed, recompute if profile is
2167          available.  */
2168       struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2169       if (n != edge->callee && n->analyzed && n->count.ipa ().initialized_p ())
2170         update_callee_keys (&edge_heap, n, updated_nodes);
2171       bitmap_clear (updated_nodes);
2172
2173       if (dump_enabled_p ())
2174         {
2175           ipa_fn_summary *s = ipa_fn_summaries->get (where);
2176
2177           /* dump_printf can't handle %+i.  */
2178           char buf_net_change[100];
2179           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2180                     overall_size - old_size);
2181
2182           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2183                            " Inlined %C into %C which now has time %f and "
2184                            "size %i, net change of %s.\n",
2185                            edge->callee, edge->caller,
2186                            s->time.to_double (),
2187                            ipa_size_summaries->get (edge->caller)->size,
2188                            buf_net_change);
2189         }
2190       if (min_size > overall_size)
2191         {
2192           min_size = overall_size;
2193           max_size = compute_max_insns (min_size);
2194
2195           if (dump_file)
2196             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2197         }
2198     }
2199
2200   free_growth_caches ();
2201   if (dump_enabled_p ())
2202     dump_printf (MSG_NOTE,
2203                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2204                  initial_size, overall_size,
2205                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2206   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2207 }
2208
2209 /* Flatten NODE.  Performed both during early inlining and
2210    at IPA inlining time.  */
2211
2212 static void
2213 flatten_function (struct cgraph_node *node, bool early, bool update)
2214 {
2215   struct cgraph_edge *e;
2216
2217   /* We shouldn't be called recursively when we are being processed.  */
2218   gcc_assert (node->aux == NULL);
2219
2220   node->aux = (void *) node;
2221
2222   for (e = node->callees; e; e = e->next_callee)
2223     {
2224       struct cgraph_node *orig_callee;
2225       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2226
2227       /* We've hit cycle?  It is time to give up.  */
2228       if (callee->aux)
2229         {
2230           if (dump_enabled_p ())
2231             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2232                              "Not inlining %C into %C to avoid cycle.\n",
2233                              callee, e->caller);
2234           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2235             e->inline_failed = CIF_RECURSIVE_INLINING;
2236           continue;
2237         }
2238
2239       /* When the edge is already inlined, we just need to recurse into
2240          it in order to fully flatten the leaves.  */
2241       if (!e->inline_failed)
2242         {
2243           flatten_function (callee, early, false);
2244           continue;
2245         }
2246
2247       /* Flatten attribute needs to be processed during late inlining. For
2248          extra code quality we however do flattening during early optimization,
2249          too.  */
2250       if (!early
2251           ? !can_inline_edge_p (e, true)
2252             && !can_inline_edge_by_limits_p (e, true)
2253           : !can_early_inline_edge_p (e))
2254         continue;
2255
2256       if (e->recursive_p ())
2257         {
2258           if (dump_enabled_p ())
2259             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2260                              "Not inlining: recursive call.\n");
2261           continue;
2262         }
2263
2264       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2265           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2266         {
2267           if (dump_enabled_p ())
2268             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2269                              "Not inlining: SSA form does not match.\n");
2270           continue;
2271         }
2272
2273       /* Inline the edge and flatten the inline clone.  Avoid
2274          recursing through the original node if the node was cloned.  */
2275       if (dump_enabled_p ())
2276         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2277                          " Inlining %C into %C.\n",
2278                          callee, e->caller);
2279       orig_callee = callee;
2280       inline_call (e, true, NULL, NULL, false);
2281       if (e->callee != orig_callee)
2282         orig_callee->aux = (void *) node;
2283       flatten_function (e->callee, early, false);
2284       if (e->callee != orig_callee)
2285         orig_callee->aux = NULL;
2286     }
2287
2288   node->aux = NULL;
2289   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2290   if (update && opt_for_fn (where->decl, optimize))
2291     ipa_update_overall_fn_summary (where);
2292 }
2293
2294 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2295    DATA points to number of calls originally found so we avoid infinite
2296    recursion.  */
2297
2298 static bool
2299 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2300                          hash_set<cgraph_node *> *callers)
2301 {
2302   int *num_calls = (int *)data;
2303   bool callee_removed = false;
2304
2305   while (node->callers && !node->inlined_to)
2306     {
2307       struct cgraph_node *caller = node->callers->caller;
2308
2309       if (!can_inline_edge_p (node->callers, true)
2310           || !can_inline_edge_by_limits_p (node->callers, true)
2311           || node->callers->recursive_p ())
2312         {
2313           if (dump_file)
2314             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2315           *num_calls = 0;
2316           return false;
2317         }
2318
2319       if (dump_file)
2320         {
2321           cgraph_node *ultimate = node->ultimate_alias_target ();
2322           fprintf (dump_file,
2323                    "\nInlining %s size %i.\n",
2324                    ultimate->name (),
2325                    ipa_size_summaries->get (ultimate)->size);
2326           fprintf (dump_file,
2327                    " Called once from %s %i insns.\n",
2328                    node->callers->caller->name (),
2329                    ipa_size_summaries->get (node->callers->caller)->size);
2330         }
2331
2332       /* Remember which callers we inlined to, delaying updating the
2333          overall summary.  */
2334       callers->add (node->callers->caller);
2335       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2336       if (dump_file)
2337         fprintf (dump_file,
2338                  " Inlined into %s which now has %i size\n",
2339                  caller->name (),
2340                  ipa_size_summaries->get (caller)->size);
2341       if (!(*num_calls)--)
2342         {
2343           if (dump_file)
2344             fprintf (dump_file, "New calls found; giving up.\n");
2345           return callee_removed;
2346         }
2347       if (callee_removed)
2348         return true;
2349     }
2350   return false;
2351 }
2352
2353 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2354    update.  */
2355
2356 static bool
2357 inline_to_all_callers (struct cgraph_node *node, void *data)
2358 {
2359   hash_set<cgraph_node *> callers;
2360   bool res = inline_to_all_callers_1 (node, data, &callers);
2361   /* Perform the delayed update of the overall summary of all callers
2362      processed.  This avoids quadratic behavior in the cases where
2363      we have a lot of calls to the same function.  */
2364   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2365        i != callers.end (); ++i)
2366     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2367   return res;
2368 }
2369
2370 /* Output overall time estimate.  */
2371 static void
2372 dump_overall_stats (void)
2373 {
2374   sreal sum_weighted = 0, sum = 0;
2375   struct cgraph_node *node;
2376
2377   FOR_EACH_DEFINED_FUNCTION (node)
2378     if (!node->inlined_to
2379         && !node->alias)
2380       {
2381         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2382         if (s != NULL)
2383           {
2384           sum += s->time;
2385           if (node->count.ipa ().initialized_p ())
2386             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2387           }
2388       }
2389   fprintf (dump_file, "Overall time estimate: "
2390            "%f weighted by profile: "
2391            "%f\n", sum.to_double (), sum_weighted.to_double ());
2392 }
2393
2394 /* Output some useful stats about inlining.  */
2395
2396 static void
2397 dump_inline_stats (void)
2398 {
2399   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2400   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2401   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2402   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2403   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2404   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2405   int64_t reason[CIF_N_REASONS][2];
2406   sreal reason_freq[CIF_N_REASONS];
2407   int i;
2408   struct cgraph_node *node;
2409
2410   memset (reason, 0, sizeof (reason));
2411   for (i=0; i < CIF_N_REASONS; i++)
2412     reason_freq[i] = 0;
2413   FOR_EACH_DEFINED_FUNCTION (node)
2414   {
2415     struct cgraph_edge *e;
2416     for (e = node->callees; e; e = e->next_callee)
2417       {
2418         if (e->inline_failed)
2419           {
2420             if (e->count.ipa ().initialized_p ())
2421               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2422             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2423             reason[(int) e->inline_failed][1] ++;
2424             if (DECL_VIRTUAL_P (e->callee->decl)
2425                 && e->count.ipa ().initialized_p ())
2426               {
2427                 if (e->indirect_inlining_edge)
2428                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2429                 else
2430                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2431               }
2432             else if (e->count.ipa ().initialized_p ())
2433               {
2434                 if (e->indirect_inlining_edge)
2435                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2436                 else
2437                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2438               }
2439           }
2440         else if (e->count.ipa ().initialized_p ())
2441           {
2442             if (e->speculative)
2443               {
2444                 if (DECL_VIRTUAL_P (e->callee->decl))
2445                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2446                 else
2447                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2448               }
2449             else if (DECL_VIRTUAL_P (e->callee->decl))
2450               {
2451                 if (e->indirect_inlining_edge)
2452                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2453                 else
2454                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2455               }
2456             else
2457               {
2458                 if (e->indirect_inlining_edge)
2459                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2460                 else
2461                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2462               }
2463           }
2464       }
2465     for (e = node->indirect_calls; e; e = e->next_callee)
2466       if (e->indirect_info->polymorphic
2467           & e->count.ipa ().initialized_p ())
2468         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2469       else if (e->count.ipa ().initialized_p ())
2470         indirect_cnt += e->count.ipa ().to_gcov_type ();
2471   }
2472   if (max_count.initialized_p ())
2473     {
2474       fprintf (dump_file,
2475                "Inlined %" PRId64 " + speculative "
2476                "%" PRId64 " + speculative polymorphic "
2477                "%" PRId64 " + previously indirect "
2478                "%" PRId64 " + virtual "
2479                "%" PRId64 " + virtual and previously indirect "
2480                "%" PRId64 "\n" "Not inlined "
2481                "%" PRId64 " + previously indirect "
2482                "%" PRId64 " + virtual "
2483                "%" PRId64 " + virtual and previously indirect "
2484                "%" PRId64 " + stil indirect "
2485                "%" PRId64 " + still indirect polymorphic "
2486                "%" PRId64 "\n", inlined_cnt,
2487                inlined_speculative, inlined_speculative_ply,
2488                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2489                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2490                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2491       fprintf (dump_file, "Removed speculations ");
2492       spec_rem.dump (dump_file);
2493       fprintf (dump_file, "\n");
2494     }
2495   dump_overall_stats ();
2496   fprintf (dump_file, "\nWhy inlining failed?\n");
2497   for (i = 0; i < CIF_N_REASONS; i++)
2498     if (reason[i][1])
2499       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2500                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2501                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2502 }
2503
2504 /* Called when node is removed.  */
2505
2506 static void
2507 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2508 {
2509   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2510     return;
2511
2512   hash_set<struct cgraph_node *> *removed
2513     = (hash_set<struct cgraph_node *> *) data;
2514   removed->add (node);
2515 }
2516
2517 /* Decide on the inlining.  We do so in the topological order to avoid
2518    expenses on updating data structures.  */
2519
2520 static unsigned int
2521 ipa_inline (void)
2522 {
2523   struct cgraph_node *node;
2524   int nnodes;
2525   struct cgraph_node **order;
2526   int i, j;
2527   int cold;
2528   bool remove_functions = false;
2529
2530   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2531
2532   if (dump_file)
2533     ipa_dump_fn_summaries (dump_file);
2534
2535   nnodes = ipa_reverse_postorder (order);
2536   spec_rem = profile_count::zero ();
2537
2538   FOR_EACH_FUNCTION (node)
2539     {
2540       node->aux = 0;
2541
2542       /* Recompute the default reasons for inlining because they may have
2543          changed during merging.  */
2544       if (in_lto_p)
2545         {
2546           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2547             {
2548               gcc_assert (e->inline_failed);
2549               initialize_inline_failed (e);
2550             }
2551           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2552             initialize_inline_failed (e);
2553         }
2554     }
2555
2556   if (dump_file)
2557     fprintf (dump_file, "\nFlattening functions:\n");
2558
2559   /* First shrink order array, so that it only contains nodes with
2560      flatten attribute.  */
2561   for (i = nnodes - 1, j = i; i >= 0; i--)
2562     {
2563       node = order[i];
2564       if (node->definition
2565           && lookup_attribute ("flatten",
2566                                DECL_ATTRIBUTES (node->decl)) != NULL)
2567         order[j--] = order[i];
2568     }
2569
2570   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2571      nodes with flatten attribute.  If there is more than one such
2572      node, we need to register a node removal hook, as flatten_function
2573      could remove other nodes with flatten attribute.  See PR82801.  */
2574   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2575   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2576   if (j < nnodes - 2)
2577     {
2578       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2579       node_removal_hook_holder
2580         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2581                                            flatten_removed_nodes);
2582     }
2583
2584   /* In the first pass handle functions to be flattened.  Do this with
2585      a priority so none of our later choices will make this impossible.  */
2586   for (i = nnodes - 1; i > j; i--)
2587     {
2588       node = order[i];
2589       if (flatten_removed_nodes
2590           && flatten_removed_nodes->contains (node))
2591         continue;
2592
2593       /* Handle nodes to be flattened.
2594          Ideally when processing callees we stop inlining at the
2595          entry of cycles, possibly cloning that entry point and
2596          try to flatten itself turning it into a self-recursive
2597          function.  */
2598       if (dump_file)
2599         fprintf (dump_file, "Flattening %s\n", node->name ());
2600       flatten_function (node, false, true);
2601     }
2602
2603   if (j < nnodes - 2)
2604     {
2605       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2606       delete flatten_removed_nodes;
2607     }
2608   free (order);
2609
2610   if (dump_file)
2611     dump_overall_stats ();
2612
2613   inline_small_functions ();
2614
2615   gcc_assert (symtab->state == IPA_SSA);
2616   symtab->state = IPA_SSA_AFTER_INLINING;
2617   /* Do first after-inlining removal.  We want to remove all "stale" extern
2618      inline functions and virtual functions so we really know what is called
2619      once.  */
2620   symtab->remove_unreachable_nodes (dump_file);
2621
2622   /* Inline functions with a property that after inlining into all callers the
2623      code size will shrink because the out-of-line copy is eliminated.
2624      We do this regardless on the callee size as long as function growth limits
2625      are met.  */
2626   if (dump_file)
2627     fprintf (dump_file,
2628              "\nDeciding on functions to be inlined into all callers and "
2629              "removing useless speculations:\n");
2630
2631   /* Inlining one function called once has good chance of preventing
2632      inlining other function into the same callee.  Ideally we should
2633      work in priority order, but probably inlining hot functions first
2634      is good cut without the extra pain of maintaining the queue.
2635
2636      ??? this is not really fitting the bill perfectly: inlining function
2637      into callee often leads to better optimization of callee due to
2638      increased context for optimization.
2639      For example if main() function calls a function that outputs help
2640      and then function that does the main optmization, we should inline
2641      the second with priority even if both calls are cold by themselves.
2642
2643      We probably want to implement new predicate replacing our use of
2644      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2645      to be hot.  */
2646   for (cold = 0; cold <= 1; cold ++)
2647     {
2648       FOR_EACH_DEFINED_FUNCTION (node)
2649         {
2650           struct cgraph_edge *edge, *next;
2651           bool update=false;
2652
2653           if (!opt_for_fn (node->decl, optimize)
2654               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2655             continue;
2656
2657           for (edge = node->callees; edge; edge = next)
2658             {
2659               next = edge->next_callee;
2660               if (edge->speculative && !speculation_useful_p (edge, false))
2661                 {
2662                   if (edge->count.ipa ().initialized_p ())
2663                     spec_rem += edge->count.ipa ();
2664                   edge->resolve_speculation ();
2665                   update = true;
2666                   remove_functions = true;
2667                 }
2668             }
2669           if (update)
2670             {
2671               struct cgraph_node *where = node->inlined_to
2672                                           ? node->inlined_to : node;
2673               reset_edge_caches (where);
2674               ipa_update_overall_fn_summary (where);
2675             }
2676           if (want_inline_function_to_all_callers_p (node, cold))
2677             {
2678               int num_calls = 0;
2679               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2680                                                  true);
2681               while (node->call_for_symbol_and_aliases
2682                        (inline_to_all_callers, &num_calls, true))
2683                 ;
2684               remove_functions = true;
2685             }
2686         }
2687     }
2688
2689   /* Free ipa-prop structures if they are no longer needed.  */
2690   ipa_free_all_structures_after_iinln ();
2691
2692   if (dump_enabled_p ())
2693     dump_printf (MSG_NOTE,
2694                  "\nInlined %i calls, eliminated %i functions\n\n",
2695                  ncalls_inlined, nfunctions_inlined);
2696   if (dump_file)
2697     dump_inline_stats ();
2698
2699   if (dump_file)
2700     ipa_dump_fn_summaries (dump_file);
2701   return remove_functions ? TODO_remove_functions : 0;
2702 }
2703
2704 /* Inline always-inline function calls in NODE.  */
2705
2706 static bool
2707 inline_always_inline_functions (struct cgraph_node *node)
2708 {
2709   struct cgraph_edge *e;
2710   bool inlined = false;
2711
2712   for (e = node->callees; e; e = e->next_callee)
2713     {
2714       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2715       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2716         continue;
2717
2718       if (e->recursive_p ())
2719         {
2720           if (dump_enabled_p ())
2721             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2722                              "  Not inlining recursive call to %C.\n",
2723                              e->callee);
2724           e->inline_failed = CIF_RECURSIVE_INLINING;
2725           continue;
2726         }
2727
2728       if (!can_early_inline_edge_p (e))
2729         {
2730           /* Set inlined to true if the callee is marked "always_inline" but
2731              is not inlinable.  This will allow flagging an error later in
2732              expand_call_inline in tree-inline.c.  */
2733           if (lookup_attribute ("always_inline",
2734                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2735             inlined = true;
2736           continue;
2737         }
2738
2739       if (dump_enabled_p ())
2740         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2741                          "  Inlining %C into %C (always_inline).\n",
2742                          e->callee, e->caller);
2743       inline_call (e, true, NULL, NULL, false);
2744       inlined = true;
2745     }
2746   if (inlined)
2747     ipa_update_overall_fn_summary (node);
2748
2749   return inlined;
2750 }
2751
2752 /* Decide on the inlining.  We do so in the topological order to avoid
2753    expenses on updating data structures.  */
2754
2755 static bool
2756 early_inline_small_functions (struct cgraph_node *node)
2757 {
2758   struct cgraph_edge *e;
2759   bool inlined = false;
2760
2761   for (e = node->callees; e; e = e->next_callee)
2762     {
2763       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2764
2765       /* We can enounter not-yet-analyzed function during
2766          early inlining on callgraphs with strongly
2767          connected components.  */
2768       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2769       if (s == NULL || !s->inlinable || !e->inline_failed)
2770         continue;
2771
2772       /* Do not consider functions not declared inline.  */
2773       if (!DECL_DECLARED_INLINE_P (callee->decl)
2774           && !opt_for_fn (node->decl, flag_inline_small_functions)
2775           && !opt_for_fn (node->decl, flag_inline_functions))
2776         continue;
2777
2778       if (dump_enabled_p ())
2779         dump_printf_loc (MSG_NOTE, e->call_stmt,
2780                          "Considering inline candidate %C.\n",
2781                          callee);
2782
2783       if (!can_early_inline_edge_p (e))
2784         continue;
2785
2786       if (e->recursive_p ())
2787         {
2788           if (dump_enabled_p ())
2789             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2790                              "  Not inlining: recursive call.\n");
2791           continue;
2792         }
2793
2794       if (!want_early_inline_function_p (e))
2795         continue;
2796
2797       if (dump_enabled_p ())
2798         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2799                          " Inlining %C into %C.\n",
2800                          callee, e->caller);
2801       inline_call (e, true, NULL, NULL, false);
2802       inlined = true;
2803     }
2804
2805   if (inlined)
2806     ipa_update_overall_fn_summary (node);
2807
2808   return inlined;
2809 }
2810
2811 unsigned int
2812 early_inliner (function *fun)
2813 {
2814   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2815   struct cgraph_edge *edge;
2816   unsigned int todo = 0;
2817   int iterations = 0;
2818   bool inlined = false;
2819
2820   if (seen_error ())
2821     return 0;
2822
2823   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2824      happens when some pass decides to construct new function and
2825      cgraph_add_new_function calls lowering passes and early optimization on
2826      it.  This may confuse ourself when early inliner decide to inline call to
2827      function clone, because function clones don't have parameter list in
2828      ipa-prop matching their signature.  */
2829   if (ipa_node_params_sum)
2830     return 0;
2831
2832   if (flag_checking)
2833     node->verify ();
2834   node->remove_all_references ();
2835
2836   /* Even when not optimizing or not inlining inline always-inline
2837      functions.  */
2838   inlined = inline_always_inline_functions (node);
2839
2840   if (!optimize
2841       || flag_no_inline
2842       || !flag_early_inlining
2843       /* Never inline regular functions into always-inline functions
2844          during incremental inlining.  This sucks as functions calling
2845          always inline functions will get less optimized, but at the
2846          same time inlining of functions calling always inline
2847          function into an always inline function might introduce
2848          cycles of edges to be always inlined in the callgraph.
2849
2850          We might want to be smarter and just avoid this type of inlining.  */
2851       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2852           && lookup_attribute ("always_inline",
2853                                DECL_ATTRIBUTES (node->decl))))
2854     ;
2855   else if (lookup_attribute ("flatten",
2856                              DECL_ATTRIBUTES (node->decl)) != NULL)
2857     {
2858       /* When the function is marked to be flattened, recursively inline
2859          all calls in it.  */
2860       if (dump_enabled_p ())
2861         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2862                      "Flattening %C\n", node);
2863       flatten_function (node, true, true);
2864       inlined = true;
2865     }
2866   else
2867     {
2868       /* If some always_inline functions was inlined, apply the changes.
2869          This way we will not account always inline into growth limits and
2870          moreover we will inline calls from always inlines that we skipped
2871          previously because of conditional above.  */
2872       if (inlined)
2873         {
2874           timevar_push (TV_INTEGRATION);
2875           todo |= optimize_inline_calls (current_function_decl);
2876           /* optimize_inline_calls call above might have introduced new
2877              statements that don't have inline parameters computed.  */
2878           for (edge = node->callees; edge; edge = edge->next_callee)
2879             {
2880               /* We can enounter not-yet-analyzed function during
2881                  early inlining on callgraphs with strongly
2882                  connected components.  */
2883               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2884               es->call_stmt_size
2885                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2886               es->call_stmt_time
2887                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2888             }
2889           ipa_update_overall_fn_summary (node);
2890           inlined = false;
2891           timevar_pop (TV_INTEGRATION);
2892         }
2893       /* We iterate incremental inlining to get trivial cases of indirect
2894          inlining.  */
2895       while (iterations < param_early_inliner_max_iterations
2896              && early_inline_small_functions (node))
2897         {
2898           timevar_push (TV_INTEGRATION);
2899           todo |= optimize_inline_calls (current_function_decl);
2900
2901           /* Technically we ought to recompute inline parameters so the new
2902              iteration of early inliner works as expected.  We however have
2903              values approximately right and thus we only need to update edge
2904              info that might be cleared out for newly discovered edges.  */
2905           for (edge = node->callees; edge; edge = edge->next_callee)
2906             {
2907               /* We have no summary for new bound store calls yet.  */
2908               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2909               es->call_stmt_size
2910                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2911               es->call_stmt_time
2912                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2913             }
2914           if (iterations < param_early_inliner_max_iterations - 1)
2915             ipa_update_overall_fn_summary (node);
2916           timevar_pop (TV_INTEGRATION);
2917           iterations++;
2918           inlined = false;
2919         }
2920       if (dump_file)
2921         fprintf (dump_file, "Iterations: %i\n", iterations);
2922     }
2923
2924   if (inlined)
2925     {
2926       timevar_push (TV_INTEGRATION);
2927       todo |= optimize_inline_calls (current_function_decl);
2928       timevar_pop (TV_INTEGRATION);
2929     }
2930
2931   fun->always_inline_functions_inlined = true;
2932
2933   return todo;
2934 }
2935
2936 /* Do inlining of small functions.  Doing so early helps profiling and other
2937    passes to be somewhat more effective and avoids some code duplication in
2938    later real inlining pass for testcases with very many function calls.  */
2939
2940 namespace {
2941
2942 const pass_data pass_data_early_inline =
2943 {
2944   GIMPLE_PASS, /* type */
2945   "einline", /* name */
2946   OPTGROUP_INLINE, /* optinfo_flags */
2947   TV_EARLY_INLINING, /* tv_id */
2948   PROP_ssa, /* properties_required */
2949   0, /* properties_provided */
2950   0, /* properties_destroyed */
2951   0, /* todo_flags_start */
2952   0, /* todo_flags_finish */
2953 };
2954
2955 class pass_early_inline : public gimple_opt_pass
2956 {
2957 public:
2958   pass_early_inline (gcc::context *ctxt)
2959     : gimple_opt_pass (pass_data_early_inline, ctxt)
2960   {}
2961
2962   /* opt_pass methods: */
2963   virtual unsigned int execute (function *);
2964
2965 }; // class pass_early_inline
2966
2967 unsigned int
2968 pass_early_inline::execute (function *fun)
2969 {
2970   return early_inliner (fun);
2971 }
2972
2973 } // anon namespace
2974
2975 gimple_opt_pass *
2976 make_pass_early_inline (gcc::context *ctxt)
2977 {
2978   return new pass_early_inline (ctxt);
2979 }
2980
2981 namespace {
2982
2983 const pass_data pass_data_ipa_inline =
2984 {
2985   IPA_PASS, /* type */
2986   "inline", /* name */
2987   OPTGROUP_INLINE, /* optinfo_flags */
2988   TV_IPA_INLINING, /* tv_id */
2989   0, /* properties_required */
2990   0, /* properties_provided */
2991   0, /* properties_destroyed */
2992   0, /* todo_flags_start */
2993   ( TODO_dump_symtab ), /* todo_flags_finish */
2994 };
2995
2996 class pass_ipa_inline : public ipa_opt_pass_d
2997 {
2998 public:
2999   pass_ipa_inline (gcc::context *ctxt)
3000     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3001                       NULL, /* generate_summary */
3002                       NULL, /* write_summary */
3003                       NULL, /* read_summary */
3004                       NULL, /* write_optimization_summary */
3005                       NULL, /* read_optimization_summary */
3006                       NULL, /* stmt_fixup */
3007                       0, /* function_transform_todo_flags_start */
3008                       inline_transform, /* function_transform */
3009                       NULL) /* variable_transform */
3010   {}
3011
3012   /* opt_pass methods: */
3013   virtual unsigned int execute (function *) { return ipa_inline (); }
3014
3015 }; // class pass_ipa_inline
3016
3017 } // anon namespace
3018
3019 ipa_opt_pass_d *
3020 make_pass_ipa_inline (gcc::context *ctxt)
3021 {
3022   return new pass_ipa_inline (ctxt);
3023 }