gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "params.h"
 109 #include "profile.h"
 110 #include "symbol-summary.h"
 111 #include "tree-vrp.h"
 112 #include "ipa-prop.h"
 113 #include "ipa-fnsummary.h"
 114 #include "ipa-inline.h"
 115 #include "ipa-utils.h"
 116 #include "sreal.h"
 117 #include "auto-profile.h"
 118 #include "builtins.h"
 119 #include "fibonacci_heap.h"
 120 #include "stringpool.h"
 121 #include "attribs.h"
 122 #include "asan.h"
 123
 124 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 125 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 126
 127 /* Statistics we collect about inlining algorithm.  */
 128 static int overall_size;
 129 static profile_count max_count;
 130 static profile_count spec_rem;
 131
 132 /* Return false when inlining edge E would lead to violating
 133    limits on function unit growth or stack usage growth.
 134
 135    The relative function body growth limit is present generally
 136    to avoid problems with non-linear behavior of the compiler.
 137    To allow inlining huge functions into tiny wrapper, the limit
 138    is always based on the bigger of the two functions considered.
 139
 140    For stack growth limits we always base the growth in stack usage
 141    of the callers.  We want to prevent applications from segfaulting
 142    on stack overflow when functions with huge stack frames gets
 143    inlined. */
 144
 145 static bool
 146 caller_growth_limits (struct cgraph_edge *e)
 147 {
 148   struct cgraph_node *to = e->caller;
 149   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 150   int newsize;
 151   int limit = 0;
 152   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 153   ipa_fn_summary *info, *what_info;
 154   ipa_fn_summary *outer_info = ipa_fn_summaries->get (to);
 155
 156   /* Look for function e->caller is inlined to.  While doing
 157      so work out the largest function body on the way.  As
 158      described above, we want to base our function growth
 159      limits based on that.  Not on the self size of the
 160      outer function, not on the self size of inline code
 161      we immediately inline to.  This is the most relaxed
 162      interpretation of the rule "do not grow large functions
 163      too much in order to prevent compiler from exploding".  */
 164   while (true)
 165     {
 166       info = ipa_fn_summaries->get (to);
 167       if (limit < info->self_size)
 168         limit = info->self_size;
 169       if (stack_size_limit < info->estimated_self_stack_size)
 170         stack_size_limit = info->estimated_self_stack_size;
 171       if (to->global.inlined_to)
 172         to = to->callers->caller;
 173       else
 174         break;
 175     }
 176
 177   what_info = ipa_fn_summaries->get (what);
 178
 179   if (limit < what_info->self_size)
 180     limit = what_info->self_size;
 181
 182   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 183
 184   /* Check the size after inlining against the function limits.  But allow
 185      the function to shrink if it went over the limits by forced inlining.  */
 186   newsize = estimate_size_after_inlining (to, e);
 187   if (newsize >= info->size
 188       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 189       && newsize > limit)
 190     {
 191       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 192       return false;
 193     }
 194
 195   if (!what_info->estimated_stack_size)
 196     return true;
 197
 198   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 199      due to large i/o datastructures used by the Fortran front-end.
 200      We ought to ignore this limit when we know that the edge is executed
 201      on every invocation of the caller (i.e. its call statement dominates
 202      exit block).  We do not track this information, yet.  */
 203   stack_size_limit += ((gcov_type)stack_size_limit
 204                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 205
 206   inlined_stack = (outer_info->stack_frame_offset
 207                    + outer_info->estimated_self_stack_size
 208                    + what_info->estimated_stack_size);
 209   /* Check new stack consumption with stack consumption at the place
 210      stack is used.  */
 211   if (inlined_stack > stack_size_limit
 212       /* If function already has large stack usage from sibling
 213          inline call, we can inline, too.
 214          This bit overoptimistically assume that we are good at stack
 215          packing.  */
 216       && inlined_stack > info->estimated_stack_size
 217       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 218     {
 219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 220       return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Dump info about why inlining has failed.  */
 226
 227 static void
 228 report_inline_failed_reason (struct cgraph_edge *e)
 229 {
 230   if (dump_enabled_p ())
 231     {
 232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 233                        "  not inlinable: %C -> %C, %s\n",
 234                        e->caller, e->callee,
 235                        cgraph_inline_failed_string (e->inline_failed));
 236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 237            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 238           && e->caller->lto_file_data
 239           && e->callee->ultimate_alias_target ()->lto_file_data)
 240         {
 241           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 242                            "  LTO objects: %s, %s\n",
 243                            e->caller->lto_file_data->file_name,
 244                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 245         }
 246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 247         if (dump_file)
 248           cl_target_option_print_diff
 249             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 250              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 252         if (dump_file)
 253           cl_optimization_print_diff
 254             (dump_file, 2, opts_for_fn (e->caller->decl),
 255              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 256     }
 257 }
 258
 259  /* Decide whether sanitizer-related attributes allow inlining. */
 260
 261 static bool
 262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 263 {
 264   if (!caller || !callee)
 265     return true;
 266
 267   return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
 268            == sanitize_flags_p (SANITIZE_ADDRESS, callee))
 269           && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
 270               == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
 271           && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
 272               == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
 273 }
 274
 275 /* Used for flags where it is safe to inline when caller's value is
 276    grater than callee's.  */
 277 #define check_maybe_up(flag) \
 278       (opts_for_fn (caller->decl)->x_##flag             \
 279        != opts_for_fn (callee->decl)->x_##flag          \
 280        && (!always_inline                               \
 281            || opts_for_fn (caller->decl)->x_##flag      \
 282               < opts_for_fn (callee->decl)->x_##flag))
 283 /* Used for flags where it is safe to inline when caller's value is
 284    smaller than callee's.  */
 285 #define check_maybe_down(flag) \
 286       (opts_for_fn (caller->decl)->x_##flag             \
 287        != opts_for_fn (callee->decl)->x_##flag          \
 288        && (!always_inline                               \
 289            || opts_for_fn (caller->decl)->x_##flag      \
 290               > opts_for_fn (callee->decl)->x_##flag))
 291 /* Used for flags where exact match is needed for correctness.  */
 292 #define check_match(flag) \
 293       (opts_for_fn (caller->decl)->x_##flag             \
 294        != opts_for_fn (callee->decl)->x_##flag)
 295
 296 /* Decide if we can inline the edge and possibly update
 297    inline_failed reason.
 298    We check whether inlining is possible at all and whether
 299    caller growth limits allow doing so.
 300
 301    if REPORT is true, output reason to the dump file. */
 302
 303 static bool
 304 can_inline_edge_p (struct cgraph_edge *e, bool report,
 305                    bool early = false)
 306 {
 307   gcc_checking_assert (e->inline_failed);
 308
 309   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 310     {
 311       if (report)
 312         report_inline_failed_reason (e);
 313       return false;
 314     }
 315
 316   bool inlinable = true;
 317   enum availability avail;
 318   cgraph_node *caller = e->caller->global.inlined_to
 319                         ? e->caller->global.inlined_to : e->caller;
 320   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 321
 322   if (!callee->definition)
 323     {
 324       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 325       inlinable = false;
 326     }
 327   if (!early && (!opt_for_fn (callee->decl, optimize)
 328                  || !opt_for_fn (caller->decl, optimize)))
 329     {
 330       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 331       inlinable = false;
 332     }
 333   else if (callee->calls_comdat_local)
 334     {
 335       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 336       inlinable = false;
 337     }
 338   else if (avail <= AVAIL_INTERPOSABLE)
 339     {
 340       e->inline_failed = CIF_OVERWRITABLE;
 341       inlinable = false;
 342     }
 343   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 344      initialized to one of FINAL_ERROR reasons.  */
 345   else if (e->call_stmt_cannot_inline_p)
 346     gcc_unreachable ();
 347   /* Don't inline if the functions have different EH personalities.  */
 348   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 349            && DECL_FUNCTION_PERSONALITY (callee->decl)
 350            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 351                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 352     {
 353       e->inline_failed = CIF_EH_PERSONALITY;
 354       inlinable = false;
 355     }
 356   /* TM pure functions should not be inlined into non-TM_pure
 357      functions.  */
 358   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 359     {
 360       e->inline_failed = CIF_UNSPECIFIED;
 361       inlinable = false;
 362     }
 363   /* Check compatibility of target optimization options.  */
 364   else if (!targetm.target_option.can_inline_p (caller->decl,
 365                                                 callee->decl))
 366     {
 367       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 368       inlinable = false;
 369     }
 370   else if (ipa_fn_summaries->get (callee) == NULL
 371            || !ipa_fn_summaries->get (callee)->inlinable)
 372     {
 373       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 374       inlinable = false;
 375     }
 376   /* Don't inline a function with mismatched sanitization attributes. */
 377   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 378     {
 379       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 380       inlinable = false;
 381     }
 382   else if (callee->externally_visible
 383            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 384     {
 385       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 386       inlinable = false;
 387     }
 388   if (!inlinable && report)
 389     report_inline_failed_reason (e);
 390   return inlinable;
 391 }
 392
 393 /* Decide if we can inline the edge and possibly update
 394    inline_failed reason.
 395    We check whether inlining is possible at all and whether
 396    caller growth limits allow doing so.
 397
 398    if REPORT is true, output reason to the dump file.
 399
 400    if DISREGARD_LIMITS is true, ignore size limits.  */
 401
 402 static bool
 403 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 404                              bool disregard_limits = false, bool early = false)
 405 {
 406   gcc_checking_assert (e->inline_failed);
 407
 408   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 409     {
 410       if (report)
 411         report_inline_failed_reason (e);
 412       return false;
 413     }
 414
 415   bool inlinable = true;
 416   enum availability avail;
 417   cgraph_node *caller = e->caller->global.inlined_to
 418                         ? e->caller->global.inlined_to : e->caller;
 419   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 420   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 421   tree callee_tree
 422     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 423   /* Check if caller growth allows the inlining.  */
 424   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 425       && !disregard_limits
 426       && !lookup_attribute ("flatten",
 427                  DECL_ATTRIBUTES (caller->decl))
 428       && !caller_growth_limits (e))
 429     inlinable = false;
 430   /* Don't inline a function with a higher optimization level than the
 431      caller.  FIXME: this is really just tip of iceberg of handling
 432      optimization attribute.  */
 433   else if (caller_tree != callee_tree)
 434     {
 435       bool always_inline =
 436              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 437               && lookup_attribute ("always_inline",
 438                                    DECL_ATTRIBUTES (callee->decl)));
 439       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 440       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 441
 442      /* Until GCC 4.9 we did not check the semantics-altering flags
 443         below and inlined across optimization boundaries.
 444         Enabling checks below breaks several packages by refusing
 445         to inline library always_inline functions. See PR65873.
 446         Disable the check for early inlining for now until better solution
 447         is found.  */
 448      if (always_inline && early)
 449         ;
 450       /* There are some options that change IL semantics which means
 451          we cannot inline in these cases for correctness reason.
 452          Not even for always_inline declared functions.  */
 453      else if (check_match (flag_wrapv)
 454               || check_match (flag_trapv)
 455               || check_match (flag_pcc_struct_return)
 456               /* When caller or callee does FP math, be sure FP codegen flags
 457                  compatible.  */
 458               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 459                   && (check_maybe_up (flag_rounding_math)
 460                       || check_maybe_up (flag_trapping_math)
 461                       || check_maybe_down (flag_unsafe_math_optimizations)
 462                       || check_maybe_down (flag_finite_math_only)
 463                       || check_maybe_up (flag_signaling_nans)
 464                       || check_maybe_down (flag_cx_limited_range)
 465                       || check_maybe_up (flag_signed_zeros)
 466                       || check_maybe_down (flag_associative_math)
 467                       || check_maybe_down (flag_reciprocal_math)
 468                       || check_maybe_down (flag_fp_int_builtin_inexact)
 469                       /* Strictly speaking only when the callee contains function
 470                          calls that may end up setting errno.  */
 471                       || check_maybe_up (flag_errno_math)))
 472               /* We do not want to make code compiled with exceptions to be
 473                  brought into a non-EH function unless we know that the callee
 474                  does not throw.
 475                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 476               || (check_maybe_up (flag_non_call_exceptions)
 477                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 478               || (check_maybe_up (flag_exceptions)
 479                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 480               /* When devirtualization is diabled for callee, it is not safe
 481                  to inline it as we possibly mangled the type info.
 482                  Allow early inlining of always inlines.  */
 483               || (!early && check_maybe_down (flag_devirtualize)))
 484         {
 485           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 486           inlinable = false;
 487         }
 488       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 489       else if (always_inline)
 490         ;
 491       /* When user added an attribute to the callee honor it.  */
 492       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 493                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 494         {
 495           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 496           inlinable = false;
 497         }
 498       /* If explicit optimize attribute are not used, the mismatch is caused
 499          by different command line options used to build different units.
 500          Do not care about COMDAT functions - those are intended to be
 501          optimized with the optimization flags of module they are used in.
 502          Also do not care about mixing up size/speed optimization when
 503          DECL_DISREGARD_INLINE_LIMITS is set.  */
 504       else if ((callee->merged_comdat
 505                 && !lookup_attribute ("optimize",
 506                                       DECL_ATTRIBUTES (caller->decl)))
 507                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 508         ;
 509       /* If mismatch is caused by merging two LTO units with different
 510          optimizationflags we want to be bit nicer.  However never inline
 511          if one of functions is not optimized at all.  */
 512       else if (!opt_for_fn (callee->decl, optimize)
 513                || !opt_for_fn (caller->decl, optimize))
 514         {
 515           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 516           inlinable = false;
 517         }
 518       /* If callee is optimized for size and caller is not, allow inlining if
 519          code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
 520          is inline (and thus likely an unified comdat).  This will allow caller
 521          to run faster.  */
 522       else if (opt_for_fn (callee->decl, optimize_size)
 523                > opt_for_fn (caller->decl, optimize_size))
 524         {
 525           int growth = estimate_edge_growth (e);
 526           if (growth > PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE)
 527               && (!DECL_DECLARED_INLINE_P (callee->decl)
 528                   && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
 529                                     MAX_INLINE_INSNS_AUTO)))
 530             {
 531               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 532               inlinable = false;
 533             }
 534         }
 535       /* If callee is more aggressively optimized for performance than caller,
 536          we generally want to inline only cheap (runtime wise) functions.  */
 537       else if (opt_for_fn (callee->decl, optimize_size)
 538                < opt_for_fn (caller->decl, optimize_size)
 539                || (opt_for_fn (callee->decl, optimize)
 540                    > opt_for_fn (caller->decl, optimize)))
 541         {
 542           if (estimate_edge_time (e)
 543               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 544             {
 545               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 546               inlinable = false;
 547             }
 548         }
 549
 550     }
 551
 552   if (!inlinable && report)
 553     report_inline_failed_reason (e);
 554   return inlinable;
 555 }
 556
 557
 558 /* Return true if the edge E is inlinable during early inlining.  */
 559
 560 static bool
 561 can_early_inline_edge_p (struct cgraph_edge *e)
 562 {
 563   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 564   /* Early inliner might get called at WPA stage when IPA pass adds new
 565      function.  In this case we can not really do any of early inlining
 566      because function bodies are missing.  */
 567   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 568     return false;
 569   if (!gimple_has_body_p (callee->decl))
 570     {
 571       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 572       return false;
 573     }
 574   /* In early inliner some of callees may not be in SSA form yet
 575      (i.e. the callgraph is cyclic and we did not process
 576      the callee by early inliner, yet).  We don't have CIF code for this
 577      case; later we will re-do the decision in the real inliner.  */
 578   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 579       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 580     {
 581       if (dump_enabled_p ())
 582         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 583                          "  edge not inlinable: not in SSA form\n");
 584       return false;
 585     }
 586   if (!can_inline_edge_p (e, true, true)
 587       || !can_inline_edge_by_limits_p (e, true, false, true))
 588     return false;
 589   return true;
 590 }
 591
 592
 593 /* Return number of calls in N.  Ignore cheap builtins.  */
 594
 595 static int
 596 num_calls (struct cgraph_node *n)
 597 {
 598   struct cgraph_edge *e;
 599   int num = 0;
 600
 601   for (e = n->callees; e; e = e->next_callee)
 602     if (!is_inexpensive_builtin (e->callee->decl))
 603       num++;
 604   return num;
 605 }
 606
 607
 608 /* Return true if we are interested in inlining small function.  */
 609
 610 static bool
 611 want_early_inline_function_p (struct cgraph_edge *e)
 612 {
 613   bool want_inline = true;
 614   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 615
 616   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 617     ;
 618   /* For AutoFDO, we need to make sure that before profile summary, all
 619      hot paths' IR look exactly the same as profiled binary. As a result,
 620      in einliner, we will disregard size limit and inline those callsites
 621      that are:
 622        * inlined in the profiled binary, and
 623        * the cloned callee has enough samples to be considered "hot".  */
 624   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 625     ;
 626   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 627            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 628     {
 629       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 630       report_inline_failed_reason (e);
 631       want_inline = false;
 632     }
 633   else
 634     {
 635       int growth = estimate_edge_growth (e);
 636       int n;
 637
 638       if (growth <= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE))
 639         ;
 640       else if (!e->maybe_hot_p ())
 641         {
 642           if (dump_enabled_p ())
 643             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 644                              "  will not early inline: %C->%C, "
 645                              "call is cold and code would grow by %i\n",
 646                              e->caller, callee,
 647                              growth);
 648           want_inline = false;
 649         }
 650       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 651         {
 652           if (dump_enabled_p ())
 653             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 654                              "  will not early inline: %C->%C, "
 655                              "growth %i exceeds --param early-inlining-insns\n",
 656                              e->caller, callee,
 657                              growth);
 658           want_inline = false;
 659         }
 660       else if ((n = num_calls (callee)) != 0
 661                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 662         {
 663           if (dump_enabled_p ())
 664             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 665                              "  will not early inline: %C->%C, "
 666                              "growth %i exceeds --param early-inlining-insns "
 667                              "divided by number of calls\n",
 668                              e->caller, callee,
 669                              growth);
 670           want_inline = false;
 671         }
 672     }
 673   return want_inline;
 674 }
 675
 676 /* Compute time of the edge->caller + edge->callee execution when inlining
 677    does not happen.  */
 678
 679 inline sreal
 680 compute_uninlined_call_time (struct cgraph_edge *edge,
 681                              sreal uninlined_call_time)
 682 {
 683   cgraph_node *caller = (edge->caller->global.inlined_to
 684                          ? edge->caller->global.inlined_to
 685                          : edge->caller);
 686
 687   sreal freq = edge->sreal_frequency ();
 688   if (freq > 0)
 689     uninlined_call_time *= freq;
 690   else
 691     uninlined_call_time = uninlined_call_time >> 11;
 692
 693   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 694   return uninlined_call_time + caller_time;
 695 }
 696
 697 /* Same as compute_uinlined_call_time but compute time when inlining
 698    does happen.  */
 699
 700 inline sreal
 701 compute_inlined_call_time (struct cgraph_edge *edge,
 702                            sreal time)
 703 {
 704   cgraph_node *caller = (edge->caller->global.inlined_to
 705                          ? edge->caller->global.inlined_to
 706                          : edge->caller);
 707   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 708
 709   sreal freq = edge->sreal_frequency ();
 710   if (freq > 0)
 711     time *= freq;
 712   else
 713     time = time >> 11;
 714
 715   /* This calculation should match one in ipa-inline-analysis.c
 716      (estimate_edge_size_and_time).  */
 717   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 718   time += caller_time;
 719   if (time <= 0)
 720     time = ((sreal) 1) >> 8;
 721   gcc_checking_assert (time >= 0);
 722   return time;
 723 }
 724
 725 /* Return true if the speedup for inlining E is bigger than
 726    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 727
 728 static bool
 729 big_speedup_p (struct cgraph_edge *e)
 730 {
 731   sreal unspec_time;
 732   sreal spec_time = estimate_edge_time (e, &unspec_time);
 733   sreal time = compute_uninlined_call_time (e, unspec_time);
 734   sreal inlined_time = compute_inlined_call_time (e, spec_time);
 735
 736   if ((time - inlined_time) * 100
 737       > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)))
 738     return true;
 739   return false;
 740 }
 741
 742 /* Return true if we are interested in inlining small function.
 743    When REPORT is true, report reason to dump file.  */
 744
 745 static bool
 746 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 747 {
 748   bool want_inline = true;
 749   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 750
 751   /* Allow this function to be called before can_inline_edge_p,
 752      since it's usually cheaper.  */
 753   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 754     want_inline = false;
 755   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 756     ;
 757   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 758            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 759     {
 760       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 761       want_inline = false;
 762     }
 763   /* Do fast and conservative check if the function can be good
 764      inline candidate.  At the moment we allow inline hints to
 765      promote non-inline functions to inline and we increase
 766      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 767   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 768            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 769            && ipa_fn_summaries->get (callee)->min_size
 770                 - ipa_call_summaries->get (e)->call_stmt_size
 771               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 772     {
 773       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 774       want_inline = false;
 775     }
 776   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 777             || e->count.ipa ().nonzero_p ())
 778            && ipa_fn_summaries->get (callee)->min_size
 779                 - ipa_call_summaries->get (e)->call_stmt_size
 780               > 16 * MAX_INLINE_INSNS_SINGLE)
 781     {
 782       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 783                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 784                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 785       want_inline = false;
 786     }
 787   else
 788     {
 789       int growth = estimate_edge_growth (e);
 790       ipa_hints hints = estimate_edge_hints (e);
 791       int big_speedup = -1; /* compute this lazily */
 792
 793       if (growth <= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE))
 794         ;
 795       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 796          hints suggests that inlining given function is very profitable.  */
 797       else if (DECL_DECLARED_INLINE_P (callee->decl)
 798                && growth >= MAX_INLINE_INSNS_SINGLE
 799                && (growth >= MAX_INLINE_INSNS_SINGLE * 16
 800                    || (!(hints & (INLINE_HINT_indirect_call
 801                                   | INLINE_HINT_known_hot
 802                                   | INLINE_HINT_loop_iterations
 803                                   | INLINE_HINT_array_index
 804                                   | INLINE_HINT_loop_stride))
 805                        && !(big_speedup = big_speedup_p (e)))))
 806         {
 807           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 808           want_inline = false;
 809         }
 810       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 811                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 812                && growth >= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SMALL))
 813         {
 814           /* growth_likely_positive is expensive, always test it last.  */
 815           if (growth >= MAX_INLINE_INSNS_SINGLE
 816               || growth_likely_positive (callee, growth))
 817             {
 818               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 819               want_inline = false;
 820             }
 821         }
 822       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 823          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 824          inlining given function is very profitable.  */
 825       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 826                && !(hints & INLINE_HINT_known_hot)
 827                && growth >= ((hints & (INLINE_HINT_indirect_call
 828                                        | INLINE_HINT_loop_iterations
 829                                        | INLINE_HINT_array_index
 830                                        | INLINE_HINT_loop_stride))
 831                              ? MAX (MAX_INLINE_INSNS_AUTO,
 832                                     MAX_INLINE_INSNS_SINGLE)
 833                              : MAX_INLINE_INSNS_AUTO)
 834                && !(big_speedup == -1 ? big_speedup_p (e) : big_speedup))
 835         {
 836           /* growth_likely_positive is expensive, always test it last.  */
 837           if (growth >= MAX_INLINE_INSNS_SINGLE
 838               || growth_likely_positive (callee, growth))
 839             {
 840               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 841               want_inline = false;
 842             }
 843         }
 844       /* If call is cold, do not inline when function body would grow. */
 845       else if (!e->maybe_hot_p ()
 846                && (growth >= MAX_INLINE_INSNS_SINGLE
 847                    || growth_likely_positive (callee, growth)))
 848         {
 849           e->inline_failed = CIF_UNLIKELY_CALL;
 850           want_inline = false;
 851         }
 852     }
 853   if (!want_inline && report)
 854     report_inline_failed_reason (e);
 855   return want_inline;
 856 }
 857
 858 /* EDGE is self recursive edge.
 859    We hand two cases - when function A is inlining into itself
 860    or when function A is being inlined into another inliner copy of function
 861    A within function B.
 862
 863    In first case OUTER_NODE points to the toplevel copy of A, while
 864    in the second case OUTER_NODE points to the outermost copy of A in B.
 865
 866    In both cases we want to be extra selective since
 867    inlining the call will just introduce new recursive calls to appear.  */
 868
 869 static bool
 870 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 871                                    struct cgraph_node *outer_node,
 872                                    bool peeling,
 873                                    int depth)
 874 {
 875   char const *reason = NULL;
 876   bool want_inline = true;
 877   sreal caller_freq = 1;
 878   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 879
 880   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 881     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 882
 883   if (!edge->maybe_hot_p ())
 884     {
 885       reason = "recursive call is cold";
 886       want_inline = false;
 887     }
 888   else if (depth > max_depth)
 889     {
 890       reason = "--param max-inline-recursive-depth exceeded.";
 891       want_inline = false;
 892     }
 893   else if (outer_node->global.inlined_to
 894            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
 895     {
 896       reason = "caller frequency is 0";
 897       want_inline = false;
 898     }
 899
 900   if (!want_inline)
 901     ;
 902   /* Inlining of self recursive function into copy of itself within other
 903      function is transformation similar to loop peeling.
 904
 905      Peeling is profitable if we can inline enough copies to make probability
 906      of actual call to the self recursive function very small.  Be sure that
 907      the probability of recursion is small.
 908
 909      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 910      This way the expected number of recursion is at most max_depth.  */
 911   else if (peeling)
 912     {
 913       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
 914       int i;
 915       for (i = 1; i < depth; i++)
 916         max_prob = max_prob * max_prob;
 917       if (edge->sreal_frequency () >= max_prob * caller_freq)
 918         {
 919           reason = "frequency of recursive call is too large";
 920           want_inline = false;
 921         }
 922     }
 923   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
 924      recursion depth is large.  We reduce function call overhead and increase
 925      chances that things fit in hardware return predictor.
 926
 927      Recursive inlining might however increase cost of stack frame setup
 928      actually slowing down functions whose recursion tree is wide rather than
 929      deep.
 930
 931      Deciding reliably on when to do recursive inlining without profile feedback
 932      is tricky.  For now we disable recursive inlining when probability of self
 933      recursion is low.
 934
 935      Recursive inlining of self recursive call within loop also results in
 936      large loop depths that generally optimize badly.  We may want to throttle
 937      down inlining in those cases.  In particular this seems to happen in one
 938      of libstdc++ rb tree methods.  */
 939   else
 940     {
 941       if (edge->sreal_frequency () * 100
 942           <= caller_freq
 943              * PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))
 944         {
 945           reason = "frequency of recursive call is too small";
 946           want_inline = false;
 947         }
 948     }
 949   if (!want_inline && dump_enabled_p ())
 950     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
 951                      "   not inlining recursively: %s\n", reason);
 952   return want_inline;
 953 }
 954
 955 /* Return true when NODE has uninlinable caller;
 956    set HAS_HOT_CALL if it has hot call.
 957    Worker for cgraph_for_node_and_aliases.  */
 958
 959 static bool
 960 check_callers (struct cgraph_node *node, void *has_hot_call)
 961 {
 962   struct cgraph_edge *e;
 963    for (e = node->callers; e; e = e->next_caller)
 964      {
 965        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
 966            || !opt_for_fn (e->caller->decl, optimize))
 967          return true;
 968        if (!can_inline_edge_p (e, true))
 969          return true;
 970        if (e->recursive_p ())
 971          return true;
 972        if (!can_inline_edge_by_limits_p (e, true))
 973          return true;
 974        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
 975          *(bool *)has_hot_call = true;
 976      }
 977   return false;
 978 }
 979
 980 /* If NODE has a caller, return true.  */
 981
 982 static bool
 983 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 984 {
 985   if (node->callers)
 986     return true;
 987   return false;
 988 }
 989
 990 /* Decide if inlining NODE would reduce unit size by eliminating
 991    the offline copy of function.
 992    When COLD is true the cold calls are considered, too.  */
 993
 994 static bool
 995 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 996 {
 997   bool has_hot_call = false;
 998
 999   /* Aliases gets inlined along with the function they alias.  */
1000   if (node->alias)
1001     return false;
1002   /* Already inlined?  */
1003   if (node->global.inlined_to)
1004     return false;
1005   /* Does it have callers?  */
1006   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1007     return false;
1008   /* Inlining into all callers would increase size?  */
1009   if (estimate_growth (node) > 0)
1010     return false;
1011   /* All inlines must be possible.  */
1012   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1013                                          true))
1014     return false;
1015   if (!cold && !has_hot_call)
1016     return false;
1017   return true;
1018 }
1019
1020 /* A cost model driving the inlining heuristics in a way so the edges with
1021    smallest badness are inlined first.  After each inlining is performed
1022    the costs of all caller edges of nodes affected are recomputed so the
1023    metrics may accurately depend on values such as number of inlinable callers
1024    of the function or function body size.  */
1025
1026 static sreal
1027 edge_badness (struct cgraph_edge *edge, bool dump)
1028 {
1029   sreal badness;
1030   int growth;
1031   sreal edge_time, unspec_edge_time;
1032   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1033   struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1034   ipa_hints hints;
1035   cgraph_node *caller = (edge->caller->global.inlined_to
1036                          ? edge->caller->global.inlined_to
1037                          : edge->caller);
1038
1039   growth = estimate_edge_growth (edge);
1040   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1041   hints = estimate_edge_hints (edge);
1042   gcc_checking_assert (edge_time >= 0);
1043   /* Check that inlined time is better, but tolerate some roundoff issues.
1044      FIXME: When callee profile drops to 0 we account calls more.  This
1045      should be fixed by never doing that.  */
1046   gcc_checking_assert ((edge_time * 100
1047                         - callee_info->time * 101).to_int () <= 0
1048                         || callee->count.ipa ().initialized_p ());
1049   gcc_checking_assert (growth <= callee_info->size);
1050
1051   if (dump)
1052     {
1053       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1054                edge->caller->dump_name (),
1055                edge->callee->dump_name ());
1056       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1057                growth,
1058                edge_time.to_double (),
1059                unspec_edge_time.to_double ());
1060       ipa_dump_hints (dump_file, hints);
1061       if (big_speedup_p (edge))
1062         fprintf (dump_file, " big_speedup");
1063       fprintf (dump_file, "\n");
1064     }
1065
1066   /* Always prefer inlining saving code size.  */
1067   if (growth <= 0)
1068     {
1069       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1070       if (dump)
1071         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1072                  growth);
1073     }
1074    /* Inlining into EXTERNAL functions is not going to change anything unless
1075       they are themselves inlined.  */
1076    else if (DECL_EXTERNAL (caller->decl))
1077     {
1078       if (dump)
1079         fprintf (dump_file, "      max: function is external\n");
1080       return sreal::max ();
1081     }
1082   /* When profile is available. Compute badness as:
1083
1084                  time_saved * caller_count
1085      goodness =  -------------------------------------------------
1086                  growth_of_caller * overall_growth * combined_size
1087
1088      badness = - goodness
1089
1090      Again use negative value to make calls with profile appear hotter
1091      then calls without.
1092   */
1093   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1094            || caller->count.ipa ().nonzero_p ())
1095     {
1096       sreal numerator, denominator;
1097       int overall_growth;
1098       sreal inlined_time = compute_inlined_call_time (edge, edge_time);
1099
1100       numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
1101                    - inlined_time);
1102       if (numerator <= 0)
1103         numerator = ((sreal) 1 >> 8);
1104       if (caller->count.ipa ().nonzero_p ())
1105         numerator *= caller->count.ipa ().to_gcov_type ();
1106       else if (caller->count.ipa ().initialized_p ())
1107         numerator = numerator >> 11;
1108       denominator = growth;
1109
1110       overall_growth = callee_info->growth;
1111
1112       /* Look for inliner wrappers of the form:
1113
1114          inline_caller ()
1115            {
1116              do_fast_job...
1117              if (need_more_work)
1118                noninline_callee ();
1119            }
1120          Withhout panilizing this case, we usually inline noninline_callee
1121          into the inline_caller because overall_growth is small preventing
1122          further inlining of inline_caller.
1123
1124          Penalize only callgraph edges to functions with small overall
1125          growth ...
1126         */
1127       if (growth > overall_growth
1128           /* ... and having only one caller which is not inlined ... */
1129           && callee_info->single_caller
1130           && !edge->caller->global.inlined_to
1131           /* ... and edges executed only conditionally ... */
1132           && edge->sreal_frequency () < 1
1133           /* ... consider case where callee is not inline but caller is ... */
1134           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1135                && DECL_DECLARED_INLINE_P (caller->decl))
1136               /* ... or when early optimizers decided to split and edge
1137                  frequency still indicates splitting is a win ... */
1138               || (callee->split_part && !caller->split_part
1139                   && edge->sreal_frequency () * 100
1140                      < PARAM_VALUE
1141                           (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY)
1142                   /* ... and do not overwrite user specified hints.   */
1143                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1144                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1145         {
1146           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1147           int caller_growth = caller_info->growth;
1148
1149           /* Only apply the penalty when caller looks like inline candidate,
1150              and it is not called once and.  */
1151           if (!caller_info->single_caller && overall_growth < caller_growth
1152               && caller_info->inlinable
1153               && caller_info->size
1154                  < (DECL_DECLARED_INLINE_P (caller->decl)
1155                     ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
1156             {
1157               if (dump)
1158                 fprintf (dump_file,
1159                          "     Wrapper penalty. Increasing growth %i to %i\n",
1160                          overall_growth, caller_growth);
1161               overall_growth = caller_growth;
1162             }
1163         }
1164       if (overall_growth > 0)
1165         {
1166           /* Strongly preffer functions with few callers that can be inlined
1167              fully.  The square root here leads to smaller binaries at average.
1168              Watch however for extreme cases and return to linear function
1169              when growth is large.  */
1170           if (overall_growth < 256)
1171             overall_growth *= overall_growth;
1172           else
1173             overall_growth += 256 * 256 - 256;
1174           denominator *= overall_growth;
1175         }
1176       denominator *= ipa_fn_summaries->get (caller)->self_size + growth;
1177
1178       badness = - numerator / denominator;
1179
1180       if (dump)
1181         {
1182           fprintf (dump_file,
1183                    "      %f: guessed profile. frequency %f, count %" PRId64
1184                    " caller count %" PRId64
1185                    " time w/o inlining %f, time with inlining %f"
1186                    " overall growth %i (current) %i (original)"
1187                    " %i (compensated)\n",
1188                    badness.to_double (),
1189                    edge->sreal_frequency ().to_double (),
1190                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1191                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1192                    compute_uninlined_call_time (edge,
1193                                                 unspec_edge_time).to_double (),
1194                    inlined_time.to_double (),
1195                    estimate_growth (callee),
1196                    callee_info->growth, overall_growth);
1197         }
1198     }
1199   /* When function local profile is not available or it does not give
1200      useful information (ie frequency is zero), base the cost on
1201      loop nest and overall size growth, so we optimize for overall number
1202      of functions fully inlined in program.  */
1203   else
1204     {
1205       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1206       badness = growth;
1207
1208       /* Decrease badness if call is nested.  */
1209       if (badness > 0)
1210         badness = badness >> nest;
1211       else
1212         badness = badness << nest;
1213       if (dump)
1214         fprintf (dump_file, "      %f: no profile. nest %i\n",
1215                  badness.to_double (), nest);
1216     }
1217   gcc_checking_assert (badness != 0);
1218
1219   if (edge->recursive_p ())
1220     badness = badness.shift (badness > 0 ? 4 : -4);
1221   if ((hints & (INLINE_HINT_indirect_call
1222                 | INLINE_HINT_loop_iterations
1223                 | INLINE_HINT_array_index
1224                 | INLINE_HINT_loop_stride))
1225       || callee_info->growth <= 0)
1226     badness = badness.shift (badness > 0 ? -2 : 2);
1227   if (hints & (INLINE_HINT_same_scc))
1228     badness = badness.shift (badness > 0 ? 3 : -3);
1229   else if (hints & (INLINE_HINT_in_scc))
1230     badness = badness.shift (badness > 0 ? 2 : -2);
1231   else if (hints & (INLINE_HINT_cross_module))
1232     badness = badness.shift (badness > 0 ? 1 : -1);
1233   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1234     badness = badness.shift (badness > 0 ? -4 : 4);
1235   else if ((hints & INLINE_HINT_declared_inline))
1236     badness = badness.shift (badness > 0 ? -3 : 3);
1237   if (dump)
1238     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1239   return badness;
1240 }
1241
1242 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1243 static inline void
1244 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1245 {
1246   sreal badness = edge_badness (edge, false);
1247   if (edge->aux)
1248     {
1249       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1250       gcc_checking_assert (n->get_data () == edge);
1251
1252       /* fibonacci_heap::replace_key does busy updating of the
1253          heap that is unnecesarily expensive.
1254          We do lazy increases: after extracting minimum if the key
1255          turns out to be out of date, it is re-inserted into heap
1256          with correct value.  */
1257       if (badness < n->get_key ())
1258         {
1259           if (dump_file && (dump_flags & TDF_DETAILS))
1260             {
1261               fprintf (dump_file,
1262                        "  decreasing badness %s -> %s, %f to %f\n",
1263                        edge->caller->dump_name (),
1264                        edge->callee->dump_name (),
1265                        n->get_key ().to_double (),
1266                        badness.to_double ());
1267             }
1268           heap->decrease_key (n, badness);
1269         }
1270     }
1271   else
1272     {
1273        if (dump_file && (dump_flags & TDF_DETAILS))
1274          {
1275            fprintf (dump_file,
1276                     "  enqueuing call %s -> %s, badness %f\n",
1277                     edge->caller->dump_name (),
1278                     edge->callee->dump_name (),
1279                     badness.to_double ());
1280          }
1281       edge->aux = heap->insert (badness, edge);
1282     }
1283 }
1284
1285
1286 /* NODE was inlined.
1287    All caller edges needs to be resetted because
1288    size estimates change. Similarly callees needs reset
1289    because better context may be known.  */
1290
1291 static void
1292 reset_edge_caches (struct cgraph_node *node)
1293 {
1294   struct cgraph_edge *edge;
1295   struct cgraph_edge *e = node->callees;
1296   struct cgraph_node *where = node;
1297   struct ipa_ref *ref;
1298
1299   if (where->global.inlined_to)
1300     where = where->global.inlined_to;
1301
1302   if (edge_growth_cache != NULL)
1303     for (edge = where->callers; edge; edge = edge->next_caller)
1304       if (edge->inline_failed)
1305         edge_growth_cache->remove (edge);
1306
1307   FOR_EACH_ALIAS (where, ref)
1308     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1309
1310   if (!e)
1311     return;
1312
1313   while (true)
1314     if (!e->inline_failed && e->callee->callees)
1315       e = e->callee->callees;
1316     else
1317       {
1318         if (edge_growth_cache != NULL && e->inline_failed)
1319           edge_growth_cache->remove (e);
1320         if (e->next_callee)
1321           e = e->next_callee;
1322         else
1323           {
1324             do
1325               {
1326                 if (e->caller == node)
1327                   return;
1328                 e = e->caller->callers;
1329               }
1330             while (!e->next_callee);
1331             e = e->next_callee;
1332           }
1333       }
1334 }
1335
1336 /* Recompute HEAP nodes for each of caller of NODE.
1337    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1338    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1339    it is inlinable. Otherwise check all edges.  */
1340
1341 static void
1342 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1343                     bitmap updated_nodes,
1344                     struct cgraph_edge *check_inlinablity_for)
1345 {
1346   struct cgraph_edge *edge;
1347   struct ipa_ref *ref;
1348
1349   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1350       || node->global.inlined_to)
1351     return;
1352   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1353     return;
1354
1355   FOR_EACH_ALIAS (node, ref)
1356     {
1357       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1358       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1359     }
1360
1361   for (edge = node->callers; edge; edge = edge->next_caller)
1362     if (edge->inline_failed)
1363       {
1364         if (!check_inlinablity_for
1365             || check_inlinablity_for == edge)
1366           {
1367             if (can_inline_edge_p (edge, false)
1368                 && want_inline_small_function_p (edge, false)
1369                 && can_inline_edge_by_limits_p (edge, false))
1370               update_edge_key (heap, edge);
1371             else if (edge->aux)
1372               {
1373                 report_inline_failed_reason (edge);
1374                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1375                 edge->aux = NULL;
1376               }
1377           }
1378         else if (edge->aux)
1379           update_edge_key (heap, edge);
1380       }
1381 }
1382
1383 /* Recompute HEAP nodes for each uninlined call in NODE.
1384    This is used when we know that edge badnesses are going only to increase
1385    (we introduced new call site) and thus all we need is to insert newly
1386    created edges into heap.  */
1387
1388 static void
1389 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1390                     bitmap updated_nodes)
1391 {
1392   struct cgraph_edge *e = node->callees;
1393
1394   if (!e)
1395     return;
1396   while (true)
1397     if (!e->inline_failed && e->callee->callees)
1398       e = e->callee->callees;
1399     else
1400       {
1401         enum availability avail;
1402         struct cgraph_node *callee;
1403         /* We do not reset callee growth cache here.  Since we added a new call,
1404            growth chould have just increased and consequentely badness metric
1405            don't need updating.  */
1406         if (e->inline_failed
1407             && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1408             && ipa_fn_summaries->get (callee) != NULL
1409             && ipa_fn_summaries->get (callee)->inlinable
1410             && avail >= AVAIL_AVAILABLE
1411             && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1412           {
1413             if (can_inline_edge_p (e, false)
1414                 && want_inline_small_function_p (e, false)
1415                 && can_inline_edge_by_limits_p (e, false))
1416               update_edge_key (heap, e);
1417             else if (e->aux)
1418               {
1419                 report_inline_failed_reason (e);
1420                 heap->delete_node ((edge_heap_node_t *) e->aux);
1421                 e->aux = NULL;
1422               }
1423           }
1424         if (e->next_callee)
1425           e = e->next_callee;
1426         else
1427           {
1428             do
1429               {
1430                 if (e->caller == node)
1431                   return;
1432                 e = e->caller->callers;
1433               }
1434             while (!e->next_callee);
1435             e = e->next_callee;
1436           }
1437       }
1438 }
1439
1440 /* Enqueue all recursive calls from NODE into priority queue depending on
1441    how likely we want to recursively inline the call.  */
1442
1443 static void
1444 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1445                         edge_heap_t *heap)
1446 {
1447   struct cgraph_edge *e;
1448   enum availability avail;
1449
1450   for (e = where->callees; e; e = e->next_callee)
1451     if (e->callee == node
1452         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1453             && avail > AVAIL_INTERPOSABLE))
1454       heap->insert (-e->sreal_frequency (), e);
1455   for (e = where->callees; e; e = e->next_callee)
1456     if (!e->inline_failed)
1457       lookup_recursive_calls (node, e->callee, heap);
1458 }
1459
1460 /* Decide on recursive inlining: in the case function has recursive calls,
1461    inline until body size reaches given argument.  If any new indirect edges
1462    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1463    is NULL.  */
1464
1465 static bool
1466 recursive_inlining (struct cgraph_edge *edge,
1467                     vec<cgraph_edge *> *new_edges)
1468 {
1469   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1470   edge_heap_t heap (sreal::min ());
1471   struct cgraph_node *node;
1472   struct cgraph_edge *e;
1473   struct cgraph_node *master_clone = NULL, *next;
1474   int depth = 0;
1475   int n = 0;
1476
1477   node = edge->caller;
1478   if (node->global.inlined_to)
1479     node = node->global.inlined_to;
1480
1481   if (DECL_DECLARED_INLINE_P (node->decl))
1482     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1483
1484   /* Make sure that function is small enough to be considered for inlining.  */
1485   if (estimate_size_after_inlining (node, edge)  >= limit)
1486     return false;
1487   lookup_recursive_calls (node, node, &heap);
1488   if (heap.empty ())
1489     return false;
1490
1491   if (dump_file)
1492     fprintf (dump_file,
1493              "  Performing recursive inlining on %s\n",
1494              node->name ());
1495
1496   /* Do the inlining and update list of recursive call during process.  */
1497   while (!heap.empty ())
1498     {
1499       struct cgraph_edge *curr = heap.extract_min ();
1500       struct cgraph_node *cnode, *dest = curr->callee;
1501
1502       if (!can_inline_edge_p (curr, true)
1503           || can_inline_edge_by_limits_p (curr, true))
1504         continue;
1505
1506       /* MASTER_CLONE is produced in the case we already started modified
1507          the function. Be sure to redirect edge to the original body before
1508          estimating growths otherwise we will be seeing growths after inlining
1509          the already modified body.  */
1510       if (master_clone)
1511         {
1512           curr->redirect_callee (master_clone);
1513           if (edge_growth_cache != NULL)
1514             edge_growth_cache->remove (curr);
1515         }
1516
1517       if (estimate_size_after_inlining (node, curr) > limit)
1518         {
1519           curr->redirect_callee (dest);
1520           if (edge_growth_cache != NULL)
1521             edge_growth_cache->remove (curr);
1522           break;
1523         }
1524
1525       depth = 1;
1526       for (cnode = curr->caller;
1527            cnode->global.inlined_to; cnode = cnode->callers->caller)
1528         if (node->decl
1529             == curr->callee->ultimate_alias_target ()->decl)
1530           depth++;
1531
1532       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1533         {
1534           curr->redirect_callee (dest);
1535           if (edge_growth_cache != NULL)
1536             edge_growth_cache->remove (curr);
1537           continue;
1538         }
1539
1540       if (dump_file)
1541         {
1542           fprintf (dump_file,
1543                    "   Inlining call of depth %i", depth);
1544           if (node->count.nonzero_p ())
1545             {
1546               fprintf (dump_file, " called approx. %.2f times per call",
1547                        (double)curr->count.to_gcov_type ()
1548                        / node->count.to_gcov_type ());
1549             }
1550           fprintf (dump_file, "\n");
1551         }
1552       if (!master_clone)
1553         {
1554           /* We need original clone to copy around.  */
1555           master_clone = node->create_clone (node->decl, node->count,
1556             false, vNULL, true, NULL, NULL);
1557           for (e = master_clone->callees; e; e = e->next_callee)
1558             if (!e->inline_failed)
1559               clone_inlined_nodes (e, true, false, NULL);
1560           curr->redirect_callee (master_clone);
1561           if (edge_growth_cache != NULL)
1562             edge_growth_cache->remove (curr);
1563         }
1564
1565       inline_call (curr, false, new_edges, &overall_size, true);
1566       lookup_recursive_calls (node, curr->callee, &heap);
1567       n++;
1568     }
1569
1570   if (!heap.empty () && dump_file)
1571     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1572
1573   if (!master_clone)
1574     return false;
1575
1576   if (dump_enabled_p ())
1577     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1578                      "\n   Inlined %i times, "
1579                      "body grown from size %i to %i, time %f to %f\n", n,
1580                      ipa_fn_summaries->get (master_clone)->size,
1581                      ipa_fn_summaries->get (node)->size,
1582                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1583                      ipa_fn_summaries->get (node)->time.to_double ());
1584
1585   /* Remove master clone we used for inlining.  We rely that clones inlined
1586      into master clone gets queued just before master clone so we don't
1587      need recursion.  */
1588   for (node = symtab->first_function (); node != master_clone;
1589        node = next)
1590     {
1591       next = symtab->next_function (node);
1592       if (node->global.inlined_to == master_clone)
1593         node->remove ();
1594     }
1595   master_clone->remove ();
1596   return true;
1597 }
1598
1599
1600 /* Given whole compilation unit estimate of INSNS, compute how large we can
1601    allow the unit to grow.  */
1602
1603 static int
1604 compute_max_insns (int insns)
1605 {
1606   int max_insns = insns;
1607   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1608     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1609
1610   return ((int64_t) max_insns
1611           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1612 }
1613
1614
1615 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1616
1617 static void
1618 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1619 {
1620   while (new_edges.length () > 0)
1621     {
1622       struct cgraph_edge *edge = new_edges.pop ();
1623
1624       gcc_assert (!edge->aux);
1625       if (edge->inline_failed
1626           && can_inline_edge_p (edge, true)
1627           && want_inline_small_function_p (edge, true)
1628           && can_inline_edge_by_limits_p (edge, true))
1629         edge->aux = heap->insert (edge_badness (edge, false), edge);
1630     }
1631 }
1632
1633 /* Remove EDGE from the fibheap.  */
1634
1635 static void
1636 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1637 {
1638   if (e->aux)
1639     {
1640       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1641       e->aux = NULL;
1642     }
1643 }
1644
1645 /* Return true if speculation of edge E seems useful.
1646    If ANTICIPATE_INLINING is true, be conservative and hope that E
1647    may get inlined.  */
1648
1649 bool
1650 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1651 {
1652   enum availability avail;
1653   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1654                                                                  e->caller);
1655   struct cgraph_edge *direct, *indirect;
1656   struct ipa_ref *ref;
1657
1658   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1659
1660   if (!e->maybe_hot_p ())
1661     return false;
1662
1663   /* See if IP optimizations found something potentially useful about the
1664      function.  For now we look only for CONST/PURE flags.  Almost everything
1665      else we propagate is useless.  */
1666   if (avail >= AVAIL_AVAILABLE)
1667     {
1668       int ecf_flags = flags_from_decl_or_type (target->decl);
1669       if (ecf_flags & ECF_CONST)
1670         {
1671           e->speculative_call_info (direct, indirect, ref);
1672           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1673             return true;
1674         }
1675       else if (ecf_flags & ECF_PURE)
1676         {
1677           e->speculative_call_info (direct, indirect, ref);
1678           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1679             return true;
1680         }
1681     }
1682   /* If we did not managed to inline the function nor redirect
1683      to an ipa-cp clone (that are seen by having local flag set),
1684      it is probably pointless to inline it unless hardware is missing
1685      indirect call predictor.  */
1686   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1687     return false;
1688   /* For overwritable targets there is not much to do.  */
1689   if (e->inline_failed
1690       && (!can_inline_edge_p (e, false)
1691           || !can_inline_edge_by_limits_p (e, false, true)))
1692     return false;
1693   /* OK, speculation seems interesting.  */
1694   return true;
1695 }
1696
1697 /* We know that EDGE is not going to be inlined.
1698    See if we can remove speculation.  */
1699
1700 static void
1701 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1702 {
1703   if (edge->speculative && !speculation_useful_p (edge, false))
1704     {
1705       struct cgraph_node *node = edge->caller;
1706       struct cgraph_node *where = node->global.inlined_to
1707                                   ? node->global.inlined_to : node;
1708       auto_bitmap updated_nodes;
1709
1710       if (edge->count.ipa ().initialized_p ())
1711         spec_rem += edge->count.ipa ();
1712       edge->resolve_speculation ();
1713       reset_edge_caches (where);
1714       ipa_update_overall_fn_summary (where);
1715       update_caller_keys (edge_heap, where,
1716                           updated_nodes, NULL);
1717       update_callee_keys (edge_heap, where,
1718                           updated_nodes);
1719     }
1720 }
1721
1722 /* Return true if NODE should be accounted for overall size estimate.
1723    Skip all nodes optimized for size so we can measure the growth of hot
1724    part of program no matter of the padding.  */
1725
1726 bool
1727 inline_account_function_p (struct cgraph_node *node)
1728 {
1729    return (!DECL_EXTERNAL (node->decl)
1730            && !opt_for_fn (node->decl, optimize_size)
1731            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1732 }
1733
1734 /* Count number of callers of NODE and store it into DATA (that
1735    points to int.  Worker for cgraph_for_node_and_aliases.  */
1736
1737 static bool
1738 sum_callers (struct cgraph_node *node, void *data)
1739 {
1740   struct cgraph_edge *e;
1741   int *num_calls = (int *)data;
1742
1743   for (e = node->callers; e; e = e->next_caller)
1744     (*num_calls)++;
1745   return false;
1746 }
1747
1748 /* We use greedy algorithm for inlining of small functions:
1749    All inline candidates are put into prioritized heap ordered in
1750    increasing badness.
1751
1752    The inlining of small functions is bounded by unit growth parameters.  */
1753
1754 static void
1755 inline_small_functions (void)
1756 {
1757   struct cgraph_node *node;
1758   struct cgraph_edge *edge;
1759   edge_heap_t edge_heap (sreal::min ());
1760   auto_bitmap updated_nodes;
1761   int min_size, max_size;
1762   auto_vec<cgraph_edge *> new_indirect_edges;
1763   int initial_size = 0;
1764   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1765   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1766   new_indirect_edges.create (8);
1767
1768   edge_removal_hook_holder
1769     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1770
1771   /* Compute overall unit size and other global parameters used by badness
1772      metrics.  */
1773
1774   max_count = profile_count::uninitialized ();
1775   ipa_reduced_postorder (order, true, true, NULL);
1776   free (order);
1777
1778   FOR_EACH_DEFINED_FUNCTION (node)
1779     if (!node->global.inlined_to)
1780       {
1781         if (!node->alias && node->analyzed
1782             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1783             && opt_for_fn (node->decl, optimize))
1784           {
1785             struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
1786             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1787
1788             /* Do not account external functions, they will be optimized out
1789                if not inlined.  Also only count the non-cold portion of program.  */
1790             if (inline_account_function_p (node))
1791               initial_size += info->size;
1792             info->growth = estimate_growth (node);
1793
1794             int num_calls = 0;
1795             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1796                                                true);
1797             if (num_calls == 1)
1798               info->single_caller = true;
1799             if (dfs && dfs->next_cycle)
1800               {
1801                 struct cgraph_node *n2;
1802                 int id = dfs->scc_no + 1;
1803                 for (n2 = node; n2;
1804                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1805                   if (opt_for_fn (n2->decl, optimize))
1806                     {
1807                       ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
1808                       if (info2->scc_no)
1809                         break;
1810                       info2->scc_no = id;
1811                     }
1812               }
1813           }
1814
1815         for (edge = node->callers; edge; edge = edge->next_caller)
1816           max_count = max_count.max (edge->count.ipa ());
1817       }
1818   ipa_free_postorder_info ();
1819   edge_growth_cache
1820     = new call_summary<edge_growth_cache_entry *> (symtab, false);
1821
1822   if (dump_file)
1823     fprintf (dump_file,
1824              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1825              initial_size);
1826
1827   overall_size = initial_size;
1828   max_size = compute_max_insns (overall_size);
1829   min_size = overall_size;
1830
1831   /* Populate the heap with all edges we might inline.  */
1832
1833   FOR_EACH_DEFINED_FUNCTION (node)
1834     {
1835       bool update = false;
1836       struct cgraph_edge *next = NULL;
1837       bool has_speculative = false;
1838
1839       if (!opt_for_fn (node->decl, optimize))
1840         continue;
1841
1842       if (dump_file)
1843         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1844
1845       for (edge = node->callees; edge; edge = next)
1846         {
1847           next = edge->next_callee;
1848           if (edge->inline_failed
1849               && !edge->aux
1850               && can_inline_edge_p (edge, true)
1851               && want_inline_small_function_p (edge, true)
1852               && can_inline_edge_by_limits_p (edge, true)
1853               && edge->inline_failed)
1854             {
1855               gcc_assert (!edge->aux);
1856               update_edge_key (&edge_heap, edge);
1857             }
1858           if (edge->speculative)
1859             has_speculative = true;
1860         }
1861       if (has_speculative)
1862         for (edge = node->callees; edge; edge = next)
1863           if (edge->speculative && !speculation_useful_p (edge,
1864                                                           edge->aux != NULL))
1865             {
1866               edge->resolve_speculation ();
1867               update = true;
1868             }
1869       if (update)
1870         {
1871           struct cgraph_node *where = node->global.inlined_to
1872                                       ? node->global.inlined_to : node;
1873           ipa_update_overall_fn_summary (where);
1874           reset_edge_caches (where);
1875           update_caller_keys (&edge_heap, where,
1876                               updated_nodes, NULL);
1877           update_callee_keys (&edge_heap, where,
1878                               updated_nodes);
1879           bitmap_clear (updated_nodes);
1880         }
1881     }
1882
1883   gcc_assert (in_lto_p
1884               || !(max_count > 0)
1885               || (profile_info && flag_branch_probabilities));
1886
1887   while (!edge_heap.empty ())
1888     {
1889       int old_size = overall_size;
1890       struct cgraph_node *where, *callee;
1891       sreal badness = edge_heap.min_key ();
1892       sreal current_badness;
1893       int growth;
1894
1895       edge = edge_heap.extract_min ();
1896       gcc_assert (edge->aux);
1897       edge->aux = NULL;
1898       if (!edge->inline_failed || !edge->callee->analyzed)
1899         continue;
1900
1901 #if CHECKING_P
1902       /* Be sure that caches are maintained consistent.
1903          This check is affected by scaling roundoff errors when compiling for
1904          IPA this we skip it in that case.  */
1905       if (!edge->callee->count.ipa_p ()
1906           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
1907         {
1908           sreal cached_badness = edge_badness (edge, false);
1909
1910           int old_size_est = estimate_edge_size (edge);
1911           sreal old_time_est = estimate_edge_time (edge);
1912           int old_hints_est = estimate_edge_hints (edge);
1913
1914           if (edge_growth_cache != NULL)
1915             edge_growth_cache->remove (edge);
1916           gcc_assert (old_size_est == estimate_edge_size (edge));
1917           gcc_assert (old_time_est == estimate_edge_time (edge));
1918           /* FIXME:
1919
1920              gcc_assert (old_hints_est == estimate_edge_hints (edge));
1921
1922              fails with profile feedback because some hints depends on
1923              maybe_hot_edge_p predicate and because callee gets inlined to other
1924              calls, the edge may become cold.
1925              This ought to be fixed by computing relative probabilities
1926              for given invocation but that will be better done once whole
1927              code is converted to sreals.  Disable for now and revert to "wrong"
1928              value so enable/disable checking paths agree.  */
1929           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
1930
1931           /* When updating the edge costs, we only decrease badness in the keys.
1932              Increases of badness are handled lazilly; when we see key with out
1933              of date value on it, we re-insert it now.  */
1934           current_badness = edge_badness (edge, false);
1935           gcc_assert (cached_badness == current_badness);
1936           gcc_assert (current_badness >= badness);
1937         }
1938       else
1939         current_badness = edge_badness (edge, false);
1940 #else
1941       current_badness = edge_badness (edge, false);
1942 #endif
1943       if (current_badness != badness)
1944         {
1945           if (edge_heap.min () && current_badness > edge_heap.min_key ())
1946             {
1947               edge->aux = edge_heap.insert (current_badness, edge);
1948               continue;
1949             }
1950           else
1951             badness = current_badness;
1952         }
1953
1954       if (!can_inline_edge_p (edge, true)
1955           || !can_inline_edge_by_limits_p (edge, true))
1956         {
1957           resolve_noninline_speculation (&edge_heap, edge);
1958           continue;
1959         }
1960
1961       callee = edge->callee->ultimate_alias_target ();
1962       growth = estimate_edge_growth (edge);
1963       if (dump_file)
1964         {
1965           fprintf (dump_file,
1966                    "\nConsidering %s with %i size\n",
1967                    callee->dump_name (),
1968                    ipa_fn_summaries->get (callee)->size);
1969           fprintf (dump_file,
1970                    " to be inlined into %s in %s:%i\n"
1971                    " Estimated badness is %f, frequency %.2f.\n",
1972                    edge->caller->dump_name (),
1973                    edge->call_stmt
1974                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
1975                                                         edge->call_stmt))
1976                        > BUILTINS_LOCATION)
1977                    ? gimple_filename ((const gimple *) edge->call_stmt)
1978                    : "unknown",
1979                    edge->call_stmt
1980                    ? gimple_lineno ((const gimple *) edge->call_stmt)
1981                    : -1,
1982                    badness.to_double (),
1983                    edge->sreal_frequency ().to_double ());
1984           if (edge->count.ipa ().initialized_p ())
1985             {
1986               fprintf (dump_file, " Called ");
1987               edge->count.ipa ().dump (dump_file);
1988               fprintf (dump_file, " times\n");
1989             }
1990           if (dump_flags & TDF_DETAILS)
1991             edge_badness (edge, true);
1992         }
1993
1994       if (overall_size + growth > max_size
1995           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1996         {
1997           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1998           report_inline_failed_reason (edge);
1999           resolve_noninline_speculation (&edge_heap, edge);
2000           continue;
2001         }
2002
2003       if (!want_inline_small_function_p (edge, true))
2004         {
2005           resolve_noninline_speculation (&edge_heap, edge);
2006           continue;
2007         }
2008
2009       /* Heuristics for inlining small functions work poorly for
2010          recursive calls where we do effects similar to loop unrolling.
2011          When inlining such edge seems profitable, leave decision on
2012          specific inliner.  */
2013       if (edge->recursive_p ())
2014         {
2015           where = edge->caller;
2016           if (where->global.inlined_to)
2017             where = where->global.inlined_to;
2018           if (!recursive_inlining (edge,
2019                                    opt_for_fn (edge->caller->decl,
2020                                                flag_indirect_inlining)
2021                                    ? &new_indirect_edges : NULL))
2022             {
2023               edge->inline_failed = CIF_RECURSIVE_INLINING;
2024               resolve_noninline_speculation (&edge_heap, edge);
2025               continue;
2026             }
2027           reset_edge_caches (where);
2028           /* Recursive inliner inlines all recursive calls of the function
2029              at once. Consequently we need to update all callee keys.  */
2030           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2031             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2032           update_callee_keys (&edge_heap, where, updated_nodes);
2033           bitmap_clear (updated_nodes);
2034         }
2035       else
2036         {
2037           struct cgraph_node *outer_node = NULL;
2038           int depth = 0;
2039
2040           /* Consider the case where self recursive function A is inlined
2041              into B.  This is desired optimization in some cases, since it
2042              leads to effect similar of loop peeling and we might completely
2043              optimize out the recursive call.  However we must be extra
2044              selective.  */
2045
2046           where = edge->caller;
2047           while (where->global.inlined_to)
2048             {
2049               if (where->decl == callee->decl)
2050                 outer_node = where, depth++;
2051               where = where->callers->caller;
2052             }
2053           if (outer_node
2054               && !want_inline_self_recursive_call_p (edge, outer_node,
2055                                                      true, depth))
2056             {
2057               edge->inline_failed
2058                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2059                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2060               resolve_noninline_speculation (&edge_heap, edge);
2061               continue;
2062             }
2063           else if (depth && dump_file)
2064             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2065
2066           gcc_checking_assert (!callee->global.inlined_to);
2067           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2068           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2069
2070           reset_edge_caches (edge->callee);
2071
2072           update_callee_keys (&edge_heap, where, updated_nodes);
2073         }
2074       where = edge->caller;
2075       if (where->global.inlined_to)
2076         where = where->global.inlined_to;
2077
2078       /* Our profitability metric can depend on local properties
2079          such as number of inlinable calls and size of the function body.
2080          After inlining these properties might change for the function we
2081          inlined into (since it's body size changed) and for the functions
2082          called by function we inlined (since number of it inlinable callers
2083          might change).  */
2084       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2085       /* Offline copy count has possibly changed, recompute if profile is
2086          available.  */
2087       struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2088       if (n != edge->callee && n->analyzed && n->count.ipa ().initialized_p ())
2089         update_callee_keys (&edge_heap, n, updated_nodes);
2090       bitmap_clear (updated_nodes);
2091
2092       if (dump_enabled_p ())
2093         {
2094           ipa_fn_summary *s = ipa_fn_summaries->get (edge->caller);
2095
2096           /* dump_printf can't handle %+i.  */
2097           char buf_net_change[100];
2098           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2099                     overall_size - old_size);
2100
2101           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2102                            " Inlined %C into %C which now has time %f and "
2103                            "size %i, net change of %s.\n",
2104                            edge->callee, edge->caller,
2105                            s->time.to_double (), s->size, buf_net_change);
2106         }
2107       if (min_size > overall_size)
2108         {
2109           min_size = overall_size;
2110           max_size = compute_max_insns (min_size);
2111
2112           if (dump_file)
2113             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2114         }
2115     }
2116
2117   free_growth_caches ();
2118   if (dump_enabled_p ())
2119     dump_printf (MSG_NOTE,
2120                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2121                  initial_size, overall_size,
2122                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2123   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2124 }
2125
2126 /* Flatten NODE.  Performed both during early inlining and
2127    at IPA inlining time.  */
2128
2129 static void
2130 flatten_function (struct cgraph_node *node, bool early)
2131 {
2132   struct cgraph_edge *e;
2133
2134   /* We shouldn't be called recursively when we are being processed.  */
2135   gcc_assert (node->aux == NULL);
2136
2137   node->aux = (void *) node;
2138
2139   for (e = node->callees; e; e = e->next_callee)
2140     {
2141       struct cgraph_node *orig_callee;
2142       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2143
2144       /* We've hit cycle?  It is time to give up.  */
2145       if (callee->aux)
2146         {
2147           if (dump_enabled_p ())
2148             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2149                              "Not inlining %C into %C to avoid cycle.\n",
2150                              callee, e->caller);
2151           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2152             e->inline_failed = CIF_RECURSIVE_INLINING;
2153           continue;
2154         }
2155
2156       /* When the edge is already inlined, we just need to recurse into
2157          it in order to fully flatten the leaves.  */
2158       if (!e->inline_failed)
2159         {
2160           flatten_function (callee, early);
2161           continue;
2162         }
2163
2164       /* Flatten attribute needs to be processed during late inlining. For
2165          extra code quality we however do flattening during early optimization,
2166          too.  */
2167       if (!early
2168           ? !can_inline_edge_p (e, true)
2169             && !can_inline_edge_by_limits_p (e, true)
2170           : !can_early_inline_edge_p (e))
2171         continue;
2172
2173       if (e->recursive_p ())
2174         {
2175           if (dump_enabled_p ())
2176             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2177                              "Not inlining: recursive call.\n");
2178           continue;
2179         }
2180
2181       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2182           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2183         {
2184           if (dump_enabled_p ())
2185             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2186                              "Not inlining: SSA form does not match.\n");
2187           continue;
2188         }
2189
2190       /* Inline the edge and flatten the inline clone.  Avoid
2191          recursing through the original node if the node was cloned.  */
2192       if (dump_enabled_p ())
2193         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2194                          " Inlining %C into %C.\n",
2195                          callee, e->caller);
2196       orig_callee = callee;
2197       inline_call (e, true, NULL, NULL, false);
2198       if (e->callee != orig_callee)
2199         orig_callee->aux = (void *) node;
2200       flatten_function (e->callee, early);
2201       if (e->callee != orig_callee)
2202         orig_callee->aux = NULL;
2203     }
2204
2205   node->aux = NULL;
2206   if (!node->global.inlined_to)
2207     ipa_update_overall_fn_summary (node);
2208 }
2209
2210 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2211    DATA points to number of calls originally found so we avoid infinite
2212    recursion.  */
2213
2214 static bool
2215 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2216                          hash_set<cgraph_node *> *callers)
2217 {
2218   int *num_calls = (int *)data;
2219   bool callee_removed = false;
2220
2221   while (node->callers && !node->global.inlined_to)
2222     {
2223       struct cgraph_node *caller = node->callers->caller;
2224
2225       if (!can_inline_edge_p (node->callers, true)
2226           || !can_inline_edge_by_limits_p (node->callers, true)
2227           || node->callers->recursive_p ())
2228         {
2229           if (dump_file)
2230             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2231           *num_calls = 0;
2232           return false;
2233         }
2234
2235       if (dump_file)
2236         {
2237           cgraph_node *ultimate = node->ultimate_alias_target ();
2238           fprintf (dump_file,
2239                    "\nInlining %s size %i.\n",
2240                    ultimate->name (),
2241                    ipa_fn_summaries->get (ultimate)->size);
2242           fprintf (dump_file,
2243                    " Called once from %s %i insns.\n",
2244                    node->callers->caller->name (),
2245                    ipa_fn_summaries->get (node->callers->caller)->size);
2246         }
2247
2248       /* Remember which callers we inlined to, delaying updating the
2249          overall summary.  */
2250       callers->add (node->callers->caller);
2251       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2252       if (dump_file)
2253         fprintf (dump_file,
2254                  " Inlined into %s which now has %i size\n",
2255                  caller->name (),
2256                  ipa_fn_summaries->get (caller)->size);
2257       if (!(*num_calls)--)
2258         {
2259           if (dump_file)
2260             fprintf (dump_file, "New calls found; giving up.\n");
2261           return callee_removed;
2262         }
2263       if (callee_removed)
2264         return true;
2265     }
2266   return false;
2267 }
2268
2269 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2270    update.  */
2271
2272 static bool
2273 inline_to_all_callers (struct cgraph_node *node, void *data)
2274 {
2275   hash_set<cgraph_node *> callers;
2276   bool res = inline_to_all_callers_1 (node, data, &callers);
2277   /* Perform the delayed update of the overall summary of all callers
2278      processed.  This avoids quadratic behavior in the cases where
2279      we have a lot of calls to the same function.  */
2280   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2281        i != callers.end (); ++i)
2282     ipa_update_overall_fn_summary (*i);
2283   return res;
2284 }
2285
2286 /* Output overall time estimate.  */
2287 static void
2288 dump_overall_stats (void)
2289 {
2290   sreal sum_weighted = 0, sum = 0;
2291   struct cgraph_node *node;
2292
2293   FOR_EACH_DEFINED_FUNCTION (node)
2294     if (!node->global.inlined_to
2295         && !node->alias)
2296       {
2297         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2298         if (s != NULL)
2299           {
2300           sum += s->time;
2301           if (node->count.ipa ().initialized_p ())
2302             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2303           }
2304       }
2305   fprintf (dump_file, "Overall time estimate: "
2306            "%f weighted by profile: "
2307            "%f\n", sum.to_double (), sum_weighted.to_double ());
2308 }
2309
2310 /* Output some useful stats about inlining.  */
2311
2312 static void
2313 dump_inline_stats (void)
2314 {
2315   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2316   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2317   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2318   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2319   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2320   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2321   int64_t reason[CIF_N_REASONS][2];
2322   sreal reason_freq[CIF_N_REASONS];
2323   int i;
2324   struct cgraph_node *node;
2325
2326   memset (reason, 0, sizeof (reason));
2327   for (i=0; i < CIF_N_REASONS; i++)
2328     reason_freq[i] = 0;
2329   FOR_EACH_DEFINED_FUNCTION (node)
2330   {
2331     struct cgraph_edge *e;
2332     for (e = node->callees; e; e = e->next_callee)
2333       {
2334         if (e->inline_failed)
2335           {
2336             if (e->count.ipa ().initialized_p ())
2337               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2338             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2339             reason[(int) e->inline_failed][1] ++;
2340             if (DECL_VIRTUAL_P (e->callee->decl)
2341                 && e->count.ipa ().initialized_p ())
2342               {
2343                 if (e->indirect_inlining_edge)
2344                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2345                 else
2346                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2347               }
2348             else if (e->count.ipa ().initialized_p ())
2349               {
2350                 if (e->indirect_inlining_edge)
2351                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2352                 else
2353                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2354               }
2355           }
2356         else if (e->count.ipa ().initialized_p ())
2357           {
2358             if (e->speculative)
2359               {
2360                 if (DECL_VIRTUAL_P (e->callee->decl))
2361                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2362                 else
2363                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2364               }
2365             else if (DECL_VIRTUAL_P (e->callee->decl))
2366               {
2367                 if (e->indirect_inlining_edge)
2368                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2369                 else
2370                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2371               }
2372             else
2373               {
2374                 if (e->indirect_inlining_edge)
2375                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2376                 else
2377                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2378               }
2379           }
2380       }
2381     for (e = node->indirect_calls; e; e = e->next_callee)
2382       if (e->indirect_info->polymorphic
2383           & e->count.ipa ().initialized_p ())
2384         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2385       else if (e->count.ipa ().initialized_p ())
2386         indirect_cnt += e->count.ipa ().to_gcov_type ();
2387   }
2388   if (max_count.initialized_p ())
2389     {
2390       fprintf (dump_file,
2391                "Inlined %" PRId64 " + speculative "
2392                "%" PRId64 " + speculative polymorphic "
2393                "%" PRId64 " + previously indirect "
2394                "%" PRId64 " + virtual "
2395                "%" PRId64 " + virtual and previously indirect "
2396                "%" PRId64 "\n" "Not inlined "
2397                "%" PRId64 " + previously indirect "
2398                "%" PRId64 " + virtual "
2399                "%" PRId64 " + virtual and previously indirect "
2400                "%" PRId64 " + stil indirect "
2401                "%" PRId64 " + still indirect polymorphic "
2402                "%" PRId64 "\n", inlined_cnt,
2403                inlined_speculative, inlined_speculative_ply,
2404                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2405                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2406                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2407       fprintf (dump_file, "Removed speculations ");
2408       spec_rem.dump (dump_file);
2409       fprintf (dump_file, "\n");
2410     }
2411   dump_overall_stats ();
2412   fprintf (dump_file, "\nWhy inlining failed?\n");
2413   for (i = 0; i < CIF_N_REASONS; i++)
2414     if (reason[i][1])
2415       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2416                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2417                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2418 }
2419
2420 /* Called when node is removed.  */
2421
2422 static void
2423 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2424 {
2425   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2426     return;
2427
2428   hash_set<struct cgraph_node *> *removed
2429     = (hash_set<struct cgraph_node *> *) data;
2430   removed->add (node);
2431 }
2432
2433 /* Decide on the inlining.  We do so in the topological order to avoid
2434    expenses on updating data structures.  */
2435
2436 static unsigned int
2437 ipa_inline (void)
2438 {
2439   struct cgraph_node *node;
2440   int nnodes;
2441   struct cgraph_node **order;
2442   int i, j;
2443   int cold;
2444   bool remove_functions = false;
2445
2446   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2447
2448   if (dump_file)
2449     ipa_dump_fn_summaries (dump_file);
2450
2451   nnodes = ipa_reverse_postorder (order);
2452   spec_rem = profile_count::zero ();
2453
2454   FOR_EACH_FUNCTION (node)
2455     {
2456       node->aux = 0;
2457
2458       /* Recompute the default reasons for inlining because they may have
2459          changed during merging.  */
2460       if (in_lto_p)
2461         {
2462           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2463             {
2464               gcc_assert (e->inline_failed);
2465               initialize_inline_failed (e);
2466             }
2467           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2468             initialize_inline_failed (e);
2469         }
2470     }
2471
2472   if (dump_file)
2473     fprintf (dump_file, "\nFlattening functions:\n");
2474
2475   /* First shrink order array, so that it only contains nodes with
2476      flatten attribute.  */
2477   for (i = nnodes - 1, j = i; i >= 0; i--)
2478     {
2479       node = order[i];
2480       if (lookup_attribute ("flatten",
2481                             DECL_ATTRIBUTES (node->decl)) != NULL)
2482         order[j--] = order[i];
2483     }
2484
2485   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2486      nodes with flatten attribute.  If there is more than one such
2487      node, we need to register a node removal hook, as flatten_function
2488      could remove other nodes with flatten attribute.  See PR82801.  */
2489   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2490   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2491   if (j < nnodes - 2)
2492     {
2493       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2494       node_removal_hook_holder
2495         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2496                                            flatten_removed_nodes);
2497     }
2498
2499   /* In the first pass handle functions to be flattened.  Do this with
2500      a priority so none of our later choices will make this impossible.  */
2501   for (i = nnodes - 1; i > j; i--)
2502     {
2503       node = order[i];
2504       if (flatten_removed_nodes
2505           && flatten_removed_nodes->contains (node))
2506         continue;
2507
2508       /* Handle nodes to be flattened.
2509          Ideally when processing callees we stop inlining at the
2510          entry of cycles, possibly cloning that entry point and
2511          try to flatten itself turning it into a self-recursive
2512          function.  */
2513       if (dump_file)
2514         fprintf (dump_file, "Flattening %s\n", node->name ());
2515       flatten_function (node, false);
2516     }
2517
2518   if (j < nnodes - 2)
2519     {
2520       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2521       delete flatten_removed_nodes;
2522     }
2523   free (order);
2524
2525   if (dump_file)
2526     dump_overall_stats ();
2527
2528   inline_small_functions ();
2529
2530   gcc_assert (symtab->state == IPA_SSA);
2531   symtab->state = IPA_SSA_AFTER_INLINING;
2532   /* Do first after-inlining removal.  We want to remove all "stale" extern
2533      inline functions and virtual functions so we really know what is called
2534      once.  */
2535   symtab->remove_unreachable_nodes (dump_file);
2536
2537   /* Inline functions with a property that after inlining into all callers the
2538      code size will shrink because the out-of-line copy is eliminated.
2539      We do this regardless on the callee size as long as function growth limits
2540      are met.  */
2541   if (dump_file)
2542     fprintf (dump_file,
2543              "\nDeciding on functions to be inlined into all callers and "
2544              "removing useless speculations:\n");
2545
2546   /* Inlining one function called once has good chance of preventing
2547      inlining other function into the same callee.  Ideally we should
2548      work in priority order, but probably inlining hot functions first
2549      is good cut without the extra pain of maintaining the queue.
2550
2551      ??? this is not really fitting the bill perfectly: inlining function
2552      into callee often leads to better optimization of callee due to
2553      increased context for optimization.
2554      For example if main() function calls a function that outputs help
2555      and then function that does the main optmization, we should inline
2556      the second with priority even if both calls are cold by themselves.
2557
2558      We probably want to implement new predicate replacing our use of
2559      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2560      to be hot.  */
2561   for (cold = 0; cold <= 1; cold ++)
2562     {
2563       FOR_EACH_DEFINED_FUNCTION (node)
2564         {
2565           struct cgraph_edge *edge, *next;
2566           bool update=false;
2567
2568           if (!opt_for_fn (node->decl, optimize)
2569               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2570             continue;
2571
2572           for (edge = node->callees; edge; edge = next)
2573             {
2574               next = edge->next_callee;
2575               if (edge->speculative && !speculation_useful_p (edge, false))
2576                 {
2577                   if (edge->count.ipa ().initialized_p ())
2578                     spec_rem += edge->count.ipa ();
2579                   edge->resolve_speculation ();
2580                   update = true;
2581                   remove_functions = true;
2582                 }
2583             }
2584           if (update)
2585             {
2586               struct cgraph_node *where = node->global.inlined_to
2587                                           ? node->global.inlined_to : node;
2588               reset_edge_caches (where);
2589               ipa_update_overall_fn_summary (where);
2590             }
2591           if (want_inline_function_to_all_callers_p (node, cold))
2592             {
2593               int num_calls = 0;
2594               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2595                                                  true);
2596               while (node->call_for_symbol_and_aliases
2597                        (inline_to_all_callers, &num_calls, true))
2598                 ;
2599               remove_functions = true;
2600             }
2601         }
2602     }
2603
2604   /* Free ipa-prop structures if they are no longer needed.  */
2605   ipa_free_all_structures_after_iinln ();
2606
2607   if (dump_enabled_p ())
2608     dump_printf (MSG_NOTE,
2609                  "\nInlined %i calls, eliminated %i functions\n\n",
2610                  ncalls_inlined, nfunctions_inlined);
2611   if (dump_file)
2612     dump_inline_stats ();
2613
2614   if (dump_file)
2615     ipa_dump_fn_summaries (dump_file);
2616   return remove_functions ? TODO_remove_functions : 0;
2617 }
2618
2619 /* Inline always-inline function calls in NODE.  */
2620
2621 static bool
2622 inline_always_inline_functions (struct cgraph_node *node)
2623 {
2624   struct cgraph_edge *e;
2625   bool inlined = false;
2626
2627   for (e = node->callees; e; e = e->next_callee)
2628     {
2629       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2630       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2631         continue;
2632
2633       if (e->recursive_p ())
2634         {
2635           if (dump_enabled_p ())
2636             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2637                              "  Not inlining recursive call to %C.\n",
2638                              e->callee);
2639           e->inline_failed = CIF_RECURSIVE_INLINING;
2640           continue;
2641         }
2642
2643       if (!can_early_inline_edge_p (e))
2644         {
2645           /* Set inlined to true if the callee is marked "always_inline" but
2646              is not inlinable.  This will allow flagging an error later in
2647              expand_call_inline in tree-inline.c.  */
2648           if (lookup_attribute ("always_inline",
2649                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2650             inlined = true;
2651           continue;
2652         }
2653
2654       if (dump_enabled_p ())
2655         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2656                          "  Inlining %C into %C (always_inline).\n",
2657                          e->callee, e->caller);
2658       inline_call (e, true, NULL, NULL, false);
2659       inlined = true;
2660     }
2661   if (inlined)
2662     ipa_update_overall_fn_summary (node);
2663
2664   return inlined;
2665 }
2666
2667 /* Decide on the inlining.  We do so in the topological order to avoid
2668    expenses on updating data structures.  */
2669
2670 static bool
2671 early_inline_small_functions (struct cgraph_node *node)
2672 {
2673   struct cgraph_edge *e;
2674   bool inlined = false;
2675
2676   for (e = node->callees; e; e = e->next_callee)
2677     {
2678       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2679
2680       /* We can enounter not-yet-analyzed function during
2681          early inlining on callgraphs with strongly
2682          connected components.  */
2683       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2684       if (s == NULL || !s->inlinable || !e->inline_failed)
2685         continue;
2686
2687       /* Do not consider functions not declared inline.  */
2688       if (!DECL_DECLARED_INLINE_P (callee->decl)
2689           && !opt_for_fn (node->decl, flag_inline_small_functions)
2690           && !opt_for_fn (node->decl, flag_inline_functions))
2691         continue;
2692
2693       if (dump_enabled_p ())
2694         dump_printf_loc (MSG_NOTE, e->call_stmt,
2695                          "Considering inline candidate %C.\n",
2696                          callee);
2697
2698       if (!can_early_inline_edge_p (e))
2699         continue;
2700
2701       if (e->recursive_p ())
2702         {
2703           if (dump_enabled_p ())
2704             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2705                              "  Not inlining: recursive call.\n");
2706           continue;
2707         }
2708
2709       if (!want_early_inline_function_p (e))
2710         continue;
2711
2712       if (dump_enabled_p ())
2713         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2714                          " Inlining %C into %C.\n",
2715                          callee, e->caller);
2716       inline_call (e, true, NULL, NULL, false);
2717       inlined = true;
2718     }
2719
2720   if (inlined)
2721     ipa_update_overall_fn_summary (node);
2722
2723   return inlined;
2724 }
2725
2726 unsigned int
2727 early_inliner (function *fun)
2728 {
2729   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2730   struct cgraph_edge *edge;
2731   unsigned int todo = 0;
2732   int iterations = 0;
2733   bool inlined = false;
2734
2735   if (seen_error ())
2736     return 0;
2737
2738   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2739      happens when some pass decides to construct new function and
2740      cgraph_add_new_function calls lowering passes and early optimization on
2741      it.  This may confuse ourself when early inliner decide to inline call to
2742      function clone, because function clones don't have parameter list in
2743      ipa-prop matching their signature.  */
2744   if (ipa_node_params_sum)
2745     return 0;
2746
2747   if (flag_checking)
2748     node->verify ();
2749   node->remove_all_references ();
2750
2751   /* Even when not optimizing or not inlining inline always-inline
2752      functions.  */
2753   inlined = inline_always_inline_functions (node);
2754
2755   if (!optimize
2756       || flag_no_inline
2757       || !flag_early_inlining
2758       /* Never inline regular functions into always-inline functions
2759          during incremental inlining.  This sucks as functions calling
2760          always inline functions will get less optimized, but at the
2761          same time inlining of functions calling always inline
2762          function into an always inline function might introduce
2763          cycles of edges to be always inlined in the callgraph.
2764
2765          We might want to be smarter and just avoid this type of inlining.  */
2766       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2767           && lookup_attribute ("always_inline",
2768                                DECL_ATTRIBUTES (node->decl))))
2769     ;
2770   else if (lookup_attribute ("flatten",
2771                              DECL_ATTRIBUTES (node->decl)) != NULL)
2772     {
2773       /* When the function is marked to be flattened, recursively inline
2774          all calls in it.  */
2775       if (dump_enabled_p ())
2776         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2777                      "Flattening %C\n", node);
2778       flatten_function (node, true);
2779       inlined = true;
2780     }
2781   else
2782     {
2783       /* If some always_inline functions was inlined, apply the changes.
2784          This way we will not account always inline into growth limits and
2785          moreover we will inline calls from always inlines that we skipped
2786          previously because of conditional above.  */
2787       if (inlined)
2788         {
2789           timevar_push (TV_INTEGRATION);
2790           todo |= optimize_inline_calls (current_function_decl);
2791           /* optimize_inline_calls call above might have introduced new
2792              statements that don't have inline parameters computed.  */
2793           for (edge = node->callees; edge; edge = edge->next_callee)
2794             {
2795               /* We can enounter not-yet-analyzed function during
2796                  early inlining on callgraphs with strongly
2797                  connected components.  */
2798               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2799               es->call_stmt_size
2800                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2801               es->call_stmt_time
2802                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2803             }
2804           ipa_update_overall_fn_summary (node);
2805           inlined = false;
2806           timevar_pop (TV_INTEGRATION);
2807         }
2808       /* We iterate incremental inlining to get trivial cases of indirect
2809          inlining.  */
2810       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2811              && early_inline_small_functions (node))
2812         {
2813           timevar_push (TV_INTEGRATION);
2814           todo |= optimize_inline_calls (current_function_decl);
2815
2816           /* Technically we ought to recompute inline parameters so the new
2817              iteration of early inliner works as expected.  We however have
2818              values approximately right and thus we only need to update edge
2819              info that might be cleared out for newly discovered edges.  */
2820           for (edge = node->callees; edge; edge = edge->next_callee)
2821             {
2822               /* We have no summary for new bound store calls yet.  */
2823               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2824               es->call_stmt_size
2825                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2826               es->call_stmt_time
2827                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2828
2829               if (edge->callee->decl
2830                   && !gimple_check_call_matching_types (
2831                       edge->call_stmt, edge->callee->decl, false))
2832                 {
2833                   edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
2834                   edge->call_stmt_cannot_inline_p = true;
2835                 }
2836             }
2837           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2838             ipa_update_overall_fn_summary (node);
2839           timevar_pop (TV_INTEGRATION);
2840           iterations++;
2841           inlined = false;
2842         }
2843       if (dump_file)
2844         fprintf (dump_file, "Iterations: %i\n", iterations);
2845     }
2846
2847   if (inlined)
2848     {
2849       timevar_push (TV_INTEGRATION);
2850       todo |= optimize_inline_calls (current_function_decl);
2851       timevar_pop (TV_INTEGRATION);
2852     }
2853
2854   fun->always_inline_functions_inlined = true;
2855
2856   return todo;
2857 }
2858
2859 /* Do inlining of small functions.  Doing so early helps profiling and other
2860    passes to be somewhat more effective and avoids some code duplication in
2861    later real inlining pass for testcases with very many function calls.  */
2862
2863 namespace {
2864
2865 const pass_data pass_data_early_inline =
2866 {
2867   GIMPLE_PASS, /* type */
2868   "einline", /* name */
2869   OPTGROUP_INLINE, /* optinfo_flags */
2870   TV_EARLY_INLINING, /* tv_id */
2871   PROP_ssa, /* properties_required */
2872   0, /* properties_provided */
2873   0, /* properties_destroyed */
2874   0, /* todo_flags_start */
2875   0, /* todo_flags_finish */
2876 };
2877
2878 class pass_early_inline : public gimple_opt_pass
2879 {
2880 public:
2881   pass_early_inline (gcc::context *ctxt)
2882     : gimple_opt_pass (pass_data_early_inline, ctxt)
2883   {}
2884
2885   /* opt_pass methods: */
2886   virtual unsigned int execute (function *);
2887
2888 }; // class pass_early_inline
2889
2890 unsigned int
2891 pass_early_inline::execute (function *fun)
2892 {
2893   return early_inliner (fun);
2894 }
2895
2896 } // anon namespace
2897
2898 gimple_opt_pass *
2899 make_pass_early_inline (gcc::context *ctxt)
2900 {
2901   return new pass_early_inline (ctxt);
2902 }
2903
2904 namespace {
2905
2906 const pass_data pass_data_ipa_inline =
2907 {
2908   IPA_PASS, /* type */
2909   "inline", /* name */
2910   OPTGROUP_INLINE, /* optinfo_flags */
2911   TV_IPA_INLINING, /* tv_id */
2912   0, /* properties_required */
2913   0, /* properties_provided */
2914   0, /* properties_destroyed */
2915   0, /* todo_flags_start */
2916   ( TODO_dump_symtab ), /* todo_flags_finish */
2917 };
2918
2919 class pass_ipa_inline : public ipa_opt_pass_d
2920 {
2921 public:
2922   pass_ipa_inline (gcc::context *ctxt)
2923     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2924                       NULL, /* generate_summary */
2925                       NULL, /* write_summary */
2926                       NULL, /* read_summary */
2927                       NULL, /* write_optimization_summary */
2928                       NULL, /* read_optimization_summary */
2929                       NULL, /* stmt_fixup */
2930                       0, /* function_transform_todo_flags_start */
2931                       inline_transform, /* function_transform */
2932                       NULL) /* variable_transform */
2933   {}
2934
2935   /* opt_pass methods: */
2936   virtual unsigned int execute (function *) { return ipa_inline (); }
2937
2938 }; // class pass_ipa_inline
2939
2940 } // anon namespace
2941
2942 ipa_opt_pass_d *
2943 make_pass_ipa_inline (gcc::context *ctxt)
2944 {
2945   return new pass_ipa_inline (ctxt);
2946 }