]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-inline.c
2015-06-17 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / ipa-inline.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Inlining decision heuristics
22
23 The implementation of inliner is organized as follows:
24
25 inlining heuristics limits
26
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
30
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
35
36 inlining heuristics
37
38 The inliner itself is split into two passes:
39
40 pass_early_inlining
41
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
45
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
52
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
56
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
61
62 Because of lack of whole unit knowledge, the pass can not really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
67
68 pass_ipa_inline
69
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
72
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
76
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
81
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
87
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
91
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "tm.h"
96 #include "alias.h"
97 #include "symtab.h"
98 #include "tree.h"
99 #include "fold-const.h"
100 #include "trans-mem.h"
101 #include "calls.h"
102 #include "tree-inline.h"
103 #include "langhooks.h"
104 #include "flags.h"
105 #include "diagnostic.h"
106 #include "gimple-pretty-print.h"
107 #include "params.h"
108 #include "intl.h"
109 #include "tree-pass.h"
110 #include "coverage.h"
111 #include "rtl.h"
112 #include "bitmap.h"
113 #include "profile.h"
114 #include "predict.h"
115 #include "hard-reg-set.h"
116 #include "function.h"
117 #include "basic-block.h"
118 #include "tree-ssa-alias.h"
119 #include "internal-fn.h"
120 #include "gimple-expr.h"
121 #include "gimple.h"
122 #include "gimple-ssa.h"
123 #include "plugin-api.h"
124 #include "ipa-ref.h"
125 #include "cgraph.h"
126 #include "alloc-pool.h"
127 #include "symbol-summary.h"
128 #include "ipa-prop.h"
129 #include "except.h"
130 #include "target.h"
131 #include "ipa-inline.h"
132 #include "ipa-utils.h"
133 #include "sreal.h"
134 #include "auto-profile.h"
135 #include "builtins.h"
136 #include "fibonacci_heap.h"
137 #include "lto-streamer.h"
138
139 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
140 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
141
142 /* Statistics we collect about inlining algorithm. */
143 static int overall_size;
144 static gcov_type max_count;
145 static gcov_type spec_rem;
146
147 /* Pre-computed constants 1/CGRAPH_FREQ_BASE and 1/100. */
148 static sreal cgraph_freq_base_rec, percent_rec;
149
150 /* Return false when inlining edge E would lead to violating
151 limits on function unit growth or stack usage growth.
152
153 The relative function body growth limit is present generally
154 to avoid problems with non-linear behavior of the compiler.
155 To allow inlining huge functions into tiny wrapper, the limit
156 is always based on the bigger of the two functions considered.
157
158 For stack growth limits we always base the growth in stack usage
159 of the callers. We want to prevent applications from segfaulting
160 on stack overflow when functions with huge stack frames gets
161 inlined. */
162
163 static bool
164 caller_growth_limits (struct cgraph_edge *e)
165 {
166 struct cgraph_node *to = e->caller;
167 struct cgraph_node *what = e->callee->ultimate_alias_target ();
168 int newsize;
169 int limit = 0;
170 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
171 inline_summary *info, *what_info, *outer_info = inline_summaries->get (to);
172
173 /* Look for function e->caller is inlined to. While doing
174 so work out the largest function body on the way. As
175 described above, we want to base our function growth
176 limits based on that. Not on the self size of the
177 outer function, not on the self size of inline code
178 we immediately inline to. This is the most relaxed
179 interpretation of the rule "do not grow large functions
180 too much in order to prevent compiler from exploding". */
181 while (true)
182 {
183 info = inline_summaries->get (to);
184 if (limit < info->self_size)
185 limit = info->self_size;
186 if (stack_size_limit < info->estimated_self_stack_size)
187 stack_size_limit = info->estimated_self_stack_size;
188 if (to->global.inlined_to)
189 to = to->callers->caller;
190 else
191 break;
192 }
193
194 what_info = inline_summaries->get (what);
195
196 if (limit < what_info->self_size)
197 limit = what_info->self_size;
198
199 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
200
201 /* Check the size after inlining against the function limits. But allow
202 the function to shrink if it went over the limits by forced inlining. */
203 newsize = estimate_size_after_inlining (to, e);
204 if (newsize >= info->size
205 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
206 && newsize > limit)
207 {
208 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
209 return false;
210 }
211
212 if (!what_info->estimated_stack_size)
213 return true;
214
215 /* FIXME: Stack size limit often prevents inlining in Fortran programs
216 due to large i/o datastructures used by the Fortran front-end.
217 We ought to ignore this limit when we know that the edge is executed
218 on every invocation of the caller (i.e. its call statement dominates
219 exit block). We do not track this information, yet. */
220 stack_size_limit += ((gcov_type)stack_size_limit
221 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
222
223 inlined_stack = (outer_info->stack_frame_offset
224 + outer_info->estimated_self_stack_size
225 + what_info->estimated_stack_size);
226 /* Check new stack consumption with stack consumption at the place
227 stack is used. */
228 if (inlined_stack > stack_size_limit
229 /* If function already has large stack usage from sibling
230 inline call, we can inline, too.
231 This bit overoptimistically assume that we are good at stack
232 packing. */
233 && inlined_stack > info->estimated_stack_size
234 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
235 {
236 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
237 return false;
238 }
239 return true;
240 }
241
242 /* Dump info about why inlining has failed. */
243
244 static void
245 report_inline_failed_reason (struct cgraph_edge *e)
246 {
247 if (dump_file)
248 {
249 fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n",
250 xstrdup_for_dump (e->caller->name ()), e->caller->order,
251 xstrdup_for_dump (e->callee->name ()), e->callee->order,
252 cgraph_inline_failed_string (e->inline_failed));
253 if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
254 || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
255 && e->caller->lto_file_data
256 && e->callee->function_symbol ()->lto_file_data)
257 {
258 fprintf (dump_file, " LTO objects: %s, %s\n",
259 e->caller->lto_file_data->file_name,
260 e->callee->function_symbol ()->lto_file_data->file_name);
261 }
262 if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
263 cl_target_option_print_diff
264 (dump_file, 2, target_opts_for_fn (e->caller->decl),
265 target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
266 if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
267 cl_optimization_print_diff
268 (dump_file, 2, opts_for_fn (e->caller->decl),
269 opts_for_fn (e->callee->ultimate_alias_target ()->decl));
270 }
271 }
272
273 /* Decide whether sanitizer-related attributes allow inlining. */
274
275 static bool
276 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
277 {
278 /* Don't care if sanitizer is disabled */
279 if (!(flag_sanitize & SANITIZE_ADDRESS))
280 return true;
281
282 if (!caller || !callee)
283 return true;
284
285 return !!lookup_attribute ("no_sanitize_address",
286 DECL_ATTRIBUTES (caller)) ==
287 !!lookup_attribute ("no_sanitize_address",
288 DECL_ATTRIBUTES (callee));
289 }
290
291 /* Used for flags where it is safe to inline when caller's value is
292 grater than callee's. */
293 #define check_maybe_up(flag) \
294 (opts_for_fn (caller->decl)->x_##flag \
295 != opts_for_fn (callee->decl)->x_##flag \
296 && (!always_inline \
297 || opts_for_fn (caller->decl)->x_##flag \
298 < opts_for_fn (callee->decl)->x_##flag))
299 /* Used for flags where it is safe to inline when caller's value is
300 smaller than callee's. */
301 #define check_maybe_down(flag) \
302 (opts_for_fn (caller->decl)->x_##flag \
303 != opts_for_fn (callee->decl)->x_##flag \
304 && (!always_inline \
305 || opts_for_fn (caller->decl)->x_##flag \
306 > opts_for_fn (callee->decl)->x_##flag))
307 /* Used for flags where exact match is needed for correctness. */
308 #define check_match(flag) \
309 (opts_for_fn (caller->decl)->x_##flag \
310 != opts_for_fn (callee->decl)->x_##flag)
311
312 /* Decide if we can inline the edge and possibly update
313 inline_failed reason.
314 We check whether inlining is possible at all and whether
315 caller growth limits allow doing so.
316
317 if REPORT is true, output reason to the dump file.
318
319 if DISREGARD_LIMITS is true, ignore size limits.*/
320
321 static bool
322 can_inline_edge_p (struct cgraph_edge *e, bool report,
323 bool disregard_limits = false, bool early = false)
324 {
325 gcc_checking_assert (e->inline_failed);
326
327 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
328 {
329 if (report)
330 report_inline_failed_reason (e);
331 return false;
332 }
333
334 bool inlinable = true;
335 enum availability avail;
336 cgraph_node *callee = e->callee->ultimate_alias_target (&avail);
337 cgraph_node *caller = e->caller->global.inlined_to
338 ? e->caller->global.inlined_to : e->caller;
339 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
340 tree callee_tree
341 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
342
343 if (!callee->definition)
344 {
345 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
346 inlinable = false;
347 }
348 else if (callee->calls_comdat_local)
349 {
350 e->inline_failed = CIF_USES_COMDAT_LOCAL;
351 inlinable = false;
352 }
353 else if (avail <= AVAIL_INTERPOSABLE)
354 {
355 e->inline_failed = CIF_OVERWRITABLE;
356 inlinable = false;
357 }
358 else if (e->call_stmt_cannot_inline_p)
359 {
360 if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
361 e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
362 inlinable = false;
363 }
364 /* Don't inline if the functions have different EH personalities. */
365 else if (DECL_FUNCTION_PERSONALITY (caller->decl)
366 && DECL_FUNCTION_PERSONALITY (callee->decl)
367 && (DECL_FUNCTION_PERSONALITY (caller->decl)
368 != DECL_FUNCTION_PERSONALITY (callee->decl)))
369 {
370 e->inline_failed = CIF_EH_PERSONALITY;
371 inlinable = false;
372 }
373 /* TM pure functions should not be inlined into non-TM_pure
374 functions. */
375 else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
376 {
377 e->inline_failed = CIF_UNSPECIFIED;
378 inlinable = false;
379 }
380 /* Check compatibility of target optimization options. */
381 else if (!targetm.target_option.can_inline_p (caller->decl,
382 callee->decl))
383 {
384 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
385 inlinable = false;
386 }
387 else if (!inline_summaries->get (callee)->inlinable)
388 {
389 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
390 inlinable = false;
391 }
392 else if (inline_summaries->get (caller)->contains_cilk_spawn)
393 {
394 e->inline_failed = CIF_CILK_SPAWN;
395 inlinable = false;
396 }
397 /* Don't inline a function with mismatched sanitization attributes. */
398 else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
399 {
400 e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
401 inlinable = false;
402 }
403 /* Check if caller growth allows the inlining. */
404 else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
405 && !disregard_limits
406 && !lookup_attribute ("flatten",
407 DECL_ATTRIBUTES (caller->decl))
408 && !caller_growth_limits (e))
409 inlinable = false;
410 /* Don't inline a function with a higher optimization level than the
411 caller. FIXME: this is really just tip of iceberg of handling
412 optimization attribute. */
413 else if (caller_tree != callee_tree)
414 {
415 bool always_inline =
416 (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
417 && lookup_attribute ("always_inline",
418 DECL_ATTRIBUTES (callee->decl)));
419
420 /* Until GCC 4.9 we did not check the semantics alterning flags
421 bellow and inline across optimization boundry.
422 Enabling checks bellow breaks several packages by refusing
423 to inline library always_inline functions. See PR65873.
424 Disable the check for early inlining for now until better solution
425 is found. */
426 if (always_inline && early)
427 ;
428 /* There are some options that change IL semantics which means
429 we cannot inline in these cases for correctness reason.
430 Not even for always_inline declared functions. */
431 /* Strictly speaking only when the callee contains signed integer
432 math where overflow is undefined. */
433 else if ((check_maybe_up (flag_strict_overflow)
434 /* this flag is set by optimize. Allow inlining across
435 optimize boundary. */
436 && (!opt_for_fn (caller->decl, optimize)
437 == !opt_for_fn (callee->decl, optimize) || !always_inline))
438 || check_match (flag_wrapv)
439 || check_match (flag_trapv)
440 /* Strictly speaking only when the callee uses FP math. */
441 || check_maybe_up (flag_rounding_math)
442 || check_maybe_up (flag_trapping_math)
443 || check_maybe_down (flag_unsafe_math_optimizations)
444 || check_maybe_down (flag_finite_math_only)
445 || check_maybe_up (flag_signaling_nans)
446 || check_maybe_down (flag_cx_limited_range)
447 || check_maybe_up (flag_signed_zeros)
448 || check_maybe_down (flag_associative_math)
449 || check_maybe_down (flag_reciprocal_math)
450 /* We do not want to make code compiled with exceptions to be
451 brought into a non-EH function unless we know that the callee
452 does not throw.
453 This is tracked by DECL_FUNCTION_PERSONALITY. */
454 || (check_match (flag_non_call_exceptions)
455 /* TODO: We also may allow bringing !flag_non_call_exceptions
456 to flag_non_call_exceptions function, but that may need
457 extra work in tree-inline to add the extra EH edges. */
458 && (!opt_for_fn (callee->decl, flag_non_call_exceptions)
459 || DECL_FUNCTION_PERSONALITY (callee->decl)))
460 || (check_maybe_up (flag_exceptions)
461 && DECL_FUNCTION_PERSONALITY (callee->decl))
462 /* Strictly speaking only when the callee contains function
463 calls that may end up setting errno. */
464 || check_maybe_up (flag_errno_math)
465 /* When devirtualization is diabled for callee, it is not safe
466 to inline it as we possibly mangled the type info.
467 Allow early inlining of always inlines. */
468 || (!early && check_maybe_down (flag_devirtualize)))
469 {
470 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
471 inlinable = false;
472 }
473 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
474 else if (always_inline)
475 ;
476 /* When user added an attribute to the callee honor it. */
477 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
478 && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
479 {
480 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
481 inlinable = false;
482 }
483 /* If explicit optimize attribute are not used, the mismatch is caused
484 by different command line options used to build different units.
485 Do not care about COMDAT functions - those are intended to be
486 optimized with the optimization flags of module they are used in.
487 Also do not care about mixing up size/speed optimization when
488 DECL_DISREGARD_INLINE_LIMITS is set. */
489 else if ((callee->merged
490 && !lookup_attribute ("optimize",
491 DECL_ATTRIBUTES (caller->decl)))
492 || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
493 ;
494 /* If mismatch is caused by merging two LTO units with different
495 optimizationflags we want to be bit nicer. However never inline
496 if one of functions is not optimized at all. */
497 else if (!opt_for_fn (callee->decl, optimize)
498 || !opt_for_fn (caller->decl, optimize))
499 {
500 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
501 inlinable = false;
502 }
503 /* If callee is optimized for size and caller is not, allow inlining if
504 code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
505 is inline (and thus likely an unified comdat). This will allow caller
506 to run faster. */
507 else if (opt_for_fn (callee->decl, optimize_size)
508 > opt_for_fn (caller->decl, optimize_size))
509 {
510 int growth = estimate_edge_growth (e);
511 if (growth > 0
512 && (!DECL_DECLARED_INLINE_P (callee->decl)
513 && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
514 MAX_INLINE_INSNS_AUTO)))
515 {
516 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
517 inlinable = false;
518 }
519 }
520 /* If callee is more aggressively optimized for performance than caller,
521 we generally want to inline only cheap (runtime wise) functions. */
522 else if (opt_for_fn (callee->decl, optimize_size)
523 < opt_for_fn (caller->decl, optimize_size)
524 || (opt_for_fn (callee->decl, optimize)
525 > opt_for_fn (caller->decl, optimize)))
526 {
527 if (estimate_edge_time (e)
528 >= 20 + inline_edge_summary (e)->call_stmt_time)
529 {
530 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
531 inlinable = false;
532 }
533 }
534
535 }
536
537 if (!inlinable && report)
538 report_inline_failed_reason (e);
539 return inlinable;
540 }
541
542
543 /* Return true if the edge E is inlinable during early inlining. */
544
545 static bool
546 can_early_inline_edge_p (struct cgraph_edge *e)
547 {
548 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
549 /* Early inliner might get called at WPA stage when IPA pass adds new
550 function. In this case we can not really do any of early inlining
551 because function bodies are missing. */
552 if (!gimple_has_body_p (callee->decl))
553 {
554 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
555 return false;
556 }
557 /* In early inliner some of callees may not be in SSA form yet
558 (i.e. the callgraph is cyclic and we did not process
559 the callee by early inliner, yet). We don't have CIF code for this
560 case; later we will re-do the decision in the real inliner. */
561 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
562 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
563 {
564 if (dump_file)
565 fprintf (dump_file, " edge not inlinable: not in SSA form\n");
566 return false;
567 }
568 if (!can_inline_edge_p (e, true, false, true))
569 return false;
570 return true;
571 }
572
573
574 /* Return number of calls in N. Ignore cheap builtins. */
575
576 static int
577 num_calls (struct cgraph_node *n)
578 {
579 struct cgraph_edge *e;
580 int num = 0;
581
582 for (e = n->callees; e; e = e->next_callee)
583 if (!is_inexpensive_builtin (e->callee->decl))
584 num++;
585 return num;
586 }
587
588
589 /* Return true if we are interested in inlining small function. */
590
591 static bool
592 want_early_inline_function_p (struct cgraph_edge *e)
593 {
594 bool want_inline = true;
595 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
596
597 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
598 ;
599 /* For AutoFDO, we need to make sure that before profile summary, all
600 hot paths' IR look exactly the same as profiled binary. As a result,
601 in einliner, we will disregard size limit and inline those callsites
602 that are:
603 * inlined in the profiled binary, and
604 * the cloned callee has enough samples to be considered "hot". */
605 else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
606 ;
607 else if (!DECL_DECLARED_INLINE_P (callee->decl)
608 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
609 {
610 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
611 report_inline_failed_reason (e);
612 want_inline = false;
613 }
614 else
615 {
616 int growth = estimate_edge_growth (e);
617 int n;
618
619 if (growth <= 0)
620 ;
621 else if (!e->maybe_hot_p ()
622 && growth > 0)
623 {
624 if (dump_file)
625 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
626 "call is cold and code would grow by %i\n",
627 xstrdup_for_dump (e->caller->name ()),
628 e->caller->order,
629 xstrdup_for_dump (callee->name ()), callee->order,
630 growth);
631 want_inline = false;
632 }
633 else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
634 {
635 if (dump_file)
636 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
637 "growth %i exceeds --param early-inlining-insns\n",
638 xstrdup_for_dump (e->caller->name ()),
639 e->caller->order,
640 xstrdup_for_dump (callee->name ()), callee->order,
641 growth);
642 want_inline = false;
643 }
644 else if ((n = num_calls (callee)) != 0
645 && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
646 {
647 if (dump_file)
648 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
649 "growth %i exceeds --param early-inlining-insns "
650 "divided by number of calls\n",
651 xstrdup_for_dump (e->caller->name ()),
652 e->caller->order,
653 xstrdup_for_dump (callee->name ()), callee->order,
654 growth);
655 want_inline = false;
656 }
657 }
658 return want_inline;
659 }
660
661 /* Compute time of the edge->caller + edge->callee execution when inlining
662 does not happen. */
663
664 inline sreal
665 compute_uninlined_call_time (struct inline_summary *callee_info,
666 struct cgraph_edge *edge)
667 {
668 sreal uninlined_call_time = (sreal)callee_info->time;
669 cgraph_node *caller = (edge->caller->global.inlined_to
670 ? edge->caller->global.inlined_to
671 : edge->caller);
672
673 if (edge->count && caller->count)
674 uninlined_call_time *= (sreal)edge->count / caller->count;
675 if (edge->frequency)
676 uninlined_call_time *= cgraph_freq_base_rec * edge->frequency;
677 else
678 uninlined_call_time = uninlined_call_time >> 11;
679
680 int caller_time = inline_summaries->get (caller)->time;
681 return uninlined_call_time + caller_time;
682 }
683
684 /* Same as compute_uinlined_call_time but compute time when inlining
685 does happen. */
686
687 inline sreal
688 compute_inlined_call_time (struct cgraph_edge *edge,
689 int edge_time)
690 {
691 cgraph_node *caller = (edge->caller->global.inlined_to
692 ? edge->caller->global.inlined_to
693 : edge->caller);
694 int caller_time = inline_summaries->get (caller)->time;
695 sreal time = edge_time;
696
697 if (edge->count && caller->count)
698 time *= (sreal)edge->count / caller->count;
699 if (edge->frequency)
700 time *= cgraph_freq_base_rec * edge->frequency;
701 else
702 time = time >> 11;
703
704 /* This calculation should match one in ipa-inline-analysis.
705 FIXME: Once ipa-inline-analysis is converted to sreal this can be
706 simplified. */
707 time -= (sreal) ((gcov_type) edge->frequency
708 * inline_edge_summary (edge)->call_stmt_time
709 * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)) / INLINE_TIME_SCALE;
710 time += caller_time;
711 if (time <= 0)
712 time = ((sreal) 1) >> 8;
713 gcc_checking_assert (time >= 0);
714 return time;
715 }
716
717 /* Return true if the speedup for inlining E is bigger than
718 PARAM_MAX_INLINE_MIN_SPEEDUP. */
719
720 static bool
721 big_speedup_p (struct cgraph_edge *e)
722 {
723 sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee),
724 e);
725 sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
726
727 if (time - inlined_time
728 > (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
729 * percent_rec)
730 return true;
731 return false;
732 }
733
734 /* Return true if we are interested in inlining small function.
735 When REPORT is true, report reason to dump file. */
736
737 static bool
738 want_inline_small_function_p (struct cgraph_edge *e, bool report)
739 {
740 bool want_inline = true;
741 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
742
743 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
744 ;
745 else if (!DECL_DECLARED_INLINE_P (callee->decl)
746 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
747 {
748 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
749 want_inline = false;
750 }
751 /* Do fast and conservative check if the function can be good
752 inline candidate. At the moment we allow inline hints to
753 promote non-inline functions to inline and we increase
754 MAX_INLINE_INSNS_SINGLE 16-fold for inline functions. */
755 else if ((!DECL_DECLARED_INLINE_P (callee->decl)
756 && (!e->count || !e->maybe_hot_p ()))
757 && inline_summaries->get (callee)->min_size
758 - inline_edge_summary (e)->call_stmt_size
759 > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
760 {
761 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
762 want_inline = false;
763 }
764 else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
765 && inline_summaries->get (callee)->min_size
766 - inline_edge_summary (e)->call_stmt_size
767 > 16 * MAX_INLINE_INSNS_SINGLE)
768 {
769 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
770 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
771 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
772 want_inline = false;
773 }
774 else
775 {
776 int growth = estimate_edge_growth (e);
777 inline_hints hints = estimate_edge_hints (e);
778 bool big_speedup = big_speedup_p (e);
779
780 if (growth <= 0)
781 ;
782 /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when
783 hints suggests that inlining given function is very profitable. */
784 else if (DECL_DECLARED_INLINE_P (callee->decl)
785 && growth >= MAX_INLINE_INSNS_SINGLE
786 && ((!big_speedup
787 && !(hints & (INLINE_HINT_indirect_call
788 | INLINE_HINT_known_hot
789 | INLINE_HINT_loop_iterations
790 | INLINE_HINT_array_index
791 | INLINE_HINT_loop_stride)))
792 || growth >= MAX_INLINE_INSNS_SINGLE * 16))
793 {
794 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
795 want_inline = false;
796 }
797 else if (!DECL_DECLARED_INLINE_P (callee->decl)
798 && !opt_for_fn (e->caller->decl, flag_inline_functions))
799 {
800 /* growth_likely_positive is expensive, always test it last. */
801 if (growth >= MAX_INLINE_INSNS_SINGLE
802 || growth_likely_positive (callee, growth))
803 {
804 e->inline_failed = CIF_NOT_DECLARED_INLINED;
805 want_inline = false;
806 }
807 }
808 /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
809 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
810 inlining given function is very profitable. */
811 else if (!DECL_DECLARED_INLINE_P (callee->decl)
812 && !big_speedup
813 && !(hints & INLINE_HINT_known_hot)
814 && growth >= ((hints & (INLINE_HINT_indirect_call
815 | INLINE_HINT_loop_iterations
816 | INLINE_HINT_array_index
817 | INLINE_HINT_loop_stride))
818 ? MAX (MAX_INLINE_INSNS_AUTO,
819 MAX_INLINE_INSNS_SINGLE)
820 : MAX_INLINE_INSNS_AUTO))
821 {
822 /* growth_likely_positive is expensive, always test it last. */
823 if (growth >= MAX_INLINE_INSNS_SINGLE
824 || growth_likely_positive (callee, growth))
825 {
826 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
827 want_inline = false;
828 }
829 }
830 /* If call is cold, do not inline when function body would grow. */
831 else if (!e->maybe_hot_p ()
832 && (growth >= MAX_INLINE_INSNS_SINGLE
833 || growth_likely_positive (callee, growth)))
834 {
835 e->inline_failed = CIF_UNLIKELY_CALL;
836 want_inline = false;
837 }
838 }
839 if (!want_inline && report)
840 report_inline_failed_reason (e);
841 return want_inline;
842 }
843
844 /* EDGE is self recursive edge.
845 We hand two cases - when function A is inlining into itself
846 or when function A is being inlined into another inliner copy of function
847 A within function B.
848
849 In first case OUTER_NODE points to the toplevel copy of A, while
850 in the second case OUTER_NODE points to the outermost copy of A in B.
851
852 In both cases we want to be extra selective since
853 inlining the call will just introduce new recursive calls to appear. */
854
855 static bool
856 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
857 struct cgraph_node *outer_node,
858 bool peeling,
859 int depth)
860 {
861 char const *reason = NULL;
862 bool want_inline = true;
863 int caller_freq = CGRAPH_FREQ_BASE;
864 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
865
866 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
867 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
868
869 if (!edge->maybe_hot_p ())
870 {
871 reason = "recursive call is cold";
872 want_inline = false;
873 }
874 else if (max_count && !outer_node->count)
875 {
876 reason = "not executed in profile";
877 want_inline = false;
878 }
879 else if (depth > max_depth)
880 {
881 reason = "--param max-inline-recursive-depth exceeded.";
882 want_inline = false;
883 }
884
885 if (outer_node->global.inlined_to)
886 caller_freq = outer_node->callers->frequency;
887
888 if (!caller_freq)
889 {
890 reason = "function is inlined and unlikely";
891 want_inline = false;
892 }
893
894 if (!want_inline)
895 ;
896 /* Inlining of self recursive function into copy of itself within other function
897 is transformation similar to loop peeling.
898
899 Peeling is profitable if we can inline enough copies to make probability
900 of actual call to the self recursive function very small. Be sure that
901 the probability of recursion is small.
902
903 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
904 This way the expected number of recision is at most max_depth. */
905 else if (peeling)
906 {
907 int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
908 / max_depth);
909 int i;
910 for (i = 1; i < depth; i++)
911 max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
912 if (max_count
913 && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
914 >= max_prob))
915 {
916 reason = "profile of recursive call is too large";
917 want_inline = false;
918 }
919 if (!max_count
920 && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
921 >= max_prob))
922 {
923 reason = "frequency of recursive call is too large";
924 want_inline = false;
925 }
926 }
927 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
928 depth is large. We reduce function call overhead and increase chances that
929 things fit in hardware return predictor.
930
931 Recursive inlining might however increase cost of stack frame setup
932 actually slowing down functions whose recursion tree is wide rather than
933 deep.
934
935 Deciding reliably on when to do recursive inlining without profile feedback
936 is tricky. For now we disable recursive inlining when probability of self
937 recursion is low.
938
939 Recursive inlining of self recursive call within loop also results in large loop
940 depths that generally optimize badly. We may want to throttle down inlining
941 in those cases. In particular this seems to happen in one of libstdc++ rb tree
942 methods. */
943 else
944 {
945 if (max_count
946 && (edge->count * 100 / outer_node->count
947 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
948 {
949 reason = "profile of recursive call is too small";
950 want_inline = false;
951 }
952 else if (!max_count
953 && (edge->frequency * 100 / caller_freq
954 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
955 {
956 reason = "frequency of recursive call is too small";
957 want_inline = false;
958 }
959 }
960 if (!want_inline && dump_file)
961 fprintf (dump_file, " not inlining recursively: %s\n", reason);
962 return want_inline;
963 }
964
965 /* Return true when NODE has uninlinable caller;
966 set HAS_HOT_CALL if it has hot call.
967 Worker for cgraph_for_node_and_aliases. */
968
969 static bool
970 check_callers (struct cgraph_node *node, void *has_hot_call)
971 {
972 struct cgraph_edge *e;
973 for (e = node->callers; e; e = e->next_caller)
974 {
975 if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once))
976 return true;
977 if (!can_inline_edge_p (e, true))
978 return true;
979 if (e->recursive_p ())
980 return true;
981 if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
982 *(bool *)has_hot_call = true;
983 }
984 return false;
985 }
986
987 /* If NODE has a caller, return true. */
988
989 static bool
990 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
991 {
992 if (node->callers)
993 return true;
994 return false;
995 }
996
997 /* Decide if inlining NODE would reduce unit size by eliminating
998 the offline copy of function.
999 When COLD is true the cold calls are considered, too. */
1000
1001 static bool
1002 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1003 {
1004 bool has_hot_call = false;
1005
1006 /* Aliases gets inlined along with the function they alias. */
1007 if (node->alias)
1008 return false;
1009 /* Already inlined? */
1010 if (node->global.inlined_to)
1011 return false;
1012 /* Does it have callers? */
1013 if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1014 return false;
1015 /* Inlining into all callers would increase size? */
1016 if (estimate_growth (node) > 0)
1017 return false;
1018 /* All inlines must be possible. */
1019 if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1020 true))
1021 return false;
1022 if (!cold && !has_hot_call)
1023 return false;
1024 return true;
1025 }
1026
1027 /* A cost model driving the inlining heuristics in a way so the edges with
1028 smallest badness are inlined first. After each inlining is performed
1029 the costs of all caller edges of nodes affected are recomputed so the
1030 metrics may accurately depend on values such as number of inlinable callers
1031 of the function or function body size. */
1032
1033 static sreal
1034 edge_badness (struct cgraph_edge *edge, bool dump)
1035 {
1036 sreal badness;
1037 int growth, edge_time;
1038 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1039 struct inline_summary *callee_info = inline_summaries->get (callee);
1040 inline_hints hints;
1041 cgraph_node *caller = (edge->caller->global.inlined_to
1042 ? edge->caller->global.inlined_to
1043 : edge->caller);
1044
1045 growth = estimate_edge_growth (edge);
1046 edge_time = estimate_edge_time (edge);
1047 hints = estimate_edge_hints (edge);
1048 gcc_checking_assert (edge_time >= 0);
1049 gcc_checking_assert (edge_time <= callee_info->time);
1050 gcc_checking_assert (growth <= callee_info->size);
1051
1052 if (dump)
1053 {
1054 fprintf (dump_file, " Badness calculation for %s/%i -> %s/%i\n",
1055 xstrdup_for_dump (edge->caller->name ()),
1056 edge->caller->order,
1057 xstrdup_for_dump (callee->name ()),
1058 edge->callee->order);
1059 fprintf (dump_file, " size growth %i, time %i ",
1060 growth,
1061 edge_time);
1062 dump_inline_hints (dump_file, hints);
1063 if (big_speedup_p (edge))
1064 fprintf (dump_file, " big_speedup");
1065 fprintf (dump_file, "\n");
1066 }
1067
1068 /* Always prefer inlining saving code size. */
1069 if (growth <= 0)
1070 {
1071 badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1072 if (dump)
1073 fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1074 growth);
1075 }
1076 /* Inlining into EXTERNAL functions is not going to change anything unless
1077 they are themselves inlined. */
1078 else if (DECL_EXTERNAL (caller->decl))
1079 {
1080 if (dump)
1081 fprintf (dump_file, " max: function is external\n");
1082 return sreal::max ();
1083 }
1084 /* When profile is available. Compute badness as:
1085
1086 time_saved * caller_count
1087 goodness = -------------------------------------------------
1088 growth_of_caller * overall_growth * combined_size
1089
1090 badness = - goodness
1091
1092 Again use negative value to make calls with profile appear hotter
1093 then calls without.
1094 */
1095 else if (opt_for_fn (caller->decl, flag_guess_branch_prob) || caller->count)
1096 {
1097 sreal numerator, denominator;
1098 int overall_growth;
1099
1100 numerator = (compute_uninlined_call_time (callee_info, edge)
1101 - compute_inlined_call_time (edge, edge_time));
1102 if (numerator == 0)
1103 numerator = ((sreal) 1 >> 8);
1104 if (caller->count)
1105 numerator *= caller->count;
1106 else if (opt_for_fn (caller->decl, flag_branch_probabilities))
1107 numerator = numerator >> 11;
1108 denominator = growth;
1109
1110 overall_growth = callee_info->growth;
1111
1112 /* Look for inliner wrappers of the form:
1113
1114 inline_caller ()
1115 {
1116 do_fast_job...
1117 if (need_more_work)
1118 noninline_callee ();
1119 }
1120 Withhout panilizing this case, we usually inline noninline_callee
1121 into the inline_caller because overall_growth is small preventing
1122 further inlining of inline_caller.
1123
1124 Penalize only callgraph edges to functions with small overall
1125 growth ...
1126 */
1127 if (growth > overall_growth
1128 /* ... and having only one caller which is not inlined ... */
1129 && callee_info->single_caller
1130 && !edge->caller->global.inlined_to
1131 /* ... and edges executed only conditionally ... */
1132 && edge->frequency < CGRAPH_FREQ_BASE
1133 /* ... consider case where callee is not inline but caller is ... */
1134 && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1135 && DECL_DECLARED_INLINE_P (caller->decl))
1136 /* ... or when early optimizers decided to split and edge
1137 frequency still indicates splitting is a win ... */
1138 || (callee->split_part && !caller->split_part
1139 && edge->frequency
1140 < CGRAPH_FREQ_BASE
1141 * PARAM_VALUE
1142 (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100
1143 /* ... and do not overwrite user specified hints. */
1144 && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1145 || DECL_DECLARED_INLINE_P (caller->decl)))))
1146 {
1147 struct inline_summary *caller_info = inline_summaries->get (caller);
1148 int caller_growth = caller_info->growth;
1149
1150 /* Only apply the penalty when caller looks like inline candidate,
1151 and it is not called once and. */
1152 if (!caller_info->single_caller && overall_growth < caller_growth
1153 && caller_info->inlinable
1154 && caller_info->size
1155 < (DECL_DECLARED_INLINE_P (caller->decl)
1156 ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
1157 {
1158 if (dump)
1159 fprintf (dump_file,
1160 " Wrapper penalty. Increasing growth %i to %i\n",
1161 overall_growth, caller_growth);
1162 overall_growth = caller_growth;
1163 }
1164 }
1165 if (overall_growth > 0)
1166 {
1167 /* Strongly preffer functions with few callers that can be inlined
1168 fully. The square root here leads to smaller binaries at average.
1169 Watch however for extreme cases and return to linear function
1170 when growth is large. */
1171 if (overall_growth < 256)
1172 overall_growth *= overall_growth;
1173 else
1174 overall_growth += 256 * 256 - 256;
1175 denominator *= overall_growth;
1176 }
1177 denominator *= inline_summaries->get (caller)->self_size + growth;
1178
1179 badness = - numerator / denominator;
1180
1181 if (dump)
1182 {
1183 fprintf (dump_file,
1184 " %f: guessed profile. frequency %f, count %" PRId64
1185 " caller count %" PRId64
1186 " time w/o inlining %f, time w inlining %f"
1187 " overall growth %i (current) %i (original)"
1188 " %i (compensated)\n",
1189 badness.to_double (),
1190 (double)edge->frequency / CGRAPH_FREQ_BASE,
1191 edge->count, caller->count,
1192 compute_uninlined_call_time (callee_info, edge).to_double (),
1193 compute_inlined_call_time (edge, edge_time).to_double (),
1194 estimate_growth (callee),
1195 callee_info->growth, overall_growth);
1196 }
1197 }
1198 /* When function local profile is not available or it does not give
1199 useful information (ie frequency is zero), base the cost on
1200 loop nest and overall size growth, so we optimize for overall number
1201 of functions fully inlined in program. */
1202 else
1203 {
1204 int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1205 badness = growth;
1206
1207 /* Decrease badness if call is nested. */
1208 if (badness > 0)
1209 badness = badness >> nest;
1210 else
1211 badness = badness << nest;
1212 if (dump)
1213 fprintf (dump_file, " %f: no profile. nest %i\n",
1214 badness.to_double (), nest);
1215 }
1216 gcc_checking_assert (badness != 0);
1217
1218 if (edge->recursive_p ())
1219 badness = badness.shift (badness > 0 ? 4 : -4);
1220 if ((hints & (INLINE_HINT_indirect_call
1221 | INLINE_HINT_loop_iterations
1222 | INLINE_HINT_array_index
1223 | INLINE_HINT_loop_stride))
1224 || callee_info->growth <= 0)
1225 badness = badness.shift (badness > 0 ? -2 : 2);
1226 if (hints & (INLINE_HINT_same_scc))
1227 badness = badness.shift (badness > 0 ? 3 : -3);
1228 else if (hints & (INLINE_HINT_in_scc))
1229 badness = badness.shift (badness > 0 ? 2 : -2);
1230 else if (hints & (INLINE_HINT_cross_module))
1231 badness = badness.shift (badness > 0 ? 1 : -1);
1232 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1233 badness = badness.shift (badness > 0 ? -4 : 4);
1234 else if ((hints & INLINE_HINT_declared_inline))
1235 badness = badness.shift (badness > 0 ? -3 : 3);
1236 if (dump)
1237 fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1238 return badness;
1239 }
1240
1241 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1242 static inline void
1243 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1244 {
1245 sreal badness = edge_badness (edge, false);
1246 if (edge->aux)
1247 {
1248 edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1249 gcc_checking_assert (n->get_data () == edge);
1250
1251 /* fibonacci_heap::replace_key does busy updating of the
1252 heap that is unnecesarily expensive.
1253 We do lazy increases: after extracting minimum if the key
1254 turns out to be out of date, it is re-inserted into heap
1255 with correct value. */
1256 if (badness < n->get_key ())
1257 {
1258 if (dump_file && (dump_flags & TDF_DETAILS))
1259 {
1260 fprintf (dump_file,
1261 " decreasing badness %s/%i -> %s/%i, %f"
1262 " to %f\n",
1263 xstrdup_for_dump (edge->caller->name ()),
1264 edge->caller->order,
1265 xstrdup_for_dump (edge->callee->name ()),
1266 edge->callee->order,
1267 n->get_key ().to_double (),
1268 badness.to_double ());
1269 }
1270 heap->decrease_key (n, badness);
1271 }
1272 }
1273 else
1274 {
1275 if (dump_file && (dump_flags & TDF_DETAILS))
1276 {
1277 fprintf (dump_file,
1278 " enqueuing call %s/%i -> %s/%i, badness %f\n",
1279 xstrdup_for_dump (edge->caller->name ()),
1280 edge->caller->order,
1281 xstrdup_for_dump (edge->callee->name ()),
1282 edge->callee->order,
1283 badness.to_double ());
1284 }
1285 edge->aux = heap->insert (badness, edge);
1286 }
1287 }
1288
1289
1290 /* NODE was inlined.
1291 All caller edges needs to be resetted because
1292 size estimates change. Similarly callees needs reset
1293 because better context may be known. */
1294
1295 static void
1296 reset_edge_caches (struct cgraph_node *node)
1297 {
1298 struct cgraph_edge *edge;
1299 struct cgraph_edge *e = node->callees;
1300 struct cgraph_node *where = node;
1301 struct ipa_ref *ref;
1302
1303 if (where->global.inlined_to)
1304 where = where->global.inlined_to;
1305
1306 for (edge = where->callers; edge; edge = edge->next_caller)
1307 if (edge->inline_failed)
1308 reset_edge_growth_cache (edge);
1309
1310 FOR_EACH_ALIAS (where, ref)
1311 reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1312
1313 if (!e)
1314 return;
1315
1316 while (true)
1317 if (!e->inline_failed && e->callee->callees)
1318 e = e->callee->callees;
1319 else
1320 {
1321 if (e->inline_failed)
1322 reset_edge_growth_cache (e);
1323 if (e->next_callee)
1324 e = e->next_callee;
1325 else
1326 {
1327 do
1328 {
1329 if (e->caller == node)
1330 return;
1331 e = e->caller->callers;
1332 }
1333 while (!e->next_callee);
1334 e = e->next_callee;
1335 }
1336 }
1337 }
1338
1339 /* Recompute HEAP nodes for each of caller of NODE.
1340 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1341 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1342 it is inlinable. Otherwise check all edges. */
1343
1344 static void
1345 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1346 bitmap updated_nodes,
1347 struct cgraph_edge *check_inlinablity_for)
1348 {
1349 struct cgraph_edge *edge;
1350 struct ipa_ref *ref;
1351
1352 if ((!node->alias && !inline_summaries->get (node)->inlinable)
1353 || node->global.inlined_to)
1354 return;
1355 if (!bitmap_set_bit (updated_nodes, node->uid))
1356 return;
1357
1358 FOR_EACH_ALIAS (node, ref)
1359 {
1360 struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1361 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1362 }
1363
1364 for (edge = node->callers; edge; edge = edge->next_caller)
1365 if (edge->inline_failed)
1366 {
1367 if (!check_inlinablity_for
1368 || check_inlinablity_for == edge)
1369 {
1370 if (can_inline_edge_p (edge, false)
1371 && want_inline_small_function_p (edge, false))
1372 update_edge_key (heap, edge);
1373 else if (edge->aux)
1374 {
1375 report_inline_failed_reason (edge);
1376 heap->delete_node ((edge_heap_node_t *) edge->aux);
1377 edge->aux = NULL;
1378 }
1379 }
1380 else if (edge->aux)
1381 update_edge_key (heap, edge);
1382 }
1383 }
1384
1385 /* Recompute HEAP nodes for each uninlined call in NODE.
1386 This is used when we know that edge badnesses are going only to increase
1387 (we introduced new call site) and thus all we need is to insert newly
1388 created edges into heap. */
1389
1390 static void
1391 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1392 bitmap updated_nodes)
1393 {
1394 struct cgraph_edge *e = node->callees;
1395
1396 if (!e)
1397 return;
1398 while (true)
1399 if (!e->inline_failed && e->callee->callees)
1400 e = e->callee->callees;
1401 else
1402 {
1403 enum availability avail;
1404 struct cgraph_node *callee;
1405 /* We do not reset callee growth cache here. Since we added a new call,
1406 growth chould have just increased and consequentely badness metric
1407 don't need updating. */
1408 if (e->inline_failed
1409 && (callee = e->callee->ultimate_alias_target (&avail))
1410 && inline_summaries->get (callee)->inlinable
1411 && avail >= AVAIL_AVAILABLE
1412 && !bitmap_bit_p (updated_nodes, callee->uid))
1413 {
1414 if (can_inline_edge_p (e, false)
1415 && want_inline_small_function_p (e, false))
1416 update_edge_key (heap, e);
1417 else if (e->aux)
1418 {
1419 report_inline_failed_reason (e);
1420 heap->delete_node ((edge_heap_node_t *) e->aux);
1421 e->aux = NULL;
1422 }
1423 }
1424 if (e->next_callee)
1425 e = e->next_callee;
1426 else
1427 {
1428 do
1429 {
1430 if (e->caller == node)
1431 return;
1432 e = e->caller->callers;
1433 }
1434 while (!e->next_callee);
1435 e = e->next_callee;
1436 }
1437 }
1438 }
1439
1440 /* Enqueue all recursive calls from NODE into priority queue depending on
1441 how likely we want to recursively inline the call. */
1442
1443 static void
1444 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1445 edge_heap_t *heap)
1446 {
1447 struct cgraph_edge *e;
1448 enum availability avail;
1449
1450 for (e = where->callees; e; e = e->next_callee)
1451 if (e->callee == node
1452 || (e->callee->ultimate_alias_target (&avail) == node
1453 && avail > AVAIL_INTERPOSABLE))
1454 {
1455 /* When profile feedback is available, prioritize by expected number
1456 of calls. */
1457 heap->insert (!max_count ? -e->frequency
1458 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1459 e);
1460 }
1461 for (e = where->callees; e; e = e->next_callee)
1462 if (!e->inline_failed)
1463 lookup_recursive_calls (node, e->callee, heap);
1464 }
1465
1466 /* Decide on recursive inlining: in the case function has recursive calls,
1467 inline until body size reaches given argument. If any new indirect edges
1468 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1469 is NULL. */
1470
1471 static bool
1472 recursive_inlining (struct cgraph_edge *edge,
1473 vec<cgraph_edge *> *new_edges)
1474 {
1475 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1476 edge_heap_t heap (sreal::min ());
1477 struct cgraph_node *node;
1478 struct cgraph_edge *e;
1479 struct cgraph_node *master_clone = NULL, *next;
1480 int depth = 0;
1481 int n = 0;
1482
1483 node = edge->caller;
1484 if (node->global.inlined_to)
1485 node = node->global.inlined_to;
1486
1487 if (DECL_DECLARED_INLINE_P (node->decl))
1488 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1489
1490 /* Make sure that function is small enough to be considered for inlining. */
1491 if (estimate_size_after_inlining (node, edge) >= limit)
1492 return false;
1493 lookup_recursive_calls (node, node, &heap);
1494 if (heap.empty ())
1495 return false;
1496
1497 if (dump_file)
1498 fprintf (dump_file,
1499 " Performing recursive inlining on %s\n",
1500 node->name ());
1501
1502 /* Do the inlining and update list of recursive call during process. */
1503 while (!heap.empty ())
1504 {
1505 struct cgraph_edge *curr = heap.extract_min ();
1506 struct cgraph_node *cnode, *dest = curr->callee;
1507
1508 if (!can_inline_edge_p (curr, true))
1509 continue;
1510
1511 /* MASTER_CLONE is produced in the case we already started modified
1512 the function. Be sure to redirect edge to the original body before
1513 estimating growths otherwise we will be seeing growths after inlining
1514 the already modified body. */
1515 if (master_clone)
1516 {
1517 curr->redirect_callee (master_clone);
1518 reset_edge_growth_cache (curr);
1519 }
1520
1521 if (estimate_size_after_inlining (node, curr) > limit)
1522 {
1523 curr->redirect_callee (dest);
1524 reset_edge_growth_cache (curr);
1525 break;
1526 }
1527
1528 depth = 1;
1529 for (cnode = curr->caller;
1530 cnode->global.inlined_to; cnode = cnode->callers->caller)
1531 if (node->decl
1532 == curr->callee->ultimate_alias_target ()->decl)
1533 depth++;
1534
1535 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1536 {
1537 curr->redirect_callee (dest);
1538 reset_edge_growth_cache (curr);
1539 continue;
1540 }
1541
1542 if (dump_file)
1543 {
1544 fprintf (dump_file,
1545 " Inlining call of depth %i", depth);
1546 if (node->count)
1547 {
1548 fprintf (dump_file, " called approx. %.2f times per call",
1549 (double)curr->count / node->count);
1550 }
1551 fprintf (dump_file, "\n");
1552 }
1553 if (!master_clone)
1554 {
1555 /* We need original clone to copy around. */
1556 master_clone = node->create_clone (node->decl, node->count,
1557 CGRAPH_FREQ_BASE, false, vNULL,
1558 true, NULL, NULL);
1559 for (e = master_clone->callees; e; e = e->next_callee)
1560 if (!e->inline_failed)
1561 clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1562 curr->redirect_callee (master_clone);
1563 reset_edge_growth_cache (curr);
1564 }
1565
1566 inline_call (curr, false, new_edges, &overall_size, true);
1567 lookup_recursive_calls (node, curr->callee, &heap);
1568 n++;
1569 }
1570
1571 if (!heap.empty () && dump_file)
1572 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1573
1574 if (!master_clone)
1575 return false;
1576
1577 if (dump_file)
1578 fprintf (dump_file,
1579 "\n Inlined %i times, "
1580 "body grown from size %i to %i, time %i to %i\n", n,
1581 inline_summaries->get (master_clone)->size, inline_summaries->get (node)->size,
1582 inline_summaries->get (master_clone)->time, inline_summaries->get (node)->time);
1583
1584 /* Remove master clone we used for inlining. We rely that clones inlined
1585 into master clone gets queued just before master clone so we don't
1586 need recursion. */
1587 for (node = symtab->first_function (); node != master_clone;
1588 node = next)
1589 {
1590 next = symtab->next_function (node);
1591 if (node->global.inlined_to == master_clone)
1592 node->remove ();
1593 }
1594 master_clone->remove ();
1595 return true;
1596 }
1597
1598
1599 /* Given whole compilation unit estimate of INSNS, compute how large we can
1600 allow the unit to grow. */
1601
1602 static int
1603 compute_max_insns (int insns)
1604 {
1605 int max_insns = insns;
1606 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1607 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1608
1609 return ((int64_t) max_insns
1610 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1611 }
1612
1613
1614 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1615
1616 static void
1617 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1618 {
1619 while (new_edges.length () > 0)
1620 {
1621 struct cgraph_edge *edge = new_edges.pop ();
1622
1623 gcc_assert (!edge->aux);
1624 if (edge->inline_failed
1625 && can_inline_edge_p (edge, true)
1626 && want_inline_small_function_p (edge, true))
1627 edge->aux = heap->insert (edge_badness (edge, false), edge);
1628 }
1629 }
1630
1631 /* Remove EDGE from the fibheap. */
1632
1633 static void
1634 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1635 {
1636 if (e->aux)
1637 {
1638 ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1639 e->aux = NULL;
1640 }
1641 }
1642
1643 /* Return true if speculation of edge E seems useful.
1644 If ANTICIPATE_INLINING is true, be conservative and hope that E
1645 may get inlined. */
1646
1647 bool
1648 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1649 {
1650 enum availability avail;
1651 struct cgraph_node *target = e->callee->ultimate_alias_target (&avail);
1652 struct cgraph_edge *direct, *indirect;
1653 struct ipa_ref *ref;
1654
1655 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1656
1657 if (!e->maybe_hot_p ())
1658 return false;
1659
1660 /* See if IP optimizations found something potentially useful about the
1661 function. For now we look only for CONST/PURE flags. Almost everything
1662 else we propagate is useless. */
1663 if (avail >= AVAIL_AVAILABLE)
1664 {
1665 int ecf_flags = flags_from_decl_or_type (target->decl);
1666 if (ecf_flags & ECF_CONST)
1667 {
1668 e->speculative_call_info (direct, indirect, ref);
1669 if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1670 return true;
1671 }
1672 else if (ecf_flags & ECF_PURE)
1673 {
1674 e->speculative_call_info (direct, indirect, ref);
1675 if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1676 return true;
1677 }
1678 }
1679 /* If we did not managed to inline the function nor redirect
1680 to an ipa-cp clone (that are seen by having local flag set),
1681 it is probably pointless to inline it unless hardware is missing
1682 indirect call predictor. */
1683 if (!anticipate_inlining && e->inline_failed && !target->local.local)
1684 return false;
1685 /* For overwritable targets there is not much to do. */
1686 if (e->inline_failed && !can_inline_edge_p (e, false, true))
1687 return false;
1688 /* OK, speculation seems interesting. */
1689 return true;
1690 }
1691
1692 /* We know that EDGE is not going to be inlined.
1693 See if we can remove speculation. */
1694
1695 static void
1696 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1697 {
1698 if (edge->speculative && !speculation_useful_p (edge, false))
1699 {
1700 struct cgraph_node *node = edge->caller;
1701 struct cgraph_node *where = node->global.inlined_to
1702 ? node->global.inlined_to : node;
1703 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1704
1705 spec_rem += edge->count;
1706 edge->resolve_speculation ();
1707 reset_edge_caches (where);
1708 inline_update_overall_summary (where);
1709 update_caller_keys (edge_heap, where,
1710 updated_nodes, NULL);
1711 update_callee_keys (edge_heap, where,
1712 updated_nodes);
1713 BITMAP_FREE (updated_nodes);
1714 }
1715 }
1716
1717 /* Return true if NODE should be accounted for overall size estimate.
1718 Skip all nodes optimized for size so we can measure the growth of hot
1719 part of program no matter of the padding. */
1720
1721 bool
1722 inline_account_function_p (struct cgraph_node *node)
1723 {
1724 return (!DECL_EXTERNAL (node->decl)
1725 && !opt_for_fn (node->decl, optimize_size)
1726 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1727 }
1728
1729 /* Count number of callers of NODE and store it into DATA (that
1730 points to int. Worker for cgraph_for_node_and_aliases. */
1731
1732 static bool
1733 sum_callers (struct cgraph_node *node, void *data)
1734 {
1735 struct cgraph_edge *e;
1736 int *num_calls = (int *)data;
1737
1738 for (e = node->callers; e; e = e->next_caller)
1739 (*num_calls)++;
1740 return false;
1741 }
1742
1743 /* We use greedy algorithm for inlining of small functions:
1744 All inline candidates are put into prioritized heap ordered in
1745 increasing badness.
1746
1747 The inlining of small functions is bounded by unit growth parameters. */
1748
1749 static void
1750 inline_small_functions (void)
1751 {
1752 struct cgraph_node *node;
1753 struct cgraph_edge *edge;
1754 edge_heap_t edge_heap (sreal::min ());
1755 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1756 int min_size, max_size;
1757 auto_vec<cgraph_edge *> new_indirect_edges;
1758 int initial_size = 0;
1759 struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1760 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1761 new_indirect_edges.create (8);
1762
1763 edge_removal_hook_holder
1764 = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1765
1766 /* Compute overall unit size and other global parameters used by badness
1767 metrics. */
1768
1769 max_count = 0;
1770 ipa_reduced_postorder (order, true, true, NULL);
1771 free (order);
1772
1773 FOR_EACH_DEFINED_FUNCTION (node)
1774 if (!node->global.inlined_to)
1775 {
1776 if (!node->alias && node->analyzed
1777 && (node->has_gimple_body_p () || node->thunk.thunk_p))
1778 {
1779 struct inline_summary *info = inline_summaries->get (node);
1780 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1781
1782 /* Do not account external functions, they will be optimized out
1783 if not inlined. Also only count the non-cold portion of program. */
1784 if (inline_account_function_p (node))
1785 initial_size += info->size;
1786 info->growth = estimate_growth (node);
1787
1788 int num_calls = 0;
1789 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1790 true);
1791 if (num_calls == 1)
1792 info->single_caller = true;
1793 if (dfs && dfs->next_cycle)
1794 {
1795 struct cgraph_node *n2;
1796 int id = dfs->scc_no + 1;
1797 for (n2 = node; n2;
1798 n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1799 {
1800 struct inline_summary *info2 = inline_summaries->get (n2);
1801 if (info2->scc_no)
1802 break;
1803 info2->scc_no = id;
1804 }
1805 }
1806 }
1807
1808 for (edge = node->callers; edge; edge = edge->next_caller)
1809 if (max_count < edge->count)
1810 max_count = edge->count;
1811 }
1812 ipa_free_postorder_info ();
1813 initialize_growth_caches ();
1814
1815 if (dump_file)
1816 fprintf (dump_file,
1817 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1818 initial_size);
1819
1820 overall_size = initial_size;
1821 max_size = compute_max_insns (overall_size);
1822 min_size = overall_size;
1823
1824 /* Populate the heap with all edges we might inline. */
1825
1826 FOR_EACH_DEFINED_FUNCTION (node)
1827 {
1828 bool update = false;
1829 struct cgraph_edge *next = NULL;
1830 bool has_speculative = false;
1831
1832 if (dump_file)
1833 fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1834 node->name (), node->order);
1835
1836 for (edge = node->callees; edge; edge = next)
1837 {
1838 next = edge->next_callee;
1839 if (edge->inline_failed
1840 && !edge->aux
1841 && can_inline_edge_p (edge, true)
1842 && want_inline_small_function_p (edge, true)
1843 && edge->inline_failed)
1844 {
1845 gcc_assert (!edge->aux);
1846 update_edge_key (&edge_heap, edge);
1847 }
1848 if (edge->speculative)
1849 has_speculative = true;
1850 }
1851 if (has_speculative)
1852 for (edge = node->callees; edge; edge = next)
1853 if (edge->speculative && !speculation_useful_p (edge,
1854 edge->aux != NULL))
1855 {
1856 edge->resolve_speculation ();
1857 update = true;
1858 }
1859 if (update)
1860 {
1861 struct cgraph_node *where = node->global.inlined_to
1862 ? node->global.inlined_to : node;
1863 inline_update_overall_summary (where);
1864 reset_edge_caches (where);
1865 update_caller_keys (&edge_heap, where,
1866 updated_nodes, NULL);
1867 update_callee_keys (&edge_heap, where,
1868 updated_nodes);
1869 bitmap_clear (updated_nodes);
1870 }
1871 }
1872
1873 gcc_assert (in_lto_p
1874 || !max_count
1875 || (profile_info && flag_branch_probabilities));
1876
1877 while (!edge_heap.empty ())
1878 {
1879 int old_size = overall_size;
1880 struct cgraph_node *where, *callee;
1881 sreal badness = edge_heap.min_key ();
1882 sreal current_badness;
1883 int growth;
1884
1885 edge = edge_heap.extract_min ();
1886 gcc_assert (edge->aux);
1887 edge->aux = NULL;
1888 if (!edge->inline_failed || !edge->callee->analyzed)
1889 continue;
1890
1891 #ifdef ENABLE_CHECKING
1892 /* Be sure that caches are maintained consistent. */
1893 sreal cached_badness = edge_badness (edge, false);
1894
1895 int old_size_est = estimate_edge_size (edge);
1896 int old_time_est = estimate_edge_time (edge);
1897 int old_hints_est = estimate_edge_hints (edge);
1898
1899 reset_edge_growth_cache (edge);
1900 gcc_assert (old_size_est == estimate_edge_size (edge));
1901 gcc_assert (old_time_est == estimate_edge_time (edge));
1902 /* FIXME:
1903
1904 gcc_assert (old_hints_est == estimate_edge_hints (edge));
1905
1906 fails with profile feedback because some hints depends on
1907 maybe_hot_edge_p predicate and because callee gets inlined to other
1908 calls, the edge may become cold.
1909 This ought to be fixed by computing relative probabilities
1910 for given invocation but that will be better done once whole
1911 code is converted to sreals. Disable for now and revert to "wrong"
1912 value so enable/disable checking paths agree. */
1913 edge_growth_cache[edge->uid].hints = old_hints_est + 1;
1914
1915 /* When updating the edge costs, we only decrease badness in the keys.
1916 Increases of badness are handled lazilly; when we see key with out
1917 of date value on it, we re-insert it now. */
1918 current_badness = edge_badness (edge, false);
1919 /* Disable checking for profile because roundoff errors may cause slight
1920 deviations in the order. */
1921 gcc_assert (max_count || cached_badness == current_badness);
1922 gcc_assert (current_badness >= badness);
1923 #else
1924 current_badness = edge_badness (edge, false);
1925 #endif
1926 if (current_badness != badness)
1927 {
1928 if (edge_heap.min () && current_badness > edge_heap.min_key ())
1929 {
1930 edge->aux = edge_heap.insert (current_badness, edge);
1931 continue;
1932 }
1933 else
1934 badness = current_badness;
1935 }
1936
1937 if (!can_inline_edge_p (edge, true))
1938 {
1939 resolve_noninline_speculation (&edge_heap, edge);
1940 continue;
1941 }
1942
1943 callee = edge->callee->ultimate_alias_target ();
1944 growth = estimate_edge_growth (edge);
1945 if (dump_file)
1946 {
1947 fprintf (dump_file,
1948 "\nConsidering %s/%i with %i size\n",
1949 callee->name (), callee->order,
1950 inline_summaries->get (callee)->size);
1951 fprintf (dump_file,
1952 " to be inlined into %s/%i in %s:%i\n"
1953 " Estimated badness is %f, frequency %.2f.\n",
1954 edge->caller->name (), edge->caller->order,
1955 edge->call_stmt
1956 && (LOCATION_LOCUS (gimple_location ((const_gimple)
1957 edge->call_stmt))
1958 > BUILTINS_LOCATION)
1959 ? gimple_filename ((const_gimple) edge->call_stmt)
1960 : "unknown",
1961 edge->call_stmt
1962 ? gimple_lineno ((const_gimple) edge->call_stmt)
1963 : -1,
1964 badness.to_double (),
1965 edge->frequency / (double)CGRAPH_FREQ_BASE);
1966 if (edge->count)
1967 fprintf (dump_file," Called %" PRId64"x\n",
1968 edge->count);
1969 if (dump_flags & TDF_DETAILS)
1970 edge_badness (edge, true);
1971 }
1972
1973 if (overall_size + growth > max_size
1974 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1975 {
1976 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1977 report_inline_failed_reason (edge);
1978 resolve_noninline_speculation (&edge_heap, edge);
1979 continue;
1980 }
1981
1982 if (!want_inline_small_function_p (edge, true))
1983 {
1984 resolve_noninline_speculation (&edge_heap, edge);
1985 continue;
1986 }
1987
1988 /* Heuristics for inlining small functions work poorly for
1989 recursive calls where we do effects similar to loop unrolling.
1990 When inlining such edge seems profitable, leave decision on
1991 specific inliner. */
1992 if (edge->recursive_p ())
1993 {
1994 where = edge->caller;
1995 if (where->global.inlined_to)
1996 where = where->global.inlined_to;
1997 if (!recursive_inlining (edge,
1998 opt_for_fn (edge->caller->decl,
1999 flag_indirect_inlining)
2000 ? &new_indirect_edges : NULL))
2001 {
2002 edge->inline_failed = CIF_RECURSIVE_INLINING;
2003 resolve_noninline_speculation (&edge_heap, edge);
2004 continue;
2005 }
2006 reset_edge_caches (where);
2007 /* Recursive inliner inlines all recursive calls of the function
2008 at once. Consequently we need to update all callee keys. */
2009 if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2010 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2011 update_callee_keys (&edge_heap, where, updated_nodes);
2012 bitmap_clear (updated_nodes);
2013 }
2014 else
2015 {
2016 struct cgraph_node *outer_node = NULL;
2017 int depth = 0;
2018
2019 /* Consider the case where self recursive function A is inlined
2020 into B. This is desired optimization in some cases, since it
2021 leads to effect similar of loop peeling and we might completely
2022 optimize out the recursive call. However we must be extra
2023 selective. */
2024
2025 where = edge->caller;
2026 while (where->global.inlined_to)
2027 {
2028 if (where->decl == callee->decl)
2029 outer_node = where, depth++;
2030 where = where->callers->caller;
2031 }
2032 if (outer_node
2033 && !want_inline_self_recursive_call_p (edge, outer_node,
2034 true, depth))
2035 {
2036 edge->inline_failed
2037 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2038 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2039 resolve_noninline_speculation (&edge_heap, edge);
2040 continue;
2041 }
2042 else if (depth && dump_file)
2043 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2044
2045 gcc_checking_assert (!callee->global.inlined_to);
2046 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2047 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2048
2049 reset_edge_caches (edge->callee->function_symbol ());
2050
2051 update_callee_keys (&edge_heap, where, updated_nodes);
2052 }
2053 where = edge->caller;
2054 if (where->global.inlined_to)
2055 where = where->global.inlined_to;
2056
2057 /* Our profitability metric can depend on local properties
2058 such as number of inlinable calls and size of the function body.
2059 After inlining these properties might change for the function we
2060 inlined into (since it's body size changed) and for the functions
2061 called by function we inlined (since number of it inlinable callers
2062 might change). */
2063 update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2064 /* Offline copy count has possibly changed, recompute if profile is
2065 available. */
2066 if (max_count)
2067 {
2068 struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2069 if (n != edge->callee && n->analyzed)
2070 update_callee_keys (&edge_heap, n, updated_nodes);
2071 }
2072 bitmap_clear (updated_nodes);
2073
2074 if (dump_file)
2075 {
2076 fprintf (dump_file,
2077 " Inlined into %s which now has time %i and size %i,"
2078 "net change of %+i.\n",
2079 edge->caller->name (),
2080 inline_summaries->get (edge->caller)->time,
2081 inline_summaries->get (edge->caller)->size,
2082 overall_size - old_size);
2083 }
2084 if (min_size > overall_size)
2085 {
2086 min_size = overall_size;
2087 max_size = compute_max_insns (min_size);
2088
2089 if (dump_file)
2090 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2091 }
2092 }
2093
2094 free_growth_caches ();
2095 if (dump_file)
2096 fprintf (dump_file,
2097 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2098 initial_size, overall_size,
2099 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2100 BITMAP_FREE (updated_nodes);
2101 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2102 }
2103
2104 /* Flatten NODE. Performed both during early inlining and
2105 at IPA inlining time. */
2106
2107 static void
2108 flatten_function (struct cgraph_node *node, bool early)
2109 {
2110 struct cgraph_edge *e;
2111
2112 /* We shouldn't be called recursively when we are being processed. */
2113 gcc_assert (node->aux == NULL);
2114
2115 node->aux = (void *) node;
2116
2117 for (e = node->callees; e; e = e->next_callee)
2118 {
2119 struct cgraph_node *orig_callee;
2120 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2121
2122 /* We've hit cycle? It is time to give up. */
2123 if (callee->aux)
2124 {
2125 if (dump_file)
2126 fprintf (dump_file,
2127 "Not inlining %s into %s to avoid cycle.\n",
2128 xstrdup_for_dump (callee->name ()),
2129 xstrdup_for_dump (e->caller->name ()));
2130 e->inline_failed = CIF_RECURSIVE_INLINING;
2131 continue;
2132 }
2133
2134 /* When the edge is already inlined, we just need to recurse into
2135 it in order to fully flatten the leaves. */
2136 if (!e->inline_failed)
2137 {
2138 flatten_function (callee, early);
2139 continue;
2140 }
2141
2142 /* Flatten attribute needs to be processed during late inlining. For
2143 extra code quality we however do flattening during early optimization,
2144 too. */
2145 if (!early
2146 ? !can_inline_edge_p (e, true)
2147 : !can_early_inline_edge_p (e))
2148 continue;
2149
2150 if (e->recursive_p ())
2151 {
2152 if (dump_file)
2153 fprintf (dump_file, "Not inlining: recursive call.\n");
2154 continue;
2155 }
2156
2157 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2158 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2159 {
2160 if (dump_file)
2161 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
2162 continue;
2163 }
2164
2165 /* Inline the edge and flatten the inline clone. Avoid
2166 recursing through the original node if the node was cloned. */
2167 if (dump_file)
2168 fprintf (dump_file, " Inlining %s into %s.\n",
2169 xstrdup_for_dump (callee->name ()),
2170 xstrdup_for_dump (e->caller->name ()));
2171 orig_callee = callee;
2172 inline_call (e, true, NULL, NULL, false);
2173 if (e->callee != orig_callee)
2174 orig_callee->aux = (void *) node;
2175 flatten_function (e->callee, early);
2176 if (e->callee != orig_callee)
2177 orig_callee->aux = NULL;
2178 }
2179
2180 node->aux = NULL;
2181 if (!node->global.inlined_to)
2182 inline_update_overall_summary (node);
2183 }
2184
2185 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2186 DATA points to number of calls originally found so we avoid infinite
2187 recursion. */
2188
2189 static bool
2190 inline_to_all_callers (struct cgraph_node *node, void *data)
2191 {
2192 int *num_calls = (int *)data;
2193 bool callee_removed = false;
2194
2195 while (node->callers && !node->global.inlined_to)
2196 {
2197 struct cgraph_node *caller = node->callers->caller;
2198
2199 if (!can_inline_edge_p (node->callers, true)
2200 || node->callers->recursive_p ())
2201 {
2202 if (dump_file)
2203 fprintf (dump_file, "Uninlinable call found; giving up.\n");
2204 *num_calls = 0;
2205 return false;
2206 }
2207
2208 if (dump_file)
2209 {
2210 fprintf (dump_file,
2211 "\nInlining %s size %i.\n",
2212 node->name (),
2213 inline_summaries->get (node)->size);
2214 fprintf (dump_file,
2215 " Called once from %s %i insns.\n",
2216 node->callers->caller->name (),
2217 inline_summaries->get (node->callers->caller)->size);
2218 }
2219
2220 inline_call (node->callers, true, NULL, NULL, true, &callee_removed);
2221 if (dump_file)
2222 fprintf (dump_file,
2223 " Inlined into %s which now has %i size\n",
2224 caller->name (),
2225 inline_summaries->get (caller)->size);
2226 if (!(*num_calls)--)
2227 {
2228 if (dump_file)
2229 fprintf (dump_file, "New calls found; giving up.\n");
2230 return callee_removed;
2231 }
2232 if (callee_removed)
2233 return true;
2234 }
2235 return false;
2236 }
2237
2238 /* Output overall time estimate. */
2239 static void
2240 dump_overall_stats (void)
2241 {
2242 int64_t sum_weighted = 0, sum = 0;
2243 struct cgraph_node *node;
2244
2245 FOR_EACH_DEFINED_FUNCTION (node)
2246 if (!node->global.inlined_to
2247 && !node->alias)
2248 {
2249 int time = inline_summaries->get (node)->time;
2250 sum += time;
2251 sum_weighted += time * node->count;
2252 }
2253 fprintf (dump_file, "Overall time estimate: "
2254 "%" PRId64" weighted by profile: "
2255 "%" PRId64"\n", sum, sum_weighted);
2256 }
2257
2258 /* Output some useful stats about inlining. */
2259
2260 static void
2261 dump_inline_stats (void)
2262 {
2263 int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2264 int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2265 int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2266 int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2267 int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2268 int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2269 int64_t reason[CIF_N_REASONS][3];
2270 int i;
2271 struct cgraph_node *node;
2272
2273 memset (reason, 0, sizeof (reason));
2274 FOR_EACH_DEFINED_FUNCTION (node)
2275 {
2276 struct cgraph_edge *e;
2277 for (e = node->callees; e; e = e->next_callee)
2278 {
2279 if (e->inline_failed)
2280 {
2281 reason[(int) e->inline_failed][0] += e->count;
2282 reason[(int) e->inline_failed][1] += e->frequency;
2283 reason[(int) e->inline_failed][2] ++;
2284 if (DECL_VIRTUAL_P (e->callee->decl))
2285 {
2286 if (e->indirect_inlining_edge)
2287 noninlined_virt_indir_cnt += e->count;
2288 else
2289 noninlined_virt_cnt += e->count;
2290 }
2291 else
2292 {
2293 if (e->indirect_inlining_edge)
2294 noninlined_indir_cnt += e->count;
2295 else
2296 noninlined_cnt += e->count;
2297 }
2298 }
2299 else
2300 {
2301 if (e->speculative)
2302 {
2303 if (DECL_VIRTUAL_P (e->callee->decl))
2304 inlined_speculative_ply += e->count;
2305 else
2306 inlined_speculative += e->count;
2307 }
2308 else if (DECL_VIRTUAL_P (e->callee->decl))
2309 {
2310 if (e->indirect_inlining_edge)
2311 inlined_virt_indir_cnt += e->count;
2312 else
2313 inlined_virt_cnt += e->count;
2314 }
2315 else
2316 {
2317 if (e->indirect_inlining_edge)
2318 inlined_indir_cnt += e->count;
2319 else
2320 inlined_cnt += e->count;
2321 }
2322 }
2323 }
2324 for (e = node->indirect_calls; e; e = e->next_callee)
2325 if (e->indirect_info->polymorphic)
2326 indirect_poly_cnt += e->count;
2327 else
2328 indirect_cnt += e->count;
2329 }
2330 if (max_count)
2331 {
2332 fprintf (dump_file,
2333 "Inlined %" PRId64 " + speculative "
2334 "%" PRId64 " + speculative polymorphic "
2335 "%" PRId64 " + previously indirect "
2336 "%" PRId64 " + virtual "
2337 "%" PRId64 " + virtual and previously indirect "
2338 "%" PRId64 "\n" "Not inlined "
2339 "%" PRId64 " + previously indirect "
2340 "%" PRId64 " + virtual "
2341 "%" PRId64 " + virtual and previously indirect "
2342 "%" PRId64 " + stil indirect "
2343 "%" PRId64 " + still indirect polymorphic "
2344 "%" PRId64 "\n", inlined_cnt,
2345 inlined_speculative, inlined_speculative_ply,
2346 inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2347 noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2348 noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2349 fprintf (dump_file,
2350 "Removed speculations %" PRId64 "\n",
2351 spec_rem);
2352 }
2353 dump_overall_stats ();
2354 fprintf (dump_file, "\nWhy inlining failed?\n");
2355 for (i = 0; i < CIF_N_REASONS; i++)
2356 if (reason[i][2])
2357 fprintf (dump_file, "%-50s: %8i calls, %8i freq, %" PRId64" count\n",
2358 cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2359 (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2360 }
2361
2362 /* Decide on the inlining. We do so in the topological order to avoid
2363 expenses on updating data structures. */
2364
2365 static unsigned int
2366 ipa_inline (void)
2367 {
2368 struct cgraph_node *node;
2369 int nnodes;
2370 struct cgraph_node **order;
2371 int i;
2372 int cold;
2373 bool remove_functions = false;
2374
2375 if (!optimize)
2376 return 0;
2377
2378 cgraph_freq_base_rec = (sreal) 1 / (sreal) CGRAPH_FREQ_BASE;
2379 percent_rec = (sreal) 1 / (sreal) 100;
2380
2381 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2382
2383 if (in_lto_p && optimize)
2384 ipa_update_after_lto_read ();
2385
2386 if (dump_file)
2387 dump_inline_summaries (dump_file);
2388
2389 nnodes = ipa_reverse_postorder (order);
2390
2391 FOR_EACH_FUNCTION (node)
2392 {
2393 node->aux = 0;
2394
2395 /* Recompute the default reasons for inlining because they may have
2396 changed during merging. */
2397 if (in_lto_p)
2398 {
2399 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2400 {
2401 gcc_assert (e->inline_failed);
2402 initialize_inline_failed (e);
2403 }
2404 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2405 initialize_inline_failed (e);
2406 }
2407 }
2408
2409 if (dump_file)
2410 fprintf (dump_file, "\nFlattening functions:\n");
2411
2412 /* In the first pass handle functions to be flattened. Do this with
2413 a priority so none of our later choices will make this impossible. */
2414 for (i = nnodes - 1; i >= 0; i--)
2415 {
2416 node = order[i];
2417
2418 /* Handle nodes to be flattened.
2419 Ideally when processing callees we stop inlining at the
2420 entry of cycles, possibly cloning that entry point and
2421 try to flatten itself turning it into a self-recursive
2422 function. */
2423 if (lookup_attribute ("flatten",
2424 DECL_ATTRIBUTES (node->decl)) != NULL)
2425 {
2426 if (dump_file)
2427 fprintf (dump_file,
2428 "Flattening %s\n", node->name ());
2429 flatten_function (node, false);
2430 }
2431 }
2432 if (dump_file)
2433 dump_overall_stats ();
2434
2435 inline_small_functions ();
2436
2437 gcc_assert (symtab->state == IPA_SSA);
2438 symtab->state = IPA_SSA_AFTER_INLINING;
2439 /* Do first after-inlining removal. We want to remove all "stale" extern
2440 inline functions and virtual functions so we really know what is called
2441 once. */
2442 symtab->remove_unreachable_nodes (dump_file);
2443 free (order);
2444
2445 /* Inline functions with a property that after inlining into all callers the
2446 code size will shrink because the out-of-line copy is eliminated.
2447 We do this regardless on the callee size as long as function growth limits
2448 are met. */
2449 if (dump_file)
2450 fprintf (dump_file,
2451 "\nDeciding on functions to be inlined into all callers and "
2452 "removing useless speculations:\n");
2453
2454 /* Inlining one function called once has good chance of preventing
2455 inlining other function into the same callee. Ideally we should
2456 work in priority order, but probably inlining hot functions first
2457 is good cut without the extra pain of maintaining the queue.
2458
2459 ??? this is not really fitting the bill perfectly: inlining function
2460 into callee often leads to better optimization of callee due to
2461 increased context for optimization.
2462 For example if main() function calls a function that outputs help
2463 and then function that does the main optmization, we should inline
2464 the second with priority even if both calls are cold by themselves.
2465
2466 We probably want to implement new predicate replacing our use of
2467 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2468 to be hot. */
2469 for (cold = 0; cold <= 1; cold ++)
2470 {
2471 FOR_EACH_DEFINED_FUNCTION (node)
2472 {
2473 struct cgraph_edge *edge, *next;
2474 bool update=false;
2475
2476 for (edge = node->callees; edge; edge = next)
2477 {
2478 next = edge->next_callee;
2479 if (edge->speculative && !speculation_useful_p (edge, false))
2480 {
2481 edge->resolve_speculation ();
2482 spec_rem += edge->count;
2483 update = true;
2484 remove_functions = true;
2485 }
2486 }
2487 if (update)
2488 {
2489 struct cgraph_node *where = node->global.inlined_to
2490 ? node->global.inlined_to : node;
2491 reset_edge_caches (where);
2492 inline_update_overall_summary (where);
2493 }
2494 if (want_inline_function_to_all_callers_p (node, cold))
2495 {
2496 int num_calls = 0;
2497 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2498 true);
2499 while (node->call_for_symbol_and_aliases
2500 (inline_to_all_callers, &num_calls, true))
2501 ;
2502 remove_functions = true;
2503 }
2504 }
2505 }
2506
2507 /* Free ipa-prop structures if they are no longer needed. */
2508 if (optimize)
2509 ipa_free_all_structures_after_iinln ();
2510
2511 if (dump_file)
2512 {
2513 fprintf (dump_file,
2514 "\nInlined %i calls, eliminated %i functions\n\n",
2515 ncalls_inlined, nfunctions_inlined);
2516 dump_inline_stats ();
2517 }
2518
2519 if (dump_file)
2520 dump_inline_summaries (dump_file);
2521 /* In WPA we use inline summaries for partitioning process. */
2522 if (!flag_wpa)
2523 inline_free_summary ();
2524 return remove_functions ? TODO_remove_functions : 0;
2525 }
2526
2527 /* Inline always-inline function calls in NODE. */
2528
2529 static bool
2530 inline_always_inline_functions (struct cgraph_node *node)
2531 {
2532 struct cgraph_edge *e;
2533 bool inlined = false;
2534
2535 for (e = node->callees; e; e = e->next_callee)
2536 {
2537 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2538 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2539 continue;
2540
2541 if (e->recursive_p ())
2542 {
2543 if (dump_file)
2544 fprintf (dump_file, " Not inlining recursive call to %s.\n",
2545 e->callee->name ());
2546 e->inline_failed = CIF_RECURSIVE_INLINING;
2547 continue;
2548 }
2549
2550 if (!can_early_inline_edge_p (e))
2551 {
2552 /* Set inlined to true if the callee is marked "always_inline" but
2553 is not inlinable. This will allow flagging an error later in
2554 expand_call_inline in tree-inline.c. */
2555 if (lookup_attribute ("always_inline",
2556 DECL_ATTRIBUTES (callee->decl)) != NULL)
2557 inlined = true;
2558 continue;
2559 }
2560
2561 if (dump_file)
2562 fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
2563 xstrdup_for_dump (e->callee->name ()),
2564 xstrdup_for_dump (e->caller->name ()));
2565 inline_call (e, true, NULL, NULL, false);
2566 inlined = true;
2567 }
2568 if (inlined)
2569 inline_update_overall_summary (node);
2570
2571 return inlined;
2572 }
2573
2574 /* Decide on the inlining. We do so in the topological order to avoid
2575 expenses on updating data structures. */
2576
2577 static bool
2578 early_inline_small_functions (struct cgraph_node *node)
2579 {
2580 struct cgraph_edge *e;
2581 bool inlined = false;
2582
2583 for (e = node->callees; e; e = e->next_callee)
2584 {
2585 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2586 if (!inline_summaries->get (callee)->inlinable
2587 || !e->inline_failed)
2588 continue;
2589
2590 /* Do not consider functions not declared inline. */
2591 if (!DECL_DECLARED_INLINE_P (callee->decl)
2592 && !opt_for_fn (node->decl, flag_inline_small_functions)
2593 && !opt_for_fn (node->decl, flag_inline_functions))
2594 continue;
2595
2596 if (dump_file)
2597 fprintf (dump_file, "Considering inline candidate %s.\n",
2598 callee->name ());
2599
2600 if (!can_early_inline_edge_p (e))
2601 continue;
2602
2603 if (e->recursive_p ())
2604 {
2605 if (dump_file)
2606 fprintf (dump_file, " Not inlining: recursive call.\n");
2607 continue;
2608 }
2609
2610 if (!want_early_inline_function_p (e))
2611 continue;
2612
2613 if (dump_file)
2614 fprintf (dump_file, " Inlining %s into %s.\n",
2615 xstrdup_for_dump (callee->name ()),
2616 xstrdup_for_dump (e->caller->name ()));
2617 inline_call (e, true, NULL, NULL, true);
2618 inlined = true;
2619 }
2620
2621 return inlined;
2622 }
2623
2624 unsigned int
2625 early_inliner (function *fun)
2626 {
2627 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2628 struct cgraph_edge *edge;
2629 unsigned int todo = 0;
2630 int iterations = 0;
2631 bool inlined = false;
2632
2633 if (seen_error ())
2634 return 0;
2635
2636 /* Do nothing if datastructures for ipa-inliner are already computed. This
2637 happens when some pass decides to construct new function and
2638 cgraph_add_new_function calls lowering passes and early optimization on
2639 it. This may confuse ourself when early inliner decide to inline call to
2640 function clone, because function clones don't have parameter list in
2641 ipa-prop matching their signature. */
2642 if (ipa_node_params_sum)
2643 return 0;
2644
2645 #ifdef ENABLE_CHECKING
2646 node->verify ();
2647 #endif
2648 node->remove_all_references ();
2649
2650 /* Rebuild this reference because it dosn't depend on
2651 function's body and it's required to pass cgraph_node
2652 verification. */
2653 if (node->instrumented_version
2654 && !node->instrumentation_clone)
2655 node->create_reference (node->instrumented_version, IPA_REF_CHKP, NULL);
2656
2657 /* Even when not optimizing or not inlining inline always-inline
2658 functions. */
2659 inlined = inline_always_inline_functions (node);
2660
2661 if (!optimize
2662 || flag_no_inline
2663 || !flag_early_inlining
2664 /* Never inline regular functions into always-inline functions
2665 during incremental inlining. This sucks as functions calling
2666 always inline functions will get less optimized, but at the
2667 same time inlining of functions calling always inline
2668 function into an always inline function might introduce
2669 cycles of edges to be always inlined in the callgraph.
2670
2671 We might want to be smarter and just avoid this type of inlining. */
2672 || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2673 && lookup_attribute ("always_inline",
2674 DECL_ATTRIBUTES (node->decl))))
2675 ;
2676 else if (lookup_attribute ("flatten",
2677 DECL_ATTRIBUTES (node->decl)) != NULL)
2678 {
2679 /* When the function is marked to be flattened, recursively inline
2680 all calls in it. */
2681 if (dump_file)
2682 fprintf (dump_file,
2683 "Flattening %s\n", node->name ());
2684 flatten_function (node, true);
2685 inlined = true;
2686 }
2687 else
2688 {
2689 /* If some always_inline functions was inlined, apply the changes.
2690 This way we will not account always inline into growth limits and
2691 moreover we will inline calls from always inlines that we skipped
2692 previously becuase of conditional above. */
2693 if (inlined)
2694 {
2695 timevar_push (TV_INTEGRATION);
2696 todo |= optimize_inline_calls (current_function_decl);
2697 /* optimize_inline_calls call above might have introduced new
2698 statements that don't have inline parameters computed. */
2699 for (edge = node->callees; edge; edge = edge->next_callee)
2700 {
2701 if (inline_edge_summary_vec.length () > (unsigned) edge->uid)
2702 {
2703 struct inline_edge_summary *es = inline_edge_summary (edge);
2704 es->call_stmt_size
2705 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2706 es->call_stmt_time
2707 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2708 }
2709 }
2710 inline_update_overall_summary (node);
2711 inlined = false;
2712 timevar_pop (TV_INTEGRATION);
2713 }
2714 /* We iterate incremental inlining to get trivial cases of indirect
2715 inlining. */
2716 while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2717 && early_inline_small_functions (node))
2718 {
2719 timevar_push (TV_INTEGRATION);
2720 todo |= optimize_inline_calls (current_function_decl);
2721
2722 /* Technically we ought to recompute inline parameters so the new
2723 iteration of early inliner works as expected. We however have
2724 values approximately right and thus we only need to update edge
2725 info that might be cleared out for newly discovered edges. */
2726 for (edge = node->callees; edge; edge = edge->next_callee)
2727 {
2728 /* We have no summary for new bound store calls yet. */
2729 if (inline_edge_summary_vec.length () > (unsigned)edge->uid)
2730 {
2731 struct inline_edge_summary *es = inline_edge_summary (edge);
2732 es->call_stmt_size
2733 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2734 es->call_stmt_time
2735 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2736 }
2737 if (edge->callee->decl
2738 && !gimple_check_call_matching_types (
2739 edge->call_stmt, edge->callee->decl, false))
2740 edge->call_stmt_cannot_inline_p = true;
2741 }
2742 if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2743 inline_update_overall_summary (node);
2744 timevar_pop (TV_INTEGRATION);
2745 iterations++;
2746 inlined = false;
2747 }
2748 if (dump_file)
2749 fprintf (dump_file, "Iterations: %i\n", iterations);
2750 }
2751
2752 if (inlined)
2753 {
2754 timevar_push (TV_INTEGRATION);
2755 todo |= optimize_inline_calls (current_function_decl);
2756 timevar_pop (TV_INTEGRATION);
2757 }
2758
2759 fun->always_inline_functions_inlined = true;
2760
2761 return todo;
2762 }
2763
2764 /* Do inlining of small functions. Doing so early helps profiling and other
2765 passes to be somewhat more effective and avoids some code duplication in
2766 later real inlining pass for testcases with very many function calls. */
2767
2768 namespace {
2769
2770 const pass_data pass_data_early_inline =
2771 {
2772 GIMPLE_PASS, /* type */
2773 "einline", /* name */
2774 OPTGROUP_INLINE, /* optinfo_flags */
2775 TV_EARLY_INLINING, /* tv_id */
2776 PROP_ssa, /* properties_required */
2777 0, /* properties_provided */
2778 0, /* properties_destroyed */
2779 0, /* todo_flags_start */
2780 0, /* todo_flags_finish */
2781 };
2782
2783 class pass_early_inline : public gimple_opt_pass
2784 {
2785 public:
2786 pass_early_inline (gcc::context *ctxt)
2787 : gimple_opt_pass (pass_data_early_inline, ctxt)
2788 {}
2789
2790 /* opt_pass methods: */
2791 virtual unsigned int execute (function *);
2792
2793 }; // class pass_early_inline
2794
2795 unsigned int
2796 pass_early_inline::execute (function *fun)
2797 {
2798 return early_inliner (fun);
2799 }
2800
2801 } // anon namespace
2802
2803 gimple_opt_pass *
2804 make_pass_early_inline (gcc::context *ctxt)
2805 {
2806 return new pass_early_inline (ctxt);
2807 }
2808
2809 namespace {
2810
2811 const pass_data pass_data_ipa_inline =
2812 {
2813 IPA_PASS, /* type */
2814 "inline", /* name */
2815 OPTGROUP_INLINE, /* optinfo_flags */
2816 TV_IPA_INLINING, /* tv_id */
2817 0, /* properties_required */
2818 0, /* properties_provided */
2819 0, /* properties_destroyed */
2820 0, /* todo_flags_start */
2821 ( TODO_dump_symtab ), /* todo_flags_finish */
2822 };
2823
2824 class pass_ipa_inline : public ipa_opt_pass_d
2825 {
2826 public:
2827 pass_ipa_inline (gcc::context *ctxt)
2828 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2829 inline_generate_summary, /* generate_summary */
2830 inline_write_summary, /* write_summary */
2831 inline_read_summary, /* read_summary */
2832 NULL, /* write_optimization_summary */
2833 NULL, /* read_optimization_summary */
2834 NULL, /* stmt_fixup */
2835 0, /* function_transform_todo_flags_start */
2836 inline_transform, /* function_transform */
2837 NULL) /* variable_transform */
2838 {}
2839
2840 /* opt_pass methods: */
2841 virtual unsigned int execute (function *) { return ipa_inline (); }
2842
2843 }; // class pass_ipa_inline
2844
2845 } // anon namespace
2846
2847 ipa_opt_pass_d *
2848 make_pass_ipa_inline (gcc::context *ctxt)
2849 {
2850 return new pass_ipa_inline (ctxt);
2851 }