]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-inline.c
Add option for whether ceil etc. can raise "inexact", adjust x86 conditions.
[thirdparty/gcc.git] / gcc / ipa-inline.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Inlining decision heuristics
22
23 The implementation of inliner is organized as follows:
24
25 inlining heuristics limits
26
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
30
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
35
36 inlining heuristics
37
38 The inliner itself is split into two passes:
39
40 pass_early_inlining
41
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
45
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
52
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
56
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
61
62 Because of lack of whole unit knowledge, the pass can not really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
67
68 pass_ipa_inline
69
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
72
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
76
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
81
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
87
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
91
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "target.h"
97 #include "rtl.h"
98 #include "tree.h"
99 #include "gimple.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
103 #include "cgraph.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
106 #include "calls.h"
107 #include "tree-inline.h"
108 #include "params.h"
109 #include "profile.h"
110 #include "symbol-summary.h"
111 #include "ipa-prop.h"
112 #include "ipa-inline.h"
113 #include "ipa-utils.h"
114 #include "sreal.h"
115 #include "auto-profile.h"
116 #include "builtins.h"
117 #include "fibonacci_heap.h"
118
119 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
120 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
121
122 /* Statistics we collect about inlining algorithm. */
123 static int overall_size;
124 static gcov_type max_count;
125 static gcov_type spec_rem;
126
127 /* Pre-computed constants 1/CGRAPH_FREQ_BASE and 1/100. */
128 static sreal cgraph_freq_base_rec, percent_rec;
129
130 /* Return false when inlining edge E would lead to violating
131 limits on function unit growth or stack usage growth.
132
133 The relative function body growth limit is present generally
134 to avoid problems with non-linear behavior of the compiler.
135 To allow inlining huge functions into tiny wrapper, the limit
136 is always based on the bigger of the two functions considered.
137
138 For stack growth limits we always base the growth in stack usage
139 of the callers. We want to prevent applications from segfaulting
140 on stack overflow when functions with huge stack frames gets
141 inlined. */
142
143 static bool
144 caller_growth_limits (struct cgraph_edge *e)
145 {
146 struct cgraph_node *to = e->caller;
147 struct cgraph_node *what = e->callee->ultimate_alias_target ();
148 int newsize;
149 int limit = 0;
150 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
151 inline_summary *info, *what_info, *outer_info = inline_summaries->get (to);
152
153 /* Look for function e->caller is inlined to. While doing
154 so work out the largest function body on the way. As
155 described above, we want to base our function growth
156 limits based on that. Not on the self size of the
157 outer function, not on the self size of inline code
158 we immediately inline to. This is the most relaxed
159 interpretation of the rule "do not grow large functions
160 too much in order to prevent compiler from exploding". */
161 while (true)
162 {
163 info = inline_summaries->get (to);
164 if (limit < info->self_size)
165 limit = info->self_size;
166 if (stack_size_limit < info->estimated_self_stack_size)
167 stack_size_limit = info->estimated_self_stack_size;
168 if (to->global.inlined_to)
169 to = to->callers->caller;
170 else
171 break;
172 }
173
174 what_info = inline_summaries->get (what);
175
176 if (limit < what_info->self_size)
177 limit = what_info->self_size;
178
179 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
180
181 /* Check the size after inlining against the function limits. But allow
182 the function to shrink if it went over the limits by forced inlining. */
183 newsize = estimate_size_after_inlining (to, e);
184 if (newsize >= info->size
185 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
186 && newsize > limit)
187 {
188 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
189 return false;
190 }
191
192 if (!what_info->estimated_stack_size)
193 return true;
194
195 /* FIXME: Stack size limit often prevents inlining in Fortran programs
196 due to large i/o datastructures used by the Fortran front-end.
197 We ought to ignore this limit when we know that the edge is executed
198 on every invocation of the caller (i.e. its call statement dominates
199 exit block). We do not track this information, yet. */
200 stack_size_limit += ((gcov_type)stack_size_limit
201 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
202
203 inlined_stack = (outer_info->stack_frame_offset
204 + outer_info->estimated_self_stack_size
205 + what_info->estimated_stack_size);
206 /* Check new stack consumption with stack consumption at the place
207 stack is used. */
208 if (inlined_stack > stack_size_limit
209 /* If function already has large stack usage from sibling
210 inline call, we can inline, too.
211 This bit overoptimistically assume that we are good at stack
212 packing. */
213 && inlined_stack > info->estimated_stack_size
214 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
215 {
216 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
217 return false;
218 }
219 return true;
220 }
221
222 /* Dump info about why inlining has failed. */
223
224 static void
225 report_inline_failed_reason (struct cgraph_edge *e)
226 {
227 if (dump_file)
228 {
229 fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n",
230 xstrdup_for_dump (e->caller->name ()), e->caller->order,
231 xstrdup_for_dump (e->callee->name ()), e->callee->order,
232 cgraph_inline_failed_string (e->inline_failed));
233 if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
234 || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
235 && e->caller->lto_file_data
236 && e->callee->ultimate_alias_target ()->lto_file_data)
237 {
238 fprintf (dump_file, " LTO objects: %s, %s\n",
239 e->caller->lto_file_data->file_name,
240 e->callee->ultimate_alias_target ()->lto_file_data->file_name);
241 }
242 if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
243 cl_target_option_print_diff
244 (dump_file, 2, target_opts_for_fn (e->caller->decl),
245 target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
246 if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
247 cl_optimization_print_diff
248 (dump_file, 2, opts_for_fn (e->caller->decl),
249 opts_for_fn (e->callee->ultimate_alias_target ()->decl));
250 }
251 }
252
253 /* Decide whether sanitizer-related attributes allow inlining. */
254
255 static bool
256 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
257 {
258 /* Don't care if sanitizer is disabled */
259 if (!(flag_sanitize & SANITIZE_ADDRESS))
260 return true;
261
262 if (!caller || !callee)
263 return true;
264
265 return !!lookup_attribute ("no_sanitize_address",
266 DECL_ATTRIBUTES (caller)) ==
267 !!lookup_attribute ("no_sanitize_address",
268 DECL_ATTRIBUTES (callee));
269 }
270
271 /* Used for flags where it is safe to inline when caller's value is
272 grater than callee's. */
273 #define check_maybe_up(flag) \
274 (opts_for_fn (caller->decl)->x_##flag \
275 != opts_for_fn (callee->decl)->x_##flag \
276 && (!always_inline \
277 || opts_for_fn (caller->decl)->x_##flag \
278 < opts_for_fn (callee->decl)->x_##flag))
279 /* Used for flags where it is safe to inline when caller's value is
280 smaller than callee's. */
281 #define check_maybe_down(flag) \
282 (opts_for_fn (caller->decl)->x_##flag \
283 != opts_for_fn (callee->decl)->x_##flag \
284 && (!always_inline \
285 || opts_for_fn (caller->decl)->x_##flag \
286 > opts_for_fn (callee->decl)->x_##flag))
287 /* Used for flags where exact match is needed for correctness. */
288 #define check_match(flag) \
289 (opts_for_fn (caller->decl)->x_##flag \
290 != opts_for_fn (callee->decl)->x_##flag)
291
292 /* Decide if we can inline the edge and possibly update
293 inline_failed reason.
294 We check whether inlining is possible at all and whether
295 caller growth limits allow doing so.
296
297 if REPORT is true, output reason to the dump file.
298
299 if DISREGARD_LIMITS is true, ignore size limits.*/
300
301 static bool
302 can_inline_edge_p (struct cgraph_edge *e, bool report,
303 bool disregard_limits = false, bool early = false)
304 {
305 gcc_checking_assert (e->inline_failed);
306
307 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
308 {
309 if (report)
310 report_inline_failed_reason (e);
311 return false;
312 }
313
314 bool inlinable = true;
315 enum availability avail;
316 cgraph_node *caller = e->caller->global.inlined_to
317 ? e->caller->global.inlined_to : e->caller;
318 cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
319 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
320 tree callee_tree
321 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
322
323 if (!callee->definition)
324 {
325 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
326 inlinable = false;
327 }
328 else if (callee->calls_comdat_local)
329 {
330 e->inline_failed = CIF_USES_COMDAT_LOCAL;
331 inlinable = false;
332 }
333 else if (avail <= AVAIL_INTERPOSABLE)
334 {
335 e->inline_failed = CIF_OVERWRITABLE;
336 inlinable = false;
337 }
338 /* All edges with call_stmt_cannot_inline_p should have inline_failed
339 initialized to one of FINAL_ERROR reasons. */
340 else if (e->call_stmt_cannot_inline_p)
341 gcc_unreachable ();
342 /* Don't inline if the functions have different EH personalities. */
343 else if (DECL_FUNCTION_PERSONALITY (caller->decl)
344 && DECL_FUNCTION_PERSONALITY (callee->decl)
345 && (DECL_FUNCTION_PERSONALITY (caller->decl)
346 != DECL_FUNCTION_PERSONALITY (callee->decl)))
347 {
348 e->inline_failed = CIF_EH_PERSONALITY;
349 inlinable = false;
350 }
351 /* TM pure functions should not be inlined into non-TM_pure
352 functions. */
353 else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
354 {
355 e->inline_failed = CIF_UNSPECIFIED;
356 inlinable = false;
357 }
358 /* Check compatibility of target optimization options. */
359 else if (!targetm.target_option.can_inline_p (caller->decl,
360 callee->decl))
361 {
362 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
363 inlinable = false;
364 }
365 else if (!inline_summaries->get (callee)->inlinable)
366 {
367 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
368 inlinable = false;
369 }
370 else if (inline_summaries->get (caller)->contains_cilk_spawn)
371 {
372 e->inline_failed = CIF_CILK_SPAWN;
373 inlinable = false;
374 }
375 /* Don't inline a function with mismatched sanitization attributes. */
376 else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
377 {
378 e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
379 inlinable = false;
380 }
381 /* Check if caller growth allows the inlining. */
382 else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
383 && !disregard_limits
384 && !lookup_attribute ("flatten",
385 DECL_ATTRIBUTES (caller->decl))
386 && !caller_growth_limits (e))
387 inlinable = false;
388 /* Don't inline a function with a higher optimization level than the
389 caller. FIXME: this is really just tip of iceberg of handling
390 optimization attribute. */
391 else if (caller_tree != callee_tree)
392 {
393 bool always_inline =
394 (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
395 && lookup_attribute ("always_inline",
396 DECL_ATTRIBUTES (callee->decl)));
397 inline_summary *caller_info = inline_summaries->get (caller);
398 inline_summary *callee_info = inline_summaries->get (callee);
399
400 /* Until GCC 4.9 we did not check the semantics alterning flags
401 bellow and inline across optimization boundry.
402 Enabling checks bellow breaks several packages by refusing
403 to inline library always_inline functions. See PR65873.
404 Disable the check for early inlining for now until better solution
405 is found. */
406 if (always_inline && early)
407 ;
408 /* There are some options that change IL semantics which means
409 we cannot inline in these cases for correctness reason.
410 Not even for always_inline declared functions. */
411 /* Strictly speaking only when the callee contains signed integer
412 math where overflow is undefined. */
413 else if ((check_maybe_up (flag_strict_overflow)
414 /* this flag is set by optimize. Allow inlining across
415 optimize boundary. */
416 && (!opt_for_fn (caller->decl, optimize)
417 == !opt_for_fn (callee->decl, optimize) || !always_inline))
418 || check_match (flag_wrapv)
419 || check_match (flag_trapv)
420 /* When caller or callee does FP math, be sure FP codegen flags
421 compatible. */
422 || ((caller_info->fp_expressions && callee_info->fp_expressions)
423 && (check_maybe_up (flag_rounding_math)
424 || check_maybe_up (flag_trapping_math)
425 || check_maybe_down (flag_unsafe_math_optimizations)
426 || check_maybe_down (flag_finite_math_only)
427 || check_maybe_up (flag_signaling_nans)
428 || check_maybe_down (flag_cx_limited_range)
429 || check_maybe_up (flag_signed_zeros)
430 || check_maybe_down (flag_associative_math)
431 || check_maybe_down (flag_reciprocal_math)
432 || check_maybe_down (flag_fp_int_builtin_inexact)
433 /* Strictly speaking only when the callee contains function
434 calls that may end up setting errno. */
435 || check_maybe_up (flag_errno_math)))
436 /* We do not want to make code compiled with exceptions to be
437 brought into a non-EH function unless we know that the callee
438 does not throw.
439 This is tracked by DECL_FUNCTION_PERSONALITY. */
440 || (check_maybe_up (flag_non_call_exceptions)
441 && DECL_FUNCTION_PERSONALITY (callee->decl))
442 || (check_maybe_up (flag_exceptions)
443 && DECL_FUNCTION_PERSONALITY (callee->decl))
444 /* When devirtualization is diabled for callee, it is not safe
445 to inline it as we possibly mangled the type info.
446 Allow early inlining of always inlines. */
447 || (!early && check_maybe_down (flag_devirtualize)))
448 {
449 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
450 inlinable = false;
451 }
452 /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
453 else if (always_inline)
454 ;
455 /* When user added an attribute to the callee honor it. */
456 else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
457 && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
458 {
459 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
460 inlinable = false;
461 }
462 /* If explicit optimize attribute are not used, the mismatch is caused
463 by different command line options used to build different units.
464 Do not care about COMDAT functions - those are intended to be
465 optimized with the optimization flags of module they are used in.
466 Also do not care about mixing up size/speed optimization when
467 DECL_DISREGARD_INLINE_LIMITS is set. */
468 else if ((callee->merged_comdat
469 && !lookup_attribute ("optimize",
470 DECL_ATTRIBUTES (caller->decl)))
471 || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
472 ;
473 /* If mismatch is caused by merging two LTO units with different
474 optimizationflags we want to be bit nicer. However never inline
475 if one of functions is not optimized at all. */
476 else if (!opt_for_fn (callee->decl, optimize)
477 || !opt_for_fn (caller->decl, optimize))
478 {
479 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
480 inlinable = false;
481 }
482 /* If callee is optimized for size and caller is not, allow inlining if
483 code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
484 is inline (and thus likely an unified comdat). This will allow caller
485 to run faster. */
486 else if (opt_for_fn (callee->decl, optimize_size)
487 > opt_for_fn (caller->decl, optimize_size))
488 {
489 int growth = estimate_edge_growth (e);
490 if (growth > 0
491 && (!DECL_DECLARED_INLINE_P (callee->decl)
492 && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
493 MAX_INLINE_INSNS_AUTO)))
494 {
495 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
496 inlinable = false;
497 }
498 }
499 /* If callee is more aggressively optimized for performance than caller,
500 we generally want to inline only cheap (runtime wise) functions. */
501 else if (opt_for_fn (callee->decl, optimize_size)
502 < opt_for_fn (caller->decl, optimize_size)
503 || (opt_for_fn (callee->decl, optimize)
504 > opt_for_fn (caller->decl, optimize)))
505 {
506 if (estimate_edge_time (e)
507 >= 20 + inline_edge_summary (e)->call_stmt_time)
508 {
509 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
510 inlinable = false;
511 }
512 }
513
514 }
515
516 if (!inlinable && report)
517 report_inline_failed_reason (e);
518 return inlinable;
519 }
520
521
522 /* Return true if the edge E is inlinable during early inlining. */
523
524 static bool
525 can_early_inline_edge_p (struct cgraph_edge *e)
526 {
527 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
528 /* Early inliner might get called at WPA stage when IPA pass adds new
529 function. In this case we can not really do any of early inlining
530 because function bodies are missing. */
531 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
532 return false;
533 if (!gimple_has_body_p (callee->decl))
534 {
535 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
536 return false;
537 }
538 /* In early inliner some of callees may not be in SSA form yet
539 (i.e. the callgraph is cyclic and we did not process
540 the callee by early inliner, yet). We don't have CIF code for this
541 case; later we will re-do the decision in the real inliner. */
542 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
543 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
544 {
545 if (dump_file)
546 fprintf (dump_file, " edge not inlinable: not in SSA form\n");
547 return false;
548 }
549 if (!can_inline_edge_p (e, true, false, true))
550 return false;
551 return true;
552 }
553
554
555 /* Return number of calls in N. Ignore cheap builtins. */
556
557 static int
558 num_calls (struct cgraph_node *n)
559 {
560 struct cgraph_edge *e;
561 int num = 0;
562
563 for (e = n->callees; e; e = e->next_callee)
564 if (!is_inexpensive_builtin (e->callee->decl))
565 num++;
566 return num;
567 }
568
569
570 /* Return true if we are interested in inlining small function. */
571
572 static bool
573 want_early_inline_function_p (struct cgraph_edge *e)
574 {
575 bool want_inline = true;
576 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
577
578 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
579 ;
580 /* For AutoFDO, we need to make sure that before profile summary, all
581 hot paths' IR look exactly the same as profiled binary. As a result,
582 in einliner, we will disregard size limit and inline those callsites
583 that are:
584 * inlined in the profiled binary, and
585 * the cloned callee has enough samples to be considered "hot". */
586 else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
587 ;
588 else if (!DECL_DECLARED_INLINE_P (callee->decl)
589 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
590 {
591 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
592 report_inline_failed_reason (e);
593 want_inline = false;
594 }
595 else
596 {
597 int growth = estimate_edge_growth (e);
598 int n;
599
600 if (growth <= 0)
601 ;
602 else if (!e->maybe_hot_p ()
603 && growth > 0)
604 {
605 if (dump_file)
606 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
607 "call is cold and code would grow by %i\n",
608 xstrdup_for_dump (e->caller->name ()),
609 e->caller->order,
610 xstrdup_for_dump (callee->name ()), callee->order,
611 growth);
612 want_inline = false;
613 }
614 else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
615 {
616 if (dump_file)
617 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
618 "growth %i exceeds --param early-inlining-insns\n",
619 xstrdup_for_dump (e->caller->name ()),
620 e->caller->order,
621 xstrdup_for_dump (callee->name ()), callee->order,
622 growth);
623 want_inline = false;
624 }
625 else if ((n = num_calls (callee)) != 0
626 && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
627 {
628 if (dump_file)
629 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
630 "growth %i exceeds --param early-inlining-insns "
631 "divided by number of calls\n",
632 xstrdup_for_dump (e->caller->name ()),
633 e->caller->order,
634 xstrdup_for_dump (callee->name ()), callee->order,
635 growth);
636 want_inline = false;
637 }
638 }
639 return want_inline;
640 }
641
642 /* Compute time of the edge->caller + edge->callee execution when inlining
643 does not happen. */
644
645 inline sreal
646 compute_uninlined_call_time (struct inline_summary *callee_info,
647 struct cgraph_edge *edge)
648 {
649 sreal uninlined_call_time = (sreal)callee_info->time;
650 cgraph_node *caller = (edge->caller->global.inlined_to
651 ? edge->caller->global.inlined_to
652 : edge->caller);
653
654 if (edge->count && caller->count)
655 uninlined_call_time *= (sreal)edge->count / caller->count;
656 if (edge->frequency)
657 uninlined_call_time *= cgraph_freq_base_rec * edge->frequency;
658 else
659 uninlined_call_time = uninlined_call_time >> 11;
660
661 int caller_time = inline_summaries->get (caller)->time;
662 return uninlined_call_time + caller_time;
663 }
664
665 /* Same as compute_uinlined_call_time but compute time when inlining
666 does happen. */
667
668 inline sreal
669 compute_inlined_call_time (struct cgraph_edge *edge,
670 int edge_time)
671 {
672 cgraph_node *caller = (edge->caller->global.inlined_to
673 ? edge->caller->global.inlined_to
674 : edge->caller);
675 int caller_time = inline_summaries->get (caller)->time;
676 sreal time = edge_time;
677
678 if (edge->count && caller->count)
679 time *= (sreal)edge->count / caller->count;
680 if (edge->frequency)
681 time *= cgraph_freq_base_rec * edge->frequency;
682 else
683 time = time >> 11;
684
685 /* This calculation should match one in ipa-inline-analysis.
686 FIXME: Once ipa-inline-analysis is converted to sreal this can be
687 simplified. */
688 time -= (sreal) ((gcov_type) edge->frequency
689 * inline_edge_summary (edge)->call_stmt_time
690 * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)) / INLINE_TIME_SCALE;
691 time += caller_time;
692 if (time <= 0)
693 time = ((sreal) 1) >> 8;
694 gcc_checking_assert (time >= 0);
695 return time;
696 }
697
698 /* Return true if the speedup for inlining E is bigger than
699 PARAM_MAX_INLINE_MIN_SPEEDUP. */
700
701 static bool
702 big_speedup_p (struct cgraph_edge *e)
703 {
704 sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee),
705 e);
706 sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
707
708 if (time - inlined_time
709 > (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
710 * percent_rec)
711 return true;
712 return false;
713 }
714
715 /* Return true if we are interested in inlining small function.
716 When REPORT is true, report reason to dump file. */
717
718 static bool
719 want_inline_small_function_p (struct cgraph_edge *e, bool report)
720 {
721 bool want_inline = true;
722 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
723
724 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
725 ;
726 else if (!DECL_DECLARED_INLINE_P (callee->decl)
727 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
728 {
729 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
730 want_inline = false;
731 }
732 /* Do fast and conservative check if the function can be good
733 inline candidate. At the moment we allow inline hints to
734 promote non-inline functions to inline and we increase
735 MAX_INLINE_INSNS_SINGLE 16-fold for inline functions. */
736 else if ((!DECL_DECLARED_INLINE_P (callee->decl)
737 && (!e->count || !e->maybe_hot_p ()))
738 && inline_summaries->get (callee)->min_size
739 - inline_edge_summary (e)->call_stmt_size
740 > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
741 {
742 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
743 want_inline = false;
744 }
745 else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
746 && inline_summaries->get (callee)->min_size
747 - inline_edge_summary (e)->call_stmt_size
748 > 16 * MAX_INLINE_INSNS_SINGLE)
749 {
750 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
751 ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
752 : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
753 want_inline = false;
754 }
755 else
756 {
757 int growth = estimate_edge_growth (e);
758 inline_hints hints = estimate_edge_hints (e);
759 bool big_speedup = big_speedup_p (e);
760
761 if (growth <= 0)
762 ;
763 /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when
764 hints suggests that inlining given function is very profitable. */
765 else if (DECL_DECLARED_INLINE_P (callee->decl)
766 && growth >= MAX_INLINE_INSNS_SINGLE
767 && ((!big_speedup
768 && !(hints & (INLINE_HINT_indirect_call
769 | INLINE_HINT_known_hot
770 | INLINE_HINT_loop_iterations
771 | INLINE_HINT_array_index
772 | INLINE_HINT_loop_stride)))
773 || growth >= MAX_INLINE_INSNS_SINGLE * 16))
774 {
775 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
776 want_inline = false;
777 }
778 else if (!DECL_DECLARED_INLINE_P (callee->decl)
779 && !opt_for_fn (e->caller->decl, flag_inline_functions))
780 {
781 /* growth_likely_positive is expensive, always test it last. */
782 if (growth >= MAX_INLINE_INSNS_SINGLE
783 || growth_likely_positive (callee, growth))
784 {
785 e->inline_failed = CIF_NOT_DECLARED_INLINED;
786 want_inline = false;
787 }
788 }
789 /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
790 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
791 inlining given function is very profitable. */
792 else if (!DECL_DECLARED_INLINE_P (callee->decl)
793 && !big_speedup
794 && !(hints & INLINE_HINT_known_hot)
795 && growth >= ((hints & (INLINE_HINT_indirect_call
796 | INLINE_HINT_loop_iterations
797 | INLINE_HINT_array_index
798 | INLINE_HINT_loop_stride))
799 ? MAX (MAX_INLINE_INSNS_AUTO,
800 MAX_INLINE_INSNS_SINGLE)
801 : MAX_INLINE_INSNS_AUTO))
802 {
803 /* growth_likely_positive is expensive, always test it last. */
804 if (growth >= MAX_INLINE_INSNS_SINGLE
805 || growth_likely_positive (callee, growth))
806 {
807 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
808 want_inline = false;
809 }
810 }
811 /* If call is cold, do not inline when function body would grow. */
812 else if (!e->maybe_hot_p ()
813 && (growth >= MAX_INLINE_INSNS_SINGLE
814 || growth_likely_positive (callee, growth)))
815 {
816 e->inline_failed = CIF_UNLIKELY_CALL;
817 want_inline = false;
818 }
819 }
820 if (!want_inline && report)
821 report_inline_failed_reason (e);
822 return want_inline;
823 }
824
825 /* EDGE is self recursive edge.
826 We hand two cases - when function A is inlining into itself
827 or when function A is being inlined into another inliner copy of function
828 A within function B.
829
830 In first case OUTER_NODE points to the toplevel copy of A, while
831 in the second case OUTER_NODE points to the outermost copy of A in B.
832
833 In both cases we want to be extra selective since
834 inlining the call will just introduce new recursive calls to appear. */
835
836 static bool
837 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
838 struct cgraph_node *outer_node,
839 bool peeling,
840 int depth)
841 {
842 char const *reason = NULL;
843 bool want_inline = true;
844 int caller_freq = CGRAPH_FREQ_BASE;
845 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
846
847 if (DECL_DECLARED_INLINE_P (edge->caller->decl))
848 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
849
850 if (!edge->maybe_hot_p ())
851 {
852 reason = "recursive call is cold";
853 want_inline = false;
854 }
855 else if (max_count && !outer_node->count)
856 {
857 reason = "not executed in profile";
858 want_inline = false;
859 }
860 else if (depth > max_depth)
861 {
862 reason = "--param max-inline-recursive-depth exceeded.";
863 want_inline = false;
864 }
865
866 if (outer_node->global.inlined_to)
867 caller_freq = outer_node->callers->frequency;
868
869 if (!caller_freq)
870 {
871 reason = "function is inlined and unlikely";
872 want_inline = false;
873 }
874
875 if (!want_inline)
876 ;
877 /* Inlining of self recursive function into copy of itself within other function
878 is transformation similar to loop peeling.
879
880 Peeling is profitable if we can inline enough copies to make probability
881 of actual call to the self recursive function very small. Be sure that
882 the probability of recursion is small.
883
884 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
885 This way the expected number of recision is at most max_depth. */
886 else if (peeling)
887 {
888 int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
889 / max_depth);
890 int i;
891 for (i = 1; i < depth; i++)
892 max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
893 if (max_count
894 && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
895 >= max_prob))
896 {
897 reason = "profile of recursive call is too large";
898 want_inline = false;
899 }
900 if (!max_count
901 && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
902 >= max_prob))
903 {
904 reason = "frequency of recursive call is too large";
905 want_inline = false;
906 }
907 }
908 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
909 depth is large. We reduce function call overhead and increase chances that
910 things fit in hardware return predictor.
911
912 Recursive inlining might however increase cost of stack frame setup
913 actually slowing down functions whose recursion tree is wide rather than
914 deep.
915
916 Deciding reliably on when to do recursive inlining without profile feedback
917 is tricky. For now we disable recursive inlining when probability of self
918 recursion is low.
919
920 Recursive inlining of self recursive call within loop also results in large loop
921 depths that generally optimize badly. We may want to throttle down inlining
922 in those cases. In particular this seems to happen in one of libstdc++ rb tree
923 methods. */
924 else
925 {
926 if (max_count
927 && (edge->count * 100 / outer_node->count
928 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
929 {
930 reason = "profile of recursive call is too small";
931 want_inline = false;
932 }
933 else if (!max_count
934 && (edge->frequency * 100 / caller_freq
935 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
936 {
937 reason = "frequency of recursive call is too small";
938 want_inline = false;
939 }
940 }
941 if (!want_inline && dump_file)
942 fprintf (dump_file, " not inlining recursively: %s\n", reason);
943 return want_inline;
944 }
945
946 /* Return true when NODE has uninlinable caller;
947 set HAS_HOT_CALL if it has hot call.
948 Worker for cgraph_for_node_and_aliases. */
949
950 static bool
951 check_callers (struct cgraph_node *node, void *has_hot_call)
952 {
953 struct cgraph_edge *e;
954 for (e = node->callers; e; e = e->next_caller)
955 {
956 if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once))
957 return true;
958 if (!can_inline_edge_p (e, true))
959 return true;
960 if (e->recursive_p ())
961 return true;
962 if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
963 *(bool *)has_hot_call = true;
964 }
965 return false;
966 }
967
968 /* If NODE has a caller, return true. */
969
970 static bool
971 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
972 {
973 if (node->callers)
974 return true;
975 return false;
976 }
977
978 /* Decide if inlining NODE would reduce unit size by eliminating
979 the offline copy of function.
980 When COLD is true the cold calls are considered, too. */
981
982 static bool
983 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
984 {
985 bool has_hot_call = false;
986
987 /* Aliases gets inlined along with the function they alias. */
988 if (node->alias)
989 return false;
990 /* Already inlined? */
991 if (node->global.inlined_to)
992 return false;
993 /* Does it have callers? */
994 if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
995 return false;
996 /* Inlining into all callers would increase size? */
997 if (estimate_growth (node) > 0)
998 return false;
999 /* All inlines must be possible. */
1000 if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1001 true))
1002 return false;
1003 if (!cold && !has_hot_call)
1004 return false;
1005 return true;
1006 }
1007
1008 /* A cost model driving the inlining heuristics in a way so the edges with
1009 smallest badness are inlined first. After each inlining is performed
1010 the costs of all caller edges of nodes affected are recomputed so the
1011 metrics may accurately depend on values such as number of inlinable callers
1012 of the function or function body size. */
1013
1014 static sreal
1015 edge_badness (struct cgraph_edge *edge, bool dump)
1016 {
1017 sreal badness;
1018 int growth, edge_time;
1019 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1020 struct inline_summary *callee_info = inline_summaries->get (callee);
1021 inline_hints hints;
1022 cgraph_node *caller = (edge->caller->global.inlined_to
1023 ? edge->caller->global.inlined_to
1024 : edge->caller);
1025
1026 growth = estimate_edge_growth (edge);
1027 edge_time = estimate_edge_time (edge);
1028 hints = estimate_edge_hints (edge);
1029 gcc_checking_assert (edge_time >= 0);
1030 gcc_checking_assert (edge_time <= callee_info->time);
1031 gcc_checking_assert (growth <= callee_info->size);
1032
1033 if (dump)
1034 {
1035 fprintf (dump_file, " Badness calculation for %s/%i -> %s/%i\n",
1036 xstrdup_for_dump (edge->caller->name ()),
1037 edge->caller->order,
1038 xstrdup_for_dump (callee->name ()),
1039 edge->callee->order);
1040 fprintf (dump_file, " size growth %i, time %i ",
1041 growth,
1042 edge_time);
1043 dump_inline_hints (dump_file, hints);
1044 if (big_speedup_p (edge))
1045 fprintf (dump_file, " big_speedup");
1046 fprintf (dump_file, "\n");
1047 }
1048
1049 /* Always prefer inlining saving code size. */
1050 if (growth <= 0)
1051 {
1052 badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1053 if (dump)
1054 fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
1055 growth);
1056 }
1057 /* Inlining into EXTERNAL functions is not going to change anything unless
1058 they are themselves inlined. */
1059 else if (DECL_EXTERNAL (caller->decl))
1060 {
1061 if (dump)
1062 fprintf (dump_file, " max: function is external\n");
1063 return sreal::max ();
1064 }
1065 /* When profile is available. Compute badness as:
1066
1067 time_saved * caller_count
1068 goodness = -------------------------------------------------
1069 growth_of_caller * overall_growth * combined_size
1070
1071 badness = - goodness
1072
1073 Again use negative value to make calls with profile appear hotter
1074 then calls without.
1075 */
1076 else if (opt_for_fn (caller->decl, flag_guess_branch_prob) || caller->count)
1077 {
1078 sreal numerator, denominator;
1079 int overall_growth;
1080
1081 numerator = (compute_uninlined_call_time (callee_info, edge)
1082 - compute_inlined_call_time (edge, edge_time));
1083 if (numerator == 0)
1084 numerator = ((sreal) 1 >> 8);
1085 if (caller->count)
1086 numerator *= caller->count;
1087 else if (opt_for_fn (caller->decl, flag_branch_probabilities))
1088 numerator = numerator >> 11;
1089 denominator = growth;
1090
1091 overall_growth = callee_info->growth;
1092
1093 /* Look for inliner wrappers of the form:
1094
1095 inline_caller ()
1096 {
1097 do_fast_job...
1098 if (need_more_work)
1099 noninline_callee ();
1100 }
1101 Withhout panilizing this case, we usually inline noninline_callee
1102 into the inline_caller because overall_growth is small preventing
1103 further inlining of inline_caller.
1104
1105 Penalize only callgraph edges to functions with small overall
1106 growth ...
1107 */
1108 if (growth > overall_growth
1109 /* ... and having only one caller which is not inlined ... */
1110 && callee_info->single_caller
1111 && !edge->caller->global.inlined_to
1112 /* ... and edges executed only conditionally ... */
1113 && edge->frequency < CGRAPH_FREQ_BASE
1114 /* ... consider case where callee is not inline but caller is ... */
1115 && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1116 && DECL_DECLARED_INLINE_P (caller->decl))
1117 /* ... or when early optimizers decided to split and edge
1118 frequency still indicates splitting is a win ... */
1119 || (callee->split_part && !caller->split_part
1120 && edge->frequency
1121 < CGRAPH_FREQ_BASE
1122 * PARAM_VALUE
1123 (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100
1124 /* ... and do not overwrite user specified hints. */
1125 && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1126 || DECL_DECLARED_INLINE_P (caller->decl)))))
1127 {
1128 struct inline_summary *caller_info = inline_summaries->get (caller);
1129 int caller_growth = caller_info->growth;
1130
1131 /* Only apply the penalty when caller looks like inline candidate,
1132 and it is not called once and. */
1133 if (!caller_info->single_caller && overall_growth < caller_growth
1134 && caller_info->inlinable
1135 && caller_info->size
1136 < (DECL_DECLARED_INLINE_P (caller->decl)
1137 ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
1138 {
1139 if (dump)
1140 fprintf (dump_file,
1141 " Wrapper penalty. Increasing growth %i to %i\n",
1142 overall_growth, caller_growth);
1143 overall_growth = caller_growth;
1144 }
1145 }
1146 if (overall_growth > 0)
1147 {
1148 /* Strongly preffer functions with few callers that can be inlined
1149 fully. The square root here leads to smaller binaries at average.
1150 Watch however for extreme cases and return to linear function
1151 when growth is large. */
1152 if (overall_growth < 256)
1153 overall_growth *= overall_growth;
1154 else
1155 overall_growth += 256 * 256 - 256;
1156 denominator *= overall_growth;
1157 }
1158 denominator *= inline_summaries->get (caller)->self_size + growth;
1159
1160 badness = - numerator / denominator;
1161
1162 if (dump)
1163 {
1164 fprintf (dump_file,
1165 " %f: guessed profile. frequency %f, count %" PRId64
1166 " caller count %" PRId64
1167 " time w/o inlining %f, time w/ inlining %f"
1168 " overall growth %i (current) %i (original)"
1169 " %i (compensated)\n",
1170 badness.to_double (),
1171 (double)edge->frequency / CGRAPH_FREQ_BASE,
1172 edge->count, caller->count,
1173 compute_uninlined_call_time (callee_info, edge).to_double (),
1174 compute_inlined_call_time (edge, edge_time).to_double (),
1175 estimate_growth (callee),
1176 callee_info->growth, overall_growth);
1177 }
1178 }
1179 /* When function local profile is not available or it does not give
1180 useful information (ie frequency is zero), base the cost on
1181 loop nest and overall size growth, so we optimize for overall number
1182 of functions fully inlined in program. */
1183 else
1184 {
1185 int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1186 badness = growth;
1187
1188 /* Decrease badness if call is nested. */
1189 if (badness > 0)
1190 badness = badness >> nest;
1191 else
1192 badness = badness << nest;
1193 if (dump)
1194 fprintf (dump_file, " %f: no profile. nest %i\n",
1195 badness.to_double (), nest);
1196 }
1197 gcc_checking_assert (badness != 0);
1198
1199 if (edge->recursive_p ())
1200 badness = badness.shift (badness > 0 ? 4 : -4);
1201 if ((hints & (INLINE_HINT_indirect_call
1202 | INLINE_HINT_loop_iterations
1203 | INLINE_HINT_array_index
1204 | INLINE_HINT_loop_stride))
1205 || callee_info->growth <= 0)
1206 badness = badness.shift (badness > 0 ? -2 : 2);
1207 if (hints & (INLINE_HINT_same_scc))
1208 badness = badness.shift (badness > 0 ? 3 : -3);
1209 else if (hints & (INLINE_HINT_in_scc))
1210 badness = badness.shift (badness > 0 ? 2 : -2);
1211 else if (hints & (INLINE_HINT_cross_module))
1212 badness = badness.shift (badness > 0 ? 1 : -1);
1213 if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1214 badness = badness.shift (badness > 0 ? -4 : 4);
1215 else if ((hints & INLINE_HINT_declared_inline))
1216 badness = badness.shift (badness > 0 ? -3 : 3);
1217 if (dump)
1218 fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
1219 return badness;
1220 }
1221
1222 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1223 static inline void
1224 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1225 {
1226 sreal badness = edge_badness (edge, false);
1227 if (edge->aux)
1228 {
1229 edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1230 gcc_checking_assert (n->get_data () == edge);
1231
1232 /* fibonacci_heap::replace_key does busy updating of the
1233 heap that is unnecesarily expensive.
1234 We do lazy increases: after extracting minimum if the key
1235 turns out to be out of date, it is re-inserted into heap
1236 with correct value. */
1237 if (badness < n->get_key ())
1238 {
1239 if (dump_file && (dump_flags & TDF_DETAILS))
1240 {
1241 fprintf (dump_file,
1242 " decreasing badness %s/%i -> %s/%i, %f"
1243 " to %f\n",
1244 xstrdup_for_dump (edge->caller->name ()),
1245 edge->caller->order,
1246 xstrdup_for_dump (edge->callee->name ()),
1247 edge->callee->order,
1248 n->get_key ().to_double (),
1249 badness.to_double ());
1250 }
1251 heap->decrease_key (n, badness);
1252 }
1253 }
1254 else
1255 {
1256 if (dump_file && (dump_flags & TDF_DETAILS))
1257 {
1258 fprintf (dump_file,
1259 " enqueuing call %s/%i -> %s/%i, badness %f\n",
1260 xstrdup_for_dump (edge->caller->name ()),
1261 edge->caller->order,
1262 xstrdup_for_dump (edge->callee->name ()),
1263 edge->callee->order,
1264 badness.to_double ());
1265 }
1266 edge->aux = heap->insert (badness, edge);
1267 }
1268 }
1269
1270
1271 /* NODE was inlined.
1272 All caller edges needs to be resetted because
1273 size estimates change. Similarly callees needs reset
1274 because better context may be known. */
1275
1276 static void
1277 reset_edge_caches (struct cgraph_node *node)
1278 {
1279 struct cgraph_edge *edge;
1280 struct cgraph_edge *e = node->callees;
1281 struct cgraph_node *where = node;
1282 struct ipa_ref *ref;
1283
1284 if (where->global.inlined_to)
1285 where = where->global.inlined_to;
1286
1287 for (edge = where->callers; edge; edge = edge->next_caller)
1288 if (edge->inline_failed)
1289 reset_edge_growth_cache (edge);
1290
1291 FOR_EACH_ALIAS (where, ref)
1292 reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1293
1294 if (!e)
1295 return;
1296
1297 while (true)
1298 if (!e->inline_failed && e->callee->callees)
1299 e = e->callee->callees;
1300 else
1301 {
1302 if (e->inline_failed)
1303 reset_edge_growth_cache (e);
1304 if (e->next_callee)
1305 e = e->next_callee;
1306 else
1307 {
1308 do
1309 {
1310 if (e->caller == node)
1311 return;
1312 e = e->caller->callers;
1313 }
1314 while (!e->next_callee);
1315 e = e->next_callee;
1316 }
1317 }
1318 }
1319
1320 /* Recompute HEAP nodes for each of caller of NODE.
1321 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1322 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1323 it is inlinable. Otherwise check all edges. */
1324
1325 static void
1326 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1327 bitmap updated_nodes,
1328 struct cgraph_edge *check_inlinablity_for)
1329 {
1330 struct cgraph_edge *edge;
1331 struct ipa_ref *ref;
1332
1333 if ((!node->alias && !inline_summaries->get (node)->inlinable)
1334 || node->global.inlined_to)
1335 return;
1336 if (!bitmap_set_bit (updated_nodes, node->uid))
1337 return;
1338
1339 FOR_EACH_ALIAS (node, ref)
1340 {
1341 struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1342 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1343 }
1344
1345 for (edge = node->callers; edge; edge = edge->next_caller)
1346 if (edge->inline_failed)
1347 {
1348 if (!check_inlinablity_for
1349 || check_inlinablity_for == edge)
1350 {
1351 if (can_inline_edge_p (edge, false)
1352 && want_inline_small_function_p (edge, false))
1353 update_edge_key (heap, edge);
1354 else if (edge->aux)
1355 {
1356 report_inline_failed_reason (edge);
1357 heap->delete_node ((edge_heap_node_t *) edge->aux);
1358 edge->aux = NULL;
1359 }
1360 }
1361 else if (edge->aux)
1362 update_edge_key (heap, edge);
1363 }
1364 }
1365
1366 /* Recompute HEAP nodes for each uninlined call in NODE.
1367 This is used when we know that edge badnesses are going only to increase
1368 (we introduced new call site) and thus all we need is to insert newly
1369 created edges into heap. */
1370
1371 static void
1372 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1373 bitmap updated_nodes)
1374 {
1375 struct cgraph_edge *e = node->callees;
1376
1377 if (!e)
1378 return;
1379 while (true)
1380 if (!e->inline_failed && e->callee->callees)
1381 e = e->callee->callees;
1382 else
1383 {
1384 enum availability avail;
1385 struct cgraph_node *callee;
1386 /* We do not reset callee growth cache here. Since we added a new call,
1387 growth chould have just increased and consequentely badness metric
1388 don't need updating. */
1389 if (e->inline_failed
1390 && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1391 && inline_summaries->get (callee)->inlinable
1392 && avail >= AVAIL_AVAILABLE
1393 && !bitmap_bit_p (updated_nodes, callee->uid))
1394 {
1395 if (can_inline_edge_p (e, false)
1396 && want_inline_small_function_p (e, false))
1397 update_edge_key (heap, e);
1398 else if (e->aux)
1399 {
1400 report_inline_failed_reason (e);
1401 heap->delete_node ((edge_heap_node_t *) e->aux);
1402 e->aux = NULL;
1403 }
1404 }
1405 if (e->next_callee)
1406 e = e->next_callee;
1407 else
1408 {
1409 do
1410 {
1411 if (e->caller == node)
1412 return;
1413 e = e->caller->callers;
1414 }
1415 while (!e->next_callee);
1416 e = e->next_callee;
1417 }
1418 }
1419 }
1420
1421 /* Enqueue all recursive calls from NODE into priority queue depending on
1422 how likely we want to recursively inline the call. */
1423
1424 static void
1425 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1426 edge_heap_t *heap)
1427 {
1428 struct cgraph_edge *e;
1429 enum availability avail;
1430
1431 for (e = where->callees; e; e = e->next_callee)
1432 if (e->callee == node
1433 || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1434 && avail > AVAIL_INTERPOSABLE))
1435 {
1436 /* When profile feedback is available, prioritize by expected number
1437 of calls. */
1438 heap->insert (!max_count ? -e->frequency
1439 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1440 e);
1441 }
1442 for (e = where->callees; e; e = e->next_callee)
1443 if (!e->inline_failed)
1444 lookup_recursive_calls (node, e->callee, heap);
1445 }
1446
1447 /* Decide on recursive inlining: in the case function has recursive calls,
1448 inline until body size reaches given argument. If any new indirect edges
1449 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1450 is NULL. */
1451
1452 static bool
1453 recursive_inlining (struct cgraph_edge *edge,
1454 vec<cgraph_edge *> *new_edges)
1455 {
1456 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1457 edge_heap_t heap (sreal::min ());
1458 struct cgraph_node *node;
1459 struct cgraph_edge *e;
1460 struct cgraph_node *master_clone = NULL, *next;
1461 int depth = 0;
1462 int n = 0;
1463
1464 node = edge->caller;
1465 if (node->global.inlined_to)
1466 node = node->global.inlined_to;
1467
1468 if (DECL_DECLARED_INLINE_P (node->decl))
1469 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1470
1471 /* Make sure that function is small enough to be considered for inlining. */
1472 if (estimate_size_after_inlining (node, edge) >= limit)
1473 return false;
1474 lookup_recursive_calls (node, node, &heap);
1475 if (heap.empty ())
1476 return false;
1477
1478 if (dump_file)
1479 fprintf (dump_file,
1480 " Performing recursive inlining on %s\n",
1481 node->name ());
1482
1483 /* Do the inlining and update list of recursive call during process. */
1484 while (!heap.empty ())
1485 {
1486 struct cgraph_edge *curr = heap.extract_min ();
1487 struct cgraph_node *cnode, *dest = curr->callee;
1488
1489 if (!can_inline_edge_p (curr, true))
1490 continue;
1491
1492 /* MASTER_CLONE is produced in the case we already started modified
1493 the function. Be sure to redirect edge to the original body before
1494 estimating growths otherwise we will be seeing growths after inlining
1495 the already modified body. */
1496 if (master_clone)
1497 {
1498 curr->redirect_callee (master_clone);
1499 reset_edge_growth_cache (curr);
1500 }
1501
1502 if (estimate_size_after_inlining (node, curr) > limit)
1503 {
1504 curr->redirect_callee (dest);
1505 reset_edge_growth_cache (curr);
1506 break;
1507 }
1508
1509 depth = 1;
1510 for (cnode = curr->caller;
1511 cnode->global.inlined_to; cnode = cnode->callers->caller)
1512 if (node->decl
1513 == curr->callee->ultimate_alias_target ()->decl)
1514 depth++;
1515
1516 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1517 {
1518 curr->redirect_callee (dest);
1519 reset_edge_growth_cache (curr);
1520 continue;
1521 }
1522
1523 if (dump_file)
1524 {
1525 fprintf (dump_file,
1526 " Inlining call of depth %i", depth);
1527 if (node->count)
1528 {
1529 fprintf (dump_file, " called approx. %.2f times per call",
1530 (double)curr->count / node->count);
1531 }
1532 fprintf (dump_file, "\n");
1533 }
1534 if (!master_clone)
1535 {
1536 /* We need original clone to copy around. */
1537 master_clone = node->create_clone (node->decl, node->count,
1538 CGRAPH_FREQ_BASE, false, vNULL,
1539 true, NULL, NULL);
1540 for (e = master_clone->callees; e; e = e->next_callee)
1541 if (!e->inline_failed)
1542 clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1543 curr->redirect_callee (master_clone);
1544 reset_edge_growth_cache (curr);
1545 }
1546
1547 inline_call (curr, false, new_edges, &overall_size, true);
1548 lookup_recursive_calls (node, curr->callee, &heap);
1549 n++;
1550 }
1551
1552 if (!heap.empty () && dump_file)
1553 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1554
1555 if (!master_clone)
1556 return false;
1557
1558 if (dump_file)
1559 fprintf (dump_file,
1560 "\n Inlined %i times, "
1561 "body grown from size %i to %i, time %i to %i\n", n,
1562 inline_summaries->get (master_clone)->size, inline_summaries->get (node)->size,
1563 inline_summaries->get (master_clone)->time, inline_summaries->get (node)->time);
1564
1565 /* Remove master clone we used for inlining. We rely that clones inlined
1566 into master clone gets queued just before master clone so we don't
1567 need recursion. */
1568 for (node = symtab->first_function (); node != master_clone;
1569 node = next)
1570 {
1571 next = symtab->next_function (node);
1572 if (node->global.inlined_to == master_clone)
1573 node->remove ();
1574 }
1575 master_clone->remove ();
1576 return true;
1577 }
1578
1579
1580 /* Given whole compilation unit estimate of INSNS, compute how large we can
1581 allow the unit to grow. */
1582
1583 static int
1584 compute_max_insns (int insns)
1585 {
1586 int max_insns = insns;
1587 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1588 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1589
1590 return ((int64_t) max_insns
1591 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1592 }
1593
1594
1595 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1596
1597 static void
1598 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1599 {
1600 while (new_edges.length () > 0)
1601 {
1602 struct cgraph_edge *edge = new_edges.pop ();
1603
1604 gcc_assert (!edge->aux);
1605 if (edge->inline_failed
1606 && can_inline_edge_p (edge, true)
1607 && want_inline_small_function_p (edge, true))
1608 edge->aux = heap->insert (edge_badness (edge, false), edge);
1609 }
1610 }
1611
1612 /* Remove EDGE from the fibheap. */
1613
1614 static void
1615 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1616 {
1617 if (e->aux)
1618 {
1619 ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1620 e->aux = NULL;
1621 }
1622 }
1623
1624 /* Return true if speculation of edge E seems useful.
1625 If ANTICIPATE_INLINING is true, be conservative and hope that E
1626 may get inlined. */
1627
1628 bool
1629 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1630 {
1631 enum availability avail;
1632 struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1633 e->caller);
1634 struct cgraph_edge *direct, *indirect;
1635 struct ipa_ref *ref;
1636
1637 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1638
1639 if (!e->maybe_hot_p ())
1640 return false;
1641
1642 /* See if IP optimizations found something potentially useful about the
1643 function. For now we look only for CONST/PURE flags. Almost everything
1644 else we propagate is useless. */
1645 if (avail >= AVAIL_AVAILABLE)
1646 {
1647 int ecf_flags = flags_from_decl_or_type (target->decl);
1648 if (ecf_flags & ECF_CONST)
1649 {
1650 e->speculative_call_info (direct, indirect, ref);
1651 if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1652 return true;
1653 }
1654 else if (ecf_flags & ECF_PURE)
1655 {
1656 e->speculative_call_info (direct, indirect, ref);
1657 if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1658 return true;
1659 }
1660 }
1661 /* If we did not managed to inline the function nor redirect
1662 to an ipa-cp clone (that are seen by having local flag set),
1663 it is probably pointless to inline it unless hardware is missing
1664 indirect call predictor. */
1665 if (!anticipate_inlining && e->inline_failed && !target->local.local)
1666 return false;
1667 /* For overwritable targets there is not much to do. */
1668 if (e->inline_failed && !can_inline_edge_p (e, false, true))
1669 return false;
1670 /* OK, speculation seems interesting. */
1671 return true;
1672 }
1673
1674 /* We know that EDGE is not going to be inlined.
1675 See if we can remove speculation. */
1676
1677 static void
1678 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1679 {
1680 if (edge->speculative && !speculation_useful_p (edge, false))
1681 {
1682 struct cgraph_node *node = edge->caller;
1683 struct cgraph_node *where = node->global.inlined_to
1684 ? node->global.inlined_to : node;
1685 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1686
1687 spec_rem += edge->count;
1688 edge->resolve_speculation ();
1689 reset_edge_caches (where);
1690 inline_update_overall_summary (where);
1691 update_caller_keys (edge_heap, where,
1692 updated_nodes, NULL);
1693 update_callee_keys (edge_heap, where,
1694 updated_nodes);
1695 BITMAP_FREE (updated_nodes);
1696 }
1697 }
1698
1699 /* Return true if NODE should be accounted for overall size estimate.
1700 Skip all nodes optimized for size so we can measure the growth of hot
1701 part of program no matter of the padding. */
1702
1703 bool
1704 inline_account_function_p (struct cgraph_node *node)
1705 {
1706 return (!DECL_EXTERNAL (node->decl)
1707 && !opt_for_fn (node->decl, optimize_size)
1708 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1709 }
1710
1711 /* Count number of callers of NODE and store it into DATA (that
1712 points to int. Worker for cgraph_for_node_and_aliases. */
1713
1714 static bool
1715 sum_callers (struct cgraph_node *node, void *data)
1716 {
1717 struct cgraph_edge *e;
1718 int *num_calls = (int *)data;
1719
1720 for (e = node->callers; e; e = e->next_caller)
1721 (*num_calls)++;
1722 return false;
1723 }
1724
1725 /* We use greedy algorithm for inlining of small functions:
1726 All inline candidates are put into prioritized heap ordered in
1727 increasing badness.
1728
1729 The inlining of small functions is bounded by unit growth parameters. */
1730
1731 static void
1732 inline_small_functions (void)
1733 {
1734 struct cgraph_node *node;
1735 struct cgraph_edge *edge;
1736 edge_heap_t edge_heap (sreal::min ());
1737 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1738 int min_size, max_size;
1739 auto_vec<cgraph_edge *> new_indirect_edges;
1740 int initial_size = 0;
1741 struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1742 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1743 new_indirect_edges.create (8);
1744
1745 edge_removal_hook_holder
1746 = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1747
1748 /* Compute overall unit size and other global parameters used by badness
1749 metrics. */
1750
1751 max_count = 0;
1752 ipa_reduced_postorder (order, true, true, NULL);
1753 free (order);
1754
1755 FOR_EACH_DEFINED_FUNCTION (node)
1756 if (!node->global.inlined_to)
1757 {
1758 if (!node->alias && node->analyzed
1759 && (node->has_gimple_body_p () || node->thunk.thunk_p))
1760 {
1761 struct inline_summary *info = inline_summaries->get (node);
1762 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1763
1764 /* Do not account external functions, they will be optimized out
1765 if not inlined. Also only count the non-cold portion of program. */
1766 if (inline_account_function_p (node))
1767 initial_size += info->size;
1768 info->growth = estimate_growth (node);
1769
1770 int num_calls = 0;
1771 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1772 true);
1773 if (num_calls == 1)
1774 info->single_caller = true;
1775 if (dfs && dfs->next_cycle)
1776 {
1777 struct cgraph_node *n2;
1778 int id = dfs->scc_no + 1;
1779 for (n2 = node; n2;
1780 n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1781 {
1782 struct inline_summary *info2 = inline_summaries->get (n2);
1783 if (info2->scc_no)
1784 break;
1785 info2->scc_no = id;
1786 }
1787 }
1788 }
1789
1790 for (edge = node->callers; edge; edge = edge->next_caller)
1791 if (max_count < edge->count)
1792 max_count = edge->count;
1793 }
1794 ipa_free_postorder_info ();
1795 initialize_growth_caches ();
1796
1797 if (dump_file)
1798 fprintf (dump_file,
1799 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1800 initial_size);
1801
1802 overall_size = initial_size;
1803 max_size = compute_max_insns (overall_size);
1804 min_size = overall_size;
1805
1806 /* Populate the heap with all edges we might inline. */
1807
1808 FOR_EACH_DEFINED_FUNCTION (node)
1809 {
1810 bool update = false;
1811 struct cgraph_edge *next = NULL;
1812 bool has_speculative = false;
1813
1814 if (dump_file)
1815 fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1816 node->name (), node->order);
1817
1818 for (edge = node->callees; edge; edge = next)
1819 {
1820 next = edge->next_callee;
1821 if (edge->inline_failed
1822 && !edge->aux
1823 && can_inline_edge_p (edge, true)
1824 && want_inline_small_function_p (edge, true)
1825 && edge->inline_failed)
1826 {
1827 gcc_assert (!edge->aux);
1828 update_edge_key (&edge_heap, edge);
1829 }
1830 if (edge->speculative)
1831 has_speculative = true;
1832 }
1833 if (has_speculative)
1834 for (edge = node->callees; edge; edge = next)
1835 if (edge->speculative && !speculation_useful_p (edge,
1836 edge->aux != NULL))
1837 {
1838 edge->resolve_speculation ();
1839 update = true;
1840 }
1841 if (update)
1842 {
1843 struct cgraph_node *where = node->global.inlined_to
1844 ? node->global.inlined_to : node;
1845 inline_update_overall_summary (where);
1846 reset_edge_caches (where);
1847 update_caller_keys (&edge_heap, where,
1848 updated_nodes, NULL);
1849 update_callee_keys (&edge_heap, where,
1850 updated_nodes);
1851 bitmap_clear (updated_nodes);
1852 }
1853 }
1854
1855 gcc_assert (in_lto_p
1856 || !max_count
1857 || (profile_info && flag_branch_probabilities));
1858
1859 while (!edge_heap.empty ())
1860 {
1861 int old_size = overall_size;
1862 struct cgraph_node *where, *callee;
1863 sreal badness = edge_heap.min_key ();
1864 sreal current_badness;
1865 int growth;
1866
1867 edge = edge_heap.extract_min ();
1868 gcc_assert (edge->aux);
1869 edge->aux = NULL;
1870 if (!edge->inline_failed || !edge->callee->analyzed)
1871 continue;
1872
1873 #if CHECKING_P
1874 /* Be sure that caches are maintained consistent. */
1875 sreal cached_badness = edge_badness (edge, false);
1876
1877 int old_size_est = estimate_edge_size (edge);
1878 int old_time_est = estimate_edge_time (edge);
1879 int old_hints_est = estimate_edge_hints (edge);
1880
1881 reset_edge_growth_cache (edge);
1882 gcc_assert (old_size_est == estimate_edge_size (edge));
1883 gcc_assert (old_time_est == estimate_edge_time (edge));
1884 /* FIXME:
1885
1886 gcc_assert (old_hints_est == estimate_edge_hints (edge));
1887
1888 fails with profile feedback because some hints depends on
1889 maybe_hot_edge_p predicate and because callee gets inlined to other
1890 calls, the edge may become cold.
1891 This ought to be fixed by computing relative probabilities
1892 for given invocation but that will be better done once whole
1893 code is converted to sreals. Disable for now and revert to "wrong"
1894 value so enable/disable checking paths agree. */
1895 edge_growth_cache[edge->uid].hints = old_hints_est + 1;
1896
1897 /* When updating the edge costs, we only decrease badness in the keys.
1898 Increases of badness are handled lazilly; when we see key with out
1899 of date value on it, we re-insert it now. */
1900 current_badness = edge_badness (edge, false);
1901 /* Disable checking for profile because roundoff errors may cause slight
1902 deviations in the order. */
1903 gcc_assert (max_count || cached_badness == current_badness);
1904 gcc_assert (current_badness >= badness);
1905 #else
1906 current_badness = edge_badness (edge, false);
1907 #endif
1908 if (current_badness != badness)
1909 {
1910 if (edge_heap.min () && current_badness > edge_heap.min_key ())
1911 {
1912 edge->aux = edge_heap.insert (current_badness, edge);
1913 continue;
1914 }
1915 else
1916 badness = current_badness;
1917 }
1918
1919 if (!can_inline_edge_p (edge, true))
1920 {
1921 resolve_noninline_speculation (&edge_heap, edge);
1922 continue;
1923 }
1924
1925 callee = edge->callee->ultimate_alias_target ();
1926 growth = estimate_edge_growth (edge);
1927 if (dump_file)
1928 {
1929 fprintf (dump_file,
1930 "\nConsidering %s/%i with %i size\n",
1931 callee->name (), callee->order,
1932 inline_summaries->get (callee)->size);
1933 fprintf (dump_file,
1934 " to be inlined into %s/%i in %s:%i\n"
1935 " Estimated badness is %f, frequency %.2f.\n",
1936 edge->caller->name (), edge->caller->order,
1937 edge->call_stmt
1938 && (LOCATION_LOCUS (gimple_location ((const gimple *)
1939 edge->call_stmt))
1940 > BUILTINS_LOCATION)
1941 ? gimple_filename ((const gimple *) edge->call_stmt)
1942 : "unknown",
1943 edge->call_stmt
1944 ? gimple_lineno ((const gimple *) edge->call_stmt)
1945 : -1,
1946 badness.to_double (),
1947 edge->frequency / (double)CGRAPH_FREQ_BASE);
1948 if (edge->count)
1949 fprintf (dump_file," Called %" PRId64"x\n",
1950 edge->count);
1951 if (dump_flags & TDF_DETAILS)
1952 edge_badness (edge, true);
1953 }
1954
1955 if (overall_size + growth > max_size
1956 && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1957 {
1958 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1959 report_inline_failed_reason (edge);
1960 resolve_noninline_speculation (&edge_heap, edge);
1961 continue;
1962 }
1963
1964 if (!want_inline_small_function_p (edge, true))
1965 {
1966 resolve_noninline_speculation (&edge_heap, edge);
1967 continue;
1968 }
1969
1970 /* Heuristics for inlining small functions work poorly for
1971 recursive calls where we do effects similar to loop unrolling.
1972 When inlining such edge seems profitable, leave decision on
1973 specific inliner. */
1974 if (edge->recursive_p ())
1975 {
1976 where = edge->caller;
1977 if (where->global.inlined_to)
1978 where = where->global.inlined_to;
1979 if (!recursive_inlining (edge,
1980 opt_for_fn (edge->caller->decl,
1981 flag_indirect_inlining)
1982 ? &new_indirect_edges : NULL))
1983 {
1984 edge->inline_failed = CIF_RECURSIVE_INLINING;
1985 resolve_noninline_speculation (&edge_heap, edge);
1986 continue;
1987 }
1988 reset_edge_caches (where);
1989 /* Recursive inliner inlines all recursive calls of the function
1990 at once. Consequently we need to update all callee keys. */
1991 if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
1992 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
1993 update_callee_keys (&edge_heap, where, updated_nodes);
1994 bitmap_clear (updated_nodes);
1995 }
1996 else
1997 {
1998 struct cgraph_node *outer_node = NULL;
1999 int depth = 0;
2000
2001 /* Consider the case where self recursive function A is inlined
2002 into B. This is desired optimization in some cases, since it
2003 leads to effect similar of loop peeling and we might completely
2004 optimize out the recursive call. However we must be extra
2005 selective. */
2006
2007 where = edge->caller;
2008 while (where->global.inlined_to)
2009 {
2010 if (where->decl == callee->decl)
2011 outer_node = where, depth++;
2012 where = where->callers->caller;
2013 }
2014 if (outer_node
2015 && !want_inline_self_recursive_call_p (edge, outer_node,
2016 true, depth))
2017 {
2018 edge->inline_failed
2019 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2020 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2021 resolve_noninline_speculation (&edge_heap, edge);
2022 continue;
2023 }
2024 else if (depth && dump_file)
2025 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2026
2027 gcc_checking_assert (!callee->global.inlined_to);
2028 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2029 add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2030
2031 reset_edge_caches (edge->callee);
2032
2033 update_callee_keys (&edge_heap, where, updated_nodes);
2034 }
2035 where = edge->caller;
2036 if (where->global.inlined_to)
2037 where = where->global.inlined_to;
2038
2039 /* Our profitability metric can depend on local properties
2040 such as number of inlinable calls and size of the function body.
2041 After inlining these properties might change for the function we
2042 inlined into (since it's body size changed) and for the functions
2043 called by function we inlined (since number of it inlinable callers
2044 might change). */
2045 update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2046 /* Offline copy count has possibly changed, recompute if profile is
2047 available. */
2048 if (max_count)
2049 {
2050 struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2051 if (n != edge->callee && n->analyzed)
2052 update_callee_keys (&edge_heap, n, updated_nodes);
2053 }
2054 bitmap_clear (updated_nodes);
2055
2056 if (dump_file)
2057 {
2058 fprintf (dump_file,
2059 " Inlined into %s which now has time %i and size %i,"
2060 "net change of %+i.\n",
2061 edge->caller->name (),
2062 inline_summaries->get (edge->caller)->time,
2063 inline_summaries->get (edge->caller)->size,
2064 overall_size - old_size);
2065 }
2066 if (min_size > overall_size)
2067 {
2068 min_size = overall_size;
2069 max_size = compute_max_insns (min_size);
2070
2071 if (dump_file)
2072 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2073 }
2074 }
2075
2076 free_growth_caches ();
2077 if (dump_file)
2078 fprintf (dump_file,
2079 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2080 initial_size, overall_size,
2081 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2082 BITMAP_FREE (updated_nodes);
2083 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2084 }
2085
2086 /* Flatten NODE. Performed both during early inlining and
2087 at IPA inlining time. */
2088
2089 static void
2090 flatten_function (struct cgraph_node *node, bool early)
2091 {
2092 struct cgraph_edge *e;
2093
2094 /* We shouldn't be called recursively when we are being processed. */
2095 gcc_assert (node->aux == NULL);
2096
2097 node->aux = (void *) node;
2098
2099 for (e = node->callees; e; e = e->next_callee)
2100 {
2101 struct cgraph_node *orig_callee;
2102 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2103
2104 /* We've hit cycle? It is time to give up. */
2105 if (callee->aux)
2106 {
2107 if (dump_file)
2108 fprintf (dump_file,
2109 "Not inlining %s into %s to avoid cycle.\n",
2110 xstrdup_for_dump (callee->name ()),
2111 xstrdup_for_dump (e->caller->name ()));
2112 e->inline_failed = CIF_RECURSIVE_INLINING;
2113 continue;
2114 }
2115
2116 /* When the edge is already inlined, we just need to recurse into
2117 it in order to fully flatten the leaves. */
2118 if (!e->inline_failed)
2119 {
2120 flatten_function (callee, early);
2121 continue;
2122 }
2123
2124 /* Flatten attribute needs to be processed during late inlining. For
2125 extra code quality we however do flattening during early optimization,
2126 too. */
2127 if (!early
2128 ? !can_inline_edge_p (e, true)
2129 : !can_early_inline_edge_p (e))
2130 continue;
2131
2132 if (e->recursive_p ())
2133 {
2134 if (dump_file)
2135 fprintf (dump_file, "Not inlining: recursive call.\n");
2136 continue;
2137 }
2138
2139 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2140 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2141 {
2142 if (dump_file)
2143 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
2144 continue;
2145 }
2146
2147 /* Inline the edge and flatten the inline clone. Avoid
2148 recursing through the original node if the node was cloned. */
2149 if (dump_file)
2150 fprintf (dump_file, " Inlining %s into %s.\n",
2151 xstrdup_for_dump (callee->name ()),
2152 xstrdup_for_dump (e->caller->name ()));
2153 orig_callee = callee;
2154 inline_call (e, true, NULL, NULL, false);
2155 if (e->callee != orig_callee)
2156 orig_callee->aux = (void *) node;
2157 flatten_function (e->callee, early);
2158 if (e->callee != orig_callee)
2159 orig_callee->aux = NULL;
2160 }
2161
2162 node->aux = NULL;
2163 if (!node->global.inlined_to)
2164 inline_update_overall_summary (node);
2165 }
2166
2167 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
2168 DATA points to number of calls originally found so we avoid infinite
2169 recursion. */
2170
2171 static bool
2172 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2173 hash_set<cgraph_node *> *callers)
2174 {
2175 int *num_calls = (int *)data;
2176 bool callee_removed = false;
2177
2178 while (node->callers && !node->global.inlined_to)
2179 {
2180 struct cgraph_node *caller = node->callers->caller;
2181
2182 if (!can_inline_edge_p (node->callers, true)
2183 || node->callers->recursive_p ())
2184 {
2185 if (dump_file)
2186 fprintf (dump_file, "Uninlinable call found; giving up.\n");
2187 *num_calls = 0;
2188 return false;
2189 }
2190
2191 if (dump_file)
2192 {
2193 fprintf (dump_file,
2194 "\nInlining %s size %i.\n",
2195 node->name (),
2196 inline_summaries->get (node)->size);
2197 fprintf (dump_file,
2198 " Called once from %s %i insns.\n",
2199 node->callers->caller->name (),
2200 inline_summaries->get (node->callers->caller)->size);
2201 }
2202
2203 /* Remember which callers we inlined to, delaying updating the
2204 overall summary. */
2205 callers->add (node->callers->caller);
2206 inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2207 if (dump_file)
2208 fprintf (dump_file,
2209 " Inlined into %s which now has %i size\n",
2210 caller->name (),
2211 inline_summaries->get (caller)->size);
2212 if (!(*num_calls)--)
2213 {
2214 if (dump_file)
2215 fprintf (dump_file, "New calls found; giving up.\n");
2216 return callee_removed;
2217 }
2218 if (callee_removed)
2219 return true;
2220 }
2221 return false;
2222 }
2223
2224 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2225 update. */
2226
2227 static bool
2228 inline_to_all_callers (struct cgraph_node *node, void *data)
2229 {
2230 hash_set<cgraph_node *> callers;
2231 bool res = inline_to_all_callers_1 (node, data, &callers);
2232 /* Perform the delayed update of the overall summary of all callers
2233 processed. This avoids quadratic behavior in the cases where
2234 we have a lot of calls to the same function. */
2235 for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2236 i != callers.end (); ++i)
2237 inline_update_overall_summary (*i);
2238 return res;
2239 }
2240
2241 /* Output overall time estimate. */
2242 static void
2243 dump_overall_stats (void)
2244 {
2245 int64_t sum_weighted = 0, sum = 0;
2246 struct cgraph_node *node;
2247
2248 FOR_EACH_DEFINED_FUNCTION (node)
2249 if (!node->global.inlined_to
2250 && !node->alias)
2251 {
2252 int time = inline_summaries->get (node)->time;
2253 sum += time;
2254 sum_weighted += time * node->count;
2255 }
2256 fprintf (dump_file, "Overall time estimate: "
2257 "%" PRId64" weighted by profile: "
2258 "%" PRId64"\n", sum, sum_weighted);
2259 }
2260
2261 /* Output some useful stats about inlining. */
2262
2263 static void
2264 dump_inline_stats (void)
2265 {
2266 int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2267 int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2268 int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2269 int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2270 int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
2271 int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2272 int64_t reason[CIF_N_REASONS][3];
2273 int i;
2274 struct cgraph_node *node;
2275
2276 memset (reason, 0, sizeof (reason));
2277 FOR_EACH_DEFINED_FUNCTION (node)
2278 {
2279 struct cgraph_edge *e;
2280 for (e = node->callees; e; e = e->next_callee)
2281 {
2282 if (e->inline_failed)
2283 {
2284 reason[(int) e->inline_failed][0] += e->count;
2285 reason[(int) e->inline_failed][1] += e->frequency;
2286 reason[(int) e->inline_failed][2] ++;
2287 if (DECL_VIRTUAL_P (e->callee->decl))
2288 {
2289 if (e->indirect_inlining_edge)
2290 noninlined_virt_indir_cnt += e->count;
2291 else
2292 noninlined_virt_cnt += e->count;
2293 }
2294 else
2295 {
2296 if (e->indirect_inlining_edge)
2297 noninlined_indir_cnt += e->count;
2298 else
2299 noninlined_cnt += e->count;
2300 }
2301 }
2302 else
2303 {
2304 if (e->speculative)
2305 {
2306 if (DECL_VIRTUAL_P (e->callee->decl))
2307 inlined_speculative_ply += e->count;
2308 else
2309 inlined_speculative += e->count;
2310 }
2311 else if (DECL_VIRTUAL_P (e->callee->decl))
2312 {
2313 if (e->indirect_inlining_edge)
2314 inlined_virt_indir_cnt += e->count;
2315 else
2316 inlined_virt_cnt += e->count;
2317 }
2318 else
2319 {
2320 if (e->indirect_inlining_edge)
2321 inlined_indir_cnt += e->count;
2322 else
2323 inlined_cnt += e->count;
2324 }
2325 }
2326 }
2327 for (e = node->indirect_calls; e; e = e->next_callee)
2328 if (e->indirect_info->polymorphic)
2329 indirect_poly_cnt += e->count;
2330 else
2331 indirect_cnt += e->count;
2332 }
2333 if (max_count)
2334 {
2335 fprintf (dump_file,
2336 "Inlined %" PRId64 " + speculative "
2337 "%" PRId64 " + speculative polymorphic "
2338 "%" PRId64 " + previously indirect "
2339 "%" PRId64 " + virtual "
2340 "%" PRId64 " + virtual and previously indirect "
2341 "%" PRId64 "\n" "Not inlined "
2342 "%" PRId64 " + previously indirect "
2343 "%" PRId64 " + virtual "
2344 "%" PRId64 " + virtual and previously indirect "
2345 "%" PRId64 " + stil indirect "
2346 "%" PRId64 " + still indirect polymorphic "
2347 "%" PRId64 "\n", inlined_cnt,
2348 inlined_speculative, inlined_speculative_ply,
2349 inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2350 noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2351 noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2352 fprintf (dump_file,
2353 "Removed speculations %" PRId64 "\n",
2354 spec_rem);
2355 }
2356 dump_overall_stats ();
2357 fprintf (dump_file, "\nWhy inlining failed?\n");
2358 for (i = 0; i < CIF_N_REASONS; i++)
2359 if (reason[i][2])
2360 fprintf (dump_file, "%-50s: %8i calls, %8i freq, %" PRId64" count\n",
2361 cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2362 (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2363 }
2364
2365 /* Decide on the inlining. We do so in the topological order to avoid
2366 expenses on updating data structures. */
2367
2368 static unsigned int
2369 ipa_inline (void)
2370 {
2371 struct cgraph_node *node;
2372 int nnodes;
2373 struct cgraph_node **order;
2374 int i;
2375 int cold;
2376 bool remove_functions = false;
2377
2378 if (!optimize)
2379 return 0;
2380
2381 cgraph_freq_base_rec = (sreal) 1 / (sreal) CGRAPH_FREQ_BASE;
2382 percent_rec = (sreal) 1 / (sreal) 100;
2383
2384 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2385
2386 if (in_lto_p && optimize)
2387 ipa_update_after_lto_read ();
2388
2389 if (dump_file)
2390 dump_inline_summaries (dump_file);
2391
2392 nnodes = ipa_reverse_postorder (order);
2393
2394 FOR_EACH_FUNCTION (node)
2395 {
2396 node->aux = 0;
2397
2398 /* Recompute the default reasons for inlining because they may have
2399 changed during merging. */
2400 if (in_lto_p)
2401 {
2402 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2403 {
2404 gcc_assert (e->inline_failed);
2405 initialize_inline_failed (e);
2406 }
2407 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2408 initialize_inline_failed (e);
2409 }
2410 }
2411
2412 if (dump_file)
2413 fprintf (dump_file, "\nFlattening functions:\n");
2414
2415 /* In the first pass handle functions to be flattened. Do this with
2416 a priority so none of our later choices will make this impossible. */
2417 for (i = nnodes - 1; i >= 0; i--)
2418 {
2419 node = order[i];
2420
2421 /* Handle nodes to be flattened.
2422 Ideally when processing callees we stop inlining at the
2423 entry of cycles, possibly cloning that entry point and
2424 try to flatten itself turning it into a self-recursive
2425 function. */
2426 if (lookup_attribute ("flatten",
2427 DECL_ATTRIBUTES (node->decl)) != NULL)
2428 {
2429 if (dump_file)
2430 fprintf (dump_file,
2431 "Flattening %s\n", node->name ());
2432 flatten_function (node, false);
2433 }
2434 }
2435 if (dump_file)
2436 dump_overall_stats ();
2437
2438 inline_small_functions ();
2439
2440 gcc_assert (symtab->state == IPA_SSA);
2441 symtab->state = IPA_SSA_AFTER_INLINING;
2442 /* Do first after-inlining removal. We want to remove all "stale" extern
2443 inline functions and virtual functions so we really know what is called
2444 once. */
2445 symtab->remove_unreachable_nodes (dump_file);
2446 free (order);
2447
2448 /* Inline functions with a property that after inlining into all callers the
2449 code size will shrink because the out-of-line copy is eliminated.
2450 We do this regardless on the callee size as long as function growth limits
2451 are met. */
2452 if (dump_file)
2453 fprintf (dump_file,
2454 "\nDeciding on functions to be inlined into all callers and "
2455 "removing useless speculations:\n");
2456
2457 /* Inlining one function called once has good chance of preventing
2458 inlining other function into the same callee. Ideally we should
2459 work in priority order, but probably inlining hot functions first
2460 is good cut without the extra pain of maintaining the queue.
2461
2462 ??? this is not really fitting the bill perfectly: inlining function
2463 into callee often leads to better optimization of callee due to
2464 increased context for optimization.
2465 For example if main() function calls a function that outputs help
2466 and then function that does the main optmization, we should inline
2467 the second with priority even if both calls are cold by themselves.
2468
2469 We probably want to implement new predicate replacing our use of
2470 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2471 to be hot. */
2472 for (cold = 0; cold <= 1; cold ++)
2473 {
2474 FOR_EACH_DEFINED_FUNCTION (node)
2475 {
2476 struct cgraph_edge *edge, *next;
2477 bool update=false;
2478
2479 for (edge = node->callees; edge; edge = next)
2480 {
2481 next = edge->next_callee;
2482 if (edge->speculative && !speculation_useful_p (edge, false))
2483 {
2484 edge->resolve_speculation ();
2485 spec_rem += edge->count;
2486 update = true;
2487 remove_functions = true;
2488 }
2489 }
2490 if (update)
2491 {
2492 struct cgraph_node *where = node->global.inlined_to
2493 ? node->global.inlined_to : node;
2494 reset_edge_caches (where);
2495 inline_update_overall_summary (where);
2496 }
2497 if (want_inline_function_to_all_callers_p (node, cold))
2498 {
2499 int num_calls = 0;
2500 node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2501 true);
2502 while (node->call_for_symbol_and_aliases
2503 (inline_to_all_callers, &num_calls, true))
2504 ;
2505 remove_functions = true;
2506 }
2507 }
2508 }
2509
2510 /* Free ipa-prop structures if they are no longer needed. */
2511 if (optimize)
2512 ipa_free_all_structures_after_iinln ();
2513
2514 if (dump_file)
2515 {
2516 fprintf (dump_file,
2517 "\nInlined %i calls, eliminated %i functions\n\n",
2518 ncalls_inlined, nfunctions_inlined);
2519 dump_inline_stats ();
2520 }
2521
2522 if (dump_file)
2523 dump_inline_summaries (dump_file);
2524 /* In WPA we use inline summaries for partitioning process. */
2525 if (!flag_wpa)
2526 inline_free_summary ();
2527 return remove_functions ? TODO_remove_functions : 0;
2528 }
2529
2530 /* Inline always-inline function calls in NODE. */
2531
2532 static bool
2533 inline_always_inline_functions (struct cgraph_node *node)
2534 {
2535 struct cgraph_edge *e;
2536 bool inlined = false;
2537
2538 for (e = node->callees; e; e = e->next_callee)
2539 {
2540 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2541 if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2542 continue;
2543
2544 if (e->recursive_p ())
2545 {
2546 if (dump_file)
2547 fprintf (dump_file, " Not inlining recursive call to %s.\n",
2548 e->callee->name ());
2549 e->inline_failed = CIF_RECURSIVE_INLINING;
2550 continue;
2551 }
2552
2553 if (!can_early_inline_edge_p (e))
2554 {
2555 /* Set inlined to true if the callee is marked "always_inline" but
2556 is not inlinable. This will allow flagging an error later in
2557 expand_call_inline in tree-inline.c. */
2558 if (lookup_attribute ("always_inline",
2559 DECL_ATTRIBUTES (callee->decl)) != NULL)
2560 inlined = true;
2561 continue;
2562 }
2563
2564 if (dump_file)
2565 fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
2566 xstrdup_for_dump (e->callee->name ()),
2567 xstrdup_for_dump (e->caller->name ()));
2568 inline_call (e, true, NULL, NULL, false);
2569 inlined = true;
2570 }
2571 if (inlined)
2572 inline_update_overall_summary (node);
2573
2574 return inlined;
2575 }
2576
2577 /* Decide on the inlining. We do so in the topological order to avoid
2578 expenses on updating data structures. */
2579
2580 static bool
2581 early_inline_small_functions (struct cgraph_node *node)
2582 {
2583 struct cgraph_edge *e;
2584 bool inlined = false;
2585
2586 for (e = node->callees; e; e = e->next_callee)
2587 {
2588 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2589 if (!inline_summaries->get (callee)->inlinable
2590 || !e->inline_failed)
2591 continue;
2592
2593 /* Do not consider functions not declared inline. */
2594 if (!DECL_DECLARED_INLINE_P (callee->decl)
2595 && !opt_for_fn (node->decl, flag_inline_small_functions)
2596 && !opt_for_fn (node->decl, flag_inline_functions))
2597 continue;
2598
2599 if (dump_file)
2600 fprintf (dump_file, "Considering inline candidate %s.\n",
2601 callee->name ());
2602
2603 if (!can_early_inline_edge_p (e))
2604 continue;
2605
2606 if (e->recursive_p ())
2607 {
2608 if (dump_file)
2609 fprintf (dump_file, " Not inlining: recursive call.\n");
2610 continue;
2611 }
2612
2613 if (!want_early_inline_function_p (e))
2614 continue;
2615
2616 if (dump_file)
2617 fprintf (dump_file, " Inlining %s into %s.\n",
2618 xstrdup_for_dump (callee->name ()),
2619 xstrdup_for_dump (e->caller->name ()));
2620 inline_call (e, true, NULL, NULL, false);
2621 inlined = true;
2622 }
2623
2624 if (inlined)
2625 inline_update_overall_summary (node);
2626
2627 return inlined;
2628 }
2629
2630 unsigned int
2631 early_inliner (function *fun)
2632 {
2633 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2634 struct cgraph_edge *edge;
2635 unsigned int todo = 0;
2636 int iterations = 0;
2637 bool inlined = false;
2638
2639 if (seen_error ())
2640 return 0;
2641
2642 /* Do nothing if datastructures for ipa-inliner are already computed. This
2643 happens when some pass decides to construct new function and
2644 cgraph_add_new_function calls lowering passes and early optimization on
2645 it. This may confuse ourself when early inliner decide to inline call to
2646 function clone, because function clones don't have parameter list in
2647 ipa-prop matching their signature. */
2648 if (ipa_node_params_sum)
2649 return 0;
2650
2651 if (flag_checking)
2652 node->verify ();
2653 node->remove_all_references ();
2654
2655 /* Rebuild this reference because it dosn't depend on
2656 function's body and it's required to pass cgraph_node
2657 verification. */
2658 if (node->instrumented_version
2659 && !node->instrumentation_clone)
2660 node->create_reference (node->instrumented_version, IPA_REF_CHKP, NULL);
2661
2662 /* Even when not optimizing or not inlining inline always-inline
2663 functions. */
2664 inlined = inline_always_inline_functions (node);
2665
2666 if (!optimize
2667 || flag_no_inline
2668 || !flag_early_inlining
2669 /* Never inline regular functions into always-inline functions
2670 during incremental inlining. This sucks as functions calling
2671 always inline functions will get less optimized, but at the
2672 same time inlining of functions calling always inline
2673 function into an always inline function might introduce
2674 cycles of edges to be always inlined in the callgraph.
2675
2676 We might want to be smarter and just avoid this type of inlining. */
2677 || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2678 && lookup_attribute ("always_inline",
2679 DECL_ATTRIBUTES (node->decl))))
2680 ;
2681 else if (lookup_attribute ("flatten",
2682 DECL_ATTRIBUTES (node->decl)) != NULL)
2683 {
2684 /* When the function is marked to be flattened, recursively inline
2685 all calls in it. */
2686 if (dump_file)
2687 fprintf (dump_file,
2688 "Flattening %s\n", node->name ());
2689 flatten_function (node, true);
2690 inlined = true;
2691 }
2692 else
2693 {
2694 /* If some always_inline functions was inlined, apply the changes.
2695 This way we will not account always inline into growth limits and
2696 moreover we will inline calls from always inlines that we skipped
2697 previously because of conditional above. */
2698 if (inlined)
2699 {
2700 timevar_push (TV_INTEGRATION);
2701 todo |= optimize_inline_calls (current_function_decl);
2702 /* optimize_inline_calls call above might have introduced new
2703 statements that don't have inline parameters computed. */
2704 for (edge = node->callees; edge; edge = edge->next_callee)
2705 {
2706 if (inline_edge_summary_vec.length () > (unsigned) edge->uid)
2707 {
2708 struct inline_edge_summary *es = inline_edge_summary (edge);
2709 es->call_stmt_size
2710 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2711 es->call_stmt_time
2712 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2713 }
2714 }
2715 inline_update_overall_summary (node);
2716 inlined = false;
2717 timevar_pop (TV_INTEGRATION);
2718 }
2719 /* We iterate incremental inlining to get trivial cases of indirect
2720 inlining. */
2721 while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2722 && early_inline_small_functions (node))
2723 {
2724 timevar_push (TV_INTEGRATION);
2725 todo |= optimize_inline_calls (current_function_decl);
2726
2727 /* Technically we ought to recompute inline parameters so the new
2728 iteration of early inliner works as expected. We however have
2729 values approximately right and thus we only need to update edge
2730 info that might be cleared out for newly discovered edges. */
2731 for (edge = node->callees; edge; edge = edge->next_callee)
2732 {
2733 /* We have no summary for new bound store calls yet. */
2734 if (inline_edge_summary_vec.length () > (unsigned)edge->uid)
2735 {
2736 struct inline_edge_summary *es = inline_edge_summary (edge);
2737 es->call_stmt_size
2738 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2739 es->call_stmt_time
2740 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2741 }
2742 if (edge->callee->decl
2743 && !gimple_check_call_matching_types (
2744 edge->call_stmt, edge->callee->decl, false))
2745 {
2746 edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
2747 edge->call_stmt_cannot_inline_p = true;
2748 }
2749 }
2750 if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2751 inline_update_overall_summary (node);
2752 timevar_pop (TV_INTEGRATION);
2753 iterations++;
2754 inlined = false;
2755 }
2756 if (dump_file)
2757 fprintf (dump_file, "Iterations: %i\n", iterations);
2758 }
2759
2760 if (inlined)
2761 {
2762 timevar_push (TV_INTEGRATION);
2763 todo |= optimize_inline_calls (current_function_decl);
2764 timevar_pop (TV_INTEGRATION);
2765 }
2766
2767 fun->always_inline_functions_inlined = true;
2768
2769 return todo;
2770 }
2771
2772 /* Do inlining of small functions. Doing so early helps profiling and other
2773 passes to be somewhat more effective and avoids some code duplication in
2774 later real inlining pass for testcases with very many function calls. */
2775
2776 namespace {
2777
2778 const pass_data pass_data_early_inline =
2779 {
2780 GIMPLE_PASS, /* type */
2781 "einline", /* name */
2782 OPTGROUP_INLINE, /* optinfo_flags */
2783 TV_EARLY_INLINING, /* tv_id */
2784 PROP_ssa, /* properties_required */
2785 0, /* properties_provided */
2786 0, /* properties_destroyed */
2787 0, /* todo_flags_start */
2788 0, /* todo_flags_finish */
2789 };
2790
2791 class pass_early_inline : public gimple_opt_pass
2792 {
2793 public:
2794 pass_early_inline (gcc::context *ctxt)
2795 : gimple_opt_pass (pass_data_early_inline, ctxt)
2796 {}
2797
2798 /* opt_pass methods: */
2799 virtual unsigned int execute (function *);
2800
2801 }; // class pass_early_inline
2802
2803 unsigned int
2804 pass_early_inline::execute (function *fun)
2805 {
2806 return early_inliner (fun);
2807 }
2808
2809 } // anon namespace
2810
2811 gimple_opt_pass *
2812 make_pass_early_inline (gcc::context *ctxt)
2813 {
2814 return new pass_early_inline (ctxt);
2815 }
2816
2817 namespace {
2818
2819 const pass_data pass_data_ipa_inline =
2820 {
2821 IPA_PASS, /* type */
2822 "inline", /* name */
2823 OPTGROUP_INLINE, /* optinfo_flags */
2824 TV_IPA_INLINING, /* tv_id */
2825 0, /* properties_required */
2826 0, /* properties_provided */
2827 0, /* properties_destroyed */
2828 0, /* todo_flags_start */
2829 ( TODO_dump_symtab ), /* todo_flags_finish */
2830 };
2831
2832 class pass_ipa_inline : public ipa_opt_pass_d
2833 {
2834 public:
2835 pass_ipa_inline (gcc::context *ctxt)
2836 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2837 inline_generate_summary, /* generate_summary */
2838 inline_write_summary, /* write_summary */
2839 inline_read_summary, /* read_summary */
2840 NULL, /* write_optimization_summary */
2841 NULL, /* read_optimization_summary */
2842 NULL, /* stmt_fixup */
2843 0, /* function_transform_todo_flags_start */
2844 inline_transform, /* function_transform */
2845 NULL) /* variable_transform */
2846 {}
2847
2848 /* opt_pass methods: */
2849 virtual unsigned int execute (function *) { return ipa_inline (); }
2850
2851 }; // class pass_ipa_inline
2852
2853 } // anon namespace
2854
2855 ipa_opt_pass_d *
2856 make_pass_ipa_inline (gcc::context *ctxt)
2857 {
2858 return new pass_ipa_inline (ctxt);
2859 }