]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-inline.c
tree-ssa.h: Remove all #include's
[thirdparty/gcc.git] / gcc / ipa-inline.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Inlining decision heuristics
22
23 The implementation of inliner is organized as follows:
24
25 inlining heuristics limits
26
27 can_inline_edge_p allow to check that particular inlining is allowed
28 by the limits specified by user (allowed function growth, growth and so
29 on).
30
31 Functions are inlined when it is obvious the result is profitable (such
32 as functions called once or when inlining reduce code size).
33 In addition to that we perform inlining of small functions and recursive
34 inlining.
35
36 inlining heuristics
37
38 The inliner itself is split into two passes:
39
40 pass_early_inlining
41
42 Simple local inlining pass inlining callees into current function.
43 This pass makes no use of whole unit analysis and thus it can do only
44 very simple decisions based on local properties.
45
46 The strength of the pass is that it is run in topological order
47 (reverse postorder) on the callgraph. Functions are converted into SSA
48 form just before this pass and optimized subsequently. As a result, the
49 callees of the function seen by the early inliner was already optimized
50 and results of early inlining adds a lot of optimization opportunities
51 for the local optimization.
52
53 The pass handle the obvious inlining decisions within the compilation
54 unit - inlining auto inline functions, inlining for size and
55 flattening.
56
57 main strength of the pass is the ability to eliminate abstraction
58 penalty in C++ code (via combination of inlining and early
59 optimization) and thus improve quality of analysis done by real IPA
60 optimizers.
61
62 Because of lack of whole unit knowledge, the pass can not really make
63 good code size/performance tradeoffs. It however does very simple
64 speculative inlining allowing code size to grow by
65 EARLY_INLINING_INSNS when callee is leaf function. In this case the
66 optimizations performed later are very likely to eliminate the cost.
67
68 pass_ipa_inline
69
70 This is the real inliner able to handle inlining with whole program
71 knowledge. It performs following steps:
72
73 1) inlining of small functions. This is implemented by greedy
74 algorithm ordering all inlinable cgraph edges by their badness and
75 inlining them in this order as long as inline limits allows doing so.
76
77 This heuristics is not very good on inlining recursive calls. Recursive
78 calls can be inlined with results similar to loop unrolling. To do so,
79 special purpose recursive inliner is executed on function when
80 recursive edge is met as viable candidate.
81
82 2) Unreachable functions are removed from callgraph. Inlining leads
83 to devirtualization and other modification of callgraph so functions
84 may become unreachable during the process. Also functions declared as
85 extern inline or virtual functions are removed, since after inlining
86 we no longer need the offline bodies.
87
88 3) Functions called once and not exported from the unit are inlined.
89 This should almost always lead to reduction of code size by eliminating
90 the need for offline copy of the function. */
91
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "tm.h"
96 #include "tree.h"
97 #include "tree-inline.h"
98 #include "langhooks.h"
99 #include "flags.h"
100 #include "diagnostic.h"
101 #include "gimple-pretty-print.h"
102 #include "params.h"
103 #include "fibheap.h"
104 #include "intl.h"
105 #include "tree-pass.h"
106 #include "coverage.h"
107 #include "ggc.h"
108 #include "rtl.h"
109 #include "bitmap.h"
110 #include "gimple.h"
111 #include "gimple-ssa.h"
112 #include "ipa-prop.h"
113 #include "except.h"
114 #include "target.h"
115 #include "ipa-inline.h"
116 #include "ipa-utils.h"
117 #include "sreal.h"
118
119 /* Statistics we collect about inlining algorithm. */
120 static int overall_size;
121 static gcov_type max_count;
122 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
123
124 /* Return false when inlining edge E would lead to violating
125 limits on function unit growth or stack usage growth.
126
127 The relative function body growth limit is present generally
128 to avoid problems with non-linear behavior of the compiler.
129 To allow inlining huge functions into tiny wrapper, the limit
130 is always based on the bigger of the two functions considered.
131
132 For stack growth limits we always base the growth in stack usage
133 of the callers. We want to prevent applications from segfaulting
134 on stack overflow when functions with huge stack frames gets
135 inlined. */
136
137 static bool
138 caller_growth_limits (struct cgraph_edge *e)
139 {
140 struct cgraph_node *to = e->caller;
141 struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
142 int newsize;
143 int limit = 0;
144 HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
145 struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
146
147 /* Look for function e->caller is inlined to. While doing
148 so work out the largest function body on the way. As
149 described above, we want to base our function growth
150 limits based on that. Not on the self size of the
151 outer function, not on the self size of inline code
152 we immediately inline to. This is the most relaxed
153 interpretation of the rule "do not grow large functions
154 too much in order to prevent compiler from exploding". */
155 while (true)
156 {
157 info = inline_summary (to);
158 if (limit < info->self_size)
159 limit = info->self_size;
160 if (stack_size_limit < info->estimated_self_stack_size)
161 stack_size_limit = info->estimated_self_stack_size;
162 if (to->global.inlined_to)
163 to = to->callers->caller;
164 else
165 break;
166 }
167
168 what_info = inline_summary (what);
169
170 if (limit < what_info->self_size)
171 limit = what_info->self_size;
172
173 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
174
175 /* Check the size after inlining against the function limits. But allow
176 the function to shrink if it went over the limits by forced inlining. */
177 newsize = estimate_size_after_inlining (to, e);
178 if (newsize >= info->size
179 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
180 && newsize > limit)
181 {
182 e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
183 return false;
184 }
185
186 if (!what_info->estimated_stack_size)
187 return true;
188
189 /* FIXME: Stack size limit often prevents inlining in Fortran programs
190 due to large i/o datastructures used by the Fortran front-end.
191 We ought to ignore this limit when we know that the edge is executed
192 on every invocation of the caller (i.e. its call statement dominates
193 exit block). We do not track this information, yet. */
194 stack_size_limit += ((gcov_type)stack_size_limit
195 * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
196
197 inlined_stack = (outer_info->stack_frame_offset
198 + outer_info->estimated_self_stack_size
199 + what_info->estimated_stack_size);
200 /* Check new stack consumption with stack consumption at the place
201 stack is used. */
202 if (inlined_stack > stack_size_limit
203 /* If function already has large stack usage from sibling
204 inline call, we can inline, too.
205 This bit overoptimistically assume that we are good at stack
206 packing. */
207 && inlined_stack > info->estimated_stack_size
208 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
209 {
210 e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
211 return false;
212 }
213 return true;
214 }
215
216 /* Dump info about why inlining has failed. */
217
218 static void
219 report_inline_failed_reason (struct cgraph_edge *e)
220 {
221 if (dump_file)
222 {
223 fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n",
224 xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
225 xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order,
226 cgraph_inline_failed_string (e->inline_failed));
227 }
228 }
229
230 /* Decide if we can inline the edge and possibly update
231 inline_failed reason.
232 We check whether inlining is possible at all and whether
233 caller growth limits allow doing so.
234
235 if REPORT is true, output reason to the dump file.
236
237 if DISREGARD_LIMITES is true, ignore size limits.*/
238
239 static bool
240 can_inline_edge_p (struct cgraph_edge *e, bool report,
241 bool disregard_limits = false)
242 {
243 bool inlinable = true;
244 enum availability avail;
245 struct cgraph_node *callee
246 = cgraph_function_or_thunk_node (e->callee, &avail);
247 tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->symbol.decl);
248 tree callee_tree
249 = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->symbol.decl) : NULL;
250 struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->symbol.decl);
251 struct function *callee_cfun
252 = callee ? DECL_STRUCT_FUNCTION (callee->symbol.decl) : NULL;
253
254 if (!caller_cfun && e->caller->clone_of)
255 caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->symbol.decl);
256
257 if (!callee_cfun && callee && callee->clone_of)
258 callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->symbol.decl);
259
260 gcc_assert (e->inline_failed);
261
262 if (!callee || !callee->symbol.definition)
263 {
264 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
265 inlinable = false;
266 }
267 else if (!inline_summary (callee)->inlinable)
268 {
269 e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
270 inlinable = false;
271 }
272 else if (avail <= AVAIL_OVERWRITABLE)
273 {
274 e->inline_failed = CIF_OVERWRITABLE;
275 inlinable = false;
276 }
277 else if (e->call_stmt_cannot_inline_p)
278 {
279 if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
280 e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
281 inlinable = false;
282 }
283 /* Don't inline if the functions have different EH personalities. */
284 else if (DECL_FUNCTION_PERSONALITY (e->caller->symbol.decl)
285 && DECL_FUNCTION_PERSONALITY (callee->symbol.decl)
286 && (DECL_FUNCTION_PERSONALITY (e->caller->symbol.decl)
287 != DECL_FUNCTION_PERSONALITY (callee->symbol.decl)))
288 {
289 e->inline_failed = CIF_EH_PERSONALITY;
290 inlinable = false;
291 }
292 /* TM pure functions should not be inlined into non-TM_pure
293 functions. */
294 else if (is_tm_pure (callee->symbol.decl)
295 && !is_tm_pure (e->caller->symbol.decl))
296 {
297 e->inline_failed = CIF_UNSPECIFIED;
298 inlinable = false;
299 }
300 /* Don't inline if the callee can throw non-call exceptions but the
301 caller cannot.
302 FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
303 Move the flag into cgraph node or mirror it in the inline summary. */
304 else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions
305 && !(caller_cfun && caller_cfun->can_throw_non_call_exceptions))
306 {
307 e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
308 inlinable = false;
309 }
310 /* Check compatibility of target optimization options. */
311 else if (!targetm.target_option.can_inline_p (e->caller->symbol.decl,
312 callee->symbol.decl))
313 {
314 e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
315 inlinable = false;
316 }
317 /* Check if caller growth allows the inlining. */
318 else if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)
319 && !disregard_limits
320 && !lookup_attribute ("flatten",
321 DECL_ATTRIBUTES
322 (e->caller->global.inlined_to
323 ? e->caller->global.inlined_to->symbol.decl
324 : e->caller->symbol.decl))
325 && !caller_growth_limits (e))
326 inlinable = false;
327 /* Don't inline a function with a higher optimization level than the
328 caller. FIXME: this is really just tip of iceberg of handling
329 optimization attribute. */
330 else if (caller_tree != callee_tree)
331 {
332 struct cl_optimization *caller_opt
333 = TREE_OPTIMIZATION ((caller_tree)
334 ? caller_tree
335 : optimization_default_node);
336
337 struct cl_optimization *callee_opt
338 = TREE_OPTIMIZATION ((callee_tree)
339 ? callee_tree
340 : optimization_default_node);
341
342 if (((caller_opt->x_optimize > callee_opt->x_optimize)
343 || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
344 /* gcc.dg/pr43564.c. Look at forced inline even in -O0. */
345 && !DECL_DISREGARD_INLINE_LIMITS (e->callee->symbol.decl))
346 {
347 e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
348 inlinable = false;
349 }
350 }
351
352 if (!inlinable && report)
353 report_inline_failed_reason (e);
354 return inlinable;
355 }
356
357
358 /* Return true if the edge E is inlinable during early inlining. */
359
360 static bool
361 can_early_inline_edge_p (struct cgraph_edge *e)
362 {
363 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
364 NULL);
365 /* Early inliner might get called at WPA stage when IPA pass adds new
366 function. In this case we can not really do any of early inlining
367 because function bodies are missing. */
368 if (!gimple_has_body_p (callee->symbol.decl))
369 {
370 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
371 return false;
372 }
373 /* In early inliner some of callees may not be in SSA form yet
374 (i.e. the callgraph is cyclic and we did not process
375 the callee by early inliner, yet). We don't have CIF code for this
376 case; later we will re-do the decision in the real inliner. */
377 if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->symbol.decl))
378 || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->symbol.decl)))
379 {
380 if (dump_file)
381 fprintf (dump_file, " edge not inlinable: not in SSA form\n");
382 return false;
383 }
384 if (!can_inline_edge_p (e, true))
385 return false;
386 return true;
387 }
388
389
390 /* Return number of calls in N. Ignore cheap builtins. */
391
392 static int
393 num_calls (struct cgraph_node *n)
394 {
395 struct cgraph_edge *e;
396 int num = 0;
397
398 for (e = n->callees; e; e = e->next_callee)
399 if (!is_inexpensive_builtin (e->callee->symbol.decl))
400 num++;
401 return num;
402 }
403
404
405 /* Return true if we are interested in inlining small function. */
406
407 static bool
408 want_early_inline_function_p (struct cgraph_edge *e)
409 {
410 bool want_inline = true;
411 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
412
413 if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
414 ;
415 else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
416 && !flag_inline_small_functions)
417 {
418 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
419 report_inline_failed_reason (e);
420 want_inline = false;
421 }
422 else
423 {
424 int growth = estimate_edge_growth (e);
425 int n;
426
427 if (growth <= 0)
428 ;
429 else if (!cgraph_maybe_hot_edge_p (e)
430 && growth > 0)
431 {
432 if (dump_file)
433 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
434 "call is cold and code would grow by %i\n",
435 xstrdup (cgraph_node_name (e->caller)),
436 e->caller->symbol.order,
437 xstrdup (cgraph_node_name (callee)), callee->symbol.order,
438 growth);
439 want_inline = false;
440 }
441 else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
442 {
443 if (dump_file)
444 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
445 "growth %i exceeds --param early-inlining-insns\n",
446 xstrdup (cgraph_node_name (e->caller)),
447 e->caller->symbol.order,
448 xstrdup (cgraph_node_name (callee)), callee->symbol.order,
449 growth);
450 want_inline = false;
451 }
452 else if ((n = num_calls (callee)) != 0
453 && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
454 {
455 if (dump_file)
456 fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
457 "growth %i exceeds --param early-inlining-insns "
458 "divided by number of calls\n",
459 xstrdup (cgraph_node_name (e->caller)),
460 e->caller->symbol.order,
461 xstrdup (cgraph_node_name (callee)), callee->symbol.order,
462 growth);
463 want_inline = false;
464 }
465 }
466 return want_inline;
467 }
468
469 /* Compute time of the edge->caller + edge->callee execution when inlining
470 does not happen. */
471
472 inline gcov_type
473 compute_uninlined_call_time (struct inline_summary *callee_info,
474 struct cgraph_edge *edge)
475 {
476 gcov_type uninlined_call_time =
477 RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
478 CGRAPH_FREQ_BASE);
479 gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
480 ? edge->caller->global.inlined_to
481 : edge->caller)->time;
482 return uninlined_call_time + caller_time;
483 }
484
485 /* Same as compute_uinlined_call_time but compute time when inlining
486 does happen. */
487
488 inline gcov_type
489 compute_inlined_call_time (struct cgraph_edge *edge,
490 int edge_time)
491 {
492 gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
493 ? edge->caller->global.inlined_to
494 : edge->caller)->time;
495 gcov_type time = (caller_time
496 + RDIV (((gcov_type) edge_time
497 - inline_edge_summary (edge)->call_stmt_time)
498 * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
499 /* Possible one roundoff error, but watch for overflows. */
500 gcc_checking_assert (time >= INT_MIN / 2);
501 if (time < 0)
502 time = 0;
503 return time;
504 }
505
506 /* Return true if the speedup for inlining E is bigger than
507 PARAM_MAX_INLINE_MIN_SPEEDUP. */
508
509 static bool
510 big_speedup_p (struct cgraph_edge *e)
511 {
512 gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
513 e);
514 gcov_type inlined_time = compute_inlined_call_time (e,
515 estimate_edge_time (e));
516 if (time - inlined_time
517 > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
518 return true;
519 return false;
520 }
521
522 /* Return true if we are interested in inlining small function.
523 When REPORT is true, report reason to dump file. */
524
525 static bool
526 want_inline_small_function_p (struct cgraph_edge *e, bool report)
527 {
528 bool want_inline = true;
529 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
530
531 if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
532 ;
533 else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
534 && !flag_inline_small_functions)
535 {
536 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
537 want_inline = false;
538 }
539 else
540 {
541 int growth = estimate_edge_growth (e);
542 inline_hints hints = estimate_edge_hints (e);
543 bool big_speedup = big_speedup_p (e);
544
545 if (growth <= 0)
546 ;
547 /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when
548 hints suggests that inlining given function is very profitable. */
549 else if (DECL_DECLARED_INLINE_P (callee->symbol.decl)
550 && growth >= MAX_INLINE_INSNS_SINGLE
551 && !big_speedup
552 && !(hints & (INLINE_HINT_indirect_call
553 | INLINE_HINT_loop_iterations
554 | INLINE_HINT_array_index
555 | INLINE_HINT_loop_stride)))
556 {
557 e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
558 want_inline = false;
559 }
560 /* Before giving up based on fact that caller size will grow, allow
561 functions that are called few times and eliminating the offline
562 copy will lead to overall code size reduction.
563 Not all of these will be handled by subsequent inlining of functions
564 called once: in particular weak functions are not handled or funcitons
565 that inline to multiple calls but a lot of bodies is optimized out.
566 Finally we want to inline earlier to allow inlining of callbacks.
567
568 This is slightly wrong on aggressive side: it is entirely possible
569 that function is called many times with a context where inlining
570 reduces code size and few times with a context where inlining increase
571 code size. Resoluting growth estimate will be negative even if it
572 would make more sense to keep offline copy and do not inline into the
573 call sites that makes the code size grow.
574
575 When badness orders the calls in a way that code reducing calls come
576 first, this situation is not a problem at all: after inlining all
577 "good" calls, we will realize that keeping the function around is
578 better. */
579 else if (growth <= MAX_INLINE_INSNS_SINGLE
580 /* Unlike for functions called once, we play unsafe with
581 COMDATs. We can allow that since we know functions
582 in consideration are small (and thus risk is small) and
583 moreover grow estimates already accounts that COMDAT
584 functions may or may not disappear when eliminated from
585 current unit. With good probability making aggressive
586 choice in all units is going to make overall program
587 smaller.
588
589 Consequently we ask cgraph_can_remove_if_no_direct_calls_p
590 instead of
591 cgraph_will_be_removed_from_program_if_no_direct_calls */
592 && !DECL_EXTERNAL (callee->symbol.decl)
593 && cgraph_can_remove_if_no_direct_calls_p (callee)
594 && estimate_growth (callee) <= 0)
595 ;
596 else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
597 && !flag_inline_functions)
598 {
599 e->inline_failed = CIF_NOT_DECLARED_INLINED;
600 want_inline = false;
601 }
602 /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
603 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
604 inlining given function is very profitable. */
605 else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
606 && !big_speedup
607 && growth >= ((hints & (INLINE_HINT_indirect_call
608 | INLINE_HINT_loop_iterations
609 | INLINE_HINT_array_index
610 | INLINE_HINT_loop_stride))
611 ? MAX (MAX_INLINE_INSNS_AUTO,
612 MAX_INLINE_INSNS_SINGLE)
613 : MAX_INLINE_INSNS_AUTO))
614 {
615 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
616 want_inline = false;
617 }
618 /* If call is cold, do not inline when function body would grow. */
619 else if (!cgraph_maybe_hot_edge_p (e))
620 {
621 e->inline_failed = CIF_UNLIKELY_CALL;
622 want_inline = false;
623 }
624 }
625 if (!want_inline && report)
626 report_inline_failed_reason (e);
627 return want_inline;
628 }
629
630 /* EDGE is self recursive edge.
631 We hand two cases - when function A is inlining into itself
632 or when function A is being inlined into another inliner copy of function
633 A within function B.
634
635 In first case OUTER_NODE points to the toplevel copy of A, while
636 in the second case OUTER_NODE points to the outermost copy of A in B.
637
638 In both cases we want to be extra selective since
639 inlining the call will just introduce new recursive calls to appear. */
640
641 static bool
642 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
643 struct cgraph_node *outer_node,
644 bool peeling,
645 int depth)
646 {
647 char const *reason = NULL;
648 bool want_inline = true;
649 int caller_freq = CGRAPH_FREQ_BASE;
650 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
651
652 if (DECL_DECLARED_INLINE_P (edge->caller->symbol.decl))
653 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
654
655 if (!cgraph_maybe_hot_edge_p (edge))
656 {
657 reason = "recursive call is cold";
658 want_inline = false;
659 }
660 else if (max_count && !outer_node->count)
661 {
662 reason = "not executed in profile";
663 want_inline = false;
664 }
665 else if (depth > max_depth)
666 {
667 reason = "--param max-inline-recursive-depth exceeded.";
668 want_inline = false;
669 }
670
671 if (outer_node->global.inlined_to)
672 caller_freq = outer_node->callers->frequency;
673
674 if (!want_inline)
675 ;
676 /* Inlining of self recursive function into copy of itself within other function
677 is transformation similar to loop peeling.
678
679 Peeling is profitable if we can inline enough copies to make probability
680 of actual call to the self recursive function very small. Be sure that
681 the probability of recursion is small.
682
683 We ensure that the frequency of recursing is at most 1 - (1/max_depth).
684 This way the expected number of recision is at most max_depth. */
685 else if (peeling)
686 {
687 int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
688 / max_depth);
689 int i;
690 for (i = 1; i < depth; i++)
691 max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
692 if (max_count
693 && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
694 >= max_prob))
695 {
696 reason = "profile of recursive call is too large";
697 want_inline = false;
698 }
699 if (!max_count
700 && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
701 >= max_prob))
702 {
703 reason = "frequency of recursive call is too large";
704 want_inline = false;
705 }
706 }
707 /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
708 depth is large. We reduce function call overhead and increase chances that
709 things fit in hardware return predictor.
710
711 Recursive inlining might however increase cost of stack frame setup
712 actually slowing down functions whose recursion tree is wide rather than
713 deep.
714
715 Deciding reliably on when to do recursive inlining without profile feedback
716 is tricky. For now we disable recursive inlining when probability of self
717 recursion is low.
718
719 Recursive inlining of self recursive call within loop also results in large loop
720 depths that generally optimize badly. We may want to throttle down inlining
721 in those cases. In particular this seems to happen in one of libstdc++ rb tree
722 methods. */
723 else
724 {
725 if (max_count
726 && (edge->count * 100 / outer_node->count
727 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
728 {
729 reason = "profile of recursive call is too small";
730 want_inline = false;
731 }
732 else if (!max_count
733 && (edge->frequency * 100 / caller_freq
734 <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
735 {
736 reason = "frequency of recursive call is too small";
737 want_inline = false;
738 }
739 }
740 if (!want_inline && dump_file)
741 fprintf (dump_file, " not inlining recursively: %s\n", reason);
742 return want_inline;
743 }
744
745 /* Return true when NODE has uninlinable caller;
746 set HAS_HOT_CALL if it has hot call.
747 Worker for cgraph_for_node_and_aliases. */
748
749 static bool
750 check_callers (struct cgraph_node *node, void *has_hot_call)
751 {
752 struct cgraph_edge *e;
753 for (e = node->callers; e; e = e->next_caller)
754 {
755 if (!can_inline_edge_p (e, true))
756 return true;
757 if (!has_hot_call && cgraph_maybe_hot_edge_p (e))
758 *(bool *)has_hot_call = true;
759 }
760 return false;
761 }
762
763 /* If NODE has a caller, return true. */
764
765 static bool
766 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
767 {
768 if (node->callers)
769 return true;
770 return false;
771 }
772
773 /* Decide if inlining NODE would reduce unit size by eliminating
774 the offline copy of function.
775 When COLD is true the cold calls are considered, too. */
776
777 static bool
778 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
779 {
780 struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL);
781 bool has_hot_call = false;
782
783 /* Does it have callers? */
784 if (!cgraph_for_node_and_aliases (node, has_caller_p, NULL, true))
785 return false;
786 /* Already inlined? */
787 if (function->global.inlined_to)
788 return false;
789 if (cgraph_function_or_thunk_node (node, NULL) != node)
790 return false;
791 /* Inlining into all callers would increase size? */
792 if (estimate_growth (node) > 0)
793 return false;
794 /* All inlines must be possible. */
795 if (cgraph_for_node_and_aliases (node, check_callers, &has_hot_call, true))
796 return false;
797 if (!cold && !has_hot_call)
798 return false;
799 return true;
800 }
801
802 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
803
804 /* Return relative time improvement for inlining EDGE in range
805 1...RELATIVE_TIME_BENEFIT_RANGE */
806
807 static inline int
808 relative_time_benefit (struct inline_summary *callee_info,
809 struct cgraph_edge *edge,
810 int edge_time)
811 {
812 gcov_type relbenefit;
813 gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
814 gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
815
816 /* Inlining into extern inline function is not a win. */
817 if (DECL_EXTERNAL (edge->caller->global.inlined_to
818 ? edge->caller->global.inlined_to->symbol.decl
819 : edge->caller->symbol.decl))
820 return 1;
821
822 /* Watch overflows. */
823 gcc_checking_assert (uninlined_call_time >= 0);
824 gcc_checking_assert (inlined_call_time >= 0);
825 gcc_checking_assert (uninlined_call_time >= inlined_call_time);
826
827 /* Compute relative time benefit, i.e. how much the call becomes faster.
828 ??? perhaps computing how much the caller+calle together become faster
829 would lead to more realistic results. */
830 if (!uninlined_call_time)
831 uninlined_call_time = 1;
832 relbenefit =
833 RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
834 uninlined_call_time);
835 relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
836 gcc_checking_assert (relbenefit >= 0);
837 relbenefit = MAX (relbenefit, 1);
838 return relbenefit;
839 }
840
841
842 /* A cost model driving the inlining heuristics in a way so the edges with
843 smallest badness are inlined first. After each inlining is performed
844 the costs of all caller edges of nodes affected are recomputed so the
845 metrics may accurately depend on values such as number of inlinable callers
846 of the function or function body size. */
847
848 static int
849 edge_badness (struct cgraph_edge *edge, bool dump)
850 {
851 gcov_type badness;
852 int growth, edge_time;
853 struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
854 NULL);
855 struct inline_summary *callee_info = inline_summary (callee);
856 inline_hints hints;
857
858 if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
859 return INT_MIN;
860
861 growth = estimate_edge_growth (edge);
862 edge_time = estimate_edge_time (edge);
863 hints = estimate_edge_hints (edge);
864 gcc_checking_assert (edge_time >= 0);
865 gcc_checking_assert (edge_time <= callee_info->time);
866 gcc_checking_assert (growth <= callee_info->size);
867
868 if (dump)
869 {
870 fprintf (dump_file, " Badness calculation for %s/%i -> %s/%i\n",
871 xstrdup (cgraph_node_name (edge->caller)),
872 edge->caller->symbol.order,
873 xstrdup (cgraph_node_name (callee)),
874 edge->callee->symbol.order);
875 fprintf (dump_file, " size growth %i, time %i ",
876 growth,
877 edge_time);
878 dump_inline_hints (dump_file, hints);
879 if (big_speedup_p (edge))
880 fprintf (dump_file, " big_speedup");
881 fprintf (dump_file, "\n");
882 }
883
884 /* Always prefer inlining saving code size. */
885 if (growth <= 0)
886 {
887 badness = INT_MIN / 2 + growth;
888 if (dump)
889 fprintf (dump_file, " %i: Growth %i <= 0\n", (int) badness,
890 growth);
891 }
892
893 /* When profiling is available, compute badness as:
894
895 relative_edge_count * relative_time_benefit
896 goodness = -------------------------------------------
897 growth_f_caller
898 badness = -goodness
899
900 The fraction is upside down, because on edge counts and time beneits
901 the bounds are known. Edge growth is essentially unlimited. */
902
903 else if (max_count)
904 {
905 sreal tmp, relbenefit_real, growth_real;
906 int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
907 /* Capping edge->count to max_count. edge->count can be larger than
908 max_count if an inline adds new edges which increase max_count
909 after max_count is computed. */
910 int edge_count = edge->count > max_count ? max_count : edge->count;
911
912 sreal_init (&relbenefit_real, relbenefit, 0);
913 sreal_init (&growth_real, growth, 0);
914
915 /* relative_edge_count. */
916 sreal_init (&tmp, edge_count, 0);
917 sreal_div (&tmp, &tmp, &max_count_real);
918
919 /* relative_time_benefit. */
920 sreal_mul (&tmp, &tmp, &relbenefit_real);
921 sreal_div (&tmp, &tmp, &max_relbenefit_real);
922
923 /* growth_f_caller. */
924 sreal_mul (&tmp, &tmp, &half_int_min_real);
925 sreal_div (&tmp, &tmp, &growth_real);
926
927 badness = -1 * sreal_to_int (&tmp);
928
929 if (dump)
930 {
931 fprintf (dump_file,
932 " %i (relative %f): profile info. Relative count %f%s"
933 " * Relative benefit %f\n",
934 (int) badness, (double) badness / INT_MIN,
935 (double) edge_count / max_count,
936 edge->count > max_count ? " (capped to max_count)" : "",
937 relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
938 }
939 }
940
941 /* When function local profile is available. Compute badness as:
942
943 relative_time_benefit
944 goodness = ---------------------------------
945 growth_of_caller * overall_growth
946
947 badness = - goodness
948
949 compensated by the inline hints.
950 */
951 else if (flag_guess_branch_prob)
952 {
953 badness = (relative_time_benefit (callee_info, edge, edge_time)
954 * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
955 badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
956 gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
957 if ((hints & (INLINE_HINT_indirect_call
958 | INLINE_HINT_loop_iterations
959 | INLINE_HINT_array_index
960 | INLINE_HINT_loop_stride))
961 || callee_info->growth <= 0)
962 badness *= 8;
963 if (hints & (INLINE_HINT_same_scc))
964 badness /= 16;
965 else if (hints & (INLINE_HINT_in_scc))
966 badness /= 8;
967 else if (hints & (INLINE_HINT_cross_module))
968 badness /= 2;
969 gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
970 if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
971 badness *= 16;
972 if (dump)
973 {
974 fprintf (dump_file,
975 " %i: guessed profile. frequency %f,"
976 " benefit %f%%, time w/o inlining %i, time w inlining %i"
977 " overall growth %i (current) %i (original)\n",
978 (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
979 relative_time_benefit (callee_info, edge, edge_time) * 100.0
980 / RELATIVE_TIME_BENEFIT_RANGE,
981 (int)compute_uninlined_call_time (callee_info, edge),
982 (int)compute_inlined_call_time (edge, edge_time),
983 estimate_growth (callee),
984 callee_info->growth);
985 }
986 }
987 /* When function local profile is not available or it does not give
988 useful information (ie frequency is zero), base the cost on
989 loop nest and overall size growth, so we optimize for overall number
990 of functions fully inlined in program. */
991 else
992 {
993 int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
994 badness = growth * 256;
995
996 /* Decrease badness if call is nested. */
997 if (badness > 0)
998 badness >>= nest;
999 else
1000 {
1001 badness <<= nest;
1002 }
1003 if (dump)
1004 fprintf (dump_file, " %i: no profile. nest %i\n", (int) badness,
1005 nest);
1006 }
1007
1008 /* Ensure that we did not overflow in all the fixed point math above. */
1009 gcc_assert (badness >= INT_MIN);
1010 gcc_assert (badness <= INT_MAX - 1);
1011 /* Make recursive inlining happen always after other inlining is done. */
1012 if (cgraph_edge_recursive_p (edge))
1013 return badness + 1;
1014 else
1015 return badness;
1016 }
1017
1018 /* Recompute badness of EDGE and update its key in HEAP if needed. */
1019 static inline void
1020 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1021 {
1022 int badness = edge_badness (edge, false);
1023 if (edge->aux)
1024 {
1025 fibnode_t n = (fibnode_t) edge->aux;
1026 gcc_checking_assert (n->data == edge);
1027
1028 /* fibheap_replace_key only decrease the keys.
1029 When we increase the key we do not update heap
1030 and instead re-insert the element once it becomes
1031 a minimum of heap. */
1032 if (badness < n->key)
1033 {
1034 if (dump_file && (dump_flags & TDF_DETAILS))
1035 {
1036 fprintf (dump_file,
1037 " decreasing badness %s/%i -> %s/%i, %i to %i\n",
1038 xstrdup (cgraph_node_name (edge->caller)),
1039 edge->caller->symbol.order,
1040 xstrdup (cgraph_node_name (edge->callee)),
1041 edge->callee->symbol.order,
1042 (int)n->key,
1043 badness);
1044 }
1045 fibheap_replace_key (heap, n, badness);
1046 gcc_checking_assert (n->key == badness);
1047 }
1048 }
1049 else
1050 {
1051 if (dump_file && (dump_flags & TDF_DETAILS))
1052 {
1053 fprintf (dump_file,
1054 " enqueuing call %s/%i -> %s/%i, badness %i\n",
1055 xstrdup (cgraph_node_name (edge->caller)),
1056 edge->caller->symbol.order,
1057 xstrdup (cgraph_node_name (edge->callee)),
1058 edge->callee->symbol.order,
1059 badness);
1060 }
1061 edge->aux = fibheap_insert (heap, badness, edge);
1062 }
1063 }
1064
1065
1066 /* NODE was inlined.
1067 All caller edges needs to be resetted because
1068 size estimates change. Similarly callees needs reset
1069 because better context may be known. */
1070
1071 static void
1072 reset_edge_caches (struct cgraph_node *node)
1073 {
1074 struct cgraph_edge *edge;
1075 struct cgraph_edge *e = node->callees;
1076 struct cgraph_node *where = node;
1077 int i;
1078 struct ipa_ref *ref;
1079
1080 if (where->global.inlined_to)
1081 where = where->global.inlined_to;
1082
1083 /* WHERE body size has changed, the cached growth is invalid. */
1084 reset_node_growth_cache (where);
1085
1086 for (edge = where->callers; edge; edge = edge->next_caller)
1087 if (edge->inline_failed)
1088 reset_edge_growth_cache (edge);
1089 for (i = 0; ipa_ref_list_referring_iterate (&where->symbol.ref_list,
1090 i, ref); i++)
1091 if (ref->use == IPA_REF_ALIAS)
1092 reset_edge_caches (ipa_ref_referring_node (ref));
1093
1094 if (!e)
1095 return;
1096
1097 while (true)
1098 if (!e->inline_failed && e->callee->callees)
1099 e = e->callee->callees;
1100 else
1101 {
1102 if (e->inline_failed)
1103 reset_edge_growth_cache (e);
1104 if (e->next_callee)
1105 e = e->next_callee;
1106 else
1107 {
1108 do
1109 {
1110 if (e->caller == node)
1111 return;
1112 e = e->caller->callers;
1113 }
1114 while (!e->next_callee);
1115 e = e->next_callee;
1116 }
1117 }
1118 }
1119
1120 /* Recompute HEAP nodes for each of caller of NODE.
1121 UPDATED_NODES track nodes we already visited, to avoid redundant work.
1122 When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1123 it is inlinable. Otherwise check all edges. */
1124
1125 static void
1126 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1127 bitmap updated_nodes,
1128 struct cgraph_edge *check_inlinablity_for)
1129 {
1130 struct cgraph_edge *edge;
1131 int i;
1132 struct ipa_ref *ref;
1133
1134 if ((!node->symbol.alias && !inline_summary (node)->inlinable)
1135 || node->global.inlined_to)
1136 return;
1137 if (!bitmap_set_bit (updated_nodes, node->uid))
1138 return;
1139
1140 for (i = 0; ipa_ref_list_referring_iterate (&node->symbol.ref_list,
1141 i, ref); i++)
1142 if (ref->use == IPA_REF_ALIAS)
1143 {
1144 struct cgraph_node *alias = ipa_ref_referring_node (ref);
1145 update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1146 }
1147
1148 for (edge = node->callers; edge; edge = edge->next_caller)
1149 if (edge->inline_failed)
1150 {
1151 if (!check_inlinablity_for
1152 || check_inlinablity_for == edge)
1153 {
1154 if (can_inline_edge_p (edge, false)
1155 && want_inline_small_function_p (edge, false))
1156 update_edge_key (heap, edge);
1157 else if (edge->aux)
1158 {
1159 report_inline_failed_reason (edge);
1160 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1161 edge->aux = NULL;
1162 }
1163 }
1164 else if (edge->aux)
1165 update_edge_key (heap, edge);
1166 }
1167 }
1168
1169 /* Recompute HEAP nodes for each uninlined call in NODE.
1170 This is used when we know that edge badnesses are going only to increase
1171 (we introduced new call site) and thus all we need is to insert newly
1172 created edges into heap. */
1173
1174 static void
1175 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1176 bitmap updated_nodes)
1177 {
1178 struct cgraph_edge *e = node->callees;
1179
1180 if (!e)
1181 return;
1182 while (true)
1183 if (!e->inline_failed && e->callee->callees)
1184 e = e->callee->callees;
1185 else
1186 {
1187 enum availability avail;
1188 struct cgraph_node *callee;
1189 /* We do not reset callee growth cache here. Since we added a new call,
1190 growth chould have just increased and consequentely badness metric
1191 don't need updating. */
1192 if (e->inline_failed
1193 && (callee = cgraph_function_or_thunk_node (e->callee, &avail))
1194 && inline_summary (callee)->inlinable
1195 && avail >= AVAIL_AVAILABLE
1196 && !bitmap_bit_p (updated_nodes, callee->uid))
1197 {
1198 if (can_inline_edge_p (e, false)
1199 && want_inline_small_function_p (e, false))
1200 update_edge_key (heap, e);
1201 else if (e->aux)
1202 {
1203 report_inline_failed_reason (e);
1204 fibheap_delete_node (heap, (fibnode_t) e->aux);
1205 e->aux = NULL;
1206 }
1207 }
1208 if (e->next_callee)
1209 e = e->next_callee;
1210 else
1211 {
1212 do
1213 {
1214 if (e->caller == node)
1215 return;
1216 e = e->caller->callers;
1217 }
1218 while (!e->next_callee);
1219 e = e->next_callee;
1220 }
1221 }
1222 }
1223
1224 /* Enqueue all recursive calls from NODE into priority queue depending on
1225 how likely we want to recursively inline the call. */
1226
1227 static void
1228 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1229 fibheap_t heap)
1230 {
1231 struct cgraph_edge *e;
1232 enum availability avail;
1233
1234 for (e = where->callees; e; e = e->next_callee)
1235 if (e->callee == node
1236 || (cgraph_function_or_thunk_node (e->callee, &avail) == node
1237 && avail > AVAIL_OVERWRITABLE))
1238 {
1239 /* When profile feedback is available, prioritize by expected number
1240 of calls. */
1241 fibheap_insert (heap,
1242 !max_count ? -e->frequency
1243 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1244 e);
1245 }
1246 for (e = where->callees; e; e = e->next_callee)
1247 if (!e->inline_failed)
1248 lookup_recursive_calls (node, e->callee, heap);
1249 }
1250
1251 /* Decide on recursive inlining: in the case function has recursive calls,
1252 inline until body size reaches given argument. If any new indirect edges
1253 are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1254 is NULL. */
1255
1256 static bool
1257 recursive_inlining (struct cgraph_edge *edge,
1258 vec<cgraph_edge_p> *new_edges)
1259 {
1260 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1261 fibheap_t heap;
1262 struct cgraph_node *node;
1263 struct cgraph_edge *e;
1264 struct cgraph_node *master_clone = NULL, *next;
1265 int depth = 0;
1266 int n = 0;
1267
1268 node = edge->caller;
1269 if (node->global.inlined_to)
1270 node = node->global.inlined_to;
1271
1272 if (DECL_DECLARED_INLINE_P (node->symbol.decl))
1273 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1274
1275 /* Make sure that function is small enough to be considered for inlining. */
1276 if (estimate_size_after_inlining (node, edge) >= limit)
1277 return false;
1278 heap = fibheap_new ();
1279 lookup_recursive_calls (node, node, heap);
1280 if (fibheap_empty (heap))
1281 {
1282 fibheap_delete (heap);
1283 return false;
1284 }
1285
1286 if (dump_file)
1287 fprintf (dump_file,
1288 " Performing recursive inlining on %s\n",
1289 cgraph_node_name (node));
1290
1291 /* Do the inlining and update list of recursive call during process. */
1292 while (!fibheap_empty (heap))
1293 {
1294 struct cgraph_edge *curr
1295 = (struct cgraph_edge *) fibheap_extract_min (heap);
1296 struct cgraph_node *cnode, *dest = curr->callee;
1297
1298 if (!can_inline_edge_p (curr, true))
1299 continue;
1300
1301 /* MASTER_CLONE is produced in the case we already started modified
1302 the function. Be sure to redirect edge to the original body before
1303 estimating growths otherwise we will be seeing growths after inlining
1304 the already modified body. */
1305 if (master_clone)
1306 {
1307 cgraph_redirect_edge_callee (curr, master_clone);
1308 reset_edge_growth_cache (curr);
1309 }
1310
1311 if (estimate_size_after_inlining (node, curr) > limit)
1312 {
1313 cgraph_redirect_edge_callee (curr, dest);
1314 reset_edge_growth_cache (curr);
1315 break;
1316 }
1317
1318 depth = 1;
1319 for (cnode = curr->caller;
1320 cnode->global.inlined_to; cnode = cnode->callers->caller)
1321 if (node->symbol.decl
1322 == cgraph_function_or_thunk_node (curr->callee, NULL)->symbol.decl)
1323 depth++;
1324
1325 if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1326 {
1327 cgraph_redirect_edge_callee (curr, dest);
1328 reset_edge_growth_cache (curr);
1329 continue;
1330 }
1331
1332 if (dump_file)
1333 {
1334 fprintf (dump_file,
1335 " Inlining call of depth %i", depth);
1336 if (node->count)
1337 {
1338 fprintf (dump_file, " called approx. %.2f times per call",
1339 (double)curr->count / node->count);
1340 }
1341 fprintf (dump_file, "\n");
1342 }
1343 if (!master_clone)
1344 {
1345 /* We need original clone to copy around. */
1346 master_clone = cgraph_clone_node (node, node->symbol.decl,
1347 node->count, CGRAPH_FREQ_BASE,
1348 false, vNULL, true, NULL);
1349 for (e = master_clone->callees; e; e = e->next_callee)
1350 if (!e->inline_failed)
1351 clone_inlined_nodes (e, true, false, NULL);
1352 cgraph_redirect_edge_callee (curr, master_clone);
1353 reset_edge_growth_cache (curr);
1354 }
1355
1356 inline_call (curr, false, new_edges, &overall_size, true);
1357 lookup_recursive_calls (node, curr->callee, heap);
1358 n++;
1359 }
1360
1361 if (!fibheap_empty (heap) && dump_file)
1362 fprintf (dump_file, " Recursive inlining growth limit met.\n");
1363 fibheap_delete (heap);
1364
1365 if (!master_clone)
1366 return false;
1367
1368 if (dump_file)
1369 fprintf (dump_file,
1370 "\n Inlined %i times, "
1371 "body grown from size %i to %i, time %i to %i\n", n,
1372 inline_summary (master_clone)->size, inline_summary (node)->size,
1373 inline_summary (master_clone)->time, inline_summary (node)->time);
1374
1375 /* Remove master clone we used for inlining. We rely that clones inlined
1376 into master clone gets queued just before master clone so we don't
1377 need recursion. */
1378 for (node = cgraph_first_function (); node != master_clone;
1379 node = next)
1380 {
1381 next = cgraph_next_function (node);
1382 if (node->global.inlined_to == master_clone)
1383 cgraph_remove_node (node);
1384 }
1385 cgraph_remove_node (master_clone);
1386 return true;
1387 }
1388
1389
1390 /* Given whole compilation unit estimate of INSNS, compute how large we can
1391 allow the unit to grow. */
1392
1393 static int
1394 compute_max_insns (int insns)
1395 {
1396 int max_insns = insns;
1397 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1398 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1399
1400 return ((HOST_WIDEST_INT) max_insns
1401 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1402 }
1403
1404
1405 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
1406
1407 static void
1408 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges)
1409 {
1410 while (new_edges.length () > 0)
1411 {
1412 struct cgraph_edge *edge = new_edges.pop ();
1413
1414 gcc_assert (!edge->aux);
1415 if (edge->inline_failed
1416 && can_inline_edge_p (edge, true)
1417 && want_inline_small_function_p (edge, true))
1418 edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1419 }
1420 }
1421
1422 /* Remove EDGE from the fibheap. */
1423
1424 static void
1425 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1426 {
1427 if (e->callee)
1428 reset_node_growth_cache (e->callee);
1429 if (e->aux)
1430 {
1431 fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1432 e->aux = NULL;
1433 }
1434 }
1435
1436 /* Return true if speculation of edge E seems useful.
1437 If ANTICIPATE_INLINING is true, be conservative and hope that E
1438 may get inlined. */
1439
1440 bool
1441 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1442 {
1443 enum availability avail;
1444 struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
1445 struct cgraph_edge *direct, *indirect;
1446 struct ipa_ref *ref;
1447
1448 gcc_assert (e->speculative && !e->indirect_unknown_callee);
1449
1450 if (!cgraph_maybe_hot_edge_p (e))
1451 return false;
1452
1453 /* See if IP optimizations found something potentially useful about the
1454 function. For now we look only for CONST/PURE flags. Almost everything
1455 else we propagate is useless. */
1456 if (avail >= AVAIL_AVAILABLE)
1457 {
1458 int ecf_flags = flags_from_decl_or_type (target->symbol.decl);
1459 if (ecf_flags & ECF_CONST)
1460 {
1461 cgraph_speculative_call_info (e, direct, indirect, ref);
1462 if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1463 return true;
1464 }
1465 else if (ecf_flags & ECF_PURE)
1466 {
1467 cgraph_speculative_call_info (e, direct, indirect, ref);
1468 if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1469 return true;
1470 }
1471 }
1472 /* If we did not managed to inline the function nor redirect
1473 to an ipa-cp clone (that are seen by having local flag set),
1474 it is probably pointless to inline it unless hardware is missing
1475 indirect call predictor. */
1476 if (!anticipate_inlining && e->inline_failed && !target->local.local)
1477 return false;
1478 /* For overwritable targets there is not much to do. */
1479 if (e->inline_failed && !can_inline_edge_p (e, false, true))
1480 return false;
1481 /* OK, speculation seems interesting. */
1482 return true;
1483 }
1484
1485 /* We know that EDGE is not going to be inlined.
1486 See if we can remove speculation. */
1487
1488 static void
1489 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1490 {
1491 if (edge->speculative && !speculation_useful_p (edge, false))
1492 {
1493 struct cgraph_node *node = edge->caller;
1494 struct cgraph_node *where = node->global.inlined_to
1495 ? node->global.inlined_to : node;
1496 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1497
1498 cgraph_resolve_speculation (edge, NULL);
1499 reset_edge_caches (where);
1500 inline_update_overall_summary (where);
1501 update_caller_keys (edge_heap, where,
1502 updated_nodes, NULL);
1503 update_callee_keys (edge_heap, where,
1504 updated_nodes);
1505 BITMAP_FREE (updated_nodes);
1506 }
1507 }
1508
1509 /* We use greedy algorithm for inlining of small functions:
1510 All inline candidates are put into prioritized heap ordered in
1511 increasing badness.
1512
1513 The inlining of small functions is bounded by unit growth parameters. */
1514
1515 static void
1516 inline_small_functions (void)
1517 {
1518 struct cgraph_node *node;
1519 struct cgraph_edge *edge;
1520 fibheap_t edge_heap = fibheap_new ();
1521 bitmap updated_nodes = BITMAP_ALLOC (NULL);
1522 int min_size, max_size;
1523 vec<cgraph_edge_p> new_indirect_edges = vNULL;
1524 int initial_size = 0;
1525 struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1526 struct cgraph_edge_hook_list *edge_removal_hook_holder;
1527
1528 if (flag_indirect_inlining)
1529 new_indirect_edges.create (8);
1530
1531 edge_removal_hook_holder
1532 = cgraph_add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1533
1534 /* Compute overall unit size and other global parameters used by badness
1535 metrics. */
1536
1537 max_count = 0;
1538 ipa_reduced_postorder (order, true, true, NULL);
1539 free (order);
1540
1541 FOR_EACH_DEFINED_FUNCTION (node)
1542 if (!node->global.inlined_to)
1543 {
1544 if (cgraph_function_with_gimple_body_p (node)
1545 || node->thunk.thunk_p)
1546 {
1547 struct inline_summary *info = inline_summary (node);
1548 struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->symbol.aux;
1549
1550 if (!DECL_EXTERNAL (node->symbol.decl))
1551 initial_size += info->size;
1552 info->growth = estimate_growth (node);
1553 if (dfs && dfs->next_cycle)
1554 {
1555 struct cgraph_node *n2;
1556 int id = dfs->scc_no + 1;
1557 for (n2 = node; n2;
1558 n2 = ((struct ipa_dfs_info *) node->symbol.aux)->next_cycle)
1559 {
1560 struct inline_summary *info2 = inline_summary (n2);
1561 if (info2->scc_no)
1562 break;
1563 info2->scc_no = id;
1564 }
1565 }
1566 }
1567
1568 for (edge = node->callers; edge; edge = edge->next_caller)
1569 if (max_count < edge->count)
1570 max_count = edge->count;
1571 }
1572 sreal_init (&max_count_real, max_count, 0);
1573 sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
1574 sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1575 ipa_free_postorder_info ();
1576 initialize_growth_caches ();
1577
1578 if (dump_file)
1579 fprintf (dump_file,
1580 "\nDeciding on inlining of small functions. Starting with size %i.\n",
1581 initial_size);
1582
1583 overall_size = initial_size;
1584 max_size = compute_max_insns (overall_size);
1585 min_size = overall_size;
1586
1587 /* Populate the heeap with all edges we might inline. */
1588
1589 FOR_EACH_DEFINED_FUNCTION (node)
1590 {
1591 bool update = false;
1592 struct cgraph_edge *next;
1593
1594 if (dump_file)
1595 fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1596 cgraph_node_name (node), node->symbol.order);
1597
1598 for (edge = node->callees; edge; edge = next)
1599 {
1600 next = edge->next_callee;
1601 if (edge->inline_failed
1602 && !edge->aux
1603 && can_inline_edge_p (edge, true)
1604 && want_inline_small_function_p (edge, true)
1605 && edge->inline_failed)
1606 {
1607 gcc_assert (!edge->aux);
1608 update_edge_key (edge_heap, edge);
1609 }
1610 if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1611 {
1612 cgraph_resolve_speculation (edge, NULL);
1613 update = true;
1614 }
1615 }
1616 if (update)
1617 {
1618 struct cgraph_node *where = node->global.inlined_to
1619 ? node->global.inlined_to : node;
1620 inline_update_overall_summary (where);
1621 reset_node_growth_cache (where);
1622 reset_edge_caches (where);
1623 update_caller_keys (edge_heap, where,
1624 updated_nodes, NULL);
1625 bitmap_clear (updated_nodes);
1626 }
1627 }
1628
1629 gcc_assert (in_lto_p
1630 || !max_count
1631 || (profile_info && flag_branch_probabilities));
1632
1633 while (!fibheap_empty (edge_heap))
1634 {
1635 int old_size = overall_size;
1636 struct cgraph_node *where, *callee;
1637 int badness = fibheap_min_key (edge_heap);
1638 int current_badness;
1639 int cached_badness;
1640 int growth;
1641
1642 edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1643 gcc_assert (edge->aux);
1644 edge->aux = NULL;
1645 if (!edge->inline_failed)
1646 continue;
1647
1648 /* Be sure that caches are maintained consistent.
1649 We can not make this ENABLE_CHECKING only because it cause different
1650 updates of the fibheap queue. */
1651 cached_badness = edge_badness (edge, false);
1652 reset_edge_growth_cache (edge);
1653 reset_node_growth_cache (edge->callee);
1654
1655 /* When updating the edge costs, we only decrease badness in the keys.
1656 Increases of badness are handled lazilly; when we see key with out
1657 of date value on it, we re-insert it now. */
1658 current_badness = edge_badness (edge, false);
1659 gcc_assert (cached_badness == current_badness);
1660 gcc_assert (current_badness >= badness);
1661 if (current_badness != badness)
1662 {
1663 edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1664 continue;
1665 }
1666
1667 if (!can_inline_edge_p (edge, true))
1668 {
1669 resolve_noninline_speculation (edge_heap, edge);
1670 continue;
1671 }
1672
1673 callee = cgraph_function_or_thunk_node (edge->callee, NULL);
1674 growth = estimate_edge_growth (edge);
1675 if (dump_file)
1676 {
1677 fprintf (dump_file,
1678 "\nConsidering %s/%i with %i size\n",
1679 cgraph_node_name (callee), callee->symbol.order,
1680 inline_summary (callee)->size);
1681 fprintf (dump_file,
1682 " to be inlined into %s/%i in %s:%i\n"
1683 " Estimated growth after inlined into all is %+i insns.\n"
1684 " Estimated badness is %i, frequency %.2f.\n",
1685 cgraph_node_name (edge->caller), edge->caller->symbol.order,
1686 flag_wpa ? "unknown"
1687 : gimple_filename ((const_gimple) edge->call_stmt),
1688 flag_wpa ? -1
1689 : gimple_lineno ((const_gimple) edge->call_stmt),
1690 estimate_growth (callee),
1691 badness,
1692 edge->frequency / (double)CGRAPH_FREQ_BASE);
1693 if (edge->count)
1694 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1695 edge->count);
1696 if (dump_flags & TDF_DETAILS)
1697 edge_badness (edge, true);
1698 }
1699
1700 if (overall_size + growth > max_size
1701 && !DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
1702 {
1703 edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1704 report_inline_failed_reason (edge);
1705 resolve_noninline_speculation (edge_heap, edge);
1706 continue;
1707 }
1708
1709 if (!want_inline_small_function_p (edge, true))
1710 {
1711 resolve_noninline_speculation (edge_heap, edge);
1712 continue;
1713 }
1714
1715 /* Heuristics for inlining small functions works poorly for
1716 recursive calls where we do efect similar to loop unrolling.
1717 When inliing such edge seems profitable, leave decision on
1718 specific inliner. */
1719 if (cgraph_edge_recursive_p (edge))
1720 {
1721 where = edge->caller;
1722 if (where->global.inlined_to)
1723 where = where->global.inlined_to;
1724 if (!recursive_inlining (edge,
1725 flag_indirect_inlining
1726 ? &new_indirect_edges : NULL))
1727 {
1728 edge->inline_failed = CIF_RECURSIVE_INLINING;
1729 resolve_noninline_speculation (edge_heap, edge);
1730 continue;
1731 }
1732 reset_edge_caches (where);
1733 /* Recursive inliner inlines all recursive calls of the function
1734 at once. Consequently we need to update all callee keys. */
1735 if (flag_indirect_inlining)
1736 add_new_edges_to_heap (edge_heap, new_indirect_edges);
1737 update_callee_keys (edge_heap, where, updated_nodes);
1738 bitmap_clear (updated_nodes);
1739 }
1740 else
1741 {
1742 struct cgraph_node *outer_node = NULL;
1743 int depth = 0;
1744
1745 /* Consider the case where self recursive function A is inlined into B.
1746 This is desired optimization in some cases, since it leads to effect
1747 similar of loop peeling and we might completely optimize out the
1748 recursive call. However we must be extra selective. */
1749
1750 where = edge->caller;
1751 while (where->global.inlined_to)
1752 {
1753 if (where->symbol.decl == callee->symbol.decl)
1754 outer_node = where, depth++;
1755 where = where->callers->caller;
1756 }
1757 if (outer_node
1758 && !want_inline_self_recursive_call_p (edge, outer_node,
1759 true, depth))
1760 {
1761 edge->inline_failed
1762 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->symbol.decl)
1763 ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1764 resolve_noninline_speculation (edge_heap, edge);
1765 continue;
1766 }
1767 else if (depth && dump_file)
1768 fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1769
1770 gcc_checking_assert (!callee->global.inlined_to);
1771 inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1772 if (flag_indirect_inlining)
1773 add_new_edges_to_heap (edge_heap, new_indirect_edges);
1774
1775 reset_edge_caches (edge->callee);
1776 reset_node_growth_cache (callee);
1777
1778 update_callee_keys (edge_heap, where, updated_nodes);
1779 }
1780 where = edge->caller;
1781 if (where->global.inlined_to)
1782 where = where->global.inlined_to;
1783
1784 /* Our profitability metric can depend on local properties
1785 such as number of inlinable calls and size of the function body.
1786 After inlining these properties might change for the function we
1787 inlined into (since it's body size changed) and for the functions
1788 called by function we inlined (since number of it inlinable callers
1789 might change). */
1790 update_caller_keys (edge_heap, where, updated_nodes, NULL);
1791 bitmap_clear (updated_nodes);
1792
1793 if (dump_file)
1794 {
1795 fprintf (dump_file,
1796 " Inlined into %s which now has time %i and size %i,"
1797 "net change of %+i.\n",
1798 cgraph_node_name (edge->caller),
1799 inline_summary (edge->caller)->time,
1800 inline_summary (edge->caller)->size,
1801 overall_size - old_size);
1802 }
1803 if (min_size > overall_size)
1804 {
1805 min_size = overall_size;
1806 max_size = compute_max_insns (min_size);
1807
1808 if (dump_file)
1809 fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1810 }
1811 }
1812
1813 free_growth_caches ();
1814 new_indirect_edges.release ();
1815 fibheap_delete (edge_heap);
1816 if (dump_file)
1817 fprintf (dump_file,
1818 "Unit growth for small function inlining: %i->%i (%i%%)\n",
1819 initial_size, overall_size,
1820 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1821 BITMAP_FREE (updated_nodes);
1822 cgraph_remove_edge_removal_hook (edge_removal_hook_holder);
1823 }
1824
1825 /* Flatten NODE. Performed both during early inlining and
1826 at IPA inlining time. */
1827
1828 static void
1829 flatten_function (struct cgraph_node *node, bool early)
1830 {
1831 struct cgraph_edge *e;
1832
1833 /* We shouldn't be called recursively when we are being processed. */
1834 gcc_assert (node->symbol.aux == NULL);
1835
1836 node->symbol.aux = (void *) node;
1837
1838 for (e = node->callees; e; e = e->next_callee)
1839 {
1840 struct cgraph_node *orig_callee;
1841 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1842
1843 /* We've hit cycle? It is time to give up. */
1844 if (callee->symbol.aux)
1845 {
1846 if (dump_file)
1847 fprintf (dump_file,
1848 "Not inlining %s into %s to avoid cycle.\n",
1849 xstrdup (cgraph_node_name (callee)),
1850 xstrdup (cgraph_node_name (e->caller)));
1851 e->inline_failed = CIF_RECURSIVE_INLINING;
1852 continue;
1853 }
1854
1855 /* When the edge is already inlined, we just need to recurse into
1856 it in order to fully flatten the leaves. */
1857 if (!e->inline_failed)
1858 {
1859 flatten_function (callee, early);
1860 continue;
1861 }
1862
1863 /* Flatten attribute needs to be processed during late inlining. For
1864 extra code quality we however do flattening during early optimization,
1865 too. */
1866 if (!early
1867 ? !can_inline_edge_p (e, true)
1868 : !can_early_inline_edge_p (e))
1869 continue;
1870
1871 if (cgraph_edge_recursive_p (e))
1872 {
1873 if (dump_file)
1874 fprintf (dump_file, "Not inlining: recursive call.\n");
1875 continue;
1876 }
1877
1878 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->symbol.decl))
1879 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->symbol.decl)))
1880 {
1881 if (dump_file)
1882 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1883 continue;
1884 }
1885
1886 /* Inline the edge and flatten the inline clone. Avoid
1887 recursing through the original node if the node was cloned. */
1888 if (dump_file)
1889 fprintf (dump_file, " Inlining %s into %s.\n",
1890 xstrdup (cgraph_node_name (callee)),
1891 xstrdup (cgraph_node_name (e->caller)));
1892 orig_callee = callee;
1893 inline_call (e, true, NULL, NULL, false);
1894 if (e->callee != orig_callee)
1895 orig_callee->symbol.aux = (void *) node;
1896 flatten_function (e->callee, early);
1897 if (e->callee != orig_callee)
1898 orig_callee->symbol.aux = NULL;
1899 }
1900
1901 node->symbol.aux = NULL;
1902 if (!node->global.inlined_to)
1903 inline_update_overall_summary (node);
1904 }
1905
1906 /* Count number of callers of NODE and store it into DATA (that
1907 points to int. Worker for cgraph_for_node_and_aliases. */
1908
1909 static bool
1910 sum_callers (struct cgraph_node *node, void *data)
1911 {
1912 struct cgraph_edge *e;
1913 int *num_calls = (int *)data;
1914
1915 for (e = node->callers; e; e = e->next_caller)
1916 (*num_calls)++;
1917 return false;
1918 }
1919
1920 /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
1921 DATA points to number of calls originally found so we avoid infinite
1922 recursion. */
1923
1924 static bool
1925 inline_to_all_callers (struct cgraph_node *node, void *data)
1926 {
1927 int *num_calls = (int *)data;
1928 while (node->callers && !node->global.inlined_to)
1929 {
1930 struct cgraph_node *caller = node->callers->caller;
1931
1932 if (dump_file)
1933 {
1934 fprintf (dump_file,
1935 "\nInlining %s size %i.\n",
1936 cgraph_node_name (node),
1937 inline_summary (node)->size);
1938 fprintf (dump_file,
1939 " Called once from %s %i insns.\n",
1940 cgraph_node_name (node->callers->caller),
1941 inline_summary (node->callers->caller)->size);
1942 }
1943
1944 inline_call (node->callers, true, NULL, NULL, true);
1945 if (dump_file)
1946 fprintf (dump_file,
1947 " Inlined into %s which now has %i size\n",
1948 cgraph_node_name (caller),
1949 inline_summary (caller)->size);
1950 if (!(*num_calls)--)
1951 {
1952 if (dump_file)
1953 fprintf (dump_file, "New calls found; giving up.\n");
1954 return true;
1955 }
1956 }
1957 return false;
1958 }
1959
1960 /* Decide on the inlining. We do so in the topological order to avoid
1961 expenses on updating data structures. */
1962
1963 static unsigned int
1964 ipa_inline (void)
1965 {
1966 struct cgraph_node *node;
1967 int nnodes;
1968 struct cgraph_node **order;
1969 int i;
1970 int cold;
1971 bool remove_functions = false;
1972
1973 if (!optimize)
1974 return 0;
1975
1976 order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1977
1978 if (in_lto_p && optimize)
1979 ipa_update_after_lto_read ();
1980
1981 if (dump_file)
1982 dump_inline_summaries (dump_file);
1983
1984 nnodes = ipa_reverse_postorder (order);
1985
1986 FOR_EACH_FUNCTION (node)
1987 node->symbol.aux = 0;
1988
1989 if (dump_file)
1990 fprintf (dump_file, "\nFlattening functions:\n");
1991
1992 /* In the first pass handle functions to be flattened. Do this with
1993 a priority so none of our later choices will make this impossible. */
1994 for (i = nnodes - 1; i >= 0; i--)
1995 {
1996 node = order[i];
1997
1998 /* Handle nodes to be flattened.
1999 Ideally when processing callees we stop inlining at the
2000 entry of cycles, possibly cloning that entry point and
2001 try to flatten itself turning it into a self-recursive
2002 function. */
2003 if (lookup_attribute ("flatten",
2004 DECL_ATTRIBUTES (node->symbol.decl)) != NULL)
2005 {
2006 if (dump_file)
2007 fprintf (dump_file,
2008 "Flattening %s\n", cgraph_node_name (node));
2009 flatten_function (node, false);
2010 }
2011 }
2012
2013 inline_small_functions ();
2014
2015 /* Do first after-inlining removal. We want to remove all "stale" extern inline
2016 functions and virtual functions so we really know what is called once. */
2017 symtab_remove_unreachable_nodes (false, dump_file);
2018 free (order);
2019
2020 /* Inline functions with a property that after inlining into all callers the
2021 code size will shrink because the out-of-line copy is eliminated.
2022 We do this regardless on the callee size as long as function growth limits
2023 are met. */
2024 if (dump_file)
2025 fprintf (dump_file,
2026 "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2027
2028 /* Inlining one function called once has good chance of preventing
2029 inlining other function into the same callee. Ideally we should
2030 work in priority order, but probably inlining hot functions first
2031 is good cut without the extra pain of maintaining the queue.
2032
2033 ??? this is not really fitting the bill perfectly: inlining function
2034 into callee often leads to better optimization of callee due to
2035 increased context for optimization.
2036 For example if main() function calls a function that outputs help
2037 and then function that does the main optmization, we should inline
2038 the second with priority even if both calls are cold by themselves.
2039
2040 We probably want to implement new predicate replacing our use of
2041 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2042 to be hot. */
2043 for (cold = 0; cold <= 1; cold ++)
2044 {
2045 FOR_EACH_DEFINED_FUNCTION (node)
2046 {
2047 struct cgraph_edge *edge, *next;
2048 bool update=false;
2049
2050 for (edge = node->callees; edge; edge = next)
2051 {
2052 next = edge->next_callee;
2053 if (edge->speculative && !speculation_useful_p (edge, false))
2054 {
2055 cgraph_resolve_speculation (edge, NULL);
2056 update = true;
2057 remove_functions = true;
2058 }
2059 }
2060 if (update)
2061 {
2062 struct cgraph_node *where = node->global.inlined_to
2063 ? node->global.inlined_to : node;
2064 reset_node_growth_cache (where);
2065 reset_edge_caches (where);
2066 inline_update_overall_summary (where);
2067 }
2068 if (flag_inline_functions_called_once
2069 && want_inline_function_to_all_callers_p (node, cold))
2070 {
2071 int num_calls = 0;
2072 cgraph_for_node_and_aliases (node, sum_callers,
2073 &num_calls, true);
2074 cgraph_for_node_and_aliases (node, inline_to_all_callers,
2075 &num_calls, true);
2076 remove_functions = true;
2077 }
2078 }
2079 }
2080
2081 /* Free ipa-prop structures if they are no longer needed. */
2082 if (optimize)
2083 ipa_free_all_structures_after_iinln ();
2084
2085 if (dump_file)
2086 fprintf (dump_file,
2087 "\nInlined %i calls, eliminated %i functions\n\n",
2088 ncalls_inlined, nfunctions_inlined);
2089
2090 if (dump_file)
2091 dump_inline_summaries (dump_file);
2092 /* In WPA we use inline summaries for partitioning process. */
2093 if (!flag_wpa)
2094 inline_free_summary ();
2095 return remove_functions ? TODO_remove_functions : 0;
2096 }
2097
2098 /* Inline always-inline function calls in NODE. */
2099
2100 static bool
2101 inline_always_inline_functions (struct cgraph_node *node)
2102 {
2103 struct cgraph_edge *e;
2104 bool inlined = false;
2105
2106 for (e = node->callees; e; e = e->next_callee)
2107 {
2108 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2109 if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
2110 continue;
2111
2112 if (cgraph_edge_recursive_p (e))
2113 {
2114 if (dump_file)
2115 fprintf (dump_file, " Not inlining recursive call to %s.\n",
2116 cgraph_node_name (e->callee));
2117 e->inline_failed = CIF_RECURSIVE_INLINING;
2118 continue;
2119 }
2120
2121 if (!can_early_inline_edge_p (e))
2122 {
2123 /* Set inlined to true if the callee is marked "always_inline" but
2124 is not inlinable. This will allow flagging an error later in
2125 expand_call_inline in tree-inline.c. */
2126 if (lookup_attribute ("always_inline",
2127 DECL_ATTRIBUTES (callee->symbol.decl)) != NULL)
2128 inlined = true;
2129 continue;
2130 }
2131
2132 if (dump_file)
2133 fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
2134 xstrdup (cgraph_node_name (e->callee)),
2135 xstrdup (cgraph_node_name (e->caller)));
2136 inline_call (e, true, NULL, NULL, false);
2137 inlined = true;
2138 }
2139 if (inlined)
2140 inline_update_overall_summary (node);
2141
2142 return inlined;
2143 }
2144
2145 /* Decide on the inlining. We do so in the topological order to avoid
2146 expenses on updating data structures. */
2147
2148 static bool
2149 early_inline_small_functions (struct cgraph_node *node)
2150 {
2151 struct cgraph_edge *e;
2152 bool inlined = false;
2153
2154 for (e = node->callees; e; e = e->next_callee)
2155 {
2156 struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2157 if (!inline_summary (callee)->inlinable
2158 || !e->inline_failed)
2159 continue;
2160
2161 /* Do not consider functions not declared inline. */
2162 if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
2163 && !flag_inline_small_functions
2164 && !flag_inline_functions)
2165 continue;
2166
2167 if (dump_file)
2168 fprintf (dump_file, "Considering inline candidate %s.\n",
2169 cgraph_node_name (callee));
2170
2171 if (!can_early_inline_edge_p (e))
2172 continue;
2173
2174 if (cgraph_edge_recursive_p (e))
2175 {
2176 if (dump_file)
2177 fprintf (dump_file, " Not inlining: recursive call.\n");
2178 continue;
2179 }
2180
2181 if (!want_early_inline_function_p (e))
2182 continue;
2183
2184 if (dump_file)
2185 fprintf (dump_file, " Inlining %s into %s.\n",
2186 xstrdup (cgraph_node_name (callee)),
2187 xstrdup (cgraph_node_name (e->caller)));
2188 inline_call (e, true, NULL, NULL, true);
2189 inlined = true;
2190 }
2191
2192 return inlined;
2193 }
2194
2195 /* Do inlining of small functions. Doing so early helps profiling and other
2196 passes to be somewhat more effective and avoids some code duplication in
2197 later real inlining pass for testcases with very many function calls. */
2198 static unsigned int
2199 early_inliner (void)
2200 {
2201 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2202 struct cgraph_edge *edge;
2203 unsigned int todo = 0;
2204 int iterations = 0;
2205 bool inlined = false;
2206
2207 if (seen_error ())
2208 return 0;
2209
2210 /* Do nothing if datastructures for ipa-inliner are already computed. This
2211 happens when some pass decides to construct new function and
2212 cgraph_add_new_function calls lowering passes and early optimization on
2213 it. This may confuse ourself when early inliner decide to inline call to
2214 function clone, because function clones don't have parameter list in
2215 ipa-prop matching their signature. */
2216 if (ipa_node_params_vector.exists ())
2217 return 0;
2218
2219 #ifdef ENABLE_CHECKING
2220 verify_cgraph_node (node);
2221 #endif
2222 ipa_remove_all_references (&node->symbol.ref_list);
2223
2224 /* Even when not optimizing or not inlining inline always-inline
2225 functions. */
2226 inlined = inline_always_inline_functions (node);
2227
2228 if (!optimize
2229 || flag_no_inline
2230 || !flag_early_inlining
2231 /* Never inline regular functions into always-inline functions
2232 during incremental inlining. This sucks as functions calling
2233 always inline functions will get less optimized, but at the
2234 same time inlining of functions calling always inline
2235 function into an always inline function might introduce
2236 cycles of edges to be always inlined in the callgraph.
2237
2238 We might want to be smarter and just avoid this type of inlining. */
2239 || DECL_DISREGARD_INLINE_LIMITS (node->symbol.decl))
2240 ;
2241 else if (lookup_attribute ("flatten",
2242 DECL_ATTRIBUTES (node->symbol.decl)) != NULL)
2243 {
2244 /* When the function is marked to be flattened, recursively inline
2245 all calls in it. */
2246 if (dump_file)
2247 fprintf (dump_file,
2248 "Flattening %s\n", cgraph_node_name (node));
2249 flatten_function (node, true);
2250 inlined = true;
2251 }
2252 else
2253 {
2254 /* We iterate incremental inlining to get trivial cases of indirect
2255 inlining. */
2256 while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2257 && early_inline_small_functions (node))
2258 {
2259 timevar_push (TV_INTEGRATION);
2260 todo |= optimize_inline_calls (current_function_decl);
2261
2262 /* Technically we ought to recompute inline parameters so the new
2263 iteration of early inliner works as expected. We however have
2264 values approximately right and thus we only need to update edge
2265 info that might be cleared out for newly discovered edges. */
2266 for (edge = node->callees; edge; edge = edge->next_callee)
2267 {
2268 struct inline_edge_summary *es = inline_edge_summary (edge);
2269 es->call_stmt_size
2270 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2271 es->call_stmt_time
2272 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2273 if (edge->callee->symbol.decl
2274 && !gimple_check_call_matching_types (
2275 edge->call_stmt, edge->callee->symbol.decl, false))
2276 edge->call_stmt_cannot_inline_p = true;
2277 }
2278 timevar_pop (TV_INTEGRATION);
2279 iterations++;
2280 inlined = false;
2281 }
2282 if (dump_file)
2283 fprintf (dump_file, "Iterations: %i\n", iterations);
2284 }
2285
2286 if (inlined)
2287 {
2288 timevar_push (TV_INTEGRATION);
2289 todo |= optimize_inline_calls (current_function_decl);
2290 timevar_pop (TV_INTEGRATION);
2291 }
2292
2293 cfun->always_inline_functions_inlined = true;
2294
2295 return todo;
2296 }
2297
2298 namespace {
2299
2300 const pass_data pass_data_early_inline =
2301 {
2302 GIMPLE_PASS, /* type */
2303 "einline", /* name */
2304 OPTGROUP_INLINE, /* optinfo_flags */
2305 false, /* has_gate */
2306 true, /* has_execute */
2307 TV_EARLY_INLINING, /* tv_id */
2308 PROP_ssa, /* properties_required */
2309 0, /* properties_provided */
2310 0, /* properties_destroyed */
2311 0, /* todo_flags_start */
2312 0, /* todo_flags_finish */
2313 };
2314
2315 class pass_early_inline : public gimple_opt_pass
2316 {
2317 public:
2318 pass_early_inline (gcc::context *ctxt)
2319 : gimple_opt_pass (pass_data_early_inline, ctxt)
2320 {}
2321
2322 /* opt_pass methods: */
2323 unsigned int execute () { return early_inliner (); }
2324
2325 }; // class pass_early_inline
2326
2327 } // anon namespace
2328
2329 gimple_opt_pass *
2330 make_pass_early_inline (gcc::context *ctxt)
2331 {
2332 return new pass_early_inline (ctxt);
2333 }
2334
2335
2336 /* When to run IPA inlining. Inlining of always-inline functions
2337 happens during early inlining.
2338
2339 Enable inlining unconditoinally, because callgraph redirection
2340 happens here. */
2341
2342 static bool
2343 gate_ipa_inline (void)
2344 {
2345 return true;
2346 }
2347
2348 namespace {
2349
2350 const pass_data pass_data_ipa_inline =
2351 {
2352 IPA_PASS, /* type */
2353 "inline", /* name */
2354 OPTGROUP_INLINE, /* optinfo_flags */
2355 true, /* has_gate */
2356 true, /* has_execute */
2357 TV_IPA_INLINING, /* tv_id */
2358 0, /* properties_required */
2359 0, /* properties_provided */
2360 0, /* properties_destroyed */
2361 TODO_remove_functions, /* todo_flags_start */
2362 ( TODO_dump_symtab ), /* todo_flags_finish */
2363 };
2364
2365 class pass_ipa_inline : public ipa_opt_pass_d
2366 {
2367 public:
2368 pass_ipa_inline (gcc::context *ctxt)
2369 : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2370 inline_generate_summary, /* generate_summary */
2371 inline_write_summary, /* write_summary */
2372 inline_read_summary, /* read_summary */
2373 NULL, /* write_optimization_summary */
2374 NULL, /* read_optimization_summary */
2375 NULL, /* stmt_fixup */
2376 0, /* function_transform_todo_flags_start */
2377 inline_transform, /* function_transform */
2378 NULL) /* variable_transform */
2379 {}
2380
2381 /* opt_pass methods: */
2382 bool gate () { return gate_ipa_inline (); }
2383 unsigned int execute () { return ipa_inline (); }
2384
2385 }; // class pass_ipa_inline
2386
2387 } // anon namespace
2388
2389 ipa_opt_pass_d *
2390 make_pass_ipa_inline (gcc::context *ctxt)
2391 {
2392 return new pass_ipa_inline (ctxt);
2393 }