]>
Commit | Line | Data |
---|---|---|
ca31b95f JH |
1 | /* Inlining decision heuristics. |
2 | Copyright (C) 2003, 2004 Free Software Foundation, Inc. | |
3 | Contributed by Jan Hubicka | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 2, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
366ccddb KC |
19 | Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
20 | 02110-1301, USA. */ | |
ca31b95f JH |
21 | |
22 | /* Inlining decision heuristics | |
23 | ||
24 | We separate inlining decisions from the inliner itself and store it | |
25 | inside callgraph as so called inline plan. Refer to cgraph.c | |
26 | documentation about particular representation of inline plans in the | |
27 | callgraph. | |
28 | ||
29 | There are three major parts of this file: | |
30 | ||
31 | cgraph_mark_inline implementation | |
32 | ||
a80769d7 | 33 | This function allows to mark given call inline and performs necessary |
ca31b95f JH |
34 | modifications of cgraph (production of the clones and updating overall |
35 | statistics) | |
36 | ||
37 | inlining heuristics limits | |
38 | ||
39 | These functions allow to check that particular inlining is allowed | |
40 | by the limits specified by user (allowed function growth, overall unit | |
41 | growth and so on). | |
42 | ||
43 | inlining heuristics | |
44 | ||
45 | This is implementation of IPA pass aiming to get as much of benefit | |
46 | from inlining obeying the limits checked above. | |
47 | ||
48 | The implementation of particular heuristics is separated from | |
49 | the rest of code to make it easier to replace it with more complicated | |
50 | implementation in the future. The rest of inlining code acts as a | |
51 | library aimed to modify the callgraph and verify that the parameters | |
52 | on code size growth fits. | |
53 | ||
54 | To mark given call inline, use cgraph_mark_inline function, the | |
55 | verification is performed by cgraph_default_inline_p and | |
56 | cgraph_check_inline_limits. | |
57 | ||
58 | The heuristics implements simple knapsack style algorithm ordering | |
59 | all functions by their "profitability" (estimated by code size growth) | |
60 | and inlining them in priority order. | |
61 | ||
62 | cgraph_decide_inlining implements heuristics taking whole callgraph | |
63 | into account, while cgraph_decide_inlining_incrementally considers | |
64 | only one function at a time and is used in non-unit-at-a-time mode. */ | |
65 | ||
66 | #include "config.h" | |
67 | #include "system.h" | |
68 | #include "coretypes.h" | |
69 | #include "tm.h" | |
70 | #include "tree.h" | |
71 | #include "tree-inline.h" | |
72 | #include "langhooks.h" | |
73 | #include "flags.h" | |
74 | #include "cgraph.h" | |
75 | #include "diagnostic.h" | |
76 | #include "timevar.h" | |
77 | #include "params.h" | |
78 | #include "fibheap.h" | |
79 | #include "intl.h" | |
80 | #include "tree-pass.h" | |
0691d1d4 | 81 | #include "hashtab.h" |
670cd5c5 | 82 | #include "coverage.h" |
d63db217 | 83 | #include "ggc.h" |
ca31b95f JH |
84 | |
85 | /* Statistics we collect about inlining algorithm. */ | |
86 | static int ncalls_inlined; | |
87 | static int nfunctions_inlined; | |
88 | static int initial_insns; | |
89 | static int overall_insns; | |
670cd5c5 JH |
90 | static int max_insns; |
91 | static gcov_type max_count; | |
ca31b95f JH |
92 | |
93 | /* Estimate size of the function after inlining WHAT into TO. */ | |
94 | ||
95 | static int | |
96 | cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to, | |
97 | struct cgraph_node *what) | |
98 | { | |
670cd5c5 JH |
99 | int size; |
100 | tree fndecl = what->decl, arg; | |
ca31b95f | 101 | int call_insns = PARAM_VALUE (PARAM_INLINE_CALL_COST); |
670cd5c5 | 102 | |
ca31b95f JH |
103 | for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg)) |
104 | call_insns += estimate_move_cost (TREE_TYPE (arg)); | |
670cd5c5 JH |
105 | size = (what->global.insns - call_insns) * times + to->global.insns; |
106 | gcc_assert (size >= 0); | |
107 | return size; | |
ca31b95f JH |
108 | } |
109 | ||
110 | /* E is expected to be an edge being inlined. Clone destination node of | |
111 | the edge and redirect it to the new clone. | |
112 | DUPLICATE is used for bookkeeping on whether we are actually creating new | |
113 | clones or re-using node originally representing out-of-line function call. | |
114 | */ | |
115 | void | |
116 | cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate) | |
117 | { | |
118 | struct cgraph_node *n; | |
119 | ||
120 | /* We may eliminate the need for out-of-line copy to be output. In that | |
121 | case just go ahead and re-use it. */ | |
122 | if (!e->callee->callers->next_caller | |
123 | && (!e->callee->needed || DECL_EXTERNAL (e->callee->decl)) | |
124 | && duplicate | |
9f2583c7 | 125 | && flag_unit_at_a_time) |
ca31b95f JH |
126 | { |
127 | gcc_assert (!e->callee->global.inlined_to); | |
128 | if (!DECL_EXTERNAL (e->callee->decl)) | |
129 | overall_insns -= e->callee->global.insns, nfunctions_inlined++; | |
130 | duplicate = 0; | |
131 | } | |
132 | else if (duplicate) | |
133 | { | |
e42922b1 | 134 | n = cgraph_clone_node (e->callee, e->count, e->loop_nest); |
ca31b95f JH |
135 | cgraph_redirect_edge_callee (e, n); |
136 | } | |
137 | ||
138 | if (e->caller->global.inlined_to) | |
139 | e->callee->global.inlined_to = e->caller->global.inlined_to; | |
140 | else | |
141 | e->callee->global.inlined_to = e->caller; | |
142 | ||
143 | /* Recursively clone all bodies. */ | |
144 | for (e = e->callee->callees; e; e = e->next_callee) | |
145 | if (!e->inline_failed) | |
146 | cgraph_clone_inlined_nodes (e, duplicate); | |
147 | } | |
148 | ||
149 | /* Mark edge E as inlined and update callgraph accordingly. */ | |
150 | ||
151 | void | |
152 | cgraph_mark_inline_edge (struct cgraph_edge *e) | |
153 | { | |
154 | int old_insns = 0, new_insns = 0; | |
155 | struct cgraph_node *to = NULL, *what; | |
156 | ||
157 | gcc_assert (e->inline_failed); | |
158 | e->inline_failed = NULL; | |
159 | ||
160 | if (!e->callee->global.inlined && flag_unit_at_a_time) | |
161 | DECL_POSSIBLY_INLINED (e->callee->decl) = true; | |
162 | e->callee->global.inlined = true; | |
163 | ||
164 | cgraph_clone_inlined_nodes (e, true); | |
165 | ||
166 | what = e->callee; | |
167 | ||
168 | /* Now update size of caller and all functions caller is inlined into. */ | |
169 | for (;e && !e->inline_failed; e = e->caller->callers) | |
170 | { | |
171 | old_insns = e->caller->global.insns; | |
172 | new_insns = cgraph_estimate_size_after_inlining (1, e->caller, | |
173 | what); | |
174 | gcc_assert (new_insns >= 0); | |
175 | to = e->caller; | |
176 | to->global.insns = new_insns; | |
177 | } | |
178 | gcc_assert (what->global.inlined_to == to); | |
179 | if (new_insns > old_insns) | |
180 | overall_insns += new_insns - old_insns; | |
181 | ncalls_inlined++; | |
182 | } | |
183 | ||
184 | /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER. | |
185 | Return following unredirected edge in the list of callers | |
186 | of EDGE->CALLEE */ | |
187 | ||
188 | static struct cgraph_edge * | |
189 | cgraph_mark_inline (struct cgraph_edge *edge) | |
190 | { | |
191 | struct cgraph_node *to = edge->caller; | |
192 | struct cgraph_node *what = edge->callee; | |
193 | struct cgraph_edge *e, *next; | |
194 | int times = 0; | |
195 | ||
196 | /* Look for all calls, mark them inline and clone recursively | |
197 | all inlined functions. */ | |
198 | for (e = what->callers; e; e = next) | |
199 | { | |
200 | next = e->next_caller; | |
201 | if (e->caller == to && e->inline_failed) | |
202 | { | |
203 | cgraph_mark_inline_edge (e); | |
204 | if (e == edge) | |
205 | edge = next; | |
206 | times++; | |
207 | } | |
208 | } | |
209 | gcc_assert (times); | |
210 | return edge; | |
211 | } | |
212 | ||
213 | /* Estimate the growth caused by inlining NODE into all callees. */ | |
214 | ||
215 | static int | |
216 | cgraph_estimate_growth (struct cgraph_node *node) | |
217 | { | |
218 | int growth = 0; | |
219 | struct cgraph_edge *e; | |
670cd5c5 JH |
220 | if (node->global.estimated_growth != INT_MIN) |
221 | return node->global.estimated_growth; | |
ca31b95f JH |
222 | |
223 | for (e = node->callers; e; e = e->next_caller) | |
224 | if (e->inline_failed) | |
225 | growth += (cgraph_estimate_size_after_inlining (1, e->caller, node) | |
226 | - e->caller->global.insns); | |
227 | ||
228 | /* ??? Wrong for self recursive functions or cases where we decide to not | |
229 | inline for different reasons, but it is not big deal as in that case | |
230 | we will keep the body around, but we will also avoid some inlining. */ | |
231 | if (!node->needed && !DECL_EXTERNAL (node->decl)) | |
232 | growth -= node->global.insns; | |
233 | ||
670cd5c5 | 234 | node->global.estimated_growth = growth; |
ca31b95f JH |
235 | return growth; |
236 | } | |
237 | ||
238 | /* Return false when inlining WHAT into TO is not good idea | |
239 | as it would cause too large growth of function bodies. */ | |
240 | ||
241 | static bool | |
242 | cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what, | |
243 | const char **reason) | |
244 | { | |
245 | int times = 0; | |
246 | struct cgraph_edge *e; | |
247 | int newsize; | |
248 | int limit; | |
249 | ||
250 | if (to->global.inlined_to) | |
251 | to = to->global.inlined_to; | |
252 | ||
253 | for (e = to->callees; e; e = e->next_callee) | |
254 | if (e->callee == what) | |
255 | times++; | |
256 | ||
257 | /* When inlining large function body called once into small function, | |
258 | take the inlined function as base for limiting the growth. */ | |
259 | if (to->local.self_insns > what->local.self_insns) | |
260 | limit = to->local.self_insns; | |
261 | else | |
262 | limit = what->local.self_insns; | |
263 | ||
264 | limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100; | |
265 | ||
266 | newsize = cgraph_estimate_size_after_inlining (times, to, what); | |
267 | if (newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS) | |
268 | && newsize > limit) | |
269 | { | |
270 | if (reason) | |
271 | *reason = N_("--param large-function-growth limit reached"); | |
272 | return false; | |
273 | } | |
274 | return true; | |
275 | } | |
276 | ||
277 | /* Return true when function N is small enough to be inlined. */ | |
278 | ||
279 | bool | |
280 | cgraph_default_inline_p (struct cgraph_node *n) | |
281 | { | |
282 | if (!DECL_INLINE (n->decl) || !DECL_SAVED_TREE (n->decl)) | |
283 | return false; | |
284 | if (DECL_DECLARED_INLINE_P (n->decl)) | |
285 | return n->global.insns < MAX_INLINE_INSNS_SINGLE; | |
286 | else | |
287 | return n->global.insns < MAX_INLINE_INSNS_AUTO; | |
288 | } | |
289 | ||
290 | /* Return true when inlining WHAT would create recursive inlining. | |
291 | We call recursive inlining all cases where same function appears more than | |
292 | once in the single recursion nest path in the inline graph. */ | |
293 | ||
294 | static bool | |
295 | cgraph_recursive_inlining_p (struct cgraph_node *to, | |
296 | struct cgraph_node *what, | |
297 | const char **reason) | |
298 | { | |
299 | bool recursive; | |
300 | if (to->global.inlined_to) | |
301 | recursive = what->decl == to->global.inlined_to->decl; | |
302 | else | |
303 | recursive = what->decl == to->decl; | |
304 | /* Marking recursive function inline has sane semantic and thus we should | |
305 | not warn on it. */ | |
306 | if (recursive && reason) | |
307 | *reason = (what->local.disregard_inline_limits | |
308 | ? N_("recursive inlining") : ""); | |
309 | return recursive; | |
310 | } | |
311 | ||
670cd5c5 JH |
312 | /* Return true if the call can be hot. */ |
313 | static bool | |
314 | cgraph_maybe_hot_edge_p (struct cgraph_edge *edge) | |
315 | { | |
316 | if (profile_info && flag_branch_probabilities | |
317 | && (edge->count | |
318 | <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION))) | |
319 | return false; | |
320 | return true; | |
321 | } | |
322 | ||
323 | /* A cost model driving the inlining heuristics in a way so the edges with | |
324 | smallest badness are inlined first. After each inlining is performed | |
0fa2e4df | 325 | the costs of all caller edges of nodes affected are recomputed so the |
670cd5c5 JH |
326 | metrics may accurately depend on values such as number of inlinable callers |
327 | of the function or function body size. | |
328 | ||
329 | For the moment we use estimated growth caused by inlining callee into all | |
330 | it's callers for driving the inlining but once we have loop depth or | |
0fa2e4df | 331 | frequency information readily available we should do better. |
670cd5c5 JH |
332 | |
333 | With profiling we use number of executions of each edge to drive the cost. | |
334 | We also should distinguish hot and cold calls where the cold calls are | |
335 | inlined into only when code size is overall improved. | |
336 | ||
337 | Value INT_MAX can be returned to prevent function from being inlined. | |
338 | */ | |
339 | ||
340 | static int | |
341 | cgraph_edge_badness (struct cgraph_edge *edge) | |
342 | { | |
343 | if (max_count) | |
344 | { | |
345 | int growth = | |
346 | cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee); | |
347 | growth -= edge->caller->global.insns; | |
348 | ||
0fa2e4df | 349 | /* Always prefer inlining saving code size. */ |
670cd5c5 JH |
350 | if (growth <= 0) |
351 | return INT_MIN - growth; | |
352 | return ((int)((double)edge->count * INT_MIN / max_count)) / growth; | |
353 | } | |
354 | else | |
355 | { | |
356 | int nest = MIN (edge->loop_nest, 8); | |
357 | int badness = cgraph_estimate_growth (edge->callee) * 256; | |
358 | ||
359 | badness >>= nest; | |
360 | ||
361 | /* Make recursive inlining happen always after other inlining is done. */ | |
362 | if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL)) | |
363 | return badness + 1; | |
364 | else | |
365 | return badness; | |
366 | } | |
367 | } | |
368 | ||
369 | /* Recompute heap nodes for each of caller edge. */ | |
370 | ||
371 | static void | |
372 | update_caller_keys (fibheap_t heap, struct cgraph_node *node, | |
373 | bitmap updated_nodes) | |
374 | { | |
375 | struct cgraph_edge *edge; | |
376 | ||
377 | if (!node->local.inlinable || node->local.disregard_inline_limits | |
378 | || node->global.inlined_to) | |
379 | return; | |
380 | if (bitmap_bit_p (updated_nodes, node->uid)) | |
381 | return; | |
382 | bitmap_set_bit (updated_nodes, node->uid); | |
383 | ||
384 | for (edge = node->callers; edge; edge = edge->next_caller) | |
385 | if (edge->inline_failed) | |
386 | { | |
387 | int badness = cgraph_edge_badness (edge); | |
388 | if (edge->aux) | |
389 | { | |
390 | fibnode_t n = edge->aux; | |
391 | gcc_assert (n->data == edge); | |
392 | if (n->key == badness) | |
393 | continue; | |
394 | ||
395 | /* fibheap_replace_key only increase the keys. */ | |
396 | if (fibheap_replace_key (heap, n, badness)) | |
397 | continue; | |
398 | fibheap_delete_node (heap, edge->aux); | |
399 | } | |
400 | edge->aux = fibheap_insert (heap, badness, edge); | |
401 | } | |
402 | } | |
403 | ||
404 | /* Recompute heap nodes for each of caller edges of each of callees. */ | |
405 | ||
ca31b95f | 406 | static void |
670cd5c5 JH |
407 | update_callee_keys (fibheap_t heap, struct cgraph_node *node, |
408 | bitmap updated_nodes) | |
ca31b95f JH |
409 | { |
410 | struct cgraph_edge *e; | |
670cd5c5 | 411 | node->global.estimated_growth = INT_MIN; |
ca31b95f JH |
412 | |
413 | for (e = node->callees; e; e = e->next_callee) | |
670cd5c5 JH |
414 | if (e->inline_failed) |
415 | update_caller_keys (heap, e->callee, updated_nodes); | |
ca31b95f | 416 | else if (!e->inline_failed) |
670cd5c5 | 417 | update_callee_keys (heap, e->callee, updated_nodes); |
ca31b95f JH |
418 | } |
419 | ||
670cd5c5 | 420 | /* Enqueue all recursive calls from NODE into priority queue depending on |
0fa2e4df | 421 | how likely we want to recursively inline the call. */ |
670cd5c5 | 422 | |
ca31b95f JH |
423 | static void |
424 | lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where, | |
670cd5c5 | 425 | fibheap_t heap) |
ca31b95f | 426 | { |
670cd5c5 | 427 | static int priority; |
ca31b95f JH |
428 | struct cgraph_edge *e; |
429 | for (e = where->callees; e; e = e->next_callee) | |
430 | if (e->callee == node) | |
431 | { | |
670cd5c5 JH |
432 | /* FIXME: Once counts and frequencies are available we should drive the |
433 | order by these. For now force the order to be simple queue since | |
434 | we get order dependent on recursion depth for free by this. */ | |
435 | fibheap_insert (heap, priority++, e); | |
ca31b95f JH |
436 | } |
437 | for (e = where->callees; e; e = e->next_callee) | |
438 | if (!e->inline_failed) | |
670cd5c5 | 439 | lookup_recursive_calls (node, e->callee, heap); |
ca31b95f JH |
440 | } |
441 | ||
0691d1d4 RG |
442 | /* Find callgraph nodes closing a circle in the graph. The |
443 | resulting hashtab can be used to avoid walking the circles. | |
444 | Uses the cgraph nodes ->aux field which needs to be zero | |
445 | before and will be zero after operation. */ | |
446 | ||
447 | static void | |
448 | cgraph_find_cycles (struct cgraph_node *node, htab_t cycles) | |
449 | { | |
450 | struct cgraph_edge *e; | |
451 | ||
452 | if (node->aux) | |
453 | { | |
454 | void **slot; | |
455 | slot = htab_find_slot (cycles, node, INSERT); | |
456 | if (!*slot) | |
457 | { | |
458 | if (dump_file) | |
459 | fprintf (dump_file, "Cycle contains %s\n", cgraph_node_name (node)); | |
460 | *slot = node; | |
461 | } | |
462 | return; | |
463 | } | |
464 | ||
465 | node->aux = node; | |
466 | for (e = node->callees; e; e = e->next_callee) | |
467 | cgraph_find_cycles (e->callee, cycles); | |
468 | node->aux = 0; | |
469 | } | |
470 | ||
471 | /* Leafify the cgraph node. We have to be careful in recursing | |
472 | as to not run endlessly in circles of the callgraph. | |
473 | We do so by using a hashtab of cycle entering nodes as generated | |
474 | by cgraph_find_cycles. */ | |
475 | ||
476 | static void | |
477 | cgraph_flatten_node (struct cgraph_node *node, htab_t cycles) | |
478 | { | |
479 | struct cgraph_edge *e; | |
480 | ||
481 | for (e = node->callees; e; e = e->next_callee) | |
482 | { | |
483 | /* Inline call, if possible, and recurse. Be sure we are not | |
484 | entering callgraph circles here. */ | |
485 | if (e->inline_failed | |
486 | && e->callee->local.inlinable | |
487 | && !cgraph_recursive_inlining_p (node, e->callee, | |
488 | &e->inline_failed) | |
489 | && !htab_find (cycles, e->callee)) | |
490 | { | |
491 | if (dump_file) | |
492 | fprintf (dump_file, " inlining %s", cgraph_node_name (e->callee)); | |
493 | cgraph_mark_inline_edge (e); | |
494 | cgraph_flatten_node (e->callee, cycles); | |
495 | } | |
496 | else if (dump_file) | |
497 | fprintf (dump_file, " !inlining %s", cgraph_node_name (e->callee)); | |
498 | } | |
499 | } | |
500 | ||
ca31b95f JH |
501 | /* Decide on recursive inlining: in the case function has recursive calls, |
502 | inline until body size reaches given argument. */ | |
670cd5c5 JH |
503 | |
504 | static bool | |
ca31b95f JH |
505 | cgraph_decide_recursive_inlining (struct cgraph_node *node) |
506 | { | |
507 | int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO); | |
508 | int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO); | |
670cd5c5 | 509 | fibheap_t heap; |
ca31b95f JH |
510 | struct cgraph_edge *e; |
511 | struct cgraph_node *master_clone; | |
512 | int depth = 0; | |
513 | int n = 0; | |
514 | ||
515 | if (DECL_DECLARED_INLINE_P (node->decl)) | |
516 | { | |
517 | limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE); | |
518 | max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH); | |
519 | } | |
520 | ||
521 | /* Make sure that function is small enough to be considered for inlining. */ | |
522 | if (!max_depth | |
523 | || cgraph_estimate_size_after_inlining (1, node, node) >= limit) | |
670cd5c5 JH |
524 | return false; |
525 | heap = fibheap_new (); | |
526 | lookup_recursive_calls (node, node, heap); | |
527 | if (fibheap_empty (heap)) | |
528 | { | |
529 | fibheap_delete (heap); | |
530 | return false; | |
531 | } | |
ca31b95f JH |
532 | |
533 | if (dump_file) | |
534 | fprintf (dump_file, | |
670cd5c5 | 535 | " Performing recursive inlining on %s\n", |
ca31b95f JH |
536 | cgraph_node_name (node)); |
537 | ||
538 | /* We need original clone to copy around. */ | |
e42922b1 | 539 | master_clone = cgraph_clone_node (node, 0, 1); |
ca31b95f JH |
540 | master_clone->needed = true; |
541 | for (e = master_clone->callees; e; e = e->next_callee) | |
542 | if (!e->inline_failed) | |
543 | cgraph_clone_inlined_nodes (e, true); | |
544 | ||
545 | /* Do the inlining and update list of recursive call during process. */ | |
670cd5c5 | 546 | while (!fibheap_empty (heap) |
ca31b95f JH |
547 | && cgraph_estimate_size_after_inlining (1, node, master_clone) <= limit) |
548 | { | |
670cd5c5 JH |
549 | struct cgraph_edge *curr = fibheap_extract_min (heap); |
550 | struct cgraph_node *node; | |
551 | ||
552 | depth = 0; | |
553 | for (node = curr->caller; | |
554 | node; node = node->global.inlined_to) | |
555 | if (node->decl == curr->callee->decl) | |
556 | depth++; | |
557 | if (depth > max_depth) | |
558 | continue; | |
ca31b95f | 559 | |
670cd5c5 JH |
560 | if (dump_file) |
561 | fprintf (dump_file, | |
562 | " Inlining call of depth %i\n", depth); | |
ca31b95f JH |
563 | cgraph_redirect_edge_callee (curr, master_clone); |
564 | cgraph_mark_inline_edge (curr); | |
670cd5c5 | 565 | lookup_recursive_calls (node, curr->callee, heap); |
ca31b95f JH |
566 | n++; |
567 | } | |
568 | ||
670cd5c5 | 569 | fibheap_delete (heap); |
ca31b95f JH |
570 | if (dump_file) |
571 | fprintf (dump_file, | |
572 | "\n Inlined %i times, body grown from %i to %i insns\n", n, | |
573 | master_clone->global.insns, node->global.insns); | |
574 | ||
575 | /* Remove master clone we used for inlining. We rely that clones inlined | |
576 | into master clone gets queued just before master clone so we don't | |
577 | need recursion. */ | |
578 | for (node = cgraph_nodes; node != master_clone; | |
579 | node = node->next) | |
580 | if (node->global.inlined_to == master_clone) | |
581 | cgraph_remove_node (node); | |
582 | cgraph_remove_node (master_clone); | |
670cd5c5 | 583 | return true; |
ca31b95f JH |
584 | } |
585 | ||
586 | /* Set inline_failed for all callers of given function to REASON. */ | |
587 | ||
588 | static void | |
589 | cgraph_set_inline_failed (struct cgraph_node *node, const char *reason) | |
590 | { | |
591 | struct cgraph_edge *e; | |
592 | ||
593 | if (dump_file) | |
594 | fprintf (dump_file, "Inlining failed: %s\n", reason); | |
595 | for (e = node->callers; e; e = e->next_caller) | |
596 | if (e->inline_failed) | |
597 | e->inline_failed = reason; | |
598 | } | |
599 | ||
600 | /* We use greedy algorithm for inlining of small functions: | |
601 | All inline candidates are put into prioritized heap based on estimated | |
602 | growth of the overall number of instructions and then update the estimates. | |
603 | ||
604 | INLINED and INLINED_CALEES are just pointers to arrays large enough | |
605 | to be passed to cgraph_inlined_into and cgraph_inlined_callees. */ | |
606 | ||
607 | static void | |
608 | cgraph_decide_inlining_of_small_functions (void) | |
609 | { | |
610 | struct cgraph_node *node; | |
670cd5c5 | 611 | struct cgraph_edge *edge; |
ca31b95f | 612 | fibheap_t heap = fibheap_new (); |
670cd5c5 JH |
613 | bitmap updated_nodes = BITMAP_ALLOC (NULL); |
614 | ||
615 | if (dump_file) | |
616 | fprintf (dump_file, "\nDeciding on smaller functions:\n"); | |
ca31b95f JH |
617 | |
618 | /* Put all inline candidates into the heap. */ | |
619 | ||
620 | for (node = cgraph_nodes; node; node = node->next) | |
621 | { | |
622 | if (!node->local.inlinable || !node->callers | |
623 | || node->local.disregard_inline_limits) | |
624 | continue; | |
670cd5c5 JH |
625 | if (dump_file) |
626 | fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node)); | |
ca31b95f | 627 | |
670cd5c5 | 628 | node->global.estimated_growth = INT_MIN; |
ca31b95f JH |
629 | if (!cgraph_default_inline_p (node)) |
630 | { | |
631 | cgraph_set_inline_failed (node, | |
632 | N_("--param max-inline-insns-single limit reached")); | |
633 | continue; | |
634 | } | |
ca31b95f | 635 | |
670cd5c5 JH |
636 | for (edge = node->callers; edge; edge = edge->next_caller) |
637 | if (edge->inline_failed) | |
638 | { | |
639 | gcc_assert (!edge->aux); | |
640 | edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge); | |
641 | } | |
642 | } | |
643 | while (overall_insns <= max_insns && (edge = fibheap_extract_min (heap))) | |
ca31b95f | 644 | { |
ca31b95f | 645 | int old_insns = overall_insns; |
670cd5c5 JH |
646 | struct cgraph_node *where; |
647 | int growth = | |
648 | cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee); | |
649 | ||
650 | growth -= edge->caller->global.insns; | |
ca31b95f | 651 | |
ca31b95f | 652 | if (dump_file) |
ca31b95f | 653 | { |
670cd5c5 JH |
654 | fprintf (dump_file, |
655 | "\nConsidering %s with %i insns to be inlined into %s\n" | |
656 | " Estimated growth after inlined into all callees is %+i insns.\n" | |
657 | " Estimated badness is %i.\n", | |
658 | cgraph_node_name (edge->callee), | |
659 | edge->callee->global.insns, | |
660 | cgraph_node_name (edge->caller), | |
661 | cgraph_estimate_growth (edge->callee), | |
662 | cgraph_edge_badness (edge)); | |
663 | if (edge->count) | |
664 | fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count); | |
ca31b95f | 665 | } |
670cd5c5 JH |
666 | gcc_assert (edge->aux); |
667 | edge->aux = NULL; | |
668 | if (!edge->inline_failed) | |
669 | continue; | |
ca31b95f | 670 | |
670cd5c5 | 671 | /* When not having profile info ready we don't weight by any way the |
0fa2e4df | 672 | position of call in procedure itself. This means if call of |
670cd5c5 JH |
673 | function A from function B seems profitable to inline, the recursive |
674 | call of function A in inline copy of A in B will look profitable too | |
675 | and we end up inlining until reaching maximal function growth. This | |
676 | is not good idea so prohibit the recursive inlining. | |
ca31b95f | 677 | |
670cd5c5 JH |
678 | ??? When the frequencies are taken into account we might not need this |
679 | restriction. */ | |
680 | if (!max_count) | |
681 | { | |
682 | where = edge->caller; | |
683 | while (where->global.inlined_to) | |
684 | { | |
685 | if (where->decl == edge->callee->decl) | |
686 | break; | |
687 | where = where->callers->caller; | |
688 | } | |
689 | if (where->global.inlined_to) | |
690 | { | |
691 | edge->inline_failed | |
692 | = (edge->callee->local.disregard_inline_limits ? N_("recursive inlining") : ""); | |
ca31b95f | 693 | if (dump_file) |
cc9795d4 | 694 | fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n"); |
670cd5c5 | 695 | continue; |
ca31b95f JH |
696 | } |
697 | } | |
698 | ||
670cd5c5 JH |
699 | if (!cgraph_maybe_hot_edge_p (edge) && growth > 0) |
700 | { | |
701 | if (!cgraph_recursive_inlining_p (edge->caller, edge->callee, | |
702 | &edge->inline_failed)) | |
703 | { | |
704 | edge->inline_failed = | |
705 | N_("call is unlikely"); | |
706 | if (dump_file) | |
707 | fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed); | |
708 | } | |
709 | continue; | |
710 | } | |
711 | if (!cgraph_default_inline_p (edge->callee)) | |
712 | { | |
713 | if (!cgraph_recursive_inlining_p (edge->caller, edge->callee, | |
714 | &edge->inline_failed)) | |
715 | { | |
716 | edge->inline_failed = | |
717 | N_("--param max-inline-insns-single limit reached after inlining into the callee"); | |
718 | if (dump_file) | |
719 | fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed); | |
720 | } | |
721 | continue; | |
722 | } | |
723 | if (cgraph_recursive_inlining_p (edge->caller, edge->callee, | |
724 | &edge->inline_failed)) | |
725 | { | |
726 | where = edge->caller; | |
727 | if (where->global.inlined_to) | |
728 | where = where->global.inlined_to; | |
729 | if (!cgraph_decide_recursive_inlining (where)) | |
730 | continue; | |
731 | update_callee_keys (heap, where, updated_nodes); | |
732 | } | |
733 | else | |
734 | { | |
735 | if (!cgraph_check_inline_limits (edge->caller, edge->callee, | |
736 | &edge->inline_failed)) | |
737 | { | |
738 | if (dump_file) | |
739 | fprintf (dump_file, " Not inlining into %s:%s.\n", | |
740 | cgraph_node_name (edge->caller), edge->inline_failed); | |
741 | continue; | |
742 | } | |
743 | cgraph_mark_inline_edge (edge); | |
744 | update_callee_keys (heap, edge->callee, updated_nodes); | |
745 | } | |
746 | where = edge->caller; | |
747 | if (where->global.inlined_to) | |
748 | where = where->global.inlined_to; | |
749 | ||
750 | /* Our profitability metric can depend on local properties | |
751 | such as number of inlinable calls and size of the function body. | |
752 | After inlining these properties might change for the function we | |
753 | inlined into (since it's body size changed) and for the functions | |
754 | called by function we inlined (since number of it inlinable callers | |
755 | might change). */ | |
756 | update_caller_keys (heap, where, updated_nodes); | |
757 | bitmap_clear (updated_nodes); | |
ca31b95f | 758 | |
670cd5c5 JH |
759 | if (dump_file) |
760 | fprintf (dump_file, | |
761 | " Inlined into %s which now has %i insns.\n", | |
762 | cgraph_node_name (edge->caller), | |
763 | edge->caller->global.insns); | |
ca31b95f JH |
764 | if (dump_file) |
765 | fprintf (dump_file, | |
766 | " Inlined for a net change of %+i insns.\n", | |
767 | overall_insns - old_insns); | |
768 | } | |
670cd5c5 JH |
769 | while ((edge = fibheap_extract_min (heap)) != NULL) |
770 | { | |
771 | gcc_assert (edge->aux); | |
772 | edge->aux = NULL; | |
773 | if (!edge->callee->local.disregard_inline_limits && edge->inline_failed | |
774 | && !cgraph_recursive_inlining_p (edge->caller, edge->callee, | |
775 | &edge->inline_failed)) | |
776 | edge->inline_failed = N_("--param inline-unit-growth limit reached"); | |
777 | } | |
ca31b95f | 778 | fibheap_delete (heap); |
670cd5c5 | 779 | BITMAP_FREE (updated_nodes); |
ca31b95f JH |
780 | } |
781 | ||
782 | /* Decide on the inlining. We do so in the topological order to avoid | |
783 | expenses on updating data structures. */ | |
784 | ||
785 | static void | |
786 | cgraph_decide_inlining (void) | |
787 | { | |
788 | struct cgraph_node *node; | |
789 | int nnodes; | |
790 | struct cgraph_node **order = | |
791 | xcalloc (cgraph_n_nodes, sizeof (struct cgraph_node *)); | |
792 | int old_insns = 0; | |
793 | int i; | |
794 | ||
670cd5c5 JH |
795 | timevar_push (TV_INLINE_HEURISTICS); |
796 | max_count = 0; | |
ca31b95f | 797 | for (node = cgraph_nodes; node; node = node->next) |
670cd5c5 JH |
798 | { |
799 | struct cgraph_edge *e; | |
800 | initial_insns += node->local.self_insns; | |
801 | for (e = node->callees; e; e = e->next_callee) | |
802 | if (max_count < e->count) | |
803 | max_count = e->count; | |
804 | } | |
ca31b95f | 805 | overall_insns = initial_insns; |
670cd5c5 JH |
806 | gcc_assert (!max_count || (profile_info && flag_branch_probabilities)); |
807 | ||
808 | max_insns = ((HOST_WIDEST_INT) overall_insns | |
809 | * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100); | |
ca31b95f JH |
810 | |
811 | nnodes = cgraph_postorder (order); | |
812 | ||
813 | if (dump_file) | |
814 | fprintf (dump_file, | |
815 | "\nDeciding on inlining. Starting with %i insns.\n", | |
816 | initial_insns); | |
817 | ||
818 | for (node = cgraph_nodes; node; node = node->next) | |
819 | node->aux = 0; | |
820 | ||
821 | if (dump_file) | |
822 | fprintf (dump_file, "\nInlining always_inline functions:\n"); | |
823 | ||
824 | /* In the first pass mark all always_inline edges. Do this with a priority | |
825 | so none of our later choices will make this impossible. */ | |
826 | for (i = nnodes - 1; i >= 0; i--) | |
827 | { | |
828 | struct cgraph_edge *e, *next; | |
829 | ||
830 | node = order[i]; | |
831 | ||
0691d1d4 RG |
832 | /* Handle nodes to be flattened, but don't update overall unit size. */ |
833 | if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL) | |
834 | { | |
835 | int old_overall_insns = overall_insns; | |
836 | htab_t cycles; | |
837 | if (dump_file) | |
838 | fprintf (dump_file, | |
839 | "Leafifying %s\n", cgraph_node_name (node)); | |
840 | cycles = htab_create (7, htab_hash_pointer, htab_eq_pointer, NULL); | |
841 | cgraph_find_cycles (node, cycles); | |
842 | cgraph_flatten_node (node, cycles); | |
843 | htab_delete (cycles); | |
844 | overall_insns = old_overall_insns; | |
845 | /* We don't need to consider always_inline functions inside the flattened | |
846 | function anymore. */ | |
847 | continue; | |
848 | } | |
849 | ||
ca31b95f JH |
850 | if (!node->local.disregard_inline_limits) |
851 | continue; | |
852 | if (dump_file) | |
853 | fprintf (dump_file, | |
854 | "\nConsidering %s %i insns (always inline)\n", | |
855 | cgraph_node_name (node), node->global.insns); | |
856 | old_insns = overall_insns; | |
857 | for (e = node->callers; e; e = next) | |
858 | { | |
859 | next = e->next_caller; | |
860 | if (!e->inline_failed) | |
861 | continue; | |
862 | if (cgraph_recursive_inlining_p (e->caller, e->callee, | |
863 | &e->inline_failed)) | |
864 | continue; | |
865 | cgraph_mark_inline_edge (e); | |
866 | if (dump_file) | |
867 | fprintf (dump_file, | |
868 | " Inlined into %s which now has %i insns.\n", | |
869 | cgraph_node_name (e->caller), | |
870 | e->caller->global.insns); | |
871 | } | |
872 | if (dump_file) | |
873 | fprintf (dump_file, | |
874 | " Inlined for a net change of %+i insns.\n", | |
875 | overall_insns - old_insns); | |
876 | } | |
877 | ||
878 | if (!flag_really_no_inline) | |
879 | { | |
880 | cgraph_decide_inlining_of_small_functions (); | |
881 | ||
882 | if (dump_file) | |
883 | fprintf (dump_file, "\nDeciding on functions called once:\n"); | |
884 | ||
885 | /* And finally decide what functions are called once. */ | |
886 | ||
887 | for (i = nnodes - 1; i >= 0; i--) | |
888 | { | |
889 | node = order[i]; | |
890 | ||
891 | if (node->callers && !node->callers->next_caller && !node->needed | |
892 | && node->local.inlinable && node->callers->inline_failed | |
893 | && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl)) | |
894 | { | |
895 | bool ok = true; | |
896 | struct cgraph_node *node1; | |
897 | ||
898 | /* Verify that we won't duplicate the caller. */ | |
899 | for (node1 = node->callers->caller; | |
900 | node1->callers && !node1->callers->inline_failed | |
901 | && ok; node1 = node1->callers->caller) | |
902 | if (node1->callers->next_caller || node1->needed) | |
903 | ok = false; | |
904 | if (ok) | |
905 | { | |
906 | if (dump_file) | |
907 | fprintf (dump_file, | |
908 | "\nConsidering %s %i insns.\n" | |
909 | " Called once from %s %i insns.\n", | |
910 | cgraph_node_name (node), node->global.insns, | |
911 | cgraph_node_name (node->callers->caller), | |
912 | node->callers->caller->global.insns); | |
913 | ||
914 | old_insns = overall_insns; | |
915 | ||
916 | if (cgraph_check_inline_limits (node->callers->caller, node, | |
917 | NULL)) | |
918 | { | |
919 | cgraph_mark_inline (node->callers); | |
920 | if (dump_file) | |
921 | fprintf (dump_file, | |
922 | " Inlined into %s which now has %i insns" | |
923 | " for a net change of %+i insns.\n", | |
924 | cgraph_node_name (node->callers->caller), | |
925 | node->callers->caller->global.insns, | |
926 | overall_insns - old_insns); | |
927 | } | |
928 | else | |
929 | { | |
930 | if (dump_file) | |
931 | fprintf (dump_file, | |
932 | " Inline limit reached, not inlined.\n"); | |
933 | } | |
934 | } | |
935 | } | |
936 | } | |
937 | } | |
938 | ||
ca31b95f JH |
939 | if (dump_file) |
940 | fprintf (dump_file, | |
941 | "\nInlined %i calls, eliminated %i functions, " | |
942 | "%i insns turned to %i insns.\n\n", | |
943 | ncalls_inlined, nfunctions_inlined, initial_insns, | |
944 | overall_insns); | |
945 | free (order); | |
670cd5c5 | 946 | timevar_pop (TV_INLINE_HEURISTICS); |
ca31b95f JH |
947 | } |
948 | ||
949 | /* Decide on the inlining. We do so in the topological order to avoid | |
950 | expenses on updating data structures. */ | |
951 | ||
d63db217 JH |
952 | bool |
953 | cgraph_decide_inlining_incrementally (struct cgraph_node *node, bool early) | |
ca31b95f JH |
954 | { |
955 | struct cgraph_edge *e; | |
d63db217 | 956 | bool inlined = false; |
ca31b95f JH |
957 | |
958 | /* First of all look for always inline functions. */ | |
959 | for (e = node->callees; e; e = e->next_callee) | |
960 | if (e->callee->local.disregard_inline_limits | |
961 | && e->inline_failed | |
962 | && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed) | |
963 | /* ??? It is possible that renaming variable removed the function body | |
964 | in duplicate_decls. See gcc.c-torture/compile/20011119-2.c */ | |
965 | && DECL_SAVED_TREE (e->callee->decl)) | |
d63db217 JH |
966 | { |
967 | if (dump_file && early) | |
968 | fprintf (dump_file, " Early inlining %s into %s\n", | |
969 | cgraph_node_name (e->callee), cgraph_node_name (node)); | |
970 | cgraph_mark_inline (e); | |
971 | inlined = true; | |
972 | } | |
ca31b95f JH |
973 | |
974 | /* Now do the automatic inlining. */ | |
975 | if (!flag_really_no_inline) | |
976 | for (e = node->callees; e; e = e->next_callee) | |
977 | if (e->callee->local.inlinable | |
978 | && e->inline_failed | |
979 | && !e->callee->local.disregard_inline_limits | |
980 | && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed) | |
d63db217 JH |
981 | && (!early |
982 | || (cgraph_estimate_size_after_inlining (1, e->caller, node) | |
983 | <= e->caller->global.insns)) | |
ca31b95f JH |
984 | && cgraph_check_inline_limits (node, e->callee, &e->inline_failed) |
985 | && DECL_SAVED_TREE (e->callee->decl)) | |
986 | { | |
987 | if (cgraph_default_inline_p (e->callee)) | |
d63db217 JH |
988 | { |
989 | if (dump_file && early) | |
990 | fprintf (dump_file, " Early inlining %s into %s\n", | |
991 | cgraph_node_name (e->callee), cgraph_node_name (node)); | |
992 | cgraph_mark_inline (e); | |
993 | inlined = true; | |
994 | } | |
995 | else if (!early) | |
ca31b95f JH |
996 | e->inline_failed |
997 | = N_("--param max-inline-insns-single limit reached"); | |
998 | } | |
d63db217 JH |
999 | if (early && inlined) |
1000 | { | |
1001 | push_cfun (DECL_STRUCT_FUNCTION (node->decl)); | |
1002 | tree_register_cfg_hooks (); | |
1003 | current_function_decl = node->decl; | |
1004 | optimize_inline_calls (current_function_decl); | |
1005 | node->local.self_insns = node->global.insns; | |
1006 | current_function_decl = NULL; | |
1007 | pop_cfun (); | |
1008 | ggc_collect (); | |
1009 | } | |
1010 | return inlined; | |
ca31b95f JH |
1011 | } |
1012 | ||
1013 | /* When inlining shall be performed. */ | |
1014 | static bool | |
1015 | cgraph_gate_inlining (void) | |
1016 | { | |
1017 | return flag_inline_trees; | |
1018 | } | |
1019 | ||
1020 | struct tree_opt_pass pass_ipa_inline = | |
1021 | { | |
1022 | "inline", /* name */ | |
1023 | cgraph_gate_inlining, /* gate */ | |
1024 | cgraph_decide_inlining, /* execute */ | |
1025 | NULL, /* sub */ | |
1026 | NULL, /* next */ | |
1027 | 0, /* static_pass_number */ | |
1028 | TV_INTEGRATION, /* tv_id */ | |
1029 | 0, /* properties_required */ | |
d63db217 JH |
1030 | PROP_cfg, /* properties_provided */ |
1031 | 0, /* properties_destroyed */ | |
1032 | 0, /* todo_flags_start */ | |
1033 | TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */ | |
1034 | 0 /* letter */ | |
1035 | }; | |
1036 | ||
1037 | /* Do inlining of small functions. Doing so early helps profiling and other | |
1038 | passes to be somewhat more effective and avoids some code duplication in | |
1039 | later real inlining pass for testcases with very many function calls. */ | |
1040 | static void | |
1041 | cgraph_early_inlining (void) | |
1042 | { | |
1043 | struct cgraph_node *node; | |
1044 | int nnodes; | |
1045 | struct cgraph_node **order = | |
1046 | xcalloc (cgraph_n_nodes, sizeof (struct cgraph_node *)); | |
1047 | int i; | |
1048 | ||
1049 | if (sorrycount || errorcount) | |
1050 | return; | |
1051 | #ifdef ENABLE_CHECKING | |
1052 | for (node = cgraph_nodes; node; node = node->next) | |
1053 | gcc_assert (!node->aux); | |
1054 | #endif | |
1055 | ||
1056 | nnodes = cgraph_postorder (order); | |
1057 | for (i = nnodes - 1; i >= 0; i--) | |
1058 | { | |
1059 | node = order[i]; | |
1060 | if (node->analyzed && node->local.inlinable | |
1061 | && (node->needed || node->reachable) | |
1062 | && node->callers) | |
1063 | cgraph_decide_inlining_incrementally (node, true); | |
1064 | } | |
1065 | cgraph_remove_unreachable_nodes (true, dump_file); | |
1066 | #ifdef ENABLE_CHECKING | |
1067 | for (node = cgraph_nodes; node; node = node->next) | |
1068 | gcc_assert (!node->global.inlined_to); | |
1069 | #endif | |
1070 | free (order); | |
1071 | } | |
1072 | ||
1073 | /* When inlining shall be performed. */ | |
1074 | static bool | |
1075 | cgraph_gate_early_inlining (void) | |
1076 | { | |
1077 | return flag_inline_trees && flag_early_inlining; | |
1078 | } | |
1079 | ||
1080 | struct tree_opt_pass pass_early_ipa_inline = | |
1081 | { | |
1082 | "einline", /* name */ | |
1083 | cgraph_gate_early_inlining, /* gate */ | |
1084 | cgraph_early_inlining, /* execute */ | |
1085 | NULL, /* sub */ | |
1086 | NULL, /* next */ | |
1087 | 0, /* static_pass_number */ | |
1088 | TV_INTEGRATION, /* tv_id */ | |
1089 | 0, /* properties_required */ | |
1090 | PROP_cfg, /* properties_provided */ | |
ca31b95f JH |
1091 | 0, /* properties_destroyed */ |
1092 | 0, /* todo_flags_start */ | |
1093 | TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */ | |
1094 | 0 /* letter */ | |
1095 | }; |