]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/ipa-split.c
re PR c++/44535 (g++ -O[ 23] generates undefined symbol)
[thirdparty/gcc.git] / gcc / ipa-split.c
CommitLineData
3e485f62
JH
1/* Function splitting pass
2 Copyright (C) 2010
3 Free Software Foundation, Inc.
4 Contributed by Jan Hubicka <jh@suse.cz>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* The purpose of this pass is to split function bodies to improve
23 inlining. I.e. for function of the form:
24
25 func (...)
26 {
27 if (cheap_test)
28 something_small
29 else
30 something_big
31 }
32
33 Produce:
34
35 func.part (...)
36 {
37 something_big
38 }
39
40 func (...)
41 {
42 if (cheap_test)
43 something_small
44 else
45 func.part (...);
46 }
47
48 When func becomes inlinable and when cheap_test is often true, inlining func,
49 but not fund.part leads to performance imrovement similar as inlining
50 original func while the code size growth is smaller.
51
52 The pass is organized in three stages:
53 1) Collect local info about basic block into BB_INFO structure and
54 compute function body estimated size and time.
55 2) Via DFS walk find all possible basic blocks where we can split
56 and chose best one.
57 3) If split point is found, split at the specified BB by creating a clone
58 and updating function to call it.
59
60 The decisions what functions to split are in execute_split_functions
61 and consider_split.
62
63 There are several possible future improvements for this pass including:
64
65 1) Splitting to break up large functions
66 2) Splitting to reduce stack frame usage
67 3) Allow split part of function to use values computed in the header part.
68 The values needs to be passed to split function, perhaps via same
69 interface as for nested functions or as argument.
70 4) Support for simple rematerialization. I.e. when split part use
71 value computed in header from function parameter in very cheap way, we
72 can just recompute it.
73 5) Support splitting of nested functions.
74 6) Support non-SSA arguments.
75 7) There is nothing preventing us from producing multiple parts of single function
76 when needed or splitting also the parts. */
77
78#include "config.h"
79#include "system.h"
80#include "coretypes.h"
81#include "tree.h"
82#include "target.h"
83#include "cgraph.h"
84#include "ipa-prop.h"
85#include "tree-flow.h"
86#include "tree-pass.h"
87#include "flags.h"
88#include "timevar.h"
89#include "diagnostic.h"
90#include "tree-dump.h"
91#include "tree-inline.h"
92#include "fibheap.h"
93#include "params.h"
94#include "gimple-pretty-print.h"
95
96/* Per basic block info. */
97
98typedef struct
99{
100 unsigned int size;
101 unsigned int time;
102} bb_info;
103DEF_VEC_O(bb_info);
104DEF_VEC_ALLOC_O(bb_info,heap);
105
106static VEC(bb_info, heap) *bb_info_vec;
107
108/* Description of split point. */
109
110struct split_point
111{
112 /* Size of the partitions. */
113 unsigned int header_time, header_size, split_time, split_size;
114
115 /* SSA names that need to be passed into spit funciton. */
116 bitmap ssa_names_to_pass;
117
118 /* Basic block where we split (that will become entry point of new function. */
119 basic_block entry_bb;
120
121 /* Basic blocks we are splitting away. */
122 bitmap split_bbs;
123};
124
125/* Best split point found. */
126
127struct split_point best_split_point;
128
129/* Callback for walk_stmt_load_store_addr_ops. If T is non-ssa automatic
130 variable, check it if it is present in bitmap passed via DATA. */
131
132static bool
133test_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t,
134 void *data ATTRIBUTE_UNUSED)
135{
136 t = get_base_address (t);
137
138 if (t && !is_gimple_reg (t)
139 && ((TREE_CODE (t) == VAR_DECL
140 && auto_var_in_fn_p (t, current_function_decl))
141 || (TREE_CODE (t) == PARM_DECL)))
142 return bitmap_bit_p ((bitmap)data, DECL_UID (t));
143 return false;
144}
145
146/* Dump split point CURRENT. */
147
148static void
149dump_split_point (FILE * file, struct split_point *current)
150{
151 fprintf (file,
152 "Split point at BB %i header time:%i header size: %i"
153 " split time: %i split size: %i\n bbs: ",
154 current->entry_bb->index, current->header_time,
155 current->header_size, current->split_time, current->split_size);
156 dump_bitmap (file, current->split_bbs);
157 fprintf (file, " SSA names to pass: ");
158 dump_bitmap (file, current->ssa_names_to_pass);
159}
160
161/* We found an split_point CURRENT. NON_SSA_VARS is bitmap of all non ssa
162 variables used and RETURN_BB is return basic block.
163 See if we can split function here. */
164
165static void
166consider_split (struct split_point *current, bitmap non_ssa_vars,
167 basic_block return_bb)
168{
169 tree parm;
170 unsigned int num_args = 0;
171 unsigned int call_overhead;
172 edge e;
173 edge_iterator ei;
8b3057b3
JH
174 gimple_stmt_iterator bsi;
175 unsigned int i;
176 int incomming_freq = 0;
177
3e485f62
JH
178 if (dump_file && (dump_flags & TDF_DETAILS))
179 dump_split_point (dump_file, current);
180
8b3057b3
JH
181 FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
182 if (!bitmap_bit_p (current->split_bbs, e->src->index))
183 incomming_freq += EDGE_FREQUENCY (e);
184
3e485f62 185 /* Do not split when we would end up calling function anyway. */
8b3057b3 186 if (incomming_freq
3e485f62
JH
187 >= (ENTRY_BLOCK_PTR->frequency
188 * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
189 {
190 if (dump_file && (dump_flags & TDF_DETAILS))
191 fprintf (dump_file,
8b3057b3 192 " Refused: incomming frequency is too large.\n");
3e485f62
JH
193 return;
194 }
195
196 if (!current->header_size)
197 {
198 if (dump_file && (dump_flags & TDF_DETAILS))
199 fprintf (dump_file, " Refused: header empty\n");
200 gcc_unreachable ();
201 return;
202 }
203
8b3057b3
JH
204 /* Verify that PHI args on entry are either virutal or all their operands
205 incomming from header are the same. */
206 for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi))
3e485f62 207 {
8b3057b3
JH
208 gimple stmt = gsi_stmt (bsi);
209 tree val = NULL;
210
211 if (!is_gimple_reg (gimple_phi_result (stmt)))
212 continue;
213 for (i = 0; i < gimple_phi_num_args (stmt); i++)
214 {
215 edge e = gimple_phi_arg_edge (stmt, i);
216 if (!bitmap_bit_p (current->split_bbs, e->src->index))
217 {
218 tree edge_val = gimple_phi_arg_def (stmt, i);
219 if (val && edge_val != val)
220 {
221 if (dump_file && (dump_flags & TDF_DETAILS))
222 fprintf (dump_file,
223 " Refused: entry BB has PHI with multiple variants\n");
224 return;
225 }
226 val = edge_val;
227 }
228 }
3e485f62
JH
229 }
230
231
232 /* See what argument we will pass to the split function and compute
233 call overhead. */
234 call_overhead = eni_size_weights.call_cost;
235 for (parm = DECL_ARGUMENTS (current_function_decl); parm;
236 parm = TREE_CHAIN (parm))
237 {
238 if (!is_gimple_reg (parm))
239 {
240 if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm)))
241 {
242 if (dump_file && (dump_flags & TDF_DETAILS))
243 fprintf (dump_file,
244 " Refused: need to pass non-ssa param values\n");
245 return;
246 }
247 }
248 else if (gimple_default_def (cfun, parm)
249 && bitmap_bit_p (current->ssa_names_to_pass,
250 SSA_NAME_VERSION (gimple_default_def
251 (cfun, parm))))
252 {
253 if (!VOID_TYPE_P (TREE_TYPE (parm)))
254 call_overhead += estimate_move_cost (TREE_TYPE (parm));
255 num_args++;
256 }
257 }
258 if (!VOID_TYPE_P (TREE_TYPE (current_function_decl)))
259 call_overhead += estimate_move_cost (TREE_TYPE (current_function_decl));
260
261 if (current->split_size <= call_overhead)
262 {
263 if (dump_file && (dump_flags & TDF_DETAILS))
264 fprintf (dump_file,
265 " Refused: split size is smaller than call overhead\n");
266 return;
267 }
268 if (current->header_size + call_overhead
269 >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl)
270 ? MAX_INLINE_INSNS_SINGLE
271 : MAX_INLINE_INSNS_AUTO))
272 {
273 if (dump_file && (dump_flags & TDF_DETAILS))
274 fprintf (dump_file,
275 " Refused: header size is too large for inline candidate\n");
276 return;
277 }
278
279 /* FIXME: we currently can pass only SSA function parameters to the split
d402c33d 280 arguments. Once parm_adjustment infrastructure is supported by cloning,
3e485f62
JH
281 we can pass more than that. */
282 if (num_args != bitmap_count_bits (current->ssa_names_to_pass))
283 {
8b3057b3 284
3e485f62
JH
285 if (dump_file && (dump_flags & TDF_DETAILS))
286 fprintf (dump_file,
287 " Refused: need to pass non-param values\n");
288 return;
289 }
290
291 /* When there are non-ssa vars used in the split region, see if they
292 are used in the header region. If so, reject the split.
293 FIXME: we can use nested function support to access both. */
294 if (!bitmap_empty_p (non_ssa_vars))
295 {
296 basic_block bb;
297 FOR_EACH_BB (bb)
298 {
299 gimple_stmt_iterator bsi;
300 if (!bitmap_bit_p (current->split_bbs, bb->index))
301 continue;
302 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
303 {
304 if (is_gimple_debug (gsi_stmt (bsi)))
305 continue;
306 if (walk_stmt_load_store_addr_ops
307 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use,
308 test_nonssa_use, test_nonssa_use))
309 {
310 if (dump_file && (dump_flags & TDF_DETAILS))
311 fprintf (dump_file,
312 " Refused: split part has non-ssa uses\n");
313 return;
314 }
315 }
316 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
317 {
3e485f62
JH
318 if (walk_stmt_load_store_addr_ops
319 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use,
320 test_nonssa_use, test_nonssa_use))
321 {
322 if (dump_file && (dump_flags & TDF_DETAILS))
323 fprintf (dump_file,
324 " Refused: split part has non-ssa uses\n");
325 return;
326 }
327 }
328 FOR_EACH_EDGE (e, ei, bb->succs)
329 {
330 if (e->dest != return_bb)
331 continue;
332 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi);
333 gsi_next (&bsi))
334 {
335 gimple stmt = gsi_stmt (bsi);
336 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
337
338 if (!is_gimple_reg (gimple_phi_result (stmt)))
339 continue;
340 if (TREE_CODE (op) != SSA_NAME
341 && test_nonssa_use (stmt, op, non_ssa_vars))
342 {
343 if (dump_file && (dump_flags & TDF_DETAILS))
344 fprintf (dump_file,
345 " Refused: split part has non-ssa uses\n");
346 return;
347 }
348 }
349 }
350 }
351 return;
352 }
353 if (dump_file && (dump_flags & TDF_DETAILS))
354 fprintf (dump_file, " Accepted!\n");
355
356 /* At the moment chose split point with lowest frequency and that leaves
357 out smallest size of header.
358 In future we might re-consider this heuristics. */
359 if (!best_split_point.split_bbs
360 || best_split_point.entry_bb->frequency > current->entry_bb->frequency
361 || (best_split_point.entry_bb->frequency == current->entry_bb->frequency
362 && best_split_point.split_size < current->split_size))
363
364 {
365 if (dump_file && (dump_flags & TDF_DETAILS))
366 fprintf (dump_file, " New best split point!\n");
367 if (best_split_point.ssa_names_to_pass)
368 {
369 BITMAP_FREE (best_split_point.ssa_names_to_pass);
370 BITMAP_FREE (best_split_point.split_bbs);
371 }
372 best_split_point = *current;
373 best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL);
374 bitmap_copy (best_split_point.ssa_names_to_pass,
375 current->ssa_names_to_pass);
376 best_split_point.split_bbs = BITMAP_ALLOC (NULL);
377 bitmap_copy (best_split_point.split_bbs, current->split_bbs);
378 }
379}
380
381/* Return basic block containing RETURN statement, or EXIT_BLOCK_PTR if none
382 found.
383 When there are multiple RETURN statement, chose one with return value,
384 since that one is more likely shared by multiple code paths.
385 TODO: We might support multiple return blocks. */
386
387static basic_block
388find_return_bb (void)
389{
390 edge e;
391 edge_iterator ei;
392 basic_block return_bb = EXIT_BLOCK_PTR;
393
394 if (EDGE_COUNT (EXIT_BLOCK_PTR->preds) == 1)
395 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
396 {
397 gimple_stmt_iterator bsi;
398 bool found_return = false;
399 tree retval = NULL_TREE;
400
401 for (bsi = gsi_start_bb (e->src); !gsi_end_p (bsi); gsi_next (&bsi))
402 if (gimple_code (gsi_stmt (bsi)) != GIMPLE_RETURN
403 && gimple_code (gsi_stmt (bsi)) != GIMPLE_LABEL
404 && !is_gimple_debug (gsi_stmt (bsi)))
405 break;
406 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
407 {
408 found_return = true;
409 retval = gimple_return_retval (gsi_stmt (bsi));
410 }
411 if (gsi_end_p (bsi) && found_return)
412 {
413 if (retval)
414 return e->src;
415 else
416 return_bb = e->src;
417 }
418 }
419 return return_bb;
420}
421
422/* Callback for walk_stmt_load_store_addr_ops. If T is non-ssa automatic
423 variable, mark it as used in bitmap passed via DATA.
424 Return true when access to T prevents splitting the function. */
425
426static bool
427mark_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t,
428 void *data ATTRIBUTE_UNUSED)
429{
430 t = get_base_address (t);
431
432 if (!t || is_gimple_reg (t))
433 return false;
434
435 /* At present we can't pass non-SSA arguments to split function.
436 FIXME: this can be relaxed by passing references to arguments. */
437 if (TREE_CODE (t) == PARM_DECL)
438 {
439 if (dump_file && (dump_flags & TDF_DETAILS))
440 fprintf (dump_file, "Can not split use of non-ssa function parameter.\n");
441 return true;
442 }
443
444 if (TREE_CODE (t) == VAR_DECL && auto_var_in_fn_p (t, current_function_decl))
445 bitmap_set_bit ((bitmap)data, DECL_UID (t));
446 return false;
447}
448
449/* Compute local properties of basic block BB we collect when looking for
450 split points. We look for ssa defs and store them in SET_SSA_NAMES,
451 for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic
452 vars stored in NON_SSA_VARS.
453
454 When BB has edge to RETURN_BB, collect uses in RETURN_BB too.
455
456 Return false when BB contains something that prevents it from being put into
457 split function. */
458
459static bool
460visit_bb (basic_block bb, basic_block return_bb,
461 bitmap set_ssa_names, bitmap used_ssa_names,
462 bitmap non_ssa_vars)
463{
464 gimple_stmt_iterator bsi;
465 edge e;
466 edge_iterator ei;
467 bool can_split = true;
468
469 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
470 {
471 gimple stmt = gsi_stmt (bsi);
472 tree op;
473 ssa_op_iter iter;
474 tree decl;
475
476 if (is_gimple_debug (stmt))
477 continue;
478
479 /* FIXME: We can split regions containing EH. We can not however
480 split RESX, EH_DISPATCH and EH_POINTER referring to same region
481 into different partitions. This would require tracking of
482 EH regions and checking in consider_split_point if they
483 are not used elsewhere. */
484 if (gimple_code (stmt) == GIMPLE_RESX
485 && stmt_can_throw_external (stmt))
486 {
487 if (dump_file && (dump_flags & TDF_DETAILS))
488 fprintf (dump_file, "Can not split external resx.\n");
489 can_split = false;
490 }
491 if (gimple_code (stmt) == GIMPLE_EH_DISPATCH)
492 {
493 if (dump_file && (dump_flags & TDF_DETAILS))
494 fprintf (dump_file, "Can not split eh dispatch.\n");
495 can_split = false;
496 }
497
498 /* Check builtins that prevent splitting. */
499 if (gimple_code (stmt) == GIMPLE_CALL
500 && (decl = gimple_call_fndecl (stmt)) != NULL_TREE
501 && DECL_BUILT_IN (decl)
502 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
503 switch (DECL_FUNCTION_CODE (decl))
504 {
505 /* FIXME: once we will allow passing non-parm values to split part,
506 we need to be sure to handle correct builtin_stack_save and
507 builtin_stack_restore. At the moment we are safe; there is no
508 way to store builtin_stack_save result in non-SSA variable
509 since all calls to those are compiler generated. */
510 case BUILT_IN_APPLY:
511 case BUILT_IN_VA_START:
512 if (dump_file && (dump_flags & TDF_DETAILS))
513 fprintf (dump_file, "Can not split builtin_apply and va_start.\n");
514 can_split = false;
515 break;
516 case BUILT_IN_EH_POINTER:
517 if (dump_file && (dump_flags & TDF_DETAILS))
518 fprintf (dump_file, "Can not split builtin_eh_pointer.\n");
519 can_split = false;
520 break;
521 default:
522 break;
523 }
524
525 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
526 bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
527 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
528 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
529 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
530 mark_nonssa_use,
531 mark_nonssa_use,
532 mark_nonssa_use);
533 }
534 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
535 {
536 gimple stmt = gsi_stmt (bsi);
8b3057b3 537 unsigned int i;
3e485f62
JH
538
539 if (is_gimple_debug (stmt))
540 continue;
541 if (!is_gimple_reg (gimple_phi_result (stmt)))
542 continue;
8b3057b3
JH
543 bitmap_set_bit (set_ssa_names,
544 SSA_NAME_VERSION (gimple_phi_result (stmt)));
545 for (i = 0; i < gimple_phi_num_args (stmt); i++)
546 {
547 tree op = gimple_phi_arg_def (stmt, i);
548 if (TREE_CODE (op) == SSA_NAME)
549 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
550 }
3e485f62
JH
551 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
552 mark_nonssa_use,
553 mark_nonssa_use,
554 mark_nonssa_use);
555 }
556 /* Record also uses comming from PHI operand in return BB. */
557 FOR_EACH_EDGE (e, ei, bb->succs)
558 if (e->dest == return_bb)
559 {
560 bool found_phi = false;
561 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
562 {
563 gimple stmt = gsi_stmt (bsi);
564 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
565
566 if (is_gimple_debug (stmt))
567 continue;
568 if (!is_gimple_reg (gimple_phi_result (stmt)))
569 continue;
570 found_phi = true;
571 if (TREE_CODE (op) == SSA_NAME)
572 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
573 else
574 can_split &= !mark_nonssa_use (stmt, op, non_ssa_vars);
575 }
576 if (!gsi_end_p (gsi_last_bb (return_bb)))
577 {
578 ssa_op_iter iter;
579 gimple stmt = gsi_stmt (gsi_last_bb (return_bb));
580 tree op;
581 if (!found_phi)
582 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
583 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
584 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
585 mark_nonssa_use,
586 mark_nonssa_use,
587 mark_nonssa_use);
588 }
589 }
590 return can_split;
591}
592
593/* Stack entry for recursive DFS walk in find_split_point. */
594
595typedef struct
596{
597 /* Basic block we are examining. */
598 basic_block bb;
599
600 /* SSA names set and used by the BB and all BBs reachable
601 from it via DFS walk. */
602 bitmap set_ssa_names, used_ssa_names;
603 bitmap non_ssa_vars;
604
605 /* All BBS visited from this BB via DFS walk. */
606 bitmap bbs_visited;
607
608 /* Last examined edge in DFS walk. Since we walk unoriented graph,
609 the value is up to sum of incomming and outgoing edges of BB. */
610 unsigned int edge_num;
611
612 /* Stack entry index of earliest BB reachable from current BB
613 or any BB visited later in DFS valk. */
614 int earliest;
615
616 /* Overall time and size of all BBs reached from this BB in DFS walk. */
617 int overall_time, overall_size;
618
619 /* When false we can not split on this BB. */
620 bool can_split;
621} stack_entry;
622DEF_VEC_O(stack_entry);
623DEF_VEC_ALLOC_O(stack_entry,heap);
624
625
626/* Find all articulations and call consider_split on them.
627 OVERALL_TIME and OVERALL_SIZE is time and size of the function.
628
629 We perform basic algorithm for finding an articulation in a graph
630 created from CFG by considering it to be an unoriented graph.
631
632 The articulation is discovered via DFS walk. We collect earliest
633 basic block on stack that is reachable via backward edge. Articulation
634 is any basic block such that there is no backward edge bypassing it.
635 To reduce stack usage we maintain heap allocated stack in STACK vector.
636 AUX pointer of BB is set to index it appears in the stack or -1 once
637 it is visited and popped off the stack.
638
639 The algorithm finds articulation after visiting the whole component
640 reachable by it. This makes it convenient to collect information about
641 the component used by consider_split. */
642
643static void
644find_split_points (int overall_time, int overall_size)
645{
646 stack_entry first;
647 VEC(stack_entry, heap) *stack = NULL;
648 basic_block bb;
649 basic_block return_bb = find_return_bb ();
650 struct split_point current;
651
652 current.header_time = overall_time;
653 current.header_size = overall_size;
654 current.split_time = 0;
655 current.split_size = 0;
656 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
657
658 first.bb = ENTRY_BLOCK_PTR;
659 first.edge_num = 0;
660 first.overall_time = 0;
661 first.overall_size = 0;
662 first.earliest = INT_MAX;
663 first.set_ssa_names = 0;
664 first.used_ssa_names = 0;
665 first.bbs_visited = 0;
666 VEC_safe_push (stack_entry, heap, stack, &first);
667 ENTRY_BLOCK_PTR->aux = (void *)(intptr_t)-1;
668
669 while (!VEC_empty (stack_entry, stack))
670 {
671 stack_entry *entry = VEC_last (stack_entry, stack);
672
673 /* We are walking an acyclic graph, so edge_num counts
674 succ and pred edges together. However when considering
675 articulation, we want to have processed everything reachable
676 from articulation but nothing that reaches into it. */
677 if (entry->edge_num == EDGE_COUNT (entry->bb->succs)
678 && entry->bb != ENTRY_BLOCK_PTR)
679 {
680 int pos = VEC_length (stack_entry, stack);
681 entry->can_split &= visit_bb (entry->bb, return_bb,
682 entry->set_ssa_names,
683 entry->used_ssa_names,
684 entry->non_ssa_vars);
685 if (pos <= entry->earliest && !entry->can_split
686 && dump_file && (dump_flags & TDF_DETAILS))
687 fprintf (dump_file,
688 "found articulation at bb %i but can not split\n",
689 entry->bb->index);
690 if (pos <= entry->earliest && entry->can_split)
691 {
692 if (dump_file && (dump_flags & TDF_DETAILS))
693 fprintf (dump_file, "found articulation at bb %i\n",
694 entry->bb->index);
695 current.entry_bb = entry->bb;
696 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
697 bitmap_and_compl (current.ssa_names_to_pass,
698 entry->used_ssa_names, entry->set_ssa_names);
699 current.header_time = overall_time - entry->overall_time;
700 current.header_size = overall_size - entry->overall_size;
701 current.split_time = entry->overall_time;
702 current.split_size = entry->overall_size;
703 current.split_bbs = entry->bbs_visited;
704 consider_split (&current, entry->non_ssa_vars, return_bb);
705 BITMAP_FREE (current.ssa_names_to_pass);
706 }
707 }
708 /* Do actual DFS walk. */
709 if (entry->edge_num
710 < (EDGE_COUNT (entry->bb->succs)
711 + EDGE_COUNT (entry->bb->preds)))
712 {
713 edge e;
714 basic_block dest;
715 if (entry->edge_num < EDGE_COUNT (entry->bb->succs))
716 {
717 e = EDGE_SUCC (entry->bb, entry->edge_num);
718 dest = e->dest;
719 }
720 else
721 {
722 e = EDGE_PRED (entry->bb, entry->edge_num
723 - EDGE_COUNT (entry->bb->succs));
724 dest = e->src;
725 }
726
727 entry->edge_num++;
728
729 /* New BB to visit, push it to the stack. */
730 if (dest != return_bb && dest != EXIT_BLOCK_PTR
731 && !dest->aux)
732 {
733 stack_entry new_entry;
734
735 new_entry.bb = dest;
736 new_entry.edge_num = 0;
737 new_entry.overall_time
738 = VEC_index (bb_info, bb_info_vec, dest->index)->time;
739 new_entry.overall_size
740 = VEC_index (bb_info, bb_info_vec, dest->index)->size;
741 new_entry.earliest = INT_MAX;
742 new_entry.set_ssa_names = BITMAP_ALLOC (NULL);
743 new_entry.used_ssa_names = BITMAP_ALLOC (NULL);
744 new_entry.bbs_visited = BITMAP_ALLOC (NULL);
745 new_entry.non_ssa_vars = BITMAP_ALLOC (NULL);
746 new_entry.can_split = true;
747 bitmap_set_bit (new_entry.bbs_visited, dest->index);
748 VEC_safe_push (stack_entry, heap, stack, &new_entry);
749 dest->aux = (void *)(intptr_t)VEC_length (stack_entry, stack);
750 }
751 /* Back edge found, record the earliest point. */
752 else if ((intptr_t)dest->aux > 0
753 && (intptr_t)dest->aux < entry->earliest)
754 entry->earliest = (intptr_t)dest->aux;
755 }
756 /* We are done with examing the edges. pop off the value from stack and
757 merge stuff we cummulate during the walk. */
758 else if (entry->bb != ENTRY_BLOCK_PTR)
759 {
760 stack_entry *prev = VEC_index (stack_entry, stack,
761 VEC_length (stack_entry, stack) - 2);
762
763 entry->bb->aux = (void *)(intptr_t)-1;
764 prev->can_split &= entry->can_split;
765 if (prev->set_ssa_names)
766 {
767 bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names);
768 bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names);
769 bitmap_ior_into (prev->bbs_visited, entry->bbs_visited);
770 bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars);
771 }
772 if (prev->earliest > entry->earliest)
773 prev->earliest = entry->earliest;
774 prev->overall_time += entry->overall_time;
775 prev->overall_size += entry->overall_size;
776 BITMAP_FREE (entry->set_ssa_names);
777 BITMAP_FREE (entry->used_ssa_names);
778 BITMAP_FREE (entry->bbs_visited);
779 BITMAP_FREE (entry->non_ssa_vars);
780 VEC_pop (stack_entry, stack);
781 }
782 else
783 VEC_pop (stack_entry, stack);
784 }
785 ENTRY_BLOCK_PTR->aux = NULL;
786 FOR_EACH_BB (bb)
787 bb->aux = NULL;
788 BITMAP_FREE (current.ssa_names_to_pass);
789}
790
791/* Split function at SPLIT_POINT. */
792
793static void
794split_function (struct split_point *split_point)
795{
796 VEC (tree, heap) *args_to_pass = NULL;
797 bitmap args_to_skip = BITMAP_ALLOC (NULL);
798 tree parm;
799 int num = 0;
800 struct cgraph_node *node;
801 basic_block return_bb = find_return_bb ();
802 basic_block call_bb;
803 gimple_stmt_iterator gsi;
804 gimple call;
805 edge e;
806 edge_iterator ei;
807 tree retval = NULL, real_retval = NULL;
808 bool split_part_return_p = false;
809 gimple last_stmt = NULL;
810
811 if (dump_file)
812 {
813 fprintf (dump_file, "\n\nSplitting function at:\n");
814 dump_split_point (dump_file, split_point);
815 }
816
817 /* Collect the parameters of new function and args_to_skip bitmap. */
818 for (parm = DECL_ARGUMENTS (current_function_decl);
819 parm; parm = TREE_CHAIN (parm), num++)
820 if (!is_gimple_reg (parm)
821 || !gimple_default_def (cfun, parm)
822 || !bitmap_bit_p (split_point->ssa_names_to_pass,
823 SSA_NAME_VERSION (gimple_default_def (cfun, parm))))
824 bitmap_set_bit (args_to_skip, num);
825 else
826 VEC_safe_push (tree, heap, args_to_pass, gimple_default_def (cfun, parm));
827
828 /* See if the split function will return. */
829 FOR_EACH_EDGE (e, ei, return_bb->preds)
830 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
831 break;
832 if (e)
833 split_part_return_p = true;
834
835 /* If we return, we will need the return block. */
836 if (return_bb != EXIT_BLOCK_PTR && split_part_return_p)
837 bitmap_set_bit (split_point->split_bbs, return_bb->index);
838
839 /* Now create the actual clone. */
840 rebuild_cgraph_edges ();
841 node = cgraph_function_versioning (cgraph_node (current_function_decl),
842 NULL, NULL,
843 args_to_skip,
844 split_point->split_bbs,
845 split_point->entry_bb, "_part");
d402c33d
JH
846 /* For usual cloning it is enough to clear builtin only when signature
847 changes. For partial inlining we however can not expect the part
848 of builtin implementation to have same semantic as the whole. */
849 if (DECL_BUILT_IN (node->decl))
850 {
851 DECL_BUILT_IN_CLASS (node->decl) = NOT_BUILT_IN;
852 DECL_FUNCTION_CODE (node->decl) = (enum built_in_function) 0;
853 }
3e485f62
JH
854 cgraph_node_remove_callees (cgraph_node (current_function_decl));
855 if (!split_part_return_p)
856 TREE_THIS_VOLATILE (node->decl) = 1;
857 if (dump_file)
858 dump_function_to_file (node->decl, dump_file, dump_flags);
859
860 /* Create the basic block we place call into. It is the entry basic block
861 split after last label. */
862 call_bb = split_point->entry_bb;
863 for (gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);)
864 if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
865 {
866 last_stmt = gsi_stmt (gsi);
867 gsi_next (&gsi);
868 }
869 else
870 break;
871 e = split_block (split_point->entry_bb, last_stmt);
872 remove_edge (e);
873
874 /* Produce the call statement. */
875 gsi = gsi_last_bb (call_bb);
876 call = gimple_build_call_vec (node->decl, args_to_pass);
877 gimple_set_block (call, DECL_INITIAL (current_function_decl));
878
879 /* Update return value. This is bit tricky. When we do not return,
880 do nothing. When we return we might need to update return_bb
881 or produce a new return statement. */
882 if (!split_part_return_p)
883 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
884 else
885 {
886 e = make_edge (call_bb, return_bb,
887 return_bb == EXIT_BLOCK_PTR ? 0 : EDGE_FALLTHRU);
888 e->count = call_bb->count;
889 e->probability = REG_BR_PROB_BASE;
890 if (return_bb != EXIT_BLOCK_PTR)
891 {
892 gimple return_stmt = gsi_stmt (gsi_last_bb (return_bb));
893 gcc_assert (gimple_code (return_stmt) == GIMPLE_RETURN);
894
895 if ((real_retval = retval = gimple_return_retval (return_stmt))
896 && !is_gimple_min_invariant (retval)
897 && (TREE_CODE (retval) != SSA_NAME
898 || !SSA_NAME_IS_DEFAULT_DEF (retval)))
899 {
900 gimple_stmt_iterator psi;
901
902 /* See if there is PHI definind return value. */
903 for (psi = gsi_start_phis (return_bb);
904 !gsi_end_p (psi); gsi_next (&psi))
905 if (is_gimple_reg (gimple_phi_result (gsi_stmt (psi))))
906 break;
907
908 /* When we have PHI, update PHI. When there is no PHI,
909 update the return statement itself. */
910 if (TREE_CODE (retval) == SSA_NAME)
911 {
912 retval = make_ssa_name (SSA_NAME_VAR (retval), call);
913 if (TREE_CODE (retval) == SSA_NAME
914 && !gsi_end_p (psi))
915 add_phi_arg (gsi_stmt (psi), retval, e, UNKNOWN_LOCATION);
916 else if (TREE_CODE (retval) == SSA_NAME)
917 {
918 gimple_return_set_retval (return_stmt, retval);
919 update_stmt (return_stmt);
920 }
921 }
922 gimple_call_set_lhs (call, retval);
923 }
924 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
925 }
926 else
927 {
928 gimple ret;
929 if (!VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
930 {
931 retval
932 = create_tmp_var (TREE_TYPE (TREE_TYPE (current_function_decl)),
933 "RET");
934 if (is_gimple_reg (retval))
935 retval = make_ssa_name (retval, call);
936 gimple_call_set_lhs (call, retval);
937 }
938 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
939 ret = gimple_build_return (retval);
940 gsi_insert_after (&gsi, ret, GSI_NEW_STMT);
941 }
942 }
943 free_dominance_info (CDI_DOMINATORS);
944 free_dominance_info (CDI_POST_DOMINATORS);
945 compute_inline_parameters (node);
946}
947
948/* Execute function splitting pass. */
949
950static unsigned int
951execute_split_functions (void)
952{
953 gimple_stmt_iterator bsi;
954 basic_block bb;
955 int overall_time = 0, overall_size = 0;
956 int todo = 0;
957 struct cgraph_node *node = cgraph_node (current_function_decl);
958
959 if (flags_from_decl_or_type (current_function_decl) & ECF_NORETURN)
960 {
961 if (dump_file)
962 fprintf (dump_file, "Not splitting: noreturn function.\n");
963 return 0;
964 }
965 if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
966 {
967 if (dump_file)
968 fprintf (dump_file, "Not splitting: main function.\n");
969 return 0;
970 }
971 /* This can be relaxed; function might become inlinable after splitting
972 away the uninlinable part. */
973 if (!node->local.inlinable)
974 {
975 if (dump_file)
976 fprintf (dump_file, "Not splitting: not inlinable.\n");
977 return 0;
978 }
979 if (node->local.disregard_inline_limits)
980 {
981 if (dump_file)
982 fprintf (dump_file, "Not splitting: disregading inline limits.\n");
983 return 0;
984 }
985 /* This can be relaxed; most of versioning tests actually prevents
986 a duplication. */
987 if (!tree_versionable_function_p (current_function_decl))
988 {
989 if (dump_file)
990 fprintf (dump_file, "Not splitting: not versionable.\n");
991 return 0;
992 }
993 /* FIXME: we could support this. */
994 if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl)
995 {
996 if (dump_file)
997 fprintf (dump_file, "Not splitting: nested function.\n");
998 return 0;
999 }
1000 /* FIXME: Should be easy to support. */
1001 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1002 {
1003 if (dump_file)
1004 fprintf (dump_file, "Not splitting: returns value by reference.\n");
1005 return 0;
1006 }
1007
1008 /* See if it makes sense to try to split.
1009 It makes sense to split if we inline, that is if we have direct calls to
1010 handle or direct calls are possibly going to appear as result of indirect
1011 inlining or LTO.
1012 Note that we are not completely conservative about disqualifying functions
1013 called once. It is possible that the caller is called more then once and
1014 then inlining would still benefit. */
1015 if ((!node->callers || !node->callers->next_caller)
1016 && !node->address_taken
1017 && ((!flag_lto && !flag_whopr) || !node->local.externally_visible))
1018 {
1019 if (dump_file)
1020 fprintf (dump_file, "Not splitting: not called directly "
1021 "or called once.\n");
1022 return 0;
1023 }
1024
1025 /* FIXME: We can actually split if splitting reduces call overhead. */
1026 if (!flag_inline_small_functions
1027 && !DECL_DECLARED_INLINE_P (current_function_decl))
1028 {
1029 if (dump_file)
1030 fprintf (dump_file, "Not splitting: not autoinlining and function"
1031 " is not inline.\n");
1032 return 0;
1033 }
1034
1035 /* Compute local info about basic blocks and determine function size/time. */
1036 VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1);
1037 memset (&best_split_point, 0, sizeof (best_split_point));
1038 FOR_EACH_BB (bb)
1039 {
1040 int time = 0;
1041 int size = 0;
1042 int freq = compute_call_stmt_bb_frequency (current_function_decl, bb);
1043
1044 if (dump_file && (dump_flags & TDF_DETAILS))
1045 fprintf (dump_file, "Basic block %i\n", bb->index);
1046
1047 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1048 {
1049 int this_time, this_size;
1050 gimple stmt = gsi_stmt (bsi);
1051
1052 this_size = estimate_num_insns (stmt, &eni_size_weights);
1053 this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
1054 size += this_size;
1055 time += this_time;
1056
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1058 {
1059 fprintf (dump_file, " freq:%6i size:%3i time:%3i ",
1060 freq, this_size, this_time);
1061 print_gimple_stmt (dump_file, stmt, 0, 0);
1062 }
1063 }
1064 overall_time += time;
1065 overall_size += size;
1066 VEC_index (bb_info, bb_info_vec, bb->index)->time = time;
1067 VEC_index (bb_info, bb_info_vec, bb->index)->size = size;
1068 }
1069 find_split_points (overall_time, overall_size);
1070 if (best_split_point.split_bbs)
1071 {
1072 split_function (&best_split_point);
1073 BITMAP_FREE (best_split_point.ssa_names_to_pass);
1074 BITMAP_FREE (best_split_point.split_bbs);
1075 todo = TODO_update_ssa | TODO_cleanup_cfg;
1076 }
1077 VEC_free (bb_info, heap, bb_info_vec);
1078 bb_info_vec = NULL;
1079 return todo;
1080}
1081
1082static bool
1083gate_split_functions (void)
1084{
1085 return flag_partial_inlining;
1086}
1087
1088struct gimple_opt_pass pass_split_functions =
1089{
1090 {
1091 GIMPLE_PASS,
1092 "fnsplit", /* name */
1093 gate_split_functions, /* gate */
1094 execute_split_functions, /* execute */
1095 NULL, /* sub */
1096 NULL, /* next */
1097 0, /* static_pass_number */
1098 TV_IPA_FNSPLIT, /* tv_id */
1099 PROP_cfg, /* properties_required */
1100 0, /* properties_provided */
1101 0, /* properties_destroyed */
1102 0, /* todo_flags_start */
1103 TODO_dump_func /* todo_flags_finish */
1104 }
1105};