1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011,
3 2012 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This (greedy) algorithm constructs traces in several rounds.
22 The construction starts from "seeds". The seed for the first round
23 is the entry point of function. When there are more than one seed
24 that one is selected first that has the lowest key in the heap
25 (see function bb_to_key). Then the algorithm repeatedly adds the most
26 probable successor to the end of a trace. Finally it connects the traces.
28 There are two parameters: Branch Threshold and Exec Threshold.
29 If the edge to a successor of the actual basic block is lower than
30 Branch Threshold or the frequency of the successor is lower than
31 Exec Threshold the successor will be the seed in one of the next rounds.
32 Each round has these parameters lower than the previous one.
33 The last round has to have these parameters set to zero
34 so that the remaining blocks are picked up.
36 The algorithm selects the most probable successor from all unvisited
37 successors and successors that have been added to this trace.
38 The other successors (that has not been "sent" to the next round) will be
39 other seeds for this round and the secondary traces will start in them.
40 If the successor has not been visited in this trace it is added to the trace
41 (however, there is some heuristic for simple branches).
42 If the successor has been visited in this trace the loop has been found.
43 If the loop has many iterations the loop is rotated so that the
44 source block of the most probable edge going out from the loop
45 is the last block of the trace.
46 If the loop has few iterations and there is no edge from the last block of
47 the loop going out from loop the loop header is duplicated.
48 Finally, the construction of the trace is terminated.
50 When connecting traces it first checks whether there is an edge from the
51 last block of one trace to the first block of another trace.
52 When there are still some unconnected traces it checks whether there exists
53 a basic block BB such that BB is a successor of the last bb of one trace
54 and BB is a predecessor of the first block of another trace. In this case,
55 BB is duplicated and the traces are connected through this duplicate.
56 The rest of traces are simply connected so there will be a jump to the
57 beginning of the rest of trace.
62 "Software Trace Cache"
63 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64 http://citeseer.nj.nec.com/15361.html
70 #include "coretypes.h"
84 #include "diagnostic-core.h"
85 #include "toplev.h" /* user_defined_section_attribute */
86 #include "tree-pass.h"
88 #include "bb-reorder.h"
91 /* The number of rounds. In most cases there will only be 4 rounds, but
92 when partitioning hot and cold basic blocks into separate sections of
93 the .o file there will be an extra round.*/
96 /* Stubs in case we don't have a return insn.
97 We have to check at runtime too, not only compiletime. */
100 #define HAVE_return 0
101 #define gen_return() NULL_RTX
105 struct target_bb_reorder default_target_bb_reorder
;
106 #if SWITCHABLE_TARGET
107 struct target_bb_reorder
*this_target_bb_reorder
= &default_target_bb_reorder
;
110 #define uncond_jump_length \
111 (this_target_bb_reorder->x_uncond_jump_length)
113 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
114 static int branch_threshold
[N_ROUNDS
] = {400, 200, 100, 0, 0};
116 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
117 static int exec_threshold
[N_ROUNDS
] = {500, 200, 50, 0, 0};
119 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
120 block the edge destination is not duplicated while connecting traces. */
121 #define DUPLICATION_THRESHOLD 100
123 /* Structure to hold needed information for each basic block. */
124 typedef struct bbro_basic_block_data_def
126 /* Which trace is the bb start of (-1 means it is not a start of a trace). */
129 /* Which trace is the bb end of (-1 means it is not an end of a trace). */
132 /* Which trace is the bb in? */
135 /* Which trace was this bb visited in? */
138 /* Which heap is BB in (if any)? */
141 /* Which heap node is BB in (if any)? */
143 } bbro_basic_block_data
;
145 /* The current size of the following dynamic array. */
146 static int array_size
;
148 /* The array which holds needed information for basic blocks. */
149 static bbro_basic_block_data
*bbd
;
151 /* To avoid frequent reallocation the size of arrays is greater than needed,
152 the number of elements is (not less than) 1.25 * size_wanted. */
153 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
155 /* Free the memory and set the pointer to NULL. */
156 #define FREE(P) (gcc_assert (P), free (P), P = 0)
158 /* Structure for holding information about a trace. */
161 /* First and last basic block of the trace. */
162 basic_block first
, last
;
164 /* The round of the STC creation which this trace was found in. */
167 /* The length (i.e. the number of basic blocks) of the trace. */
171 /* Maximum frequency and count of one of the entry blocks. */
172 static int max_entry_frequency
;
173 static gcov_type max_entry_count
;
175 /* Local function prototypes. */
176 static void find_traces (int *, struct trace
*);
177 static basic_block
rotate_loop (edge
, struct trace
*, int);
178 static void mark_bb_visited (basic_block
, int);
179 static void find_traces_1_round (int, int, gcov_type
, struct trace
*, int *,
180 int, fibheap_t
*, int);
181 static basic_block
copy_bb (basic_block
, edge
, basic_block
, int);
182 static fibheapkey_t
bb_to_key (basic_block
);
183 static bool better_edge_p (const_basic_block
, const_edge
, int, int, int, int, const_edge
);
184 static void connect_traces (int, struct trace
*);
185 static bool copy_bb_p (const_basic_block
, int);
186 static bool push_to_next_round_p (const_basic_block
, int, int, int, gcov_type
);
188 /* Return the trace number in which BB was visited. */
191 bb_visited_trace (const_basic_block bb
)
193 gcc_assert (bb
->index
< array_size
);
194 return bbd
[bb
->index
].visited
;
197 /* This function marks BB that it was visited in trace number TRACE. */
200 mark_bb_visited (basic_block bb
, int trace
)
202 bbd
[bb
->index
].visited
= trace
;
203 if (bbd
[bb
->index
].heap
)
205 fibheap_delete_node (bbd
[bb
->index
].heap
, bbd
[bb
->index
].node
);
206 bbd
[bb
->index
].heap
= NULL
;
207 bbd
[bb
->index
].node
= NULL
;
211 /* Check to see if bb should be pushed into the next round of trace
212 collections or not. Reasons for pushing the block forward are 1).
213 If the block is cold, we are doing partitioning, and there will be
214 another round (cold partition blocks are not supposed to be
215 collected into traces until the very last round); or 2). There will
216 be another round, and the basic block is not "hot enough" for the
217 current round of trace collection. */
220 push_to_next_round_p (const_basic_block bb
, int round
, int number_of_rounds
,
221 int exec_th
, gcov_type count_th
)
223 bool there_exists_another_round
;
224 bool block_not_hot_enough
;
226 there_exists_another_round
= round
< number_of_rounds
- 1;
228 block_not_hot_enough
= (bb
->frequency
< exec_th
229 || bb
->count
< count_th
230 || probably_never_executed_bb_p (bb
));
232 if (there_exists_another_round
233 && block_not_hot_enough
)
239 /* Find the traces for Software Trace Cache. Chain each trace through
240 RBI()->next. Store the number of traces to N_TRACES and description of
244 find_traces (int *n_traces
, struct trace
*traces
)
247 int number_of_rounds
;
252 /* Add one extra round of trace collection when partitioning hot/cold
253 basic blocks into separate sections. The last round is for all the
254 cold blocks (and ONLY the cold blocks). */
256 number_of_rounds
= N_ROUNDS
- 1;
258 /* Insert entry points of function into heap. */
259 heap
= fibheap_new ();
260 max_entry_frequency
= 0;
262 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
264 bbd
[e
->dest
->index
].heap
= heap
;
265 bbd
[e
->dest
->index
].node
= fibheap_insert (heap
, bb_to_key (e
->dest
),
267 if (e
->dest
->frequency
> max_entry_frequency
)
268 max_entry_frequency
= e
->dest
->frequency
;
269 if (e
->dest
->count
> max_entry_count
)
270 max_entry_count
= e
->dest
->count
;
273 /* Find the traces. */
274 for (i
= 0; i
< number_of_rounds
; i
++)
276 gcov_type count_threshold
;
279 fprintf (dump_file
, "STC - round %d\n", i
+ 1);
281 if (max_entry_count
< INT_MAX
/ 1000)
282 count_threshold
= max_entry_count
* exec_threshold
[i
] / 1000;
284 count_threshold
= max_entry_count
/ 1000 * exec_threshold
[i
];
286 find_traces_1_round (REG_BR_PROB_BASE
* branch_threshold
[i
] / 1000,
287 max_entry_frequency
* exec_threshold
[i
] / 1000,
288 count_threshold
, traces
, n_traces
, i
, &heap
,
291 fibheap_delete (heap
);
295 for (i
= 0; i
< *n_traces
; i
++)
298 fprintf (dump_file
, "Trace %d (round %d): ", i
+ 1,
299 traces
[i
].round
+ 1);
300 for (bb
= traces
[i
].first
; bb
!= traces
[i
].last
; bb
= (basic_block
) bb
->aux
)
301 fprintf (dump_file
, "%d [%d] ", bb
->index
, bb
->frequency
);
302 fprintf (dump_file
, "%d [%d]\n", bb
->index
, bb
->frequency
);
308 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
309 (with sequential number TRACE_N). */
312 rotate_loop (edge back_edge
, struct trace
*trace
, int trace_n
)
316 /* Information about the best end (end after rotation) of the loop. */
317 basic_block best_bb
= NULL
;
318 edge best_edge
= NULL
;
320 gcov_type best_count
= -1;
321 /* The best edge is preferred when its destination is not visited yet
322 or is a start block of some trace. */
323 bool is_preferred
= false;
325 /* Find the most frequent edge that goes out from current trace. */
326 bb
= back_edge
->dest
;
332 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
333 if (e
->dest
!= EXIT_BLOCK_PTR
334 && bb_visited_trace (e
->dest
) != trace_n
335 && (e
->flags
& EDGE_CAN_FALLTHRU
)
336 && !(e
->flags
& EDGE_COMPLEX
))
340 /* The best edge is preferred. */
341 if (!bb_visited_trace (e
->dest
)
342 || bbd
[e
->dest
->index
].start_of_trace
>= 0)
344 /* The current edge E is also preferred. */
345 int freq
= EDGE_FREQUENCY (e
);
346 if (freq
> best_freq
|| e
->count
> best_count
)
349 best_count
= e
->count
;
357 if (!bb_visited_trace (e
->dest
)
358 || bbd
[e
->dest
->index
].start_of_trace
>= 0)
360 /* The current edge E is preferred. */
362 best_freq
= EDGE_FREQUENCY (e
);
363 best_count
= e
->count
;
369 int freq
= EDGE_FREQUENCY (e
);
370 if (!best_edge
|| freq
> best_freq
|| e
->count
> best_count
)
373 best_count
= e
->count
;
380 bb
= (basic_block
) bb
->aux
;
382 while (bb
!= back_edge
->dest
);
386 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
388 if (back_edge
->dest
== trace
->first
)
390 trace
->first
= (basic_block
) best_bb
->aux
;
396 for (prev_bb
= trace
->first
;
397 prev_bb
->aux
!= back_edge
->dest
;
398 prev_bb
= (basic_block
) prev_bb
->aux
)
400 prev_bb
->aux
= best_bb
->aux
;
402 /* Try to get rid of uncond jump to cond jump. */
403 if (single_succ_p (prev_bb
))
405 basic_block header
= single_succ (prev_bb
);
407 /* Duplicate HEADER if it is a small block containing cond jump
409 if (any_condjump_p (BB_END (header
)) && copy_bb_p (header
, 0)
410 && !find_reg_note (BB_END (header
), REG_CROSSING_JUMP
,
412 copy_bb (header
, single_succ_edge (prev_bb
), prev_bb
, trace_n
);
418 /* We have not found suitable loop tail so do no rotation. */
419 best_bb
= back_edge
->src
;
425 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
426 not include basic blocks their probability is lower than BRANCH_TH or their
427 frequency is lower than EXEC_TH into traces (or count is lower than
428 COUNT_TH). It stores the new traces into TRACES and modifies the number of
429 traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
430 expects that starting basic blocks are in *HEAP and at the end it deletes
431 *HEAP and stores starting points for the next round into new *HEAP. */
434 find_traces_1_round (int branch_th
, int exec_th
, gcov_type count_th
,
435 struct trace
*traces
, int *n_traces
, int round
,
436 fibheap_t
*heap
, int number_of_rounds
)
438 /* Heap for discarded basic blocks which are possible starting points for
440 fibheap_t new_heap
= fibheap_new ();
442 while (!fibheap_empty (*heap
))
450 bb
= (basic_block
) fibheap_extract_min (*heap
);
451 bbd
[bb
->index
].heap
= NULL
;
452 bbd
[bb
->index
].node
= NULL
;
455 fprintf (dump_file
, "Getting bb %d\n", bb
->index
);
457 /* If the BB's frequency is too low send BB to the next round. When
458 partitioning hot/cold blocks into separate sections, make sure all
459 the cold blocks (and ONLY the cold blocks) go into the (extra) final
462 if (push_to_next_round_p (bb
, round
, number_of_rounds
, exec_th
,
465 int key
= bb_to_key (bb
);
466 bbd
[bb
->index
].heap
= new_heap
;
467 bbd
[bb
->index
].node
= fibheap_insert (new_heap
, key
, bb
);
471 " Possible start point of next round: %d (key: %d)\n",
476 trace
= traces
+ *n_traces
;
478 trace
->round
= round
;
480 bbd
[bb
->index
].in_trace
= *n_traces
;
488 /* The probability and frequency of the best edge. */
489 int best_prob
= INT_MIN
/ 2;
490 int best_freq
= INT_MIN
/ 2;
493 mark_bb_visited (bb
, *n_traces
);
497 fprintf (dump_file
, "Basic block %d was visited in trace %d\n",
498 bb
->index
, *n_traces
- 1);
500 ends_in_call
= block_ends_with_call_p (bb
);
502 /* Select the successor that will be placed after BB. */
503 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
505 gcc_assert (!(e
->flags
& EDGE_FAKE
));
507 if (e
->dest
== EXIT_BLOCK_PTR
)
510 if (bb_visited_trace (e
->dest
)
511 && bb_visited_trace (e
->dest
) != *n_traces
)
514 if (BB_PARTITION (e
->dest
) != BB_PARTITION (bb
))
517 prob
= e
->probability
;
518 freq
= e
->dest
->frequency
;
520 /* The only sensible preference for a call instruction is the
521 fallthru edge. Don't bother selecting anything else. */
524 if (e
->flags
& EDGE_CAN_FALLTHRU
)
533 /* Edge that cannot be fallthru or improbable or infrequent
534 successor (i.e. it is unsuitable successor). */
535 if (!(e
->flags
& EDGE_CAN_FALLTHRU
) || (e
->flags
& EDGE_COMPLEX
)
536 || prob
< branch_th
|| EDGE_FREQUENCY (e
) < exec_th
537 || e
->count
< count_th
)
540 /* If partitioning hot/cold basic blocks, don't consider edges
541 that cross section boundaries. */
543 if (better_edge_p (bb
, e
, prob
, freq
, best_prob
, best_freq
,
552 /* If the best destination has multiple predecessors, and can be
553 duplicated cheaper than a jump, don't allow it to be added
554 to a trace. We'll duplicate it when connecting traces. */
555 if (best_edge
&& EDGE_COUNT (best_edge
->dest
->preds
) >= 2
556 && copy_bb_p (best_edge
->dest
, 0))
559 /* Add all non-selected successors to the heaps. */
560 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
563 || e
->dest
== EXIT_BLOCK_PTR
564 || bb_visited_trace (e
->dest
))
567 key
= bb_to_key (e
->dest
);
569 if (bbd
[e
->dest
->index
].heap
)
571 /* E->DEST is already in some heap. */
572 if (key
!= bbd
[e
->dest
->index
].node
->key
)
577 "Changing key for bb %d from %ld to %ld.\n",
579 (long) bbd
[e
->dest
->index
].node
->key
,
582 fibheap_replace_key (bbd
[e
->dest
->index
].heap
,
583 bbd
[e
->dest
->index
].node
, key
);
588 fibheap_t which_heap
= *heap
;
590 prob
= e
->probability
;
591 freq
= EDGE_FREQUENCY (e
);
593 if (!(e
->flags
& EDGE_CAN_FALLTHRU
)
594 || (e
->flags
& EDGE_COMPLEX
)
595 || prob
< branch_th
|| freq
< exec_th
596 || e
->count
< count_th
)
598 /* When partitioning hot/cold basic blocks, make sure
599 the cold blocks (and only the cold blocks) all get
600 pushed to the last round of trace collection. */
602 if (push_to_next_round_p (e
->dest
, round
,
605 which_heap
= new_heap
;
608 bbd
[e
->dest
->index
].heap
= which_heap
;
609 bbd
[e
->dest
->index
].node
= fibheap_insert (which_heap
,
615 " Possible start of %s round: %d (key: %ld)\n",
616 (which_heap
== new_heap
) ? "next" : "this",
617 e
->dest
->index
, (long) key
);
623 if (best_edge
) /* Suitable successor was found. */
625 if (bb_visited_trace (best_edge
->dest
) == *n_traces
)
627 /* We do nothing with one basic block loops. */
628 if (best_edge
->dest
!= bb
)
630 if (EDGE_FREQUENCY (best_edge
)
631 > 4 * best_edge
->dest
->frequency
/ 5)
633 /* The loop has at least 4 iterations. If the loop
634 header is not the first block of the function
635 we can rotate the loop. */
637 if (best_edge
->dest
!= ENTRY_BLOCK_PTR
->next_bb
)
642 "Rotating loop %d - %d\n",
643 best_edge
->dest
->index
, bb
->index
);
645 bb
->aux
= best_edge
->dest
;
646 bbd
[best_edge
->dest
->index
].in_trace
=
648 bb
= rotate_loop (best_edge
, trace
, *n_traces
);
653 /* The loop has less than 4 iterations. */
655 if (single_succ_p (bb
)
656 && copy_bb_p (best_edge
->dest
,
657 optimize_edge_for_speed_p (best_edge
)))
659 bb
= copy_bb (best_edge
->dest
, best_edge
, bb
,
666 /* Terminate the trace. */
671 /* Check for a situation
680 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
681 >= EDGE_FREQUENCY (AC).
682 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
683 Best ordering is then A B C.
685 This situation is created for example by:
692 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
694 && (e
->flags
& EDGE_CAN_FALLTHRU
)
695 && !(e
->flags
& EDGE_COMPLEX
)
696 && !bb_visited_trace (e
->dest
)
697 && single_pred_p (e
->dest
)
698 && !(e
->flags
& EDGE_CROSSING
)
699 && single_succ_p (e
->dest
)
700 && (single_succ_edge (e
->dest
)->flags
702 && !(single_succ_edge (e
->dest
)->flags
& EDGE_COMPLEX
)
703 && single_succ (e
->dest
) == best_edge
->dest
704 && 2 * e
->dest
->frequency
>= EDGE_FREQUENCY (best_edge
))
708 fprintf (dump_file
, "Selecting BB %d\n",
709 best_edge
->dest
->index
);
713 bb
->aux
= best_edge
->dest
;
714 bbd
[best_edge
->dest
->index
].in_trace
= (*n_traces
) - 1;
715 bb
= best_edge
->dest
;
721 bbd
[trace
->first
->index
].start_of_trace
= *n_traces
- 1;
722 bbd
[trace
->last
->index
].end_of_trace
= *n_traces
- 1;
724 /* The trace is terminated so we have to recount the keys in heap
725 (some block can have a lower key because now one of its predecessors
726 is an end of the trace). */
727 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
729 if (e
->dest
== EXIT_BLOCK_PTR
730 || bb_visited_trace (e
->dest
))
733 if (bbd
[e
->dest
->index
].heap
)
735 key
= bb_to_key (e
->dest
);
736 if (key
!= bbd
[e
->dest
->index
].node
->key
)
741 "Changing key for bb %d from %ld to %ld.\n",
743 (long) bbd
[e
->dest
->index
].node
->key
, key
);
745 fibheap_replace_key (bbd
[e
->dest
->index
].heap
,
746 bbd
[e
->dest
->index
].node
,
753 fibheap_delete (*heap
);
755 /* "Return" the new heap. */
759 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
760 it to trace after BB, mark OLD_BB visited and update pass' data structures
761 (TRACE is a number of trace which OLD_BB is duplicated to). */
764 copy_bb (basic_block old_bb
, edge e
, basic_block bb
, int trace
)
768 new_bb
= duplicate_block (old_bb
, e
, bb
);
769 BB_COPY_PARTITION (new_bb
, old_bb
);
771 gcc_assert (e
->dest
== new_bb
);
775 "Duplicated bb %d (created bb %d)\n",
776 old_bb
->index
, new_bb
->index
);
778 if (new_bb
->index
>= array_size
|| last_basic_block
> array_size
)
783 new_size
= MAX (last_basic_block
, new_bb
->index
+ 1);
784 new_size
= GET_ARRAY_SIZE (new_size
);
785 bbd
= XRESIZEVEC (bbro_basic_block_data
, bbd
, new_size
);
786 for (i
= array_size
; i
< new_size
; i
++)
788 bbd
[i
].start_of_trace
= -1;
789 bbd
[i
].end_of_trace
= -1;
790 bbd
[i
].in_trace
= -1;
795 array_size
= new_size
;
800 "Growing the dynamic array to %d elements.\n",
805 gcc_assert (!bb_visited_trace (e
->dest
));
806 mark_bb_visited (new_bb
, trace
);
807 new_bb
->aux
= bb
->aux
;
810 bbd
[new_bb
->index
].in_trace
= trace
;
815 /* Compute and return the key (for the heap) of the basic block BB. */
818 bb_to_key (basic_block bb
)
824 /* Do not start in probably never executed blocks. */
826 if (BB_PARTITION (bb
) == BB_COLD_PARTITION
827 || probably_never_executed_bb_p (bb
))
830 /* Prefer blocks whose predecessor is an end of some trace
831 or whose predecessor edge is EDGE_DFS_BACK. */
832 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
834 if ((e
->src
!= ENTRY_BLOCK_PTR
&& bbd
[e
->src
->index
].end_of_trace
>= 0)
835 || (e
->flags
& EDGE_DFS_BACK
))
837 int edge_freq
= EDGE_FREQUENCY (e
);
839 if (edge_freq
> priority
)
840 priority
= edge_freq
;
845 /* The block with priority should have significantly lower key. */
846 return -(100 * BB_FREQ_MAX
+ 100 * priority
+ bb
->frequency
);
847 return -bb
->frequency
;
850 /* Return true when the edge E from basic block BB is better than the temporary
851 best edge (details are in function). The probability of edge E is PROB. The
852 frequency of the successor is FREQ. The current best probability is
853 BEST_PROB, the best frequency is BEST_FREQ.
854 The edge is considered to be equivalent when PROB does not differ much from
855 BEST_PROB; similarly for frequency. */
858 better_edge_p (const_basic_block bb
, const_edge e
, int prob
, int freq
, int best_prob
,
859 int best_freq
, const_edge cur_best_edge
)
863 /* The BEST_* values do not have to be best, but can be a bit smaller than
865 int diff_prob
= best_prob
/ 10;
866 int diff_freq
= best_freq
/ 10;
868 if (prob
> best_prob
+ diff_prob
)
869 /* The edge has higher probability than the temporary best edge. */
870 is_better_edge
= true;
871 else if (prob
< best_prob
- diff_prob
)
872 /* The edge has lower probability than the temporary best edge. */
873 is_better_edge
= false;
874 else if (freq
< best_freq
- diff_freq
)
875 /* The edge and the temporary best edge have almost equivalent
876 probabilities. The higher frequency of a successor now means
877 that there is another edge going into that successor.
878 This successor has lower frequency so it is better. */
879 is_better_edge
= true;
880 else if (freq
> best_freq
+ diff_freq
)
881 /* This successor has higher frequency so it is worse. */
882 is_better_edge
= false;
883 else if (e
->dest
->prev_bb
== bb
)
884 /* The edges have equivalent probabilities and the successors
885 have equivalent frequencies. Select the previous successor. */
886 is_better_edge
= true;
888 is_better_edge
= false;
890 /* If we are doing hot/cold partitioning, make sure that we always favor
891 non-crossing edges over crossing edges. */
894 && flag_reorder_blocks_and_partition
896 && (cur_best_edge
->flags
& EDGE_CROSSING
)
897 && !(e
->flags
& EDGE_CROSSING
))
898 is_better_edge
= true;
900 return is_better_edge
;
903 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
906 connect_traces (int n_traces
, struct trace
*traces
)
913 int current_partition
;
915 gcov_type count_threshold
;
917 freq_threshold
= max_entry_frequency
* DUPLICATION_THRESHOLD
/ 1000;
918 if (max_entry_count
< INT_MAX
/ 1000)
919 count_threshold
= max_entry_count
* DUPLICATION_THRESHOLD
/ 1000;
921 count_threshold
= max_entry_count
/ 1000 * DUPLICATION_THRESHOLD
;
923 connected
= XCNEWVEC (bool, n_traces
);
926 current_partition
= BB_PARTITION (traces
[0].first
);
929 if (flag_reorder_blocks_and_partition
)
930 for (i
= 0; i
< n_traces
&& !two_passes
; i
++)
931 if (BB_PARTITION (traces
[0].first
)
932 != BB_PARTITION (traces
[i
].first
))
935 for (i
= 0; i
< n_traces
|| (two_passes
&& current_pass
== 1) ; i
++)
944 gcc_assert (two_passes
&& current_pass
== 1);
948 if (current_partition
== BB_HOT_PARTITION
)
949 current_partition
= BB_COLD_PARTITION
;
951 current_partition
= BB_HOT_PARTITION
;
958 && BB_PARTITION (traces
[t
].first
) != current_partition
)
963 /* Find the predecessor traces. */
964 for (t2
= t
; t2
> 0;)
969 FOR_EACH_EDGE (e
, ei
, traces
[t2
].first
->preds
)
971 int si
= e
->src
->index
;
973 if (e
->src
!= ENTRY_BLOCK_PTR
974 && (e
->flags
& EDGE_CAN_FALLTHRU
)
975 && !(e
->flags
& EDGE_COMPLEX
)
976 && bbd
[si
].end_of_trace
>= 0
977 && !connected
[bbd
[si
].end_of_trace
]
978 && (BB_PARTITION (e
->src
) == current_partition
)
980 || e
->probability
> best
->probability
981 || (e
->probability
== best
->probability
982 && traces
[bbd
[si
].end_of_trace
].length
> best_len
)))
985 best_len
= traces
[bbd
[si
].end_of_trace
].length
;
990 best
->src
->aux
= best
->dest
;
991 t2
= bbd
[best
->src
->index
].end_of_trace
;
992 connected
[t2
] = true;
996 fprintf (dump_file
, "Connection: %d %d\n",
997 best
->src
->index
, best
->dest
->index
);
1004 if (last_trace
>= 0)
1005 traces
[last_trace
].last
->aux
= traces
[t2
].first
;
1008 /* Find the successor traces. */
1011 /* Find the continuation of the chain. */
1015 FOR_EACH_EDGE (e
, ei
, traces
[t
].last
->succs
)
1017 int di
= e
->dest
->index
;
1019 if (e
->dest
!= EXIT_BLOCK_PTR
1020 && (e
->flags
& EDGE_CAN_FALLTHRU
)
1021 && !(e
->flags
& EDGE_COMPLEX
)
1022 && bbd
[di
].start_of_trace
>= 0
1023 && !connected
[bbd
[di
].start_of_trace
]
1024 && (BB_PARTITION (e
->dest
) == current_partition
)
1026 || e
->probability
> best
->probability
1027 || (e
->probability
== best
->probability
1028 && traces
[bbd
[di
].start_of_trace
].length
> best_len
)))
1031 best_len
= traces
[bbd
[di
].start_of_trace
].length
;
1039 fprintf (dump_file
, "Connection: %d %d\n",
1040 best
->src
->index
, best
->dest
->index
);
1042 t
= bbd
[best
->dest
->index
].start_of_trace
;
1043 traces
[last_trace
].last
->aux
= traces
[t
].first
;
1044 connected
[t
] = true;
1049 /* Try to connect the traces by duplication of 1 block. */
1051 basic_block next_bb
= NULL
;
1052 bool try_copy
= false;
1054 FOR_EACH_EDGE (e
, ei
, traces
[t
].last
->succs
)
1055 if (e
->dest
!= EXIT_BLOCK_PTR
1056 && (e
->flags
& EDGE_CAN_FALLTHRU
)
1057 && !(e
->flags
& EDGE_COMPLEX
)
1058 && (!best
|| e
->probability
> best
->probability
))
1064 /* If the destination is a start of a trace which is only
1065 one block long, then no need to search the successor
1066 blocks of the trace. Accept it. */
1067 if (bbd
[e
->dest
->index
].start_of_trace
>= 0
1068 && traces
[bbd
[e
->dest
->index
].start_of_trace
].length
1076 FOR_EACH_EDGE (e2
, ei
, e
->dest
->succs
)
1078 int di
= e2
->dest
->index
;
1080 if (e2
->dest
== EXIT_BLOCK_PTR
1081 || ((e2
->flags
& EDGE_CAN_FALLTHRU
)
1082 && !(e2
->flags
& EDGE_COMPLEX
)
1083 && bbd
[di
].start_of_trace
>= 0
1084 && !connected
[bbd
[di
].start_of_trace
]
1085 && (BB_PARTITION (e2
->dest
) == current_partition
)
1086 && (EDGE_FREQUENCY (e2
) >= freq_threshold
)
1087 && (e2
->count
>= count_threshold
)
1089 || e2
->probability
> best2
->probability
1090 || (e2
->probability
== best2
->probability
1091 && traces
[bbd
[di
].start_of_trace
].length
1096 if (e2
->dest
!= EXIT_BLOCK_PTR
)
1097 best2_len
= traces
[bbd
[di
].start_of_trace
].length
;
1099 best2_len
= INT_MAX
;
1106 if (flag_reorder_blocks_and_partition
)
1109 /* Copy tiny blocks always; copy larger blocks only when the
1110 edge is traversed frequently enough. */
1112 && copy_bb_p (best
->dest
,
1113 optimize_edge_for_speed_p (best
)
1114 && EDGE_FREQUENCY (best
) >= freq_threshold
1115 && best
->count
>= count_threshold
))
1121 fprintf (dump_file
, "Connection: %d %d ",
1122 traces
[t
].last
->index
, best
->dest
->index
);
1124 fputc ('\n', dump_file
);
1125 else if (next_bb
== EXIT_BLOCK_PTR
)
1126 fprintf (dump_file
, "exit\n");
1128 fprintf (dump_file
, "%d\n", next_bb
->index
);
1131 new_bb
= copy_bb (best
->dest
, best
, traces
[t
].last
, t
);
1132 traces
[t
].last
= new_bb
;
1133 if (next_bb
&& next_bb
!= EXIT_BLOCK_PTR
)
1135 t
= bbd
[next_bb
->index
].start_of_trace
;
1136 traces
[last_trace
].last
->aux
= traces
[t
].first
;
1137 connected
[t
] = true;
1141 break; /* Stop finding the successor traces. */
1144 break; /* Stop finding the successor traces. */
1153 fprintf (dump_file
, "Final order:\n");
1154 for (bb
= traces
[0].first
; bb
; bb
= (basic_block
) bb
->aux
)
1155 fprintf (dump_file
, "%d ", bb
->index
);
1156 fprintf (dump_file
, "\n");
1163 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1164 when code size is allowed to grow by duplication. */
1167 copy_bb_p (const_basic_block bb
, int code_may_grow
)
1170 int max_size
= uncond_jump_length
;
1175 if (EDGE_COUNT (bb
->preds
) < 2)
1177 if (!can_duplicate_block_p (bb
))
1180 /* Avoid duplicating blocks which have many successors (PR/13430). */
1181 if (EDGE_COUNT (bb
->succs
) > 8)
1184 if (code_may_grow
&& optimize_bb_for_speed_p (bb
))
1185 max_size
*= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS
);
1187 FOR_BB_INSNS (bb
, insn
)
1190 size
+= get_attr_min_length (insn
);
1193 if (size
<= max_size
)
1199 "Block %d can't be copied because its size = %d.\n",
1206 /* Return the length of unconditional jump instruction. */
1209 get_uncond_jump_length (void)
1214 label
= emit_label_before (gen_label_rtx (), get_insns ());
1215 jump
= emit_jump_insn (gen_jump (label
));
1217 length
= get_attr_min_length (jump
);
1220 delete_insn (label
);
1224 /* Emit a barrier into the footer of BB. */
1227 emit_barrier_after_bb (basic_block bb
)
1229 rtx barrier
= emit_barrier_after (BB_END (bb
));
1230 BB_FOOTER (bb
) = unlink_insn_chain (barrier
, barrier
);
1233 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1234 Duplicate the landing pad and split the edges so that no EH edge
1235 crosses partitions. */
1238 fix_up_crossing_landing_pad (eh_landing_pad old_lp
, basic_block old_bb
)
1240 eh_landing_pad new_lp
;
1241 basic_block new_bb
, last_bb
, post_bb
;
1242 rtx new_label
, jump
, post_label
;
1243 unsigned new_partition
;
1247 /* Generate the new landing-pad structure. */
1248 new_lp
= gen_eh_landing_pad (old_lp
->region
);
1249 new_lp
->post_landing_pad
= old_lp
->post_landing_pad
;
1250 new_lp
->landing_pad
= gen_label_rtx ();
1251 LABEL_PRESERVE_P (new_lp
->landing_pad
) = 1;
1253 /* Put appropriate instructions in new bb. */
1254 new_label
= emit_label (new_lp
->landing_pad
);
1256 expand_dw2_landing_pad_for_region (old_lp
->region
);
1258 post_bb
= BLOCK_FOR_INSN (old_lp
->landing_pad
);
1259 post_bb
= single_succ (post_bb
);
1260 post_label
= block_label (post_bb
);
1261 jump
= emit_jump_insn (gen_jump (post_label
));
1262 JUMP_LABEL (jump
) = post_label
;
1264 /* Create new basic block to be dest for lp. */
1265 last_bb
= EXIT_BLOCK_PTR
->prev_bb
;
1266 new_bb
= create_basic_block (new_label
, jump
, last_bb
);
1267 new_bb
->aux
= last_bb
->aux
;
1268 last_bb
->aux
= new_bb
;
1270 emit_barrier_after_bb (new_bb
);
1272 make_edge (new_bb
, post_bb
, 0);
1274 /* Make sure new bb is in the other partition. */
1275 new_partition
= BB_PARTITION (old_bb
);
1276 new_partition
^= BB_HOT_PARTITION
| BB_COLD_PARTITION
;
1277 BB_SET_PARTITION (new_bb
, new_partition
);
1279 /* Fix up the edges. */
1280 for (ei
= ei_start (old_bb
->preds
); (e
= ei_safe_edge (ei
)) != NULL
; )
1281 if (BB_PARTITION (e
->src
) == new_partition
)
1283 rtx insn
= BB_END (e
->src
);
1284 rtx note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
1286 gcc_assert (note
!= NULL
);
1287 gcc_checking_assert (INTVAL (XEXP (note
, 0)) == old_lp
->index
);
1288 XEXP (note
, 0) = GEN_INT (new_lp
->index
);
1290 /* Adjust the edge to the new destination. */
1291 redirect_edge_succ (e
, new_bb
);
1297 /* Find the basic blocks that are rarely executed and need to be moved to
1298 a separate section of the .o file (to cut down on paging and improve
1299 cache locality). Return a vector of all edges that cross. */
1301 static VEC(edge
, heap
) *
1302 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1304 VEC(edge
, heap
) *crossing_edges
= NULL
;
1309 /* Mark which partition (hot/cold) each basic block belongs in. */
1312 if (probably_never_executed_bb_p (bb
))
1313 BB_SET_PARTITION (bb
, BB_COLD_PARTITION
);
1315 BB_SET_PARTITION (bb
, BB_HOT_PARTITION
);
1318 /* The format of .gcc_except_table does not allow landing pads to
1319 be in a different partition as the throw. Fix this by either
1320 moving or duplicating the landing pads. */
1321 if (cfun
->eh
->lp_array
)
1326 FOR_EACH_VEC_ELT (eh_landing_pad
, cfun
->eh
->lp_array
, i
, lp
)
1328 bool all_same
, all_diff
;
1331 || lp
->landing_pad
== NULL_RTX
1332 || !LABEL_P (lp
->landing_pad
))
1335 all_same
= all_diff
= true;
1336 bb
= BLOCK_FOR_INSN (lp
->landing_pad
);
1337 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1339 gcc_assert (e
->flags
& EDGE_EH
);
1340 if (BB_PARTITION (bb
) == BB_PARTITION (e
->src
))
1350 int which
= BB_PARTITION (bb
);
1351 which
^= BB_HOT_PARTITION
| BB_COLD_PARTITION
;
1352 BB_SET_PARTITION (bb
, which
);
1355 fix_up_crossing_landing_pad (lp
, bb
);
1359 /* Mark every edge that crosses between sections. */
1362 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1364 unsigned int flags
= e
->flags
;
1366 /* We should never have EDGE_CROSSING set yet. */
1367 gcc_checking_assert ((flags
& EDGE_CROSSING
) == 0);
1369 if (e
->src
!= ENTRY_BLOCK_PTR
1370 && e
->dest
!= EXIT_BLOCK_PTR
1371 && BB_PARTITION (e
->src
) != BB_PARTITION (e
->dest
))
1373 VEC_safe_push (edge
, heap
, crossing_edges
, e
);
1374 flags
|= EDGE_CROSSING
;
1377 /* Now that we've split eh edges as appropriate, allow landing pads
1378 to be merged with the post-landing pads. */
1379 flags
&= ~EDGE_PRESERVE
;
1384 return crossing_edges
;
1387 /* If any destination of a crossing edge does not have a label, add label;
1388 Convert any easy fall-through crossing edges to unconditional jumps. */
1391 add_labels_and_missing_jumps (VEC(edge
, heap
) *crossing_edges
)
1396 FOR_EACH_VEC_ELT (edge
, crossing_edges
, i
, e
)
1398 basic_block src
= e
->src
;
1399 basic_block dest
= e
->dest
;
1400 rtx label
, new_jump
;
1402 if (dest
== EXIT_BLOCK_PTR
)
1405 /* Make sure dest has a label. */
1406 label
= block_label (dest
);
1408 /* Nothing to do for non-fallthru edges. */
1409 if (src
== ENTRY_BLOCK_PTR
)
1411 if ((e
->flags
& EDGE_FALLTHRU
) == 0)
1414 /* If the block does not end with a control flow insn, then we
1415 can trivially add a jump to the end to fixup the crossing.
1416 Otherwise the jump will have to go in a new bb, which will
1417 be handled by fix_up_fall_thru_edges function. */
1418 if (control_flow_insn_p (BB_END (src
)))
1421 /* Make sure there's only one successor. */
1422 gcc_assert (single_succ_p (src
));
1424 new_jump
= emit_jump_insn_after (gen_jump (label
), BB_END (src
));
1425 BB_END (src
) = new_jump
;
1426 JUMP_LABEL (new_jump
) = label
;
1427 LABEL_NUSES (label
) += 1;
1429 emit_barrier_after_bb (src
);
1431 /* Mark edge as non-fallthru. */
1432 e
->flags
&= ~EDGE_FALLTHRU
;
1436 /* Find any bb's where the fall-through edge is a crossing edge (note that
1437 these bb's must also contain a conditional jump or end with a call
1438 instruction; we've already dealt with fall-through edges for blocks
1439 that didn't have a conditional jump or didn't end with call instruction
1440 in the call to add_labels_and_missing_jumps). Convert the fall-through
1441 edge to non-crossing edge by inserting a new bb to fall-through into.
1442 The new bb will contain an unconditional jump (crossing edge) to the
1443 original fall through destination. */
1446 fix_up_fall_thru_edges (void)
1453 edge cond_jump
= NULL
;
1455 bool cond_jump_crosses
;
1458 rtx fall_thru_label
;
1460 FOR_EACH_BB (cur_bb
)
1463 if (EDGE_COUNT (cur_bb
->succs
) > 0)
1464 succ1
= EDGE_SUCC (cur_bb
, 0);
1468 if (EDGE_COUNT (cur_bb
->succs
) > 1)
1469 succ2
= EDGE_SUCC (cur_bb
, 1);
1473 /* Find the fall-through edge. */
1476 && (succ1
->flags
& EDGE_FALLTHRU
))
1482 && (succ2
->flags
& EDGE_FALLTHRU
))
1488 && (block_ends_with_call_p (cur_bb
)
1489 || can_throw_internal (BB_END (cur_bb
))))
1494 /* Find EDGE_CAN_FALLTHRU edge. */
1495 FOR_EACH_EDGE (e
, ei
, cur_bb
->succs
)
1496 if (e
->flags
& EDGE_CAN_FALLTHRU
)
1503 if (fall_thru
&& (fall_thru
->dest
!= EXIT_BLOCK_PTR
))
1505 /* Check to see if the fall-thru edge is a crossing edge. */
1507 if (fall_thru
->flags
& EDGE_CROSSING
)
1509 /* The fall_thru edge crosses; now check the cond jump edge, if
1512 cond_jump_crosses
= true;
1514 old_jump
= BB_END (cur_bb
);
1516 /* Find the jump instruction, if there is one. */
1520 if (!(cond_jump
->flags
& EDGE_CROSSING
))
1521 cond_jump_crosses
= false;
1523 /* We know the fall-thru edge crosses; if the cond
1524 jump edge does NOT cross, and its destination is the
1525 next block in the bb order, invert the jump
1526 (i.e. fix it so the fall through does not cross and
1527 the cond jump does). */
1529 if (!cond_jump_crosses
1530 && cur_bb
->aux
== cond_jump
->dest
)
1532 /* Find label in fall_thru block. We've already added
1533 any missing labels, so there must be one. */
1535 fall_thru_label
= block_label (fall_thru
->dest
);
1537 if (old_jump
&& JUMP_P (old_jump
) && fall_thru_label
)
1538 invert_worked
= invert_jump (old_jump
,
1542 fall_thru
->flags
&= ~EDGE_FALLTHRU
;
1543 cond_jump
->flags
|= EDGE_FALLTHRU
;
1544 update_br_prob_note (cur_bb
);
1546 fall_thru
= cond_jump
;
1548 cond_jump
->flags
|= EDGE_CROSSING
;
1549 fall_thru
->flags
&= ~EDGE_CROSSING
;
1554 if (cond_jump_crosses
|| !invert_worked
)
1556 /* This is the case where both edges out of the basic
1557 block are crossing edges. Here we will fix up the
1558 fall through edge. The jump edge will be taken care
1559 of later. The EDGE_CROSSING flag of fall_thru edge
1560 is unset before the call to force_nonfallthru
1561 function because if a new basic-block is created
1562 this edge remains in the current section boundary
1563 while the edge between new_bb and the fall_thru->dest
1564 becomes EDGE_CROSSING. */
1566 fall_thru
->flags
&= ~EDGE_CROSSING
;
1567 new_bb
= force_nonfallthru (fall_thru
);
1571 new_bb
->aux
= cur_bb
->aux
;
1572 cur_bb
->aux
= new_bb
;
1574 /* Make sure new fall-through bb is in same
1575 partition as bb it's falling through from. */
1577 BB_COPY_PARTITION (new_bb
, cur_bb
);
1578 single_succ_edge (new_bb
)->flags
|= EDGE_CROSSING
;
1582 /* If a new basic-block was not created; restore
1583 the EDGE_CROSSING flag. */
1584 fall_thru
->flags
|= EDGE_CROSSING
;
1587 /* Add barrier after new jump */
1588 emit_barrier_after_bb (new_bb
? new_bb
: cur_bb
);
1595 /* This function checks the destination block of a "crossing jump" to
1596 see if it has any crossing predecessors that begin with a code label
1597 and end with an unconditional jump. If so, it returns that predecessor
1598 block. (This is to avoid creating lots of new basic blocks that all
1599 contain unconditional jumps to the same destination). */
1602 find_jump_block (basic_block jump_dest
)
1604 basic_block source_bb
= NULL
;
1609 FOR_EACH_EDGE (e
, ei
, jump_dest
->preds
)
1610 if (e
->flags
& EDGE_CROSSING
)
1612 basic_block src
= e
->src
;
1614 /* Check each predecessor to see if it has a label, and contains
1615 only one executable instruction, which is an unconditional jump.
1616 If so, we can use it. */
1618 if (LABEL_P (BB_HEAD (src
)))
1619 for (insn
= BB_HEAD (src
);
1620 !INSN_P (insn
) && insn
!= NEXT_INSN (BB_END (src
));
1621 insn
= NEXT_INSN (insn
))
1624 && insn
== BB_END (src
)
1626 && !any_condjump_p (insn
))
1640 /* Find all BB's with conditional jumps that are crossing edges;
1641 insert a new bb and make the conditional jump branch to the new
1642 bb instead (make the new bb same color so conditional branch won't
1643 be a 'crossing' edge). Insert an unconditional jump from the
1644 new bb to the original destination of the conditional jump. */
1647 fix_crossing_conditional_branches (void)
1658 rtx old_label
= NULL_RTX
;
1661 FOR_EACH_BB (cur_bb
)
1663 crossing_edge
= NULL
;
1664 if (EDGE_COUNT (cur_bb
->succs
) > 0)
1665 succ1
= EDGE_SUCC (cur_bb
, 0);
1669 if (EDGE_COUNT (cur_bb
->succs
) > 1)
1670 succ2
= EDGE_SUCC (cur_bb
, 1);
1674 /* We already took care of fall-through edges, so only one successor
1675 can be a crossing edge. */
1677 if (succ1
&& (succ1
->flags
& EDGE_CROSSING
))
1678 crossing_edge
= succ1
;
1679 else if (succ2
&& (succ2
->flags
& EDGE_CROSSING
))
1680 crossing_edge
= succ2
;
1684 old_jump
= BB_END (cur_bb
);
1686 /* Check to make sure the jump instruction is a
1687 conditional jump. */
1691 if (any_condjump_p (old_jump
))
1693 if (GET_CODE (PATTERN (old_jump
)) == SET
)
1694 set_src
= SET_SRC (PATTERN (old_jump
));
1695 else if (GET_CODE (PATTERN (old_jump
)) == PARALLEL
)
1697 set_src
= XVECEXP (PATTERN (old_jump
), 0,0);
1698 if (GET_CODE (set_src
) == SET
)
1699 set_src
= SET_SRC (set_src
);
1705 if (set_src
&& (GET_CODE (set_src
) == IF_THEN_ELSE
))
1707 if (GET_CODE (XEXP (set_src
, 1)) == PC
)
1708 old_label
= XEXP (set_src
, 2);
1709 else if (GET_CODE (XEXP (set_src
, 2)) == PC
)
1710 old_label
= XEXP (set_src
, 1);
1712 /* Check to see if new bb for jumping to that dest has
1713 already been created; if so, use it; if not, create
1716 new_bb
= find_jump_block (crossing_edge
->dest
);
1719 new_label
= block_label (new_bb
);
1722 basic_block last_bb
;
1725 /* Create new basic block to be dest for
1726 conditional jump. */
1728 /* Put appropriate instructions in new bb. */
1730 new_label
= gen_label_rtx ();
1731 emit_label (new_label
);
1733 gcc_assert (GET_CODE (old_label
) == LABEL_REF
);
1734 old_label
= JUMP_LABEL (old_jump
);
1735 new_jump
= emit_jump_insn (gen_jump (old_label
));
1736 JUMP_LABEL (new_jump
) = old_label
;
1738 last_bb
= EXIT_BLOCK_PTR
->prev_bb
;
1739 new_bb
= create_basic_block (new_label
, new_jump
, last_bb
);
1740 new_bb
->aux
= last_bb
->aux
;
1741 last_bb
->aux
= new_bb
;
1743 emit_barrier_after_bb (new_bb
);
1745 /* Make sure new bb is in same partition as source
1746 of conditional branch. */
1747 BB_COPY_PARTITION (new_bb
, cur_bb
);
1750 /* Make old jump branch to new bb. */
1752 redirect_jump (old_jump
, new_label
, 0);
1754 /* Remove crossing_edge as predecessor of 'dest'. */
1756 dest
= crossing_edge
->dest
;
1758 redirect_edge_succ (crossing_edge
, new_bb
);
1760 /* Make a new edge from new_bb to old dest; new edge
1761 will be a successor for new_bb and a predecessor
1764 if (EDGE_COUNT (new_bb
->succs
) == 0)
1765 new_edge
= make_edge (new_bb
, dest
, 0);
1767 new_edge
= EDGE_SUCC (new_bb
, 0);
1769 crossing_edge
->flags
&= ~EDGE_CROSSING
;
1770 new_edge
->flags
|= EDGE_CROSSING
;
1776 /* Find any unconditional branches that cross between hot and cold
1777 sections. Convert them into indirect jumps instead. */
1780 fix_crossing_unconditional_branches (void)
1786 rtx indirect_jump_sequence
;
1787 rtx jump_insn
= NULL_RTX
;
1792 FOR_EACH_BB (cur_bb
)
1794 last_insn
= BB_END (cur_bb
);
1796 if (EDGE_COUNT (cur_bb
->succs
) < 1)
1799 succ
= EDGE_SUCC (cur_bb
, 0);
1801 /* Check to see if bb ends in a crossing (unconditional) jump. At
1802 this point, no crossing jumps should be conditional. */
1804 if (JUMP_P (last_insn
)
1805 && (succ
->flags
& EDGE_CROSSING
))
1809 gcc_assert (!any_condjump_p (last_insn
));
1811 /* Make sure the jump is not already an indirect or table jump. */
1813 if (!computed_jump_p (last_insn
)
1814 && !tablejump_p (last_insn
, &label2
, &table
))
1816 /* We have found a "crossing" unconditional branch. Now
1817 we must convert it to an indirect jump. First create
1818 reference of label, as target for jump. */
1820 label
= JUMP_LABEL (last_insn
);
1821 label_addr
= gen_rtx_LABEL_REF (Pmode
, label
);
1822 LABEL_NUSES (label
) += 1;
1824 /* Get a register to use for the indirect jump. */
1826 new_reg
= gen_reg_rtx (Pmode
);
1828 /* Generate indirect the jump sequence. */
1831 emit_move_insn (new_reg
, label_addr
);
1832 emit_indirect_jump (new_reg
);
1833 indirect_jump_sequence
= get_insns ();
1836 /* Make sure every instruction in the new jump sequence has
1837 its basic block set to be cur_bb. */
1839 for (cur_insn
= indirect_jump_sequence
; cur_insn
;
1840 cur_insn
= NEXT_INSN (cur_insn
))
1842 if (!BARRIER_P (cur_insn
))
1843 BLOCK_FOR_INSN (cur_insn
) = cur_bb
;
1844 if (JUMP_P (cur_insn
))
1845 jump_insn
= cur_insn
;
1848 /* Insert the new (indirect) jump sequence immediately before
1849 the unconditional jump, then delete the unconditional jump. */
1851 emit_insn_before (indirect_jump_sequence
, last_insn
);
1852 delete_insn (last_insn
);
1854 /* Make BB_END for cur_bb be the jump instruction (NOT the
1855 barrier instruction at the end of the sequence...). */
1857 BB_END (cur_bb
) = jump_insn
;
1863 /* Add REG_CROSSING_JUMP note to all crossing jump insns. */
1866 add_reg_crossing_jump_notes (void)
1873 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1874 if ((e
->flags
& EDGE_CROSSING
)
1875 && JUMP_P (BB_END (e
->src
)))
1876 add_reg_note (BB_END (e
->src
), REG_CROSSING_JUMP
, NULL_RTX
);
1879 /* Verify, in the basic block chain, that there is at most one switch
1880 between hot/cold partitions. This is modelled on
1881 rtl_verify_flow_info_1, but it cannot go inside that function
1882 because this condition will not be true until after
1883 reorder_basic_blocks is called. */
1886 verify_hot_cold_block_grouping (void)
1890 bool switched_sections
= false;
1891 int current_partition
= 0;
1895 if (!current_partition
)
1896 current_partition
= BB_PARTITION (bb
);
1897 if (BB_PARTITION (bb
) != current_partition
)
1899 if (switched_sections
)
1901 error ("multiple hot/cold transitions found (bb %i)",
1907 switched_sections
= true;
1908 current_partition
= BB_PARTITION (bb
);
1916 /* Reorder basic blocks. The main entry point to this file. FLAGS is
1917 the set of flags to pass to cfg_layout_initialize(). */
1920 reorder_basic_blocks (void)
1924 struct trace
*traces
;
1926 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT
);
1928 if (n_basic_blocks
<= NUM_FIXED_BLOCKS
+ 1)
1931 set_edge_can_fallthru_flag ();
1932 mark_dfs_back_edges ();
1934 /* We are estimating the length of uncond jump insn only once since the code
1935 for getting the insn length always returns the minimal length now. */
1936 if (uncond_jump_length
== 0)
1937 uncond_jump_length
= get_uncond_jump_length ();
1939 /* We need to know some information for each basic block. */
1940 array_size
= GET_ARRAY_SIZE (last_basic_block
);
1941 bbd
= XNEWVEC (bbro_basic_block_data
, array_size
);
1942 for (i
= 0; i
< array_size
; i
++)
1944 bbd
[i
].start_of_trace
= -1;
1945 bbd
[i
].end_of_trace
= -1;
1946 bbd
[i
].in_trace
= -1;
1952 traces
= XNEWVEC (struct trace
, n_basic_blocks
);
1954 find_traces (&n_traces
, traces
);
1955 connect_traces (n_traces
, traces
);
1959 relink_block_chain (/*stay_in_cfglayout_mode=*/true);
1962 dump_flow_info (dump_file
, dump_flags
);
1964 if (flag_reorder_blocks_and_partition
)
1965 verify_hot_cold_block_grouping ();
1968 /* Determine which partition the first basic block in the function
1969 belongs to, then find the first basic block in the current function
1970 that belongs to a different section, and insert a
1971 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
1972 instruction stream. When writing out the assembly code,
1973 encountering this note will make the compiler switch between the
1974 hot and cold text sections. */
1977 insert_section_boundary_note (void)
1981 int first_partition
= 0;
1983 if (!flag_reorder_blocks_and_partition
)
1988 if (!first_partition
)
1989 first_partition
= BB_PARTITION (bb
);
1990 if (BB_PARTITION (bb
) != first_partition
)
1992 new_note
= emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS
,
1994 /* ??? This kind of note always lives between basic blocks,
1995 but add_insn_before will set BLOCK_FOR_INSN anyway. */
1996 BLOCK_FOR_INSN (new_note
) = NULL
;
2002 /* Duplicate the blocks containing computed gotos. This basically unfactors
2003 computed gotos that were factored early on in the compilation process to
2004 speed up edge based data flow. We used to not unfactoring them again,
2005 which can seriously pessimize code with many computed jumps in the source
2006 code, such as interpreters. See e.g. PR15242. */
2009 gate_duplicate_computed_gotos (void)
2011 if (targetm
.cannot_modify_jumps_p ())
2013 return (optimize
> 0
2014 && flag_expensive_optimizations
2015 && ! optimize_function_for_size_p (cfun
));
2020 duplicate_computed_gotos (void)
2022 basic_block bb
, new_bb
;
2026 if (n_basic_blocks
<= NUM_FIXED_BLOCKS
+ 1)
2030 cfg_layout_initialize (0);
2032 /* We are estimating the length of uncond jump insn only once
2033 since the code for getting the insn length always returns
2034 the minimal length now. */
2035 if (uncond_jump_length
== 0)
2036 uncond_jump_length
= get_uncond_jump_length ();
2038 max_size
= uncond_jump_length
* PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS
);
2039 candidates
= BITMAP_ALLOC (NULL
);
2041 /* Look for blocks that end in a computed jump, and see if such blocks
2042 are suitable for unfactoring. If a block is a candidate for unfactoring,
2043 mark it in the candidates. */
2049 int size
, all_flags
;
2051 /* Build the reorder chain for the original order of blocks. */
2052 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
2053 bb
->aux
= bb
->next_bb
;
2055 /* Obviously the block has to end in a computed jump. */
2056 if (!computed_jump_p (BB_END (bb
)))
2059 /* Only consider blocks that can be duplicated. */
2060 if (find_reg_note (BB_END (bb
), REG_CROSSING_JUMP
, NULL_RTX
)
2061 || !can_duplicate_block_p (bb
))
2064 /* Make sure that the block is small enough. */
2066 FOR_BB_INSNS (bb
, insn
)
2069 size
+= get_attr_min_length (insn
);
2070 if (size
> max_size
)
2073 if (size
> max_size
)
2076 /* Final check: there must not be any incoming abnormal edges. */
2078 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2079 all_flags
|= e
->flags
;
2080 if (all_flags
& EDGE_COMPLEX
)
2083 bitmap_set_bit (candidates
, bb
->index
);
2086 /* Nothing to do if there is no computed jump here. */
2087 if (bitmap_empty_p (candidates
))
2090 /* Duplicate computed gotos. */
2093 if (bb
->flags
& BB_VISITED
)
2096 bb
->flags
|= BB_VISITED
;
2098 /* BB must have one outgoing edge. That edge must not lead to
2099 the exit block or the next block.
2100 The destination must have more than one predecessor. */
2101 if (!single_succ_p (bb
)
2102 || single_succ (bb
) == EXIT_BLOCK_PTR
2103 || single_succ (bb
) == bb
->next_bb
2104 || single_pred_p (single_succ (bb
)))
2107 /* The successor block has to be a duplication candidate. */
2108 if (!bitmap_bit_p (candidates
, single_succ (bb
)->index
))
2111 new_bb
= duplicate_block (single_succ (bb
), single_succ_edge (bb
), bb
);
2112 new_bb
->aux
= bb
->aux
;
2114 new_bb
->flags
|= BB_VISITED
;
2118 cfg_layout_finalize ();
2120 BITMAP_FREE (candidates
);
2124 struct rtl_opt_pass pass_duplicate_computed_gotos
=
2128 "compgotos", /* name */
2129 gate_duplicate_computed_gotos
, /* gate */
2130 duplicate_computed_gotos
, /* execute */
2133 0, /* static_pass_number */
2134 TV_REORDER_BLOCKS
, /* tv_id */
2135 0, /* properties_required */
2136 0, /* properties_provided */
2137 0, /* properties_destroyed */
2138 0, /* todo_flags_start */
2139 TODO_verify_rtl_sharing
,/* todo_flags_finish */
2144 /* This function is the main 'entrance' for the optimization that
2145 partitions hot and cold basic blocks into separate sections of the
2146 .o file (to improve performance and cache locality). Ideally it
2147 would be called after all optimizations that rearrange the CFG have
2148 been called. However part of this optimization may introduce new
2149 register usage, so it must be called before register allocation has
2150 occurred. This means that this optimization is actually called
2151 well before the optimization that reorders basic blocks (see
2154 This optimization checks the feedback information to determine
2155 which basic blocks are hot/cold, updates flags on the basic blocks
2156 to indicate which section they belong in. This information is
2157 later used for writing out sections in the .o file. Because hot
2158 and cold sections can be arbitrarily large (within the bounds of
2159 memory), far beyond the size of a single function, it is necessary
2160 to fix up all edges that cross section boundaries, to make sure the
2161 instructions used can actually span the required distance. The
2162 fixes are described below.
2164 Fall-through edges must be changed into jumps; it is not safe or
2165 legal to fall through across a section boundary. Whenever a
2166 fall-through edge crossing a section boundary is encountered, a new
2167 basic block is inserted (in the same section as the fall-through
2168 source), and the fall through edge is redirected to the new basic
2169 block. The new basic block contains an unconditional jump to the
2170 original fall-through target. (If the unconditional jump is
2171 insufficient to cross section boundaries, that is dealt with a
2172 little later, see below).
2174 In order to deal with architectures that have short conditional
2175 branches (which cannot span all of memory) we take any conditional
2176 jump that attempts to cross a section boundary and add a level of
2177 indirection: it becomes a conditional jump to a new basic block, in
2178 the same section. The new basic block contains an unconditional
2179 jump to the original target, in the other section.
2181 For those architectures whose unconditional branch is also
2182 incapable of reaching all of memory, those unconditional jumps are
2183 converted into indirect jumps, through a register.
2185 IMPORTANT NOTE: This optimization causes some messy interactions
2186 with the cfg cleanup optimizations; those optimizations want to
2187 merge blocks wherever possible, and to collapse indirect jump
2188 sequences (change "A jumps to B jumps to C" directly into "A jumps
2189 to C"). Those optimizations can undo the jump fixes that
2190 partitioning is required to make (see above), in order to ensure
2191 that jumps attempting to cross section boundaries are really able
2192 to cover whatever distance the jump requires (on many architectures
2193 conditional or unconditional jumps are not able to reach all of
2194 memory). Therefore tests have to be inserted into each such
2195 optimization to make sure that it does not undo stuff necessary to
2196 cross partition boundaries. This would be much less of a problem
2197 if we could perform this optimization later in the compilation, but
2198 unfortunately the fact that we may need to create indirect jumps
2199 (through registers) requires that this optimization be performed
2200 before register allocation.
2202 Hot and cold basic blocks are partitioned and put in separate
2203 sections of the .o file, to reduce paging and improve cache
2204 performance (hopefully). This can result in bits of code from the
2205 same function being widely separated in the .o file. However this
2206 is not obvious to the current bb structure. Therefore we must take
2207 care to ensure that: 1). There are no fall_thru edges that cross
2208 between sections; 2). For those architectures which have "short"
2209 conditional branches, all conditional branches that attempt to
2210 cross between sections are converted to unconditional branches;
2211 and, 3). For those architectures which have "short" unconditional
2212 branches, all unconditional branches that attempt to cross between
2213 sections are converted to indirect jumps.
2215 The code for fixing up fall_thru edges that cross between hot and
2216 cold basic blocks does so by creating new basic blocks containing
2217 unconditional branches to the appropriate label in the "other"
2218 section. The new basic block is then put in the same (hot or cold)
2219 section as the original conditional branch, and the fall_thru edge
2220 is modified to fall into the new basic block instead. By adding
2221 this level of indirection we end up with only unconditional branches
2222 crossing between hot and cold sections.
2224 Conditional branches are dealt with by adding a level of indirection.
2225 A new basic block is added in the same (hot/cold) section as the
2226 conditional branch, and the conditional branch is retargeted to the
2227 new basic block. The new basic block contains an unconditional branch
2228 to the original target of the conditional branch (in the other section).
2230 Unconditional branches are dealt with by converting them into
2234 partition_hot_cold_basic_blocks (void)
2236 VEC(edge
, heap
) *crossing_edges
;
2238 if (n_basic_blocks
<= NUM_FIXED_BLOCKS
+ 1)
2241 df_set_flags (DF_DEFER_INSN_RESCAN
);
2243 crossing_edges
= find_rarely_executed_basic_blocks_and_crossing_edges ();
2244 if (crossing_edges
== NULL
)
2247 /* Make sure the source of any crossing edge ends in a jump and the
2248 destination of any crossing edge has a label. */
2249 add_labels_and_missing_jumps (crossing_edges
);
2251 /* Convert all crossing fall_thru edges to non-crossing fall
2252 thrus to unconditional jumps (that jump to the original fall
2254 fix_up_fall_thru_edges ();
2256 /* If the architecture does not have conditional branches that can
2257 span all of memory, convert crossing conditional branches into
2258 crossing unconditional branches. */
2259 if (!HAS_LONG_COND_BRANCH
)
2260 fix_crossing_conditional_branches ();
2262 /* If the architecture does not have unconditional branches that
2263 can span all of memory, convert crossing unconditional branches
2264 into indirect jumps. Since adding an indirect jump also adds
2265 a new register usage, update the register usage information as
2267 if (!HAS_LONG_UNCOND_BRANCH
)
2268 fix_crossing_unconditional_branches ();
2270 add_reg_crossing_jump_notes ();
2272 /* Clear bb->aux fields that the above routines were using. */
2273 clear_aux_for_blocks ();
2275 VEC_free (edge
, heap
, crossing_edges
);
2277 /* ??? FIXME: DF generates the bb info for a block immediately.
2278 And by immediately, I mean *during* creation of the block.
2280 #0 df_bb_refs_collect
2281 #1 in df_bb_refs_record
2282 #2 in create_basic_block_structure
2284 Which means that the bb_has_eh_pred test in df_bb_refs_collect
2285 will *always* fail, because no edges can have been added to the
2286 block yet. Which of course means we don't add the right
2287 artificial refs, which means we fail df_verify (much) later.
2289 Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2290 that we also shouldn't grab data from the new blocks those new
2291 insns are in either. In this way one can create the block, link
2292 it up properly, and have everything Just Work later, when deferred
2293 insns are processed.
2295 In the meantime, we have no other option but to throw away all
2296 of the DF data and recompute it all. */
2297 if (cfun
->eh
->lp_array
)
2299 df_finish_pass (true);
2300 df_scan_alloc (NULL
);
2302 /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2303 data. We blindly generated all of them when creating the new
2304 landing pad. Delete those assignments we don't use. */
2305 df_set_flags (DF_LR_RUN_DCE
);
2309 return TODO_verify_flow
| TODO_verify_rtl_sharing
;
2313 gate_handle_reorder_blocks (void)
2315 if (targetm
.cannot_modify_jumps_p ())
2317 /* Don't reorder blocks when optimizing for size because extra jump insns may
2318 be created; also barrier may create extra padding.
2320 More correctly we should have a block reordering mode that tried to
2321 minimize the combined size of all the jumps. This would more or less
2322 automatically remove extra jumps, but would also try to use more short
2323 jumps instead of long jumps. */
2324 if (!optimize_function_for_speed_p (cfun
))
2326 return (optimize
> 0
2327 && (flag_reorder_blocks
|| flag_reorder_blocks_and_partition
));
2331 /* Reorder basic blocks. */
2333 rest_of_handle_reorder_blocks (void)
2337 /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2338 splitting possibly introduced more crossjumping opportunities. */
2339 cfg_layout_initialize (CLEANUP_EXPENSIVE
);
2341 reorder_basic_blocks ();
2342 cleanup_cfg (CLEANUP_EXPENSIVE
);
2345 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
2346 bb
->aux
= bb
->next_bb
;
2347 cfg_layout_finalize ();
2349 /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes. */
2350 insert_section_boundary_note ();
2354 struct rtl_opt_pass pass_reorder_blocks
=
2359 gate_handle_reorder_blocks
, /* gate */
2360 rest_of_handle_reorder_blocks
, /* execute */
2363 0, /* static_pass_number */
2364 TV_REORDER_BLOCKS
, /* tv_id */
2365 0, /* properties_required */
2366 0, /* properties_provided */
2367 0, /* properties_destroyed */
2368 0, /* todo_flags_start */
2369 TODO_verify_rtl_sharing
, /* todo_flags_finish */
2374 gate_handle_partition_blocks (void)
2376 /* The optimization to partition hot/cold basic blocks into separate
2377 sections of the .o file does not work well with linkonce or with
2378 user defined section attributes. Don't call it if either case
2380 return (flag_reorder_blocks_and_partition
2382 /* See gate_handle_reorder_blocks. We should not partition if
2383 we are going to omit the reordering. */
2384 && optimize_function_for_speed_p (cfun
)
2385 && !DECL_ONE_ONLY (current_function_decl
)
2386 && !user_defined_section_attribute
);
2389 struct rtl_opt_pass pass_partition_blocks
=
2393 "bbpart", /* name */
2394 gate_handle_partition_blocks
, /* gate */
2395 partition_hot_cold_basic_blocks
, /* execute */
2398 0, /* static_pass_number */
2399 TV_REORDER_BLOCKS
, /* tv_id */
2400 PROP_cfglayout
, /* properties_required */
2401 0, /* properties_provided */
2402 0, /* properties_destroyed */
2403 0, /* todo_flags_start */
2404 0 /* todo_flags_finish */