]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/bb-reorder.c
143bdd0c16870fa9687fb6f05c316b8060193823
[thirdparty/gcc.git] / gcc / bb-reorder.c
1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011,
3 2012 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This (greedy) algorithm constructs traces in several rounds.
22 The construction starts from "seeds". The seed for the first round
23 is the entry point of function. When there are more than one seed
24 that one is selected first that has the lowest key in the heap
25 (see function bb_to_key). Then the algorithm repeatedly adds the most
26 probable successor to the end of a trace. Finally it connects the traces.
27
28 There are two parameters: Branch Threshold and Exec Threshold.
29 If the edge to a successor of the actual basic block is lower than
30 Branch Threshold or the frequency of the successor is lower than
31 Exec Threshold the successor will be the seed in one of the next rounds.
32 Each round has these parameters lower than the previous one.
33 The last round has to have these parameters set to zero
34 so that the remaining blocks are picked up.
35
36 The algorithm selects the most probable successor from all unvisited
37 successors and successors that have been added to this trace.
38 The other successors (that has not been "sent" to the next round) will be
39 other seeds for this round and the secondary traces will start in them.
40 If the successor has not been visited in this trace it is added to the trace
41 (however, there is some heuristic for simple branches).
42 If the successor has been visited in this trace the loop has been found.
43 If the loop has many iterations the loop is rotated so that the
44 source block of the most probable edge going out from the loop
45 is the last block of the trace.
46 If the loop has few iterations and there is no edge from the last block of
47 the loop going out from loop the loop header is duplicated.
48 Finally, the construction of the trace is terminated.
49
50 When connecting traces it first checks whether there is an edge from the
51 last block of one trace to the first block of another trace.
52 When there are still some unconnected traces it checks whether there exists
53 a basic block BB such that BB is a successor of the last bb of one trace
54 and BB is a predecessor of the first block of another trace. In this case,
55 BB is duplicated and the traces are connected through this duplicate.
56 The rest of traces are simply connected so there will be a jump to the
57 beginning of the rest of trace.
58
59
60 References:
61
62 "Software Trace Cache"
63 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64 http://citeseer.nj.nec.com/15361.html
65
66 */
67
68 #include "config.h"
69 #include "system.h"
70 #include "coretypes.h"
71 #include "tm.h"
72 #include "rtl.h"
73 #include "regs.h"
74 #include "flags.h"
75 #include "timevar.h"
76 #include "output.h"
77 #include "fibheap.h"
78 #include "target.h"
79 #include "function.h"
80 #include "tm_p.h"
81 #include "obstack.h"
82 #include "expr.h"
83 #include "params.h"
84 #include "diagnostic-core.h"
85 #include "toplev.h" /* user_defined_section_attribute */
86 #include "tree-pass.h"
87 #include "df.h"
88 #include "bb-reorder.h"
89 #include "except.h"
90
91 /* The number of rounds. In most cases there will only be 4 rounds, but
92 when partitioning hot and cold basic blocks into separate sections of
93 the .o file there will be an extra round.*/
94 #define N_ROUNDS 5
95
96 /* Stubs in case we don't have a return insn.
97 We have to check at runtime too, not only compiletime. */
98
99 #ifndef HAVE_return
100 #define HAVE_return 0
101 #define gen_return() NULL_RTX
102 #endif
103
104
105 struct target_bb_reorder default_target_bb_reorder;
106 #if SWITCHABLE_TARGET
107 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
108 #endif
109
110 #define uncond_jump_length \
111 (this_target_bb_reorder->x_uncond_jump_length)
112
113 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
114 static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
115
116 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
117 static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
118
119 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
120 block the edge destination is not duplicated while connecting traces. */
121 #define DUPLICATION_THRESHOLD 100
122
123 /* Structure to hold needed information for each basic block. */
124 typedef struct bbro_basic_block_data_def
125 {
126 /* Which trace is the bb start of (-1 means it is not a start of a trace). */
127 int start_of_trace;
128
129 /* Which trace is the bb end of (-1 means it is not an end of a trace). */
130 int end_of_trace;
131
132 /* Which trace is the bb in? */
133 int in_trace;
134
135 /* Which trace was this bb visited in? */
136 int visited;
137
138 /* Which heap is BB in (if any)? */
139 fibheap_t heap;
140
141 /* Which heap node is BB in (if any)? */
142 fibnode_t node;
143 } bbro_basic_block_data;
144
145 /* The current size of the following dynamic array. */
146 static int array_size;
147
148 /* The array which holds needed information for basic blocks. */
149 static bbro_basic_block_data *bbd;
150
151 /* To avoid frequent reallocation the size of arrays is greater than needed,
152 the number of elements is (not less than) 1.25 * size_wanted. */
153 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
154
155 /* Free the memory and set the pointer to NULL. */
156 #define FREE(P) (gcc_assert (P), free (P), P = 0)
157
158 /* Structure for holding information about a trace. */
159 struct trace
160 {
161 /* First and last basic block of the trace. */
162 basic_block first, last;
163
164 /* The round of the STC creation which this trace was found in. */
165 int round;
166
167 /* The length (i.e. the number of basic blocks) of the trace. */
168 int length;
169 };
170
171 /* Maximum frequency and count of one of the entry blocks. */
172 static int max_entry_frequency;
173 static gcov_type max_entry_count;
174
175 /* Local function prototypes. */
176 static void find_traces (int *, struct trace *);
177 static basic_block rotate_loop (edge, struct trace *, int);
178 static void mark_bb_visited (basic_block, int);
179 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
180 int, fibheap_t *, int);
181 static basic_block copy_bb (basic_block, edge, basic_block, int);
182 static fibheapkey_t bb_to_key (basic_block);
183 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int, const_edge);
184 static void connect_traces (int, struct trace *);
185 static bool copy_bb_p (const_basic_block, int);
186 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
187 \f
188 /* Return the trace number in which BB was visited. */
189
190 static int
191 bb_visited_trace (const_basic_block bb)
192 {
193 gcc_assert (bb->index < array_size);
194 return bbd[bb->index].visited;
195 }
196
197 /* This function marks BB that it was visited in trace number TRACE. */
198
199 static void
200 mark_bb_visited (basic_block bb, int trace)
201 {
202 bbd[bb->index].visited = trace;
203 if (bbd[bb->index].heap)
204 {
205 fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
206 bbd[bb->index].heap = NULL;
207 bbd[bb->index].node = NULL;
208 }
209 }
210
211 /* Check to see if bb should be pushed into the next round of trace
212 collections or not. Reasons for pushing the block forward are 1).
213 If the block is cold, we are doing partitioning, and there will be
214 another round (cold partition blocks are not supposed to be
215 collected into traces until the very last round); or 2). There will
216 be another round, and the basic block is not "hot enough" for the
217 current round of trace collection. */
218
219 static bool
220 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
221 int exec_th, gcov_type count_th)
222 {
223 bool there_exists_another_round;
224 bool block_not_hot_enough;
225
226 there_exists_another_round = round < number_of_rounds - 1;
227
228 block_not_hot_enough = (bb->frequency < exec_th
229 || bb->count < count_th
230 || probably_never_executed_bb_p (bb));
231
232 if (there_exists_another_round
233 && block_not_hot_enough)
234 return true;
235 else
236 return false;
237 }
238
239 /* Find the traces for Software Trace Cache. Chain each trace through
240 RBI()->next. Store the number of traces to N_TRACES and description of
241 traces to TRACES. */
242
243 static void
244 find_traces (int *n_traces, struct trace *traces)
245 {
246 int i;
247 int number_of_rounds;
248 edge e;
249 edge_iterator ei;
250 fibheap_t heap;
251
252 /* Add one extra round of trace collection when partitioning hot/cold
253 basic blocks into separate sections. The last round is for all the
254 cold blocks (and ONLY the cold blocks). */
255
256 number_of_rounds = N_ROUNDS - 1;
257
258 /* Insert entry points of function into heap. */
259 heap = fibheap_new ();
260 max_entry_frequency = 0;
261 max_entry_count = 0;
262 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
263 {
264 bbd[e->dest->index].heap = heap;
265 bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
266 e->dest);
267 if (e->dest->frequency > max_entry_frequency)
268 max_entry_frequency = e->dest->frequency;
269 if (e->dest->count > max_entry_count)
270 max_entry_count = e->dest->count;
271 }
272
273 /* Find the traces. */
274 for (i = 0; i < number_of_rounds; i++)
275 {
276 gcov_type count_threshold;
277
278 if (dump_file)
279 fprintf (dump_file, "STC - round %d\n", i + 1);
280
281 if (max_entry_count < INT_MAX / 1000)
282 count_threshold = max_entry_count * exec_threshold[i] / 1000;
283 else
284 count_threshold = max_entry_count / 1000 * exec_threshold[i];
285
286 find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
287 max_entry_frequency * exec_threshold[i] / 1000,
288 count_threshold, traces, n_traces, i, &heap,
289 number_of_rounds);
290 }
291 fibheap_delete (heap);
292
293 if (dump_file)
294 {
295 for (i = 0; i < *n_traces; i++)
296 {
297 basic_block bb;
298 fprintf (dump_file, "Trace %d (round %d): ", i + 1,
299 traces[i].round + 1);
300 for (bb = traces[i].first; bb != traces[i].last; bb = (basic_block) bb->aux)
301 fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
302 fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
303 }
304 fflush (dump_file);
305 }
306 }
307
308 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
309 (with sequential number TRACE_N). */
310
311 static basic_block
312 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
313 {
314 basic_block bb;
315
316 /* Information about the best end (end after rotation) of the loop. */
317 basic_block best_bb = NULL;
318 edge best_edge = NULL;
319 int best_freq = -1;
320 gcov_type best_count = -1;
321 /* The best edge is preferred when its destination is not visited yet
322 or is a start block of some trace. */
323 bool is_preferred = false;
324
325 /* Find the most frequent edge that goes out from current trace. */
326 bb = back_edge->dest;
327 do
328 {
329 edge e;
330 edge_iterator ei;
331
332 FOR_EACH_EDGE (e, ei, bb->succs)
333 if (e->dest != EXIT_BLOCK_PTR
334 && bb_visited_trace (e->dest) != trace_n
335 && (e->flags & EDGE_CAN_FALLTHRU)
336 && !(e->flags & EDGE_COMPLEX))
337 {
338 if (is_preferred)
339 {
340 /* The best edge is preferred. */
341 if (!bb_visited_trace (e->dest)
342 || bbd[e->dest->index].start_of_trace >= 0)
343 {
344 /* The current edge E is also preferred. */
345 int freq = EDGE_FREQUENCY (e);
346 if (freq > best_freq || e->count > best_count)
347 {
348 best_freq = freq;
349 best_count = e->count;
350 best_edge = e;
351 best_bb = bb;
352 }
353 }
354 }
355 else
356 {
357 if (!bb_visited_trace (e->dest)
358 || bbd[e->dest->index].start_of_trace >= 0)
359 {
360 /* The current edge E is preferred. */
361 is_preferred = true;
362 best_freq = EDGE_FREQUENCY (e);
363 best_count = e->count;
364 best_edge = e;
365 best_bb = bb;
366 }
367 else
368 {
369 int freq = EDGE_FREQUENCY (e);
370 if (!best_edge || freq > best_freq || e->count > best_count)
371 {
372 best_freq = freq;
373 best_count = e->count;
374 best_edge = e;
375 best_bb = bb;
376 }
377 }
378 }
379 }
380 bb = (basic_block) bb->aux;
381 }
382 while (bb != back_edge->dest);
383
384 if (best_bb)
385 {
386 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
387 the trace. */
388 if (back_edge->dest == trace->first)
389 {
390 trace->first = (basic_block) best_bb->aux;
391 }
392 else
393 {
394 basic_block prev_bb;
395
396 for (prev_bb = trace->first;
397 prev_bb->aux != back_edge->dest;
398 prev_bb = (basic_block) prev_bb->aux)
399 ;
400 prev_bb->aux = best_bb->aux;
401
402 /* Try to get rid of uncond jump to cond jump. */
403 if (single_succ_p (prev_bb))
404 {
405 basic_block header = single_succ (prev_bb);
406
407 /* Duplicate HEADER if it is a small block containing cond jump
408 in the end. */
409 if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
410 && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
411 NULL_RTX))
412 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
413 }
414 }
415 }
416 else
417 {
418 /* We have not found suitable loop tail so do no rotation. */
419 best_bb = back_edge->src;
420 }
421 best_bb->aux = NULL;
422 return best_bb;
423 }
424
425 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
426 not include basic blocks their probability is lower than BRANCH_TH or their
427 frequency is lower than EXEC_TH into traces (or count is lower than
428 COUNT_TH). It stores the new traces into TRACES and modifies the number of
429 traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
430 expects that starting basic blocks are in *HEAP and at the end it deletes
431 *HEAP and stores starting points for the next round into new *HEAP. */
432
433 static void
434 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
435 struct trace *traces, int *n_traces, int round,
436 fibheap_t *heap, int number_of_rounds)
437 {
438 /* Heap for discarded basic blocks which are possible starting points for
439 the next round. */
440 fibheap_t new_heap = fibheap_new ();
441
442 while (!fibheap_empty (*heap))
443 {
444 basic_block bb;
445 struct trace *trace;
446 edge best_edge, e;
447 fibheapkey_t key;
448 edge_iterator ei;
449
450 bb = (basic_block) fibheap_extract_min (*heap);
451 bbd[bb->index].heap = NULL;
452 bbd[bb->index].node = NULL;
453
454 if (dump_file)
455 fprintf (dump_file, "Getting bb %d\n", bb->index);
456
457 /* If the BB's frequency is too low send BB to the next round. When
458 partitioning hot/cold blocks into separate sections, make sure all
459 the cold blocks (and ONLY the cold blocks) go into the (extra) final
460 round. */
461
462 if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
463 count_th))
464 {
465 int key = bb_to_key (bb);
466 bbd[bb->index].heap = new_heap;
467 bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
468
469 if (dump_file)
470 fprintf (dump_file,
471 " Possible start point of next round: %d (key: %d)\n",
472 bb->index, key);
473 continue;
474 }
475
476 trace = traces + *n_traces;
477 trace->first = bb;
478 trace->round = round;
479 trace->length = 0;
480 bbd[bb->index].in_trace = *n_traces;
481 (*n_traces)++;
482
483 do
484 {
485 int prob, freq;
486 bool ends_in_call;
487
488 /* The probability and frequency of the best edge. */
489 int best_prob = INT_MIN / 2;
490 int best_freq = INT_MIN / 2;
491
492 best_edge = NULL;
493 mark_bb_visited (bb, *n_traces);
494 trace->length++;
495
496 if (dump_file)
497 fprintf (dump_file, "Basic block %d was visited in trace %d\n",
498 bb->index, *n_traces - 1);
499
500 ends_in_call = block_ends_with_call_p (bb);
501
502 /* Select the successor that will be placed after BB. */
503 FOR_EACH_EDGE (e, ei, bb->succs)
504 {
505 gcc_assert (!(e->flags & EDGE_FAKE));
506
507 if (e->dest == EXIT_BLOCK_PTR)
508 continue;
509
510 if (bb_visited_trace (e->dest)
511 && bb_visited_trace (e->dest) != *n_traces)
512 continue;
513
514 if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
515 continue;
516
517 prob = e->probability;
518 freq = e->dest->frequency;
519
520 /* The only sensible preference for a call instruction is the
521 fallthru edge. Don't bother selecting anything else. */
522 if (ends_in_call)
523 {
524 if (e->flags & EDGE_CAN_FALLTHRU)
525 {
526 best_edge = e;
527 best_prob = prob;
528 best_freq = freq;
529 }
530 continue;
531 }
532
533 /* Edge that cannot be fallthru or improbable or infrequent
534 successor (i.e. it is unsuitable successor). */
535 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
536 || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
537 || e->count < count_th)
538 continue;
539
540 /* If partitioning hot/cold basic blocks, don't consider edges
541 that cross section boundaries. */
542
543 if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
544 best_edge))
545 {
546 best_edge = e;
547 best_prob = prob;
548 best_freq = freq;
549 }
550 }
551
552 /* If the best destination has multiple predecessors, and can be
553 duplicated cheaper than a jump, don't allow it to be added
554 to a trace. We'll duplicate it when connecting traces. */
555 if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
556 && copy_bb_p (best_edge->dest, 0))
557 best_edge = NULL;
558
559 /* Add all non-selected successors to the heaps. */
560 FOR_EACH_EDGE (e, ei, bb->succs)
561 {
562 if (e == best_edge
563 || e->dest == EXIT_BLOCK_PTR
564 || bb_visited_trace (e->dest))
565 continue;
566
567 key = bb_to_key (e->dest);
568
569 if (bbd[e->dest->index].heap)
570 {
571 /* E->DEST is already in some heap. */
572 if (key != bbd[e->dest->index].node->key)
573 {
574 if (dump_file)
575 {
576 fprintf (dump_file,
577 "Changing key for bb %d from %ld to %ld.\n",
578 e->dest->index,
579 (long) bbd[e->dest->index].node->key,
580 key);
581 }
582 fibheap_replace_key (bbd[e->dest->index].heap,
583 bbd[e->dest->index].node, key);
584 }
585 }
586 else
587 {
588 fibheap_t which_heap = *heap;
589
590 prob = e->probability;
591 freq = EDGE_FREQUENCY (e);
592
593 if (!(e->flags & EDGE_CAN_FALLTHRU)
594 || (e->flags & EDGE_COMPLEX)
595 || prob < branch_th || freq < exec_th
596 || e->count < count_th)
597 {
598 /* When partitioning hot/cold basic blocks, make sure
599 the cold blocks (and only the cold blocks) all get
600 pushed to the last round of trace collection. */
601
602 if (push_to_next_round_p (e->dest, round,
603 number_of_rounds,
604 exec_th, count_th))
605 which_heap = new_heap;
606 }
607
608 bbd[e->dest->index].heap = which_heap;
609 bbd[e->dest->index].node = fibheap_insert (which_heap,
610 key, e->dest);
611
612 if (dump_file)
613 {
614 fprintf (dump_file,
615 " Possible start of %s round: %d (key: %ld)\n",
616 (which_heap == new_heap) ? "next" : "this",
617 e->dest->index, (long) key);
618 }
619
620 }
621 }
622
623 if (best_edge) /* Suitable successor was found. */
624 {
625 if (bb_visited_trace (best_edge->dest) == *n_traces)
626 {
627 /* We do nothing with one basic block loops. */
628 if (best_edge->dest != bb)
629 {
630 if (EDGE_FREQUENCY (best_edge)
631 > 4 * best_edge->dest->frequency / 5)
632 {
633 /* The loop has at least 4 iterations. If the loop
634 header is not the first block of the function
635 we can rotate the loop. */
636
637 if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
638 {
639 if (dump_file)
640 {
641 fprintf (dump_file,
642 "Rotating loop %d - %d\n",
643 best_edge->dest->index, bb->index);
644 }
645 bb->aux = best_edge->dest;
646 bbd[best_edge->dest->index].in_trace =
647 (*n_traces) - 1;
648 bb = rotate_loop (best_edge, trace, *n_traces);
649 }
650 }
651 else
652 {
653 /* The loop has less than 4 iterations. */
654
655 if (single_succ_p (bb)
656 && copy_bb_p (best_edge->dest,
657 optimize_edge_for_speed_p (best_edge)))
658 {
659 bb = copy_bb (best_edge->dest, best_edge, bb,
660 *n_traces);
661 trace->length++;
662 }
663 }
664 }
665
666 /* Terminate the trace. */
667 break;
668 }
669 else
670 {
671 /* Check for a situation
672
673 A
674 /|
675 B |
676 \|
677 C
678
679 where
680 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
681 >= EDGE_FREQUENCY (AC).
682 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
683 Best ordering is then A B C.
684
685 This situation is created for example by:
686
687 if (A) B;
688 C;
689
690 */
691
692 FOR_EACH_EDGE (e, ei, bb->succs)
693 if (e != best_edge
694 && (e->flags & EDGE_CAN_FALLTHRU)
695 && !(e->flags & EDGE_COMPLEX)
696 && !bb_visited_trace (e->dest)
697 && single_pred_p (e->dest)
698 && !(e->flags & EDGE_CROSSING)
699 && single_succ_p (e->dest)
700 && (single_succ_edge (e->dest)->flags
701 & EDGE_CAN_FALLTHRU)
702 && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
703 && single_succ (e->dest) == best_edge->dest
704 && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
705 {
706 best_edge = e;
707 if (dump_file)
708 fprintf (dump_file, "Selecting BB %d\n",
709 best_edge->dest->index);
710 break;
711 }
712
713 bb->aux = best_edge->dest;
714 bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
715 bb = best_edge->dest;
716 }
717 }
718 }
719 while (best_edge);
720 trace->last = bb;
721 bbd[trace->first->index].start_of_trace = *n_traces - 1;
722 bbd[trace->last->index].end_of_trace = *n_traces - 1;
723
724 /* The trace is terminated so we have to recount the keys in heap
725 (some block can have a lower key because now one of its predecessors
726 is an end of the trace). */
727 FOR_EACH_EDGE (e, ei, bb->succs)
728 {
729 if (e->dest == EXIT_BLOCK_PTR
730 || bb_visited_trace (e->dest))
731 continue;
732
733 if (bbd[e->dest->index].heap)
734 {
735 key = bb_to_key (e->dest);
736 if (key != bbd[e->dest->index].node->key)
737 {
738 if (dump_file)
739 {
740 fprintf (dump_file,
741 "Changing key for bb %d from %ld to %ld.\n",
742 e->dest->index,
743 (long) bbd[e->dest->index].node->key, key);
744 }
745 fibheap_replace_key (bbd[e->dest->index].heap,
746 bbd[e->dest->index].node,
747 key);
748 }
749 }
750 }
751 }
752
753 fibheap_delete (*heap);
754
755 /* "Return" the new heap. */
756 *heap = new_heap;
757 }
758
759 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
760 it to trace after BB, mark OLD_BB visited and update pass' data structures
761 (TRACE is a number of trace which OLD_BB is duplicated to). */
762
763 static basic_block
764 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
765 {
766 basic_block new_bb;
767
768 new_bb = duplicate_block (old_bb, e, bb);
769 BB_COPY_PARTITION (new_bb, old_bb);
770
771 gcc_assert (e->dest == new_bb);
772
773 if (dump_file)
774 fprintf (dump_file,
775 "Duplicated bb %d (created bb %d)\n",
776 old_bb->index, new_bb->index);
777
778 if (new_bb->index >= array_size || last_basic_block > array_size)
779 {
780 int i;
781 int new_size;
782
783 new_size = MAX (last_basic_block, new_bb->index + 1);
784 new_size = GET_ARRAY_SIZE (new_size);
785 bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
786 for (i = array_size; i < new_size; i++)
787 {
788 bbd[i].start_of_trace = -1;
789 bbd[i].end_of_trace = -1;
790 bbd[i].in_trace = -1;
791 bbd[i].visited = 0;
792 bbd[i].heap = NULL;
793 bbd[i].node = NULL;
794 }
795 array_size = new_size;
796
797 if (dump_file)
798 {
799 fprintf (dump_file,
800 "Growing the dynamic array to %d elements.\n",
801 array_size);
802 }
803 }
804
805 gcc_assert (!bb_visited_trace (e->dest));
806 mark_bb_visited (new_bb, trace);
807 new_bb->aux = bb->aux;
808 bb->aux = new_bb;
809
810 bbd[new_bb->index].in_trace = trace;
811
812 return new_bb;
813 }
814
815 /* Compute and return the key (for the heap) of the basic block BB. */
816
817 static fibheapkey_t
818 bb_to_key (basic_block bb)
819 {
820 edge e;
821 edge_iterator ei;
822 int priority = 0;
823
824 /* Do not start in probably never executed blocks. */
825
826 if (BB_PARTITION (bb) == BB_COLD_PARTITION
827 || probably_never_executed_bb_p (bb))
828 return BB_FREQ_MAX;
829
830 /* Prefer blocks whose predecessor is an end of some trace
831 or whose predecessor edge is EDGE_DFS_BACK. */
832 FOR_EACH_EDGE (e, ei, bb->preds)
833 {
834 if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
835 || (e->flags & EDGE_DFS_BACK))
836 {
837 int edge_freq = EDGE_FREQUENCY (e);
838
839 if (edge_freq > priority)
840 priority = edge_freq;
841 }
842 }
843
844 if (priority)
845 /* The block with priority should have significantly lower key. */
846 return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
847 return -bb->frequency;
848 }
849
850 /* Return true when the edge E from basic block BB is better than the temporary
851 best edge (details are in function). The probability of edge E is PROB. The
852 frequency of the successor is FREQ. The current best probability is
853 BEST_PROB, the best frequency is BEST_FREQ.
854 The edge is considered to be equivalent when PROB does not differ much from
855 BEST_PROB; similarly for frequency. */
856
857 static bool
858 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq, int best_prob,
859 int best_freq, const_edge cur_best_edge)
860 {
861 bool is_better_edge;
862
863 /* The BEST_* values do not have to be best, but can be a bit smaller than
864 maximum values. */
865 int diff_prob = best_prob / 10;
866 int diff_freq = best_freq / 10;
867
868 if (prob > best_prob + diff_prob)
869 /* The edge has higher probability than the temporary best edge. */
870 is_better_edge = true;
871 else if (prob < best_prob - diff_prob)
872 /* The edge has lower probability than the temporary best edge. */
873 is_better_edge = false;
874 else if (freq < best_freq - diff_freq)
875 /* The edge and the temporary best edge have almost equivalent
876 probabilities. The higher frequency of a successor now means
877 that there is another edge going into that successor.
878 This successor has lower frequency so it is better. */
879 is_better_edge = true;
880 else if (freq > best_freq + diff_freq)
881 /* This successor has higher frequency so it is worse. */
882 is_better_edge = false;
883 else if (e->dest->prev_bb == bb)
884 /* The edges have equivalent probabilities and the successors
885 have equivalent frequencies. Select the previous successor. */
886 is_better_edge = true;
887 else
888 is_better_edge = false;
889
890 /* If we are doing hot/cold partitioning, make sure that we always favor
891 non-crossing edges over crossing edges. */
892
893 if (!is_better_edge
894 && flag_reorder_blocks_and_partition
895 && cur_best_edge
896 && (cur_best_edge->flags & EDGE_CROSSING)
897 && !(e->flags & EDGE_CROSSING))
898 is_better_edge = true;
899
900 return is_better_edge;
901 }
902
903 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
904
905 static void
906 connect_traces (int n_traces, struct trace *traces)
907 {
908 int i;
909 bool *connected;
910 bool two_passes;
911 int last_trace;
912 int current_pass;
913 int current_partition;
914 int freq_threshold;
915 gcov_type count_threshold;
916
917 freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
918 if (max_entry_count < INT_MAX / 1000)
919 count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
920 else
921 count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
922
923 connected = XCNEWVEC (bool, n_traces);
924 last_trace = -1;
925 current_pass = 1;
926 current_partition = BB_PARTITION (traces[0].first);
927 two_passes = false;
928
929 if (flag_reorder_blocks_and_partition)
930 for (i = 0; i < n_traces && !two_passes; i++)
931 if (BB_PARTITION (traces[0].first)
932 != BB_PARTITION (traces[i].first))
933 two_passes = true;
934
935 for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
936 {
937 int t = i;
938 int t2;
939 edge e, best;
940 int best_len;
941
942 if (i >= n_traces)
943 {
944 gcc_assert (two_passes && current_pass == 1);
945 i = 0;
946 t = i;
947 current_pass = 2;
948 if (current_partition == BB_HOT_PARTITION)
949 current_partition = BB_COLD_PARTITION;
950 else
951 current_partition = BB_HOT_PARTITION;
952 }
953
954 if (connected[t])
955 continue;
956
957 if (two_passes
958 && BB_PARTITION (traces[t].first) != current_partition)
959 continue;
960
961 connected[t] = true;
962
963 /* Find the predecessor traces. */
964 for (t2 = t; t2 > 0;)
965 {
966 edge_iterator ei;
967 best = NULL;
968 best_len = 0;
969 FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
970 {
971 int si = e->src->index;
972
973 if (e->src != ENTRY_BLOCK_PTR
974 && (e->flags & EDGE_CAN_FALLTHRU)
975 && !(e->flags & EDGE_COMPLEX)
976 && bbd[si].end_of_trace >= 0
977 && !connected[bbd[si].end_of_trace]
978 && (BB_PARTITION (e->src) == current_partition)
979 && (!best
980 || e->probability > best->probability
981 || (e->probability == best->probability
982 && traces[bbd[si].end_of_trace].length > best_len)))
983 {
984 best = e;
985 best_len = traces[bbd[si].end_of_trace].length;
986 }
987 }
988 if (best)
989 {
990 best->src->aux = best->dest;
991 t2 = bbd[best->src->index].end_of_trace;
992 connected[t2] = true;
993
994 if (dump_file)
995 {
996 fprintf (dump_file, "Connection: %d %d\n",
997 best->src->index, best->dest->index);
998 }
999 }
1000 else
1001 break;
1002 }
1003
1004 if (last_trace >= 0)
1005 traces[last_trace].last->aux = traces[t2].first;
1006 last_trace = t;
1007
1008 /* Find the successor traces. */
1009 while (1)
1010 {
1011 /* Find the continuation of the chain. */
1012 edge_iterator ei;
1013 best = NULL;
1014 best_len = 0;
1015 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1016 {
1017 int di = e->dest->index;
1018
1019 if (e->dest != EXIT_BLOCK_PTR
1020 && (e->flags & EDGE_CAN_FALLTHRU)
1021 && !(e->flags & EDGE_COMPLEX)
1022 && bbd[di].start_of_trace >= 0
1023 && !connected[bbd[di].start_of_trace]
1024 && (BB_PARTITION (e->dest) == current_partition)
1025 && (!best
1026 || e->probability > best->probability
1027 || (e->probability == best->probability
1028 && traces[bbd[di].start_of_trace].length > best_len)))
1029 {
1030 best = e;
1031 best_len = traces[bbd[di].start_of_trace].length;
1032 }
1033 }
1034
1035 if (best)
1036 {
1037 if (dump_file)
1038 {
1039 fprintf (dump_file, "Connection: %d %d\n",
1040 best->src->index, best->dest->index);
1041 }
1042 t = bbd[best->dest->index].start_of_trace;
1043 traces[last_trace].last->aux = traces[t].first;
1044 connected[t] = true;
1045 last_trace = t;
1046 }
1047 else
1048 {
1049 /* Try to connect the traces by duplication of 1 block. */
1050 edge e2;
1051 basic_block next_bb = NULL;
1052 bool try_copy = false;
1053
1054 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1055 if (e->dest != EXIT_BLOCK_PTR
1056 && (e->flags & EDGE_CAN_FALLTHRU)
1057 && !(e->flags & EDGE_COMPLEX)
1058 && (!best || e->probability > best->probability))
1059 {
1060 edge_iterator ei;
1061 edge best2 = NULL;
1062 int best2_len = 0;
1063
1064 /* If the destination is a start of a trace which is only
1065 one block long, then no need to search the successor
1066 blocks of the trace. Accept it. */
1067 if (bbd[e->dest->index].start_of_trace >= 0
1068 && traces[bbd[e->dest->index].start_of_trace].length
1069 == 1)
1070 {
1071 best = e;
1072 try_copy = true;
1073 continue;
1074 }
1075
1076 FOR_EACH_EDGE (e2, ei, e->dest->succs)
1077 {
1078 int di = e2->dest->index;
1079
1080 if (e2->dest == EXIT_BLOCK_PTR
1081 || ((e2->flags & EDGE_CAN_FALLTHRU)
1082 && !(e2->flags & EDGE_COMPLEX)
1083 && bbd[di].start_of_trace >= 0
1084 && !connected[bbd[di].start_of_trace]
1085 && (BB_PARTITION (e2->dest) == current_partition)
1086 && (EDGE_FREQUENCY (e2) >= freq_threshold)
1087 && (e2->count >= count_threshold)
1088 && (!best2
1089 || e2->probability > best2->probability
1090 || (e2->probability == best2->probability
1091 && traces[bbd[di].start_of_trace].length
1092 > best2_len))))
1093 {
1094 best = e;
1095 best2 = e2;
1096 if (e2->dest != EXIT_BLOCK_PTR)
1097 best2_len = traces[bbd[di].start_of_trace].length;
1098 else
1099 best2_len = INT_MAX;
1100 next_bb = e2->dest;
1101 try_copy = true;
1102 }
1103 }
1104 }
1105
1106 if (flag_reorder_blocks_and_partition)
1107 try_copy = false;
1108
1109 /* Copy tiny blocks always; copy larger blocks only when the
1110 edge is traversed frequently enough. */
1111 if (try_copy
1112 && copy_bb_p (best->dest,
1113 optimize_edge_for_speed_p (best)
1114 && EDGE_FREQUENCY (best) >= freq_threshold
1115 && best->count >= count_threshold))
1116 {
1117 basic_block new_bb;
1118
1119 if (dump_file)
1120 {
1121 fprintf (dump_file, "Connection: %d %d ",
1122 traces[t].last->index, best->dest->index);
1123 if (!next_bb)
1124 fputc ('\n', dump_file);
1125 else if (next_bb == EXIT_BLOCK_PTR)
1126 fprintf (dump_file, "exit\n");
1127 else
1128 fprintf (dump_file, "%d\n", next_bb->index);
1129 }
1130
1131 new_bb = copy_bb (best->dest, best, traces[t].last, t);
1132 traces[t].last = new_bb;
1133 if (next_bb && next_bb != EXIT_BLOCK_PTR)
1134 {
1135 t = bbd[next_bb->index].start_of_trace;
1136 traces[last_trace].last->aux = traces[t].first;
1137 connected[t] = true;
1138 last_trace = t;
1139 }
1140 else
1141 break; /* Stop finding the successor traces. */
1142 }
1143 else
1144 break; /* Stop finding the successor traces. */
1145 }
1146 }
1147 }
1148
1149 if (dump_file)
1150 {
1151 basic_block bb;
1152
1153 fprintf (dump_file, "Final order:\n");
1154 for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1155 fprintf (dump_file, "%d ", bb->index);
1156 fprintf (dump_file, "\n");
1157 fflush (dump_file);
1158 }
1159
1160 FREE (connected);
1161 }
1162
1163 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1164 when code size is allowed to grow by duplication. */
1165
1166 static bool
1167 copy_bb_p (const_basic_block bb, int code_may_grow)
1168 {
1169 int size = 0;
1170 int max_size = uncond_jump_length;
1171 rtx insn;
1172
1173 if (!bb->frequency)
1174 return false;
1175 if (EDGE_COUNT (bb->preds) < 2)
1176 return false;
1177 if (!can_duplicate_block_p (bb))
1178 return false;
1179
1180 /* Avoid duplicating blocks which have many successors (PR/13430). */
1181 if (EDGE_COUNT (bb->succs) > 8)
1182 return false;
1183
1184 if (code_may_grow && optimize_bb_for_speed_p (bb))
1185 max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1186
1187 FOR_BB_INSNS (bb, insn)
1188 {
1189 if (INSN_P (insn))
1190 size += get_attr_min_length (insn);
1191 }
1192
1193 if (size <= max_size)
1194 return true;
1195
1196 if (dump_file)
1197 {
1198 fprintf (dump_file,
1199 "Block %d can't be copied because its size = %d.\n",
1200 bb->index, size);
1201 }
1202
1203 return false;
1204 }
1205
1206 /* Return the length of unconditional jump instruction. */
1207
1208 int
1209 get_uncond_jump_length (void)
1210 {
1211 rtx label, jump;
1212 int length;
1213
1214 label = emit_label_before (gen_label_rtx (), get_insns ());
1215 jump = emit_jump_insn (gen_jump (label));
1216
1217 length = get_attr_min_length (jump);
1218
1219 delete_insn (jump);
1220 delete_insn (label);
1221 return length;
1222 }
1223
1224 /* Emit a barrier into the footer of BB. */
1225
1226 static void
1227 emit_barrier_after_bb (basic_block bb)
1228 {
1229 rtx barrier = emit_barrier_after (BB_END (bb));
1230 BB_FOOTER (bb) = unlink_insn_chain (barrier, barrier);
1231 }
1232
1233 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1234 Duplicate the landing pad and split the edges so that no EH edge
1235 crosses partitions. */
1236
1237 static void
1238 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1239 {
1240 eh_landing_pad new_lp;
1241 basic_block new_bb, last_bb, post_bb;
1242 rtx new_label, jump, post_label;
1243 unsigned new_partition;
1244 edge_iterator ei;
1245 edge e;
1246
1247 /* Generate the new landing-pad structure. */
1248 new_lp = gen_eh_landing_pad (old_lp->region);
1249 new_lp->post_landing_pad = old_lp->post_landing_pad;
1250 new_lp->landing_pad = gen_label_rtx ();
1251 LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1252
1253 /* Put appropriate instructions in new bb. */
1254 new_label = emit_label (new_lp->landing_pad);
1255
1256 expand_dw2_landing_pad_for_region (old_lp->region);
1257
1258 post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1259 post_bb = single_succ (post_bb);
1260 post_label = block_label (post_bb);
1261 jump = emit_jump_insn (gen_jump (post_label));
1262 JUMP_LABEL (jump) = post_label;
1263
1264 /* Create new basic block to be dest for lp. */
1265 last_bb = EXIT_BLOCK_PTR->prev_bb;
1266 new_bb = create_basic_block (new_label, jump, last_bb);
1267 new_bb->aux = last_bb->aux;
1268 last_bb->aux = new_bb;
1269
1270 emit_barrier_after_bb (new_bb);
1271
1272 make_edge (new_bb, post_bb, 0);
1273
1274 /* Make sure new bb is in the other partition. */
1275 new_partition = BB_PARTITION (old_bb);
1276 new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1277 BB_SET_PARTITION (new_bb, new_partition);
1278
1279 /* Fix up the edges. */
1280 for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1281 if (BB_PARTITION (e->src) == new_partition)
1282 {
1283 rtx insn = BB_END (e->src);
1284 rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1285
1286 gcc_assert (note != NULL);
1287 gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1288 XEXP (note, 0) = GEN_INT (new_lp->index);
1289
1290 /* Adjust the edge to the new destination. */
1291 redirect_edge_succ (e, new_bb);
1292 }
1293 else
1294 ei_next (&ei);
1295 }
1296
1297 /* Find the basic blocks that are rarely executed and need to be moved to
1298 a separate section of the .o file (to cut down on paging and improve
1299 cache locality). Return a vector of all edges that cross. */
1300
1301 static VEC(edge, heap) *
1302 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1303 {
1304 VEC(edge, heap) *crossing_edges = NULL;
1305 basic_block bb;
1306 edge e;
1307 edge_iterator ei;
1308
1309 /* Mark which partition (hot/cold) each basic block belongs in. */
1310 FOR_EACH_BB (bb)
1311 {
1312 if (probably_never_executed_bb_p (bb))
1313 BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1314 else
1315 BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1316 }
1317
1318 /* The format of .gcc_except_table does not allow landing pads to
1319 be in a different partition as the throw. Fix this by either
1320 moving or duplicating the landing pads. */
1321 if (cfun->eh->lp_array)
1322 {
1323 unsigned i;
1324 eh_landing_pad lp;
1325
1326 FOR_EACH_VEC_ELT (eh_landing_pad, cfun->eh->lp_array, i, lp)
1327 {
1328 bool all_same, all_diff;
1329
1330 if (lp == NULL
1331 || lp->landing_pad == NULL_RTX
1332 || !LABEL_P (lp->landing_pad))
1333 continue;
1334
1335 all_same = all_diff = true;
1336 bb = BLOCK_FOR_INSN (lp->landing_pad);
1337 FOR_EACH_EDGE (e, ei, bb->preds)
1338 {
1339 gcc_assert (e->flags & EDGE_EH);
1340 if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1341 all_diff = false;
1342 else
1343 all_same = false;
1344 }
1345
1346 if (all_same)
1347 ;
1348 else if (all_diff)
1349 {
1350 int which = BB_PARTITION (bb);
1351 which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1352 BB_SET_PARTITION (bb, which);
1353 }
1354 else
1355 fix_up_crossing_landing_pad (lp, bb);
1356 }
1357 }
1358
1359 /* Mark every edge that crosses between sections. */
1360
1361 FOR_EACH_BB (bb)
1362 FOR_EACH_EDGE (e, ei, bb->succs)
1363 {
1364 unsigned int flags = e->flags;
1365
1366 /* We should never have EDGE_CROSSING set yet. */
1367 gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1368
1369 if (e->src != ENTRY_BLOCK_PTR
1370 && e->dest != EXIT_BLOCK_PTR
1371 && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1372 {
1373 VEC_safe_push (edge, heap, crossing_edges, e);
1374 flags |= EDGE_CROSSING;
1375 }
1376
1377 /* Now that we've split eh edges as appropriate, allow landing pads
1378 to be merged with the post-landing pads. */
1379 flags &= ~EDGE_PRESERVE;
1380
1381 e->flags = flags;
1382 }
1383
1384 return crossing_edges;
1385 }
1386
1387 /* If any destination of a crossing edge does not have a label, add label;
1388 Convert any easy fall-through crossing edges to unconditional jumps. */
1389
1390 static void
1391 add_labels_and_missing_jumps (VEC(edge, heap) *crossing_edges)
1392 {
1393 size_t i;
1394 edge e;
1395
1396 FOR_EACH_VEC_ELT (edge, crossing_edges, i, e)
1397 {
1398 basic_block src = e->src;
1399 basic_block dest = e->dest;
1400 rtx label, new_jump;
1401
1402 if (dest == EXIT_BLOCK_PTR)
1403 continue;
1404
1405 /* Make sure dest has a label. */
1406 label = block_label (dest);
1407
1408 /* Nothing to do for non-fallthru edges. */
1409 if (src == ENTRY_BLOCK_PTR)
1410 continue;
1411 if ((e->flags & EDGE_FALLTHRU) == 0)
1412 continue;
1413
1414 /* If the block does not end with a control flow insn, then we
1415 can trivially add a jump to the end to fixup the crossing.
1416 Otherwise the jump will have to go in a new bb, which will
1417 be handled by fix_up_fall_thru_edges function. */
1418 if (control_flow_insn_p (BB_END (src)))
1419 continue;
1420
1421 /* Make sure there's only one successor. */
1422 gcc_assert (single_succ_p (src));
1423
1424 new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1425 BB_END (src) = new_jump;
1426 JUMP_LABEL (new_jump) = label;
1427 LABEL_NUSES (label) += 1;
1428
1429 emit_barrier_after_bb (src);
1430
1431 /* Mark edge as non-fallthru. */
1432 e->flags &= ~EDGE_FALLTHRU;
1433 }
1434 }
1435
1436 /* Find any bb's where the fall-through edge is a crossing edge (note that
1437 these bb's must also contain a conditional jump or end with a call
1438 instruction; we've already dealt with fall-through edges for blocks
1439 that didn't have a conditional jump or didn't end with call instruction
1440 in the call to add_labels_and_missing_jumps). Convert the fall-through
1441 edge to non-crossing edge by inserting a new bb to fall-through into.
1442 The new bb will contain an unconditional jump (crossing edge) to the
1443 original fall through destination. */
1444
1445 static void
1446 fix_up_fall_thru_edges (void)
1447 {
1448 basic_block cur_bb;
1449 basic_block new_bb;
1450 edge succ1;
1451 edge succ2;
1452 edge fall_thru;
1453 edge cond_jump = NULL;
1454 edge e;
1455 bool cond_jump_crosses;
1456 int invert_worked;
1457 rtx old_jump;
1458 rtx fall_thru_label;
1459
1460 FOR_EACH_BB (cur_bb)
1461 {
1462 fall_thru = NULL;
1463 if (EDGE_COUNT (cur_bb->succs) > 0)
1464 succ1 = EDGE_SUCC (cur_bb, 0);
1465 else
1466 succ1 = NULL;
1467
1468 if (EDGE_COUNT (cur_bb->succs) > 1)
1469 succ2 = EDGE_SUCC (cur_bb, 1);
1470 else
1471 succ2 = NULL;
1472
1473 /* Find the fall-through edge. */
1474
1475 if (succ1
1476 && (succ1->flags & EDGE_FALLTHRU))
1477 {
1478 fall_thru = succ1;
1479 cond_jump = succ2;
1480 }
1481 else if (succ2
1482 && (succ2->flags & EDGE_FALLTHRU))
1483 {
1484 fall_thru = succ2;
1485 cond_jump = succ1;
1486 }
1487 else if (succ1
1488 && (block_ends_with_call_p (cur_bb)
1489 || can_throw_internal (BB_END (cur_bb))))
1490 {
1491 edge e;
1492 edge_iterator ei;
1493
1494 /* Find EDGE_CAN_FALLTHRU edge. */
1495 FOR_EACH_EDGE (e, ei, cur_bb->succs)
1496 if (e->flags & EDGE_CAN_FALLTHRU)
1497 {
1498 fall_thru = e;
1499 break;
1500 }
1501 }
1502
1503 if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1504 {
1505 /* Check to see if the fall-thru edge is a crossing edge. */
1506
1507 if (fall_thru->flags & EDGE_CROSSING)
1508 {
1509 /* The fall_thru edge crosses; now check the cond jump edge, if
1510 it exists. */
1511
1512 cond_jump_crosses = true;
1513 invert_worked = 0;
1514 old_jump = BB_END (cur_bb);
1515
1516 /* Find the jump instruction, if there is one. */
1517
1518 if (cond_jump)
1519 {
1520 if (!(cond_jump->flags & EDGE_CROSSING))
1521 cond_jump_crosses = false;
1522
1523 /* We know the fall-thru edge crosses; if the cond
1524 jump edge does NOT cross, and its destination is the
1525 next block in the bb order, invert the jump
1526 (i.e. fix it so the fall through does not cross and
1527 the cond jump does). */
1528
1529 if (!cond_jump_crosses
1530 && cur_bb->aux == cond_jump->dest)
1531 {
1532 /* Find label in fall_thru block. We've already added
1533 any missing labels, so there must be one. */
1534
1535 fall_thru_label = block_label (fall_thru->dest);
1536
1537 if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1538 invert_worked = invert_jump (old_jump,
1539 fall_thru_label,0);
1540 if (invert_worked)
1541 {
1542 fall_thru->flags &= ~EDGE_FALLTHRU;
1543 cond_jump->flags |= EDGE_FALLTHRU;
1544 update_br_prob_note (cur_bb);
1545 e = fall_thru;
1546 fall_thru = cond_jump;
1547 cond_jump = e;
1548 cond_jump->flags |= EDGE_CROSSING;
1549 fall_thru->flags &= ~EDGE_CROSSING;
1550 }
1551 }
1552 }
1553
1554 if (cond_jump_crosses || !invert_worked)
1555 {
1556 /* This is the case where both edges out of the basic
1557 block are crossing edges. Here we will fix up the
1558 fall through edge. The jump edge will be taken care
1559 of later. The EDGE_CROSSING flag of fall_thru edge
1560 is unset before the call to force_nonfallthru
1561 function because if a new basic-block is created
1562 this edge remains in the current section boundary
1563 while the edge between new_bb and the fall_thru->dest
1564 becomes EDGE_CROSSING. */
1565
1566 fall_thru->flags &= ~EDGE_CROSSING;
1567 new_bb = force_nonfallthru (fall_thru);
1568
1569 if (new_bb)
1570 {
1571 new_bb->aux = cur_bb->aux;
1572 cur_bb->aux = new_bb;
1573
1574 /* Make sure new fall-through bb is in same
1575 partition as bb it's falling through from. */
1576
1577 BB_COPY_PARTITION (new_bb, cur_bb);
1578 single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1579 }
1580 else
1581 {
1582 /* If a new basic-block was not created; restore
1583 the EDGE_CROSSING flag. */
1584 fall_thru->flags |= EDGE_CROSSING;
1585 }
1586
1587 /* Add barrier after new jump */
1588 emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1589 }
1590 }
1591 }
1592 }
1593 }
1594
1595 /* This function checks the destination block of a "crossing jump" to
1596 see if it has any crossing predecessors that begin with a code label
1597 and end with an unconditional jump. If so, it returns that predecessor
1598 block. (This is to avoid creating lots of new basic blocks that all
1599 contain unconditional jumps to the same destination). */
1600
1601 static basic_block
1602 find_jump_block (basic_block jump_dest)
1603 {
1604 basic_block source_bb = NULL;
1605 edge e;
1606 rtx insn;
1607 edge_iterator ei;
1608
1609 FOR_EACH_EDGE (e, ei, jump_dest->preds)
1610 if (e->flags & EDGE_CROSSING)
1611 {
1612 basic_block src = e->src;
1613
1614 /* Check each predecessor to see if it has a label, and contains
1615 only one executable instruction, which is an unconditional jump.
1616 If so, we can use it. */
1617
1618 if (LABEL_P (BB_HEAD (src)))
1619 for (insn = BB_HEAD (src);
1620 !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1621 insn = NEXT_INSN (insn))
1622 {
1623 if (INSN_P (insn)
1624 && insn == BB_END (src)
1625 && JUMP_P (insn)
1626 && !any_condjump_p (insn))
1627 {
1628 source_bb = src;
1629 break;
1630 }
1631 }
1632
1633 if (source_bb)
1634 break;
1635 }
1636
1637 return source_bb;
1638 }
1639
1640 /* Find all BB's with conditional jumps that are crossing edges;
1641 insert a new bb and make the conditional jump branch to the new
1642 bb instead (make the new bb same color so conditional branch won't
1643 be a 'crossing' edge). Insert an unconditional jump from the
1644 new bb to the original destination of the conditional jump. */
1645
1646 static void
1647 fix_crossing_conditional_branches (void)
1648 {
1649 basic_block cur_bb;
1650 basic_block new_bb;
1651 basic_block dest;
1652 edge succ1;
1653 edge succ2;
1654 edge crossing_edge;
1655 edge new_edge;
1656 rtx old_jump;
1657 rtx set_src;
1658 rtx old_label = NULL_RTX;
1659 rtx new_label;
1660
1661 FOR_EACH_BB (cur_bb)
1662 {
1663 crossing_edge = NULL;
1664 if (EDGE_COUNT (cur_bb->succs) > 0)
1665 succ1 = EDGE_SUCC (cur_bb, 0);
1666 else
1667 succ1 = NULL;
1668
1669 if (EDGE_COUNT (cur_bb->succs) > 1)
1670 succ2 = EDGE_SUCC (cur_bb, 1);
1671 else
1672 succ2 = NULL;
1673
1674 /* We already took care of fall-through edges, so only one successor
1675 can be a crossing edge. */
1676
1677 if (succ1 && (succ1->flags & EDGE_CROSSING))
1678 crossing_edge = succ1;
1679 else if (succ2 && (succ2->flags & EDGE_CROSSING))
1680 crossing_edge = succ2;
1681
1682 if (crossing_edge)
1683 {
1684 old_jump = BB_END (cur_bb);
1685
1686 /* Check to make sure the jump instruction is a
1687 conditional jump. */
1688
1689 set_src = NULL_RTX;
1690
1691 if (any_condjump_p (old_jump))
1692 {
1693 if (GET_CODE (PATTERN (old_jump)) == SET)
1694 set_src = SET_SRC (PATTERN (old_jump));
1695 else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1696 {
1697 set_src = XVECEXP (PATTERN (old_jump), 0,0);
1698 if (GET_CODE (set_src) == SET)
1699 set_src = SET_SRC (set_src);
1700 else
1701 set_src = NULL_RTX;
1702 }
1703 }
1704
1705 if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1706 {
1707 if (GET_CODE (XEXP (set_src, 1)) == PC)
1708 old_label = XEXP (set_src, 2);
1709 else if (GET_CODE (XEXP (set_src, 2)) == PC)
1710 old_label = XEXP (set_src, 1);
1711
1712 /* Check to see if new bb for jumping to that dest has
1713 already been created; if so, use it; if not, create
1714 a new one. */
1715
1716 new_bb = find_jump_block (crossing_edge->dest);
1717
1718 if (new_bb)
1719 new_label = block_label (new_bb);
1720 else
1721 {
1722 basic_block last_bb;
1723 rtx new_jump;
1724
1725 /* Create new basic block to be dest for
1726 conditional jump. */
1727
1728 /* Put appropriate instructions in new bb. */
1729
1730 new_label = gen_label_rtx ();
1731 emit_label (new_label);
1732
1733 gcc_assert (GET_CODE (old_label) == LABEL_REF);
1734 old_label = JUMP_LABEL (old_jump);
1735 new_jump = emit_jump_insn (gen_jump (old_label));
1736 JUMP_LABEL (new_jump) = old_label;
1737
1738 last_bb = EXIT_BLOCK_PTR->prev_bb;
1739 new_bb = create_basic_block (new_label, new_jump, last_bb);
1740 new_bb->aux = last_bb->aux;
1741 last_bb->aux = new_bb;
1742
1743 emit_barrier_after_bb (new_bb);
1744
1745 /* Make sure new bb is in same partition as source
1746 of conditional branch. */
1747 BB_COPY_PARTITION (new_bb, cur_bb);
1748 }
1749
1750 /* Make old jump branch to new bb. */
1751
1752 redirect_jump (old_jump, new_label, 0);
1753
1754 /* Remove crossing_edge as predecessor of 'dest'. */
1755
1756 dest = crossing_edge->dest;
1757
1758 redirect_edge_succ (crossing_edge, new_bb);
1759
1760 /* Make a new edge from new_bb to old dest; new edge
1761 will be a successor for new_bb and a predecessor
1762 for 'dest'. */
1763
1764 if (EDGE_COUNT (new_bb->succs) == 0)
1765 new_edge = make_edge (new_bb, dest, 0);
1766 else
1767 new_edge = EDGE_SUCC (new_bb, 0);
1768
1769 crossing_edge->flags &= ~EDGE_CROSSING;
1770 new_edge->flags |= EDGE_CROSSING;
1771 }
1772 }
1773 }
1774 }
1775
1776 /* Find any unconditional branches that cross between hot and cold
1777 sections. Convert them into indirect jumps instead. */
1778
1779 static void
1780 fix_crossing_unconditional_branches (void)
1781 {
1782 basic_block cur_bb;
1783 rtx last_insn;
1784 rtx label;
1785 rtx label_addr;
1786 rtx indirect_jump_sequence;
1787 rtx jump_insn = NULL_RTX;
1788 rtx new_reg;
1789 rtx cur_insn;
1790 edge succ;
1791
1792 FOR_EACH_BB (cur_bb)
1793 {
1794 last_insn = BB_END (cur_bb);
1795
1796 if (EDGE_COUNT (cur_bb->succs) < 1)
1797 continue;
1798
1799 succ = EDGE_SUCC (cur_bb, 0);
1800
1801 /* Check to see if bb ends in a crossing (unconditional) jump. At
1802 this point, no crossing jumps should be conditional. */
1803
1804 if (JUMP_P (last_insn)
1805 && (succ->flags & EDGE_CROSSING))
1806 {
1807 rtx label2, table;
1808
1809 gcc_assert (!any_condjump_p (last_insn));
1810
1811 /* Make sure the jump is not already an indirect or table jump. */
1812
1813 if (!computed_jump_p (last_insn)
1814 && !tablejump_p (last_insn, &label2, &table))
1815 {
1816 /* We have found a "crossing" unconditional branch. Now
1817 we must convert it to an indirect jump. First create
1818 reference of label, as target for jump. */
1819
1820 label = JUMP_LABEL (last_insn);
1821 label_addr = gen_rtx_LABEL_REF (Pmode, label);
1822 LABEL_NUSES (label) += 1;
1823
1824 /* Get a register to use for the indirect jump. */
1825
1826 new_reg = gen_reg_rtx (Pmode);
1827
1828 /* Generate indirect the jump sequence. */
1829
1830 start_sequence ();
1831 emit_move_insn (new_reg, label_addr);
1832 emit_indirect_jump (new_reg);
1833 indirect_jump_sequence = get_insns ();
1834 end_sequence ();
1835
1836 /* Make sure every instruction in the new jump sequence has
1837 its basic block set to be cur_bb. */
1838
1839 for (cur_insn = indirect_jump_sequence; cur_insn;
1840 cur_insn = NEXT_INSN (cur_insn))
1841 {
1842 if (!BARRIER_P (cur_insn))
1843 BLOCK_FOR_INSN (cur_insn) = cur_bb;
1844 if (JUMP_P (cur_insn))
1845 jump_insn = cur_insn;
1846 }
1847
1848 /* Insert the new (indirect) jump sequence immediately before
1849 the unconditional jump, then delete the unconditional jump. */
1850
1851 emit_insn_before (indirect_jump_sequence, last_insn);
1852 delete_insn (last_insn);
1853
1854 /* Make BB_END for cur_bb be the jump instruction (NOT the
1855 barrier instruction at the end of the sequence...). */
1856
1857 BB_END (cur_bb) = jump_insn;
1858 }
1859 }
1860 }
1861 }
1862
1863 /* Add REG_CROSSING_JUMP note to all crossing jump insns. */
1864
1865 static void
1866 add_reg_crossing_jump_notes (void)
1867 {
1868 basic_block bb;
1869 edge e;
1870 edge_iterator ei;
1871
1872 FOR_EACH_BB (bb)
1873 FOR_EACH_EDGE (e, ei, bb->succs)
1874 if ((e->flags & EDGE_CROSSING)
1875 && JUMP_P (BB_END (e->src)))
1876 add_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX);
1877 }
1878
1879 /* Verify, in the basic block chain, that there is at most one switch
1880 between hot/cold partitions. This is modelled on
1881 rtl_verify_flow_info_1, but it cannot go inside that function
1882 because this condition will not be true until after
1883 reorder_basic_blocks is called. */
1884
1885 static void
1886 verify_hot_cold_block_grouping (void)
1887 {
1888 basic_block bb;
1889 int err = 0;
1890 bool switched_sections = false;
1891 int current_partition = 0;
1892
1893 FOR_EACH_BB (bb)
1894 {
1895 if (!current_partition)
1896 current_partition = BB_PARTITION (bb);
1897 if (BB_PARTITION (bb) != current_partition)
1898 {
1899 if (switched_sections)
1900 {
1901 error ("multiple hot/cold transitions found (bb %i)",
1902 bb->index);
1903 err = 1;
1904 }
1905 else
1906 {
1907 switched_sections = true;
1908 current_partition = BB_PARTITION (bb);
1909 }
1910 }
1911 }
1912
1913 gcc_assert(!err);
1914 }
1915
1916 /* Reorder basic blocks. The main entry point to this file. FLAGS is
1917 the set of flags to pass to cfg_layout_initialize(). */
1918
1919 static void
1920 reorder_basic_blocks (void)
1921 {
1922 int n_traces;
1923 int i;
1924 struct trace *traces;
1925
1926 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
1927
1928 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
1929 return;
1930
1931 set_edge_can_fallthru_flag ();
1932 mark_dfs_back_edges ();
1933
1934 /* We are estimating the length of uncond jump insn only once since the code
1935 for getting the insn length always returns the minimal length now. */
1936 if (uncond_jump_length == 0)
1937 uncond_jump_length = get_uncond_jump_length ();
1938
1939 /* We need to know some information for each basic block. */
1940 array_size = GET_ARRAY_SIZE (last_basic_block);
1941 bbd = XNEWVEC (bbro_basic_block_data, array_size);
1942 for (i = 0; i < array_size; i++)
1943 {
1944 bbd[i].start_of_trace = -1;
1945 bbd[i].end_of_trace = -1;
1946 bbd[i].in_trace = -1;
1947 bbd[i].visited = 0;
1948 bbd[i].heap = NULL;
1949 bbd[i].node = NULL;
1950 }
1951
1952 traces = XNEWVEC (struct trace, n_basic_blocks);
1953 n_traces = 0;
1954 find_traces (&n_traces, traces);
1955 connect_traces (n_traces, traces);
1956 FREE (traces);
1957 FREE (bbd);
1958
1959 relink_block_chain (/*stay_in_cfglayout_mode=*/true);
1960
1961 if (dump_file)
1962 dump_flow_info (dump_file, dump_flags);
1963
1964 if (flag_reorder_blocks_and_partition)
1965 verify_hot_cold_block_grouping ();
1966 }
1967
1968 /* Determine which partition the first basic block in the function
1969 belongs to, then find the first basic block in the current function
1970 that belongs to a different section, and insert a
1971 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
1972 instruction stream. When writing out the assembly code,
1973 encountering this note will make the compiler switch between the
1974 hot and cold text sections. */
1975
1976 static void
1977 insert_section_boundary_note (void)
1978 {
1979 basic_block bb;
1980 rtx new_note;
1981 int first_partition = 0;
1982
1983 if (!flag_reorder_blocks_and_partition)
1984 return;
1985
1986 FOR_EACH_BB (bb)
1987 {
1988 if (!first_partition)
1989 first_partition = BB_PARTITION (bb);
1990 if (BB_PARTITION (bb) != first_partition)
1991 {
1992 new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
1993 BB_HEAD (bb));
1994 /* ??? This kind of note always lives between basic blocks,
1995 but add_insn_before will set BLOCK_FOR_INSN anyway. */
1996 BLOCK_FOR_INSN (new_note) = NULL;
1997 break;
1998 }
1999 }
2000 }
2001
2002 /* Duplicate the blocks containing computed gotos. This basically unfactors
2003 computed gotos that were factored early on in the compilation process to
2004 speed up edge based data flow. We used to not unfactoring them again,
2005 which can seriously pessimize code with many computed jumps in the source
2006 code, such as interpreters. See e.g. PR15242. */
2007
2008 static bool
2009 gate_duplicate_computed_gotos (void)
2010 {
2011 if (targetm.cannot_modify_jumps_p ())
2012 return false;
2013 return (optimize > 0
2014 && flag_expensive_optimizations
2015 && ! optimize_function_for_size_p (cfun));
2016 }
2017
2018
2019 static unsigned int
2020 duplicate_computed_gotos (void)
2021 {
2022 basic_block bb, new_bb;
2023 bitmap candidates;
2024 int max_size;
2025
2026 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2027 return 0;
2028
2029 clear_bb_flags ();
2030 cfg_layout_initialize (0);
2031
2032 /* We are estimating the length of uncond jump insn only once
2033 since the code for getting the insn length always returns
2034 the minimal length now. */
2035 if (uncond_jump_length == 0)
2036 uncond_jump_length = get_uncond_jump_length ();
2037
2038 max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2039 candidates = BITMAP_ALLOC (NULL);
2040
2041 /* Look for blocks that end in a computed jump, and see if such blocks
2042 are suitable for unfactoring. If a block is a candidate for unfactoring,
2043 mark it in the candidates. */
2044 FOR_EACH_BB (bb)
2045 {
2046 rtx insn;
2047 edge e;
2048 edge_iterator ei;
2049 int size, all_flags;
2050
2051 /* Build the reorder chain for the original order of blocks. */
2052 if (bb->next_bb != EXIT_BLOCK_PTR)
2053 bb->aux = bb->next_bb;
2054
2055 /* Obviously the block has to end in a computed jump. */
2056 if (!computed_jump_p (BB_END (bb)))
2057 continue;
2058
2059 /* Only consider blocks that can be duplicated. */
2060 if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2061 || !can_duplicate_block_p (bb))
2062 continue;
2063
2064 /* Make sure that the block is small enough. */
2065 size = 0;
2066 FOR_BB_INSNS (bb, insn)
2067 if (INSN_P (insn))
2068 {
2069 size += get_attr_min_length (insn);
2070 if (size > max_size)
2071 break;
2072 }
2073 if (size > max_size)
2074 continue;
2075
2076 /* Final check: there must not be any incoming abnormal edges. */
2077 all_flags = 0;
2078 FOR_EACH_EDGE (e, ei, bb->preds)
2079 all_flags |= e->flags;
2080 if (all_flags & EDGE_COMPLEX)
2081 continue;
2082
2083 bitmap_set_bit (candidates, bb->index);
2084 }
2085
2086 /* Nothing to do if there is no computed jump here. */
2087 if (bitmap_empty_p (candidates))
2088 goto done;
2089
2090 /* Duplicate computed gotos. */
2091 FOR_EACH_BB (bb)
2092 {
2093 if (bb->flags & BB_VISITED)
2094 continue;
2095
2096 bb->flags |= BB_VISITED;
2097
2098 /* BB must have one outgoing edge. That edge must not lead to
2099 the exit block or the next block.
2100 The destination must have more than one predecessor. */
2101 if (!single_succ_p (bb)
2102 || single_succ (bb) == EXIT_BLOCK_PTR
2103 || single_succ (bb) == bb->next_bb
2104 || single_pred_p (single_succ (bb)))
2105 continue;
2106
2107 /* The successor block has to be a duplication candidate. */
2108 if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2109 continue;
2110
2111 new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2112 new_bb->aux = bb->aux;
2113 bb->aux = new_bb;
2114 new_bb->flags |= BB_VISITED;
2115 }
2116
2117 done:
2118 cfg_layout_finalize ();
2119
2120 BITMAP_FREE (candidates);
2121 return 0;
2122 }
2123
2124 struct rtl_opt_pass pass_duplicate_computed_gotos =
2125 {
2126 {
2127 RTL_PASS,
2128 "compgotos", /* name */
2129 gate_duplicate_computed_gotos, /* gate */
2130 duplicate_computed_gotos, /* execute */
2131 NULL, /* sub */
2132 NULL, /* next */
2133 0, /* static_pass_number */
2134 TV_REORDER_BLOCKS, /* tv_id */
2135 0, /* properties_required */
2136 0, /* properties_provided */
2137 0, /* properties_destroyed */
2138 0, /* todo_flags_start */
2139 TODO_verify_rtl_sharing,/* todo_flags_finish */
2140 }
2141 };
2142
2143
2144 /* This function is the main 'entrance' for the optimization that
2145 partitions hot and cold basic blocks into separate sections of the
2146 .o file (to improve performance and cache locality). Ideally it
2147 would be called after all optimizations that rearrange the CFG have
2148 been called. However part of this optimization may introduce new
2149 register usage, so it must be called before register allocation has
2150 occurred. This means that this optimization is actually called
2151 well before the optimization that reorders basic blocks (see
2152 function above).
2153
2154 This optimization checks the feedback information to determine
2155 which basic blocks are hot/cold, updates flags on the basic blocks
2156 to indicate which section they belong in. This information is
2157 later used for writing out sections in the .o file. Because hot
2158 and cold sections can be arbitrarily large (within the bounds of
2159 memory), far beyond the size of a single function, it is necessary
2160 to fix up all edges that cross section boundaries, to make sure the
2161 instructions used can actually span the required distance. The
2162 fixes are described below.
2163
2164 Fall-through edges must be changed into jumps; it is not safe or
2165 legal to fall through across a section boundary. Whenever a
2166 fall-through edge crossing a section boundary is encountered, a new
2167 basic block is inserted (in the same section as the fall-through
2168 source), and the fall through edge is redirected to the new basic
2169 block. The new basic block contains an unconditional jump to the
2170 original fall-through target. (If the unconditional jump is
2171 insufficient to cross section boundaries, that is dealt with a
2172 little later, see below).
2173
2174 In order to deal with architectures that have short conditional
2175 branches (which cannot span all of memory) we take any conditional
2176 jump that attempts to cross a section boundary and add a level of
2177 indirection: it becomes a conditional jump to a new basic block, in
2178 the same section. The new basic block contains an unconditional
2179 jump to the original target, in the other section.
2180
2181 For those architectures whose unconditional branch is also
2182 incapable of reaching all of memory, those unconditional jumps are
2183 converted into indirect jumps, through a register.
2184
2185 IMPORTANT NOTE: This optimization causes some messy interactions
2186 with the cfg cleanup optimizations; those optimizations want to
2187 merge blocks wherever possible, and to collapse indirect jump
2188 sequences (change "A jumps to B jumps to C" directly into "A jumps
2189 to C"). Those optimizations can undo the jump fixes that
2190 partitioning is required to make (see above), in order to ensure
2191 that jumps attempting to cross section boundaries are really able
2192 to cover whatever distance the jump requires (on many architectures
2193 conditional or unconditional jumps are not able to reach all of
2194 memory). Therefore tests have to be inserted into each such
2195 optimization to make sure that it does not undo stuff necessary to
2196 cross partition boundaries. This would be much less of a problem
2197 if we could perform this optimization later in the compilation, but
2198 unfortunately the fact that we may need to create indirect jumps
2199 (through registers) requires that this optimization be performed
2200 before register allocation.
2201
2202 Hot and cold basic blocks are partitioned and put in separate
2203 sections of the .o file, to reduce paging and improve cache
2204 performance (hopefully). This can result in bits of code from the
2205 same function being widely separated in the .o file. However this
2206 is not obvious to the current bb structure. Therefore we must take
2207 care to ensure that: 1). There are no fall_thru edges that cross
2208 between sections; 2). For those architectures which have "short"
2209 conditional branches, all conditional branches that attempt to
2210 cross between sections are converted to unconditional branches;
2211 and, 3). For those architectures which have "short" unconditional
2212 branches, all unconditional branches that attempt to cross between
2213 sections are converted to indirect jumps.
2214
2215 The code for fixing up fall_thru edges that cross between hot and
2216 cold basic blocks does so by creating new basic blocks containing
2217 unconditional branches to the appropriate label in the "other"
2218 section. The new basic block is then put in the same (hot or cold)
2219 section as the original conditional branch, and the fall_thru edge
2220 is modified to fall into the new basic block instead. By adding
2221 this level of indirection we end up with only unconditional branches
2222 crossing between hot and cold sections.
2223
2224 Conditional branches are dealt with by adding a level of indirection.
2225 A new basic block is added in the same (hot/cold) section as the
2226 conditional branch, and the conditional branch is retargeted to the
2227 new basic block. The new basic block contains an unconditional branch
2228 to the original target of the conditional branch (in the other section).
2229
2230 Unconditional branches are dealt with by converting them into
2231 indirect jumps. */
2232
2233 static unsigned
2234 partition_hot_cold_basic_blocks (void)
2235 {
2236 VEC(edge, heap) *crossing_edges;
2237
2238 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2239 return 0;
2240
2241 df_set_flags (DF_DEFER_INSN_RESCAN);
2242
2243 crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2244 if (crossing_edges == NULL)
2245 return 0;
2246
2247 /* Make sure the source of any crossing edge ends in a jump and the
2248 destination of any crossing edge has a label. */
2249 add_labels_and_missing_jumps (crossing_edges);
2250
2251 /* Convert all crossing fall_thru edges to non-crossing fall
2252 thrus to unconditional jumps (that jump to the original fall
2253 through dest). */
2254 fix_up_fall_thru_edges ();
2255
2256 /* If the architecture does not have conditional branches that can
2257 span all of memory, convert crossing conditional branches into
2258 crossing unconditional branches. */
2259 if (!HAS_LONG_COND_BRANCH)
2260 fix_crossing_conditional_branches ();
2261
2262 /* If the architecture does not have unconditional branches that
2263 can span all of memory, convert crossing unconditional branches
2264 into indirect jumps. Since adding an indirect jump also adds
2265 a new register usage, update the register usage information as
2266 well. */
2267 if (!HAS_LONG_UNCOND_BRANCH)
2268 fix_crossing_unconditional_branches ();
2269
2270 add_reg_crossing_jump_notes ();
2271
2272 /* Clear bb->aux fields that the above routines were using. */
2273 clear_aux_for_blocks ();
2274
2275 VEC_free (edge, heap, crossing_edges);
2276
2277 /* ??? FIXME: DF generates the bb info for a block immediately.
2278 And by immediately, I mean *during* creation of the block.
2279
2280 #0 df_bb_refs_collect
2281 #1 in df_bb_refs_record
2282 #2 in create_basic_block_structure
2283
2284 Which means that the bb_has_eh_pred test in df_bb_refs_collect
2285 will *always* fail, because no edges can have been added to the
2286 block yet. Which of course means we don't add the right
2287 artificial refs, which means we fail df_verify (much) later.
2288
2289 Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2290 that we also shouldn't grab data from the new blocks those new
2291 insns are in either. In this way one can create the block, link
2292 it up properly, and have everything Just Work later, when deferred
2293 insns are processed.
2294
2295 In the meantime, we have no other option but to throw away all
2296 of the DF data and recompute it all. */
2297 if (cfun->eh->lp_array)
2298 {
2299 df_finish_pass (true);
2300 df_scan_alloc (NULL);
2301 df_scan_blocks ();
2302 /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2303 data. We blindly generated all of them when creating the new
2304 landing pad. Delete those assignments we don't use. */
2305 df_set_flags (DF_LR_RUN_DCE);
2306 df_analyze ();
2307 }
2308
2309 return TODO_verify_flow | TODO_verify_rtl_sharing;
2310 }
2311 \f
2312 static bool
2313 gate_handle_reorder_blocks (void)
2314 {
2315 if (targetm.cannot_modify_jumps_p ())
2316 return false;
2317 /* Don't reorder blocks when optimizing for size because extra jump insns may
2318 be created; also barrier may create extra padding.
2319
2320 More correctly we should have a block reordering mode that tried to
2321 minimize the combined size of all the jumps. This would more or less
2322 automatically remove extra jumps, but would also try to use more short
2323 jumps instead of long jumps. */
2324 if (!optimize_function_for_speed_p (cfun))
2325 return false;
2326 return (optimize > 0
2327 && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2328 }
2329
2330
2331 /* Reorder basic blocks. */
2332 static unsigned int
2333 rest_of_handle_reorder_blocks (void)
2334 {
2335 basic_block bb;
2336
2337 /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2338 splitting possibly introduced more crossjumping opportunities. */
2339 cfg_layout_initialize (CLEANUP_EXPENSIVE);
2340
2341 reorder_basic_blocks ();
2342 cleanup_cfg (CLEANUP_EXPENSIVE);
2343
2344 FOR_EACH_BB (bb)
2345 if (bb->next_bb != EXIT_BLOCK_PTR)
2346 bb->aux = bb->next_bb;
2347 cfg_layout_finalize ();
2348
2349 /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes. */
2350 insert_section_boundary_note ();
2351 return 0;
2352 }
2353
2354 struct rtl_opt_pass pass_reorder_blocks =
2355 {
2356 {
2357 RTL_PASS,
2358 "bbro", /* name */
2359 gate_handle_reorder_blocks, /* gate */
2360 rest_of_handle_reorder_blocks, /* execute */
2361 NULL, /* sub */
2362 NULL, /* next */
2363 0, /* static_pass_number */
2364 TV_REORDER_BLOCKS, /* tv_id */
2365 0, /* properties_required */
2366 0, /* properties_provided */
2367 0, /* properties_destroyed */
2368 0, /* todo_flags_start */
2369 TODO_verify_rtl_sharing, /* todo_flags_finish */
2370 }
2371 };
2372
2373 static bool
2374 gate_handle_partition_blocks (void)
2375 {
2376 /* The optimization to partition hot/cold basic blocks into separate
2377 sections of the .o file does not work well with linkonce or with
2378 user defined section attributes. Don't call it if either case
2379 arises. */
2380 return (flag_reorder_blocks_and_partition
2381 && optimize
2382 /* See gate_handle_reorder_blocks. We should not partition if
2383 we are going to omit the reordering. */
2384 && optimize_function_for_speed_p (cfun)
2385 && !DECL_ONE_ONLY (current_function_decl)
2386 && !user_defined_section_attribute);
2387 }
2388
2389 struct rtl_opt_pass pass_partition_blocks =
2390 {
2391 {
2392 RTL_PASS,
2393 "bbpart", /* name */
2394 gate_handle_partition_blocks, /* gate */
2395 partition_hot_cold_basic_blocks, /* execute */
2396 NULL, /* sub */
2397 NULL, /* next */
2398 0, /* static_pass_number */
2399 TV_REORDER_BLOCKS, /* tv_id */
2400 PROP_cfglayout, /* properties_required */
2401 0, /* properties_provided */
2402 0, /* properties_destroyed */
2403 0, /* todo_flags_start */
2404 0 /* todo_flags_finish */
2405 }
2406 };