]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/bb-reorder.c
Change copyright header to refer to version 3 of the GNU General Public License and...
[thirdparty/gcc.git] / gcc / bb-reorder.c
CommitLineData
295ae817 1/* Basic block reordering routines for the GNU compiler.
62e5bf5d
RS
2 Copyright (C) 2000, 2002, 2003, 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
295ae817 4
1322177d 5 This file is part of GCC.
295ae817 6
1322177d
LB
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9dcd6f09 9 the Free Software Foundation; either version 3, or (at your option)
295ae817
JE
10 any later version.
11
1322177d
LB
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
295ae817
JE
16
17 You should have received a copy of the GNU General Public License
9dcd6f09
NC
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
295ae817 20
aa634f11
JZ
21/* This (greedy) algorithm constructs traces in several rounds.
22 The construction starts from "seeds". The seed for the first round
23 is the entry point of function. When there are more than one seed
24 that one is selected first that has the lowest key in the heap
25 (see function bb_to_key). Then the algorithm repeatedly adds the most
26 probable successor to the end of a trace. Finally it connects the traces.
27
28 There are two parameters: Branch Threshold and Exec Threshold.
29 If the edge to a successor of the actual basic block is lower than
30 Branch Threshold or the frequency of the successor is lower than
31 Exec Threshold the successor will be the seed in one of the next rounds.
32 Each round has these parameters lower than the previous one.
33 The last round has to have these parameters set to zero
34 so that the remaining blocks are picked up.
35
36 The algorithm selects the most probable successor from all unvisited
37 successors and successors that have been added to this trace.
38 The other successors (that has not been "sent" to the next round) will be
39 other seeds for this round and the secondary traces will start in them.
40 If the successor has not been visited in this trace it is added to the trace
41 (however, there is some heuristic for simple branches).
42 If the successor has been visited in this trace the loop has been found.
43 If the loop has many iterations the loop is rotated so that the
44 source block of the most probable edge going out from the loop
45 is the last block of the trace.
46 If the loop has few iterations and there is no edge from the last block of
47 the loop going out from loop the loop header is duplicated.
48 Finally, the construction of the trace is terminated.
49
50 When connecting traces it first checks whether there is an edge from the
51 last block of one trace to the first block of another trace.
52 When there are still some unconnected traces it checks whether there exists
53 a basic block BB such that BB is a successor of the last bb of one trace
54 and BB is a predecessor of the first block of another trace. In this case,
55 BB is duplicated and the traces are connected through this duplicate.
56 The rest of traces are simply connected so there will be a jump to the
57 beginning of the rest of trace.
58
59
60 References:
61
62 "Software Trace Cache"
63 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64 http://citeseer.nj.nec.com/15361.html
65
295ae817
JE
66*/
67
68#include "config.h"
69#include "system.h"
4977bab6
ZW
70#include "coretypes.h"
71#include "tm.h"
295ae817 72#include "rtl.h"
7932a3db 73#include "regs.h"
295ae817 74#include "flags.h"
38700cee 75#include "timevar.h"
295ae817 76#include "output.h"
d56a8211 77#include "cfglayout.h"
aa634f11 78#include "fibheap.h"
e4ec2cac 79#include "target.h"
750054a2 80#include "function.h"
ef482a86 81#include "tm_p.h"
750054a2
CT
82#include "obstack.h"
83#include "expr.h"
bbcb0c05 84#include "params.h"
4c714dd4 85#include "toplev.h"
ef330312 86#include "tree-pass.h"
6fb5fa3c 87#include "df.h"
ef330312
PB
88
89#ifndef HAVE_conditional_execution
90#define HAVE_conditional_execution 0
91#endif
295ae817 92
750054a2
CT
93/* The number of rounds. In most cases there will only be 4 rounds, but
94 when partitioning hot and cold basic blocks into separate sections of
95 the .o file there will be an extra round.*/
96#define N_ROUNDS 5
aa634f11 97
ef482a86 98/* Stubs in case we don't have a return insn.
c22cacf3 99 We have to check at runtime too, not only compiletime. */
ef482a86
AT
100
101#ifndef HAVE_return
102#define HAVE_return 0
103#define gen_return() NULL_RTX
104#endif
105
106
aa634f11 107/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
750054a2 108static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
aa634f11
JZ
109
110/* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
750054a2 111static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
aa634f11
JZ
112
113/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
114 block the edge destination is not duplicated while connecting traces. */
115#define DUPLICATION_THRESHOLD 100
116
117/* Length of unconditional jump instruction. */
118static int uncond_jump_length;
119
120/* Structure to hold needed information for each basic block. */
121typedef struct bbro_basic_block_data_def
122{
123 /* Which trace is the bb start of (-1 means it is not a start of a trace). */
124 int start_of_trace;
125
126 /* Which trace is the bb end of (-1 means it is not an end of a trace). */
127 int end_of_trace;
128
87c8b4be
CT
129 /* Which trace is the bb in? */
130 int in_trace;
131
aa634f11
JZ
132 /* Which heap is BB in (if any)? */
133 fibheap_t heap;
134
135 /* Which heap node is BB in (if any)? */
136 fibnode_t node;
137} bbro_basic_block_data;
138
139/* The current size of the following dynamic array. */
140static int array_size;
141
142/* The array which holds needed information for basic blocks. */
143static bbro_basic_block_data *bbd;
144
145/* To avoid frequent reallocation the size of arrays is greater than needed,
146 the number of elements is (not less than) 1.25 * size_wanted. */
147#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
148
149/* Free the memory and set the pointer to NULL. */
298e6adc 150#define FREE(P) (gcc_assert (P), free (P), P = 0)
aa634f11
JZ
151
152/* Structure for holding information about a trace. */
153struct trace
154{
155 /* First and last basic block of the trace. */
156 basic_block first, last;
157
158 /* The round of the STC creation which this trace was found in. */
159 int round;
160
161 /* The length (i.e. the number of basic blocks) of the trace. */
162 int length;
163};
164
165/* Maximum frequency and count of one of the entry blocks. */
cd735ab8
KH
166static int max_entry_frequency;
167static gcov_type max_entry_count;
aa634f11 168
295ae817 169/* Local function prototypes. */
4682ae04
AJ
170static void find_traces (int *, struct trace *);
171static basic_block rotate_loop (edge, struct trace *, int);
172static void mark_bb_visited (basic_block, int);
173static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
750054a2 174 int, fibheap_t *, int);
4682ae04
AJ
175static basic_block copy_bb (basic_block, edge, basic_block, int);
176static fibheapkey_t bb_to_key (basic_block);
750054a2 177static bool better_edge_p (basic_block, edge, int, int, int, int, edge);
4682ae04
AJ
178static void connect_traces (int, struct trace *);
179static bool copy_bb_p (basic_block, int);
180static int get_uncond_jump_length (void);
750054a2 181static bool push_to_next_round_p (basic_block, int, int, int, gcov_type);
639d3040 182static void find_rarely_executed_basic_blocks_and_crossing_edges (edge **,
750054a2
CT
183 int *,
184 int *);
750054a2
CT
185static void add_labels_and_missing_jumps (edge *, int);
186static void add_reg_crossing_jump_notes (void);
187static void fix_up_fall_thru_edges (void);
188static void fix_edges_for_rarely_executed_code (edge *, int);
189static void fix_crossing_conditional_branches (void);
190static void fix_crossing_unconditional_branches (void);
f008a564 191\f
750054a2
CT
192/* Check to see if bb should be pushed into the next round of trace
193 collections or not. Reasons for pushing the block forward are 1).
194 If the block is cold, we are doing partitioning, and there will be
195 another round (cold partition blocks are not supposed to be
196 collected into traces until the very last round); or 2). There will
197 be another round, and the basic block is not "hot enough" for the
198 current round of trace collection. */
199
200static bool
201push_to_next_round_p (basic_block bb, int round, int number_of_rounds,
202 int exec_th, gcov_type count_th)
203{
204 bool there_exists_another_round;
750054a2
CT
205 bool block_not_hot_enough;
206
207 there_exists_another_round = round < number_of_rounds - 1;
750054a2 208
c22cacf3 209 block_not_hot_enough = (bb->frequency < exec_th
750054a2
CT
210 || bb->count < count_th
211 || probably_never_executed_bb_p (bb));
212
87c8b4be
CT
213 if (there_exists_another_round
214 && block_not_hot_enough)
750054a2 215 return true;
c22cacf3 216 else
750054a2
CT
217 return false;
218}
219
aa634f11
JZ
220/* Find the traces for Software Trace Cache. Chain each trace through
221 RBI()->next. Store the number of traces to N_TRACES and description of
222 traces to TRACES. */
295ae817 223
f008a564 224static void
4682ae04 225find_traces (int *n_traces, struct trace *traces)
295ae817 226{
aa634f11 227 int i;
750054a2 228 int number_of_rounds;
aa634f11 229 edge e;
628f6a4e 230 edge_iterator ei;
aa634f11
JZ
231 fibheap_t heap;
232
750054a2
CT
233 /* Add one extra round of trace collection when partitioning hot/cold
234 basic blocks into separate sections. The last round is for all the
235 cold blocks (and ONLY the cold blocks). */
236
237 number_of_rounds = N_ROUNDS - 1;
750054a2 238
aa634f11
JZ
239 /* Insert entry points of function into heap. */
240 heap = fibheap_new ();
241 max_entry_frequency = 0;
242 max_entry_count = 0;
628f6a4e 243 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
aa634f11
JZ
244 {
245 bbd[e->dest->index].heap = heap;
246 bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
247 e->dest);
248 if (e->dest->frequency > max_entry_frequency)
249 max_entry_frequency = e->dest->frequency;
250 if (e->dest->count > max_entry_count)
251 max_entry_count = e->dest->count;
252 }
253
254 /* Find the traces. */
750054a2 255 for (i = 0; i < number_of_rounds; i++)
aa634f11
JZ
256 {
257 gcov_type count_threshold;
f008a564 258
c263766c
RH
259 if (dump_file)
260 fprintf (dump_file, "STC - round %d\n", i + 1);
aa634f11
JZ
261
262 if (max_entry_count < INT_MAX / 1000)
263 count_threshold = max_entry_count * exec_threshold[i] / 1000;
264 else
265 count_threshold = max_entry_count / 1000 * exec_threshold[i];
266
267 find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
268 max_entry_frequency * exec_threshold[i] / 1000,
750054a2
CT
269 count_threshold, traces, n_traces, i, &heap,
270 number_of_rounds);
aa634f11
JZ
271 }
272 fibheap_delete (heap);
273
c263766c 274 if (dump_file)
aa634f11
JZ
275 {
276 for (i = 0; i < *n_traces; i++)
277 {
278 basic_block bb;
c263766c 279 fprintf (dump_file, "Trace %d (round %d): ", i + 1,
aa634f11 280 traces[i].round + 1);
370369e1 281 for (bb = traces[i].first; bb != traces[i].last; bb = bb->aux)
c263766c
RH
282 fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
283 fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
aa634f11 284 }
c263766c 285 fflush (dump_file);
aa634f11
JZ
286 }
287}
288
289/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
290 (with sequential number TRACE_N). */
291
292static basic_block
4682ae04 293rotate_loop (edge back_edge, struct trace *trace, int trace_n)
aa634f11
JZ
294{
295 basic_block bb;
296
297 /* Information about the best end (end after rotation) of the loop. */
298 basic_block best_bb = NULL;
299 edge best_edge = NULL;
300 int best_freq = -1;
301 gcov_type best_count = -1;
302 /* The best edge is preferred when its destination is not visited yet
303 or is a start block of some trace. */
304 bool is_preferred = false;
305
306 /* Find the most frequent edge that goes out from current trace. */
307 bb = back_edge->dest;
f008a564
RH
308 do
309 {
aa634f11 310 edge e;
628f6a4e
BE
311 edge_iterator ei;
312
313 FOR_EACH_EDGE (e, ei, bb->succs)
aa634f11 314 if (e->dest != EXIT_BLOCK_PTR
370369e1 315 && e->dest->il.rtl->visited != trace_n
aa634f11
JZ
316 && (e->flags & EDGE_CAN_FALLTHRU)
317 && !(e->flags & EDGE_COMPLEX))
318 {
319 if (is_preferred)
320 {
321 /* The best edge is preferred. */
370369e1 322 if (!e->dest->il.rtl->visited
aa634f11
JZ
323 || bbd[e->dest->index].start_of_trace >= 0)
324 {
325 /* The current edge E is also preferred. */
326 int freq = EDGE_FREQUENCY (e);
327 if (freq > best_freq || e->count > best_count)
328 {
329 best_freq = freq;
330 best_count = e->count;
331 best_edge = e;
332 best_bb = bb;
333 }
334 }
335 }
336 else
337 {
370369e1 338 if (!e->dest->il.rtl->visited
aa634f11
JZ
339 || bbd[e->dest->index].start_of_trace >= 0)
340 {
341 /* The current edge E is preferred. */
342 is_preferred = true;
343 best_freq = EDGE_FREQUENCY (e);
344 best_count = e->count;
345 best_edge = e;
346 best_bb = bb;
347 }
348 else
349 {
350 int freq = EDGE_FREQUENCY (e);
351 if (!best_edge || freq > best_freq || e->count > best_count)
352 {
353 best_freq = freq;
354 best_count = e->count;
355 best_edge = e;
356 best_bb = bb;
357 }
358 }
359 }
360 }
370369e1 361 bb = bb->aux;
aa634f11
JZ
362 }
363 while (bb != back_edge->dest);
364
365 if (best_bb)
366 {
367 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
368 the trace. */
369 if (back_edge->dest == trace->first)
370 {
370369e1 371 trace->first = best_bb->aux;
aa634f11
JZ
372 }
373 else
374 {
375 basic_block prev_bb;
ca7fd9cd 376
aa634f11 377 for (prev_bb = trace->first;
370369e1
JH
378 prev_bb->aux != back_edge->dest;
379 prev_bb = prev_bb->aux)
aa634f11 380 ;
370369e1 381 prev_bb->aux = best_bb->aux;
aa634f11
JZ
382
383 /* Try to get rid of uncond jump to cond jump. */
c5cbcccf 384 if (single_succ_p (prev_bb))
aa634f11 385 {
c5cbcccf 386 basic_block header = single_succ (prev_bb);
aa634f11
JZ
387
388 /* Duplicate HEADER if it is a small block containing cond jump
389 in the end. */
9fb32434 390 if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
c22cacf3 391 && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
9fb32434 392 NULL_RTX))
c5cbcccf 393 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
aa634f11
JZ
394 }
395 }
396 }
397 else
398 {
399 /* We have not found suitable loop tail so do no rotation. */
400 best_bb = back_edge->src;
295ae817 401 }
370369e1 402 best_bb->aux = NULL;
aa634f11 403 return best_bb;
f008a564 404}
295ae817 405
aa634f11
JZ
406/* This function marks BB that it was visited in trace number TRACE. */
407
408static void
4682ae04 409mark_bb_visited (basic_block bb, int trace)
aa634f11 410{
370369e1 411 bb->il.rtl->visited = trace;
aa634f11
JZ
412 if (bbd[bb->index].heap)
413 {
414 fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
415 bbd[bb->index].heap = NULL;
416 bbd[bb->index].node = NULL;
417 }
418}
419
420/* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
421 not include basic blocks their probability is lower than BRANCH_TH or their
422 frequency is lower than EXEC_TH into traces (or count is lower than
423 COUNT_TH). It stores the new traces into TRACES and modifies the number of
424 traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
425 expects that starting basic blocks are in *HEAP and at the end it deletes
426 *HEAP and stores starting points for the next round into new *HEAP. */
427
428static void
4682ae04
AJ
429find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
430 struct trace *traces, int *n_traces, int round,
750054a2 431 fibheap_t *heap, int number_of_rounds)
aa634f11
JZ
432{
433 /* Heap for discarded basic blocks which are possible starting points for
434 the next round. */
435 fibheap_t new_heap = fibheap_new ();
436
437 while (!fibheap_empty (*heap))
438 {
439 basic_block bb;
440 struct trace *trace;
441 edge best_edge, e;
442 fibheapkey_t key;
628f6a4e 443 edge_iterator ei;
aa634f11
JZ
444
445 bb = fibheap_extract_min (*heap);
446 bbd[bb->index].heap = NULL;
447 bbd[bb->index].node = NULL;
448
c263766c
RH
449 if (dump_file)
450 fprintf (dump_file, "Getting bb %d\n", bb->index);
aa634f11 451
750054a2 452 /* If the BB's frequency is too low send BB to the next round. When
c22cacf3
MS
453 partitioning hot/cold blocks into separate sections, make sure all
454 the cold blocks (and ONLY the cold blocks) go into the (extra) final
455 round. */
750054a2 456
c22cacf3 457 if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
750054a2 458 count_th))
aa634f11
JZ
459 {
460 int key = bb_to_key (bb);
461 bbd[bb->index].heap = new_heap;
462 bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
463
c263766c
RH
464 if (dump_file)
465 fprintf (dump_file,
aa634f11
JZ
466 " Possible start point of next round: %d (key: %d)\n",
467 bb->index, key);
468 continue;
469 }
470
471 trace = traces + *n_traces;
472 trace->first = bb;
473 trace->round = round;
474 trace->length = 0;
87c8b4be 475 bbd[bb->index].in_trace = *n_traces;
aa634f11
JZ
476 (*n_traces)++;
477
478 do
479 {
480 int prob, freq;
934677f9 481 bool ends_in_call;
aa634f11
JZ
482
483 /* The probability and frequency of the best edge. */
484 int best_prob = INT_MIN / 2;
485 int best_freq = INT_MIN / 2;
486
487 best_edge = NULL;
488 mark_bb_visited (bb, *n_traces);
489 trace->length++;
490
c263766c
RH
491 if (dump_file)
492 fprintf (dump_file, "Basic block %d was visited in trace %d\n",
aa634f11
JZ
493 bb->index, *n_traces - 1);
494
c22cacf3 495 ends_in_call = block_ends_with_call_p (bb);
934677f9 496
aa634f11 497 /* Select the successor that will be placed after BB. */
628f6a4e 498 FOR_EACH_EDGE (e, ei, bb->succs)
aa634f11 499 {
298e6adc 500 gcc_assert (!(e->flags & EDGE_FAKE));
aa634f11
JZ
501
502 if (e->dest == EXIT_BLOCK_PTR)
503 continue;
504
370369e1
JH
505 if (e->dest->il.rtl->visited
506 && e->dest->il.rtl->visited != *n_traces)
aa634f11
JZ
507 continue;
508
87c8b4be 509 if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
750054a2
CT
510 continue;
511
aa634f11 512 prob = e->probability;
1651e640 513 freq = e->dest->frequency;
aa634f11 514
934677f9
RH
515 /* The only sensible preference for a call instruction is the
516 fallthru edge. Don't bother selecting anything else. */
517 if (ends_in_call)
518 {
519 if (e->flags & EDGE_CAN_FALLTHRU)
520 {
521 best_edge = e;
522 best_prob = prob;
523 best_freq = freq;
524 }
525 continue;
526 }
527
aa634f11 528 /* Edge that cannot be fallthru or improbable or infrequent
89dbed81 529 successor (i.e. it is unsuitable successor). */
aa634f11 530 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
1651e640
PH
531 || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
532 || e->count < count_th)
aa634f11
JZ
533 continue;
534
750054a2
CT
535 /* If partitioning hot/cold basic blocks, don't consider edges
536 that cross section boundaries. */
537
538 if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
539 best_edge))
aa634f11
JZ
540 {
541 best_edge = e;
542 best_prob = prob;
543 best_freq = freq;
544 }
545 }
546
c8717368 547 /* If the best destination has multiple predecessors, and can be
6d9cc15b
JZ
548 duplicated cheaper than a jump, don't allow it to be added
549 to a trace. We'll duplicate it when connecting traces. */
628f6a4e 550 if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
6d9cc15b
JZ
551 && copy_bb_p (best_edge->dest, 0))
552 best_edge = NULL;
553
aa634f11 554 /* Add all non-selected successors to the heaps. */
628f6a4e 555 FOR_EACH_EDGE (e, ei, bb->succs)
aa634f11
JZ
556 {
557 if (e == best_edge
558 || e->dest == EXIT_BLOCK_PTR
370369e1 559 || e->dest->il.rtl->visited)
aa634f11
JZ
560 continue;
561
562 key = bb_to_key (e->dest);
563
564 if (bbd[e->dest->index].heap)
565 {
566 /* E->DEST is already in some heap. */
567 if (key != bbd[e->dest->index].node->key)
568 {
c263766c 569 if (dump_file)
aa634f11 570 {
c263766c 571 fprintf (dump_file,
aa634f11
JZ
572 "Changing key for bb %d from %ld to %ld.\n",
573 e->dest->index,
574 (long) bbd[e->dest->index].node->key,
575 key);
576 }
577 fibheap_replace_key (bbd[e->dest->index].heap,
578 bbd[e->dest->index].node, key);
579 }
580 }
581 else
582 {
583 fibheap_t which_heap = *heap;
584
585 prob = e->probability;
586 freq = EDGE_FREQUENCY (e);
587
588 if (!(e->flags & EDGE_CAN_FALLTHRU)
589 || (e->flags & EDGE_COMPLEX)
590 || prob < branch_th || freq < exec_th
591 || e->count < count_th)
592 {
750054a2
CT
593 /* When partitioning hot/cold basic blocks, make sure
594 the cold blocks (and only the cold blocks) all get
595 pushed to the last round of trace collection. */
596
c22cacf3 597 if (push_to_next_round_p (e->dest, round,
750054a2
CT
598 number_of_rounds,
599 exec_th, count_th))
aa634f11
JZ
600 which_heap = new_heap;
601 }
295ae817 602
aa634f11
JZ
603 bbd[e->dest->index].heap = which_heap;
604 bbd[e->dest->index].node = fibheap_insert (which_heap,
605 key, e->dest);
295ae817 606
c263766c 607 if (dump_file)
aa634f11 608 {
c263766c 609 fprintf (dump_file,
aa634f11
JZ
610 " Possible start of %s round: %d (key: %ld)\n",
611 (which_heap == new_heap) ? "next" : "this",
612 e->dest->index, (long) key);
613 }
614
615 }
616 }
617
618 if (best_edge) /* Suitable successor was found. */
619 {
370369e1 620 if (best_edge->dest->il.rtl->visited == *n_traces)
aa634f11
JZ
621 {
622 /* We do nothing with one basic block loops. */
623 if (best_edge->dest != bb)
624 {
625 if (EDGE_FREQUENCY (best_edge)
626 > 4 * best_edge->dest->frequency / 5)
627 {
628 /* The loop has at least 4 iterations. If the loop
629 header is not the first block of the function
630 we can rotate the loop. */
631
632 if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
633 {
c263766c 634 if (dump_file)
aa634f11 635 {
c263766c 636 fprintf (dump_file,
aa634f11
JZ
637 "Rotating loop %d - %d\n",
638 best_edge->dest->index, bb->index);
639 }
370369e1 640 bb->aux = best_edge->dest;
c22cacf3
MS
641 bbd[best_edge->dest->index].in_trace =
642 (*n_traces) - 1;
aa634f11
JZ
643 bb = rotate_loop (best_edge, trace, *n_traces);
644 }
645 }
646 else
647 {
648 /* The loop has less than 4 iterations. */
649
c5cbcccf 650 if (single_succ_p (bb)
51027369 651 && copy_bb_p (best_edge->dest, !optimize_size))
aa634f11
JZ
652 {
653 bb = copy_bb (best_edge->dest, best_edge, bb,
654 *n_traces);
87c8b4be 655 trace->length++;
aa634f11
JZ
656 }
657 }
658 }
659
660 /* Terminate the trace. */
661 break;
662 }
663 else
664 {
665 /* Check for a situation
666
667 A
668 /|
669 B |
670 \|
671 C
672
673 where
674 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
675 >= EDGE_FREQUENCY (AC).
676 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
677 Best ordering is then A B C.
678
679 This situation is created for example by:
680
681 if (A) B;
682 C;
683
684 */
685
628f6a4e 686 FOR_EACH_EDGE (e, ei, bb->succs)
aa634f11
JZ
687 if (e != best_edge
688 && (e->flags & EDGE_CAN_FALLTHRU)
689 && !(e->flags & EDGE_COMPLEX)
370369e1 690 && !e->dest->il.rtl->visited
c5cbcccf 691 && single_pred_p (e->dest)
bd454efd 692 && !(e->flags & EDGE_CROSSING)
87c8b4be 693 && single_succ_p (e->dest)
c5cbcccf
ZD
694 && (single_succ_edge (e->dest)->flags
695 & EDGE_CAN_FALLTHRU)
696 && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
697 && single_succ (e->dest) == best_edge->dest
aa634f11
JZ
698 && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
699 {
700 best_edge = e;
c263766c
RH
701 if (dump_file)
702 fprintf (dump_file, "Selecting BB %d\n",
aa634f11
JZ
703 best_edge->dest->index);
704 break;
705 }
706
370369e1 707 bb->aux = best_edge->dest;
87c8b4be 708 bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
aa634f11
JZ
709 bb = best_edge->dest;
710 }
711 }
712 }
713 while (best_edge);
714 trace->last = bb;
715 bbd[trace->first->index].start_of_trace = *n_traces - 1;
716 bbd[trace->last->index].end_of_trace = *n_traces - 1;
717
718 /* The trace is terminated so we have to recount the keys in heap
719 (some block can have a lower key because now one of its predecessors
720 is an end of the trace). */
628f6a4e 721 FOR_EACH_EDGE (e, ei, bb->succs)
aa634f11
JZ
722 {
723 if (e->dest == EXIT_BLOCK_PTR
370369e1 724 || e->dest->il.rtl->visited)
aa634f11
JZ
725 continue;
726
727 if (bbd[e->dest->index].heap)
728 {
729 key = bb_to_key (e->dest);
730 if (key != bbd[e->dest->index].node->key)
731 {
c263766c 732 if (dump_file)
aa634f11 733 {
c263766c 734 fprintf (dump_file,
aa634f11
JZ
735 "Changing key for bb %d from %ld to %ld.\n",
736 e->dest->index,
737 (long) bbd[e->dest->index].node->key, key);
738 }
739 fibheap_replace_key (bbd[e->dest->index].heap,
740 bbd[e->dest->index].node,
741 key);
742 }
743 }
744 }
745 }
746
747 fibheap_delete (*heap);
748
749 /* "Return" the new heap. */
750 *heap = new_heap;
751}
752
753/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
754 it to trace after BB, mark OLD_BB visited and update pass' data structures
755 (TRACE is a number of trace which OLD_BB is duplicated to). */
295ae817 756
f008a564 757static basic_block
4682ae04 758copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
f008a564 759{
aa634f11
JZ
760 basic_block new_bb;
761
b9a66240 762 new_bb = duplicate_block (old_bb, e, bb);
076c7ab8 763 BB_COPY_PARTITION (new_bb, old_bb);
9fb32434 764
298e6adc 765 gcc_assert (e->dest == new_bb);
370369e1 766 gcc_assert (!e->dest->il.rtl->visited);
298e6adc 767
c263766c
RH
768 if (dump_file)
769 fprintf (dump_file,
aa634f11
JZ
770 "Duplicated bb %d (created bb %d)\n",
771 old_bb->index, new_bb->index);
370369e1
JH
772 new_bb->il.rtl->visited = trace;
773 new_bb->aux = bb->aux;
774 bb->aux = new_bb;
f008a564 775
aa634f11 776 if (new_bb->index >= array_size || last_basic_block > array_size)
295ae817 777 {
aa634f11
JZ
778 int i;
779 int new_size;
780
781 new_size = MAX (last_basic_block, new_bb->index + 1);
782 new_size = GET_ARRAY_SIZE (new_size);
783 bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
784 for (i = array_size; i < new_size; i++)
785 {
786 bbd[i].start_of_trace = -1;
87c8b4be 787 bbd[i].in_trace = -1;
aa634f11
JZ
788 bbd[i].end_of_trace = -1;
789 bbd[i].heap = NULL;
790 bbd[i].node = NULL;
791 }
792 array_size = new_size;
295ae817 793
c263766c 794 if (dump_file)
aa634f11 795 {
c263766c 796 fprintf (dump_file,
aa634f11
JZ
797 "Growing the dynamic array to %d elements.\n",
798 array_size);
799 }
295ae817 800 }
aa634f11 801
87c8b4be
CT
802 bbd[new_bb->index].in_trace = trace;
803
aa634f11
JZ
804 return new_bb;
805}
806
807/* Compute and return the key (for the heap) of the basic block BB. */
808
809static fibheapkey_t
4682ae04 810bb_to_key (basic_block bb)
aa634f11
JZ
811{
812 edge e;
628f6a4e 813 edge_iterator ei;
aa634f11
JZ
814 int priority = 0;
815
816 /* Do not start in probably never executed blocks. */
750054a2 817
076c7ab8
ZW
818 if (BB_PARTITION (bb) == BB_COLD_PARTITION
819 || probably_never_executed_bb_p (bb))
aa634f11
JZ
820 return BB_FREQ_MAX;
821
822 /* Prefer blocks whose predecessor is an end of some trace
823 or whose predecessor edge is EDGE_DFS_BACK. */
628f6a4e 824 FOR_EACH_EDGE (e, ei, bb->preds)
402209ff 825 {
aa634f11
JZ
826 if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
827 || (e->flags & EDGE_DFS_BACK))
828 {
829 int edge_freq = EDGE_FREQUENCY (e);
830
831 if (edge_freq > priority)
832 priority = edge_freq;
833 }
402209ff 834 }
295ae817 835
aa634f11
JZ
836 if (priority)
837 /* The block with priority should have significantly lower key. */
838 return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
839 return -bb->frequency;
840}
841
842/* Return true when the edge E from basic block BB is better than the temporary
843 best edge (details are in function). The probability of edge E is PROB. The
844 frequency of the successor is FREQ. The current best probability is
845 BEST_PROB, the best frequency is BEST_FREQ.
846 The edge is considered to be equivalent when PROB does not differ much from
847 BEST_PROB; similarly for frequency. */
848
849static bool
4682ae04 850better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob,
750054a2 851 int best_freq, edge cur_best_edge)
aa634f11
JZ
852{
853 bool is_better_edge;
f008a564 854
aa634f11
JZ
855 /* The BEST_* values do not have to be best, but can be a bit smaller than
856 maximum values. */
857 int diff_prob = best_prob / 10;
858 int diff_freq = best_freq / 10;
f008a564 859
aa634f11
JZ
860 if (prob > best_prob + diff_prob)
861 /* The edge has higher probability than the temporary best edge. */
862 is_better_edge = true;
863 else if (prob < best_prob - diff_prob)
864 /* The edge has lower probability than the temporary best edge. */
865 is_better_edge = false;
866 else if (freq < best_freq - diff_freq)
867 /* The edge and the temporary best edge have almost equivalent
868 probabilities. The higher frequency of a successor now means
869 that there is another edge going into that successor.
870 This successor has lower frequency so it is better. */
871 is_better_edge = true;
872 else if (freq > best_freq + diff_freq)
873 /* This successor has higher frequency so it is worse. */
874 is_better_edge = false;
875 else if (e->dest->prev_bb == bb)
876 /* The edges have equivalent probabilities and the successors
877 have equivalent frequencies. Select the previous successor. */
878 is_better_edge = true;
879 else
880 is_better_edge = false;
881
750054a2
CT
882 /* If we are doing hot/cold partitioning, make sure that we always favor
883 non-crossing edges over crossing edges. */
884
885 if (!is_better_edge
c22cacf3
MS
886 && flag_reorder_blocks_and_partition
887 && cur_best_edge
bd454efd
SB
888 && (cur_best_edge->flags & EDGE_CROSSING)
889 && !(e->flags & EDGE_CROSSING))
750054a2
CT
890 is_better_edge = true;
891
aa634f11
JZ
892 return is_better_edge;
893}
894
895/* Connect traces in array TRACES, N_TRACES is the count of traces. */
896
897static void
4682ae04 898connect_traces (int n_traces, struct trace *traces)
aa634f11
JZ
899{
900 int i;
901 bool *connected;
87c8b4be 902 bool two_passes;
aa634f11 903 int last_trace;
87c8b4be
CT
904 int current_pass;
905 int current_partition;
aa634f11
JZ
906 int freq_threshold;
907 gcov_type count_threshold;
908
909 freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
910 if (max_entry_count < INT_MAX / 1000)
911 count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
912 else
913 count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
914
5ed6ace5 915 connected = XCNEWVEC (bool, n_traces);
aa634f11 916 last_trace = -1;
87c8b4be
CT
917 current_pass = 1;
918 current_partition = BB_PARTITION (traces[0].first);
919 two_passes = false;
750054a2
CT
920
921 if (flag_reorder_blocks_and_partition)
87c8b4be 922 for (i = 0; i < n_traces && !two_passes; i++)
c22cacf3 923 if (BB_PARTITION (traces[0].first)
87c8b4be
CT
924 != BB_PARTITION (traces[i].first))
925 two_passes = true;
926
927 for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
295ae817 928 {
aa634f11
JZ
929 int t = i;
930 int t2;
931 edge e, best;
932 int best_len;
295ae817 933
87c8b4be 934 if (i >= n_traces)
750054a2 935 {
41806d92
NS
936 gcc_assert (two_passes && current_pass == 1);
937 i = 0;
938 t = i;
939 current_pass = 2;
940 if (current_partition == BB_HOT_PARTITION)
941 current_partition = BB_COLD_PARTITION;
87c8b4be 942 else
41806d92 943 current_partition = BB_HOT_PARTITION;
750054a2 944 }
c22cacf3 945
aa634f11
JZ
946 if (connected[t])
947 continue;
f008a564 948
c22cacf3 949 if (two_passes
87c8b4be
CT
950 && BB_PARTITION (traces[t].first) != current_partition)
951 continue;
952
aa634f11 953 connected[t] = true;
b0cc7919 954
aa634f11
JZ
955 /* Find the predecessor traces. */
956 for (t2 = t; t2 > 0;)
957 {
628f6a4e 958 edge_iterator ei;
aa634f11
JZ
959 best = NULL;
960 best_len = 0;
628f6a4e 961 FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
aa634f11
JZ
962 {
963 int si = e->src->index;
b0cc7919 964
aa634f11
JZ
965 if (e->src != ENTRY_BLOCK_PTR
966 && (e->flags & EDGE_CAN_FALLTHRU)
967 && !(e->flags & EDGE_COMPLEX)
968 && bbd[si].end_of_trace >= 0
969 && !connected[bbd[si].end_of_trace]
87c8b4be 970 && (BB_PARTITION (e->src) == current_partition)
aa634f11
JZ
971 && (!best
972 || e->probability > best->probability
973 || (e->probability == best->probability
974 && traces[bbd[si].end_of_trace].length > best_len)))
975 {
976 best = e;
977 best_len = traces[bbd[si].end_of_trace].length;
978 }
979 }
980 if (best)
981 {
370369e1 982 best->src->aux = best->dest;
aa634f11
JZ
983 t2 = bbd[best->src->index].end_of_trace;
984 connected[t2] = true;
750054a2 985
c263766c 986 if (dump_file)
aa634f11 987 {
c263766c 988 fprintf (dump_file, "Connection: %d %d\n",
aa634f11
JZ
989 best->src->index, best->dest->index);
990 }
991 }
992 else
993 break;
994 }
f008a564 995
aa634f11 996 if (last_trace >= 0)
370369e1 997 traces[last_trace].last->aux = traces[t2].first;
aa634f11
JZ
998 last_trace = t;
999
1000 /* Find the successor traces. */
1001 while (1)
b0cc7919 1002 {
aa634f11 1003 /* Find the continuation of the chain. */
628f6a4e 1004 edge_iterator ei;
aa634f11
JZ
1005 best = NULL;
1006 best_len = 0;
628f6a4e 1007 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
aa634f11
JZ
1008 {
1009 int di = e->dest->index;
1010
1011 if (e->dest != EXIT_BLOCK_PTR
1012 && (e->flags & EDGE_CAN_FALLTHRU)
1013 && !(e->flags & EDGE_COMPLEX)
1014 && bbd[di].start_of_trace >= 0
1015 && !connected[bbd[di].start_of_trace]
87c8b4be 1016 && (BB_PARTITION (e->dest) == current_partition)
aa634f11
JZ
1017 && (!best
1018 || e->probability > best->probability
1019 || (e->probability == best->probability
1020 && traces[bbd[di].start_of_trace].length > best_len)))
1021 {
1022 best = e;
1023 best_len = traces[bbd[di].start_of_trace].length;
1024 }
1025 }
1026
1027 if (best)
1028 {
c263766c 1029 if (dump_file)
aa634f11 1030 {
c263766c 1031 fprintf (dump_file, "Connection: %d %d\n",
aa634f11
JZ
1032 best->src->index, best->dest->index);
1033 }
1034 t = bbd[best->dest->index].start_of_trace;
370369e1 1035 traces[last_trace].last->aux = traces[t].first;
aa634f11
JZ
1036 connected[t] = true;
1037 last_trace = t;
1038 }
1039 else
1040 {
1041 /* Try to connect the traces by duplication of 1 block. */
1042 edge e2;
1043 basic_block next_bb = NULL;
99dc7277 1044 bool try_copy = false;
aa634f11 1045
628f6a4e 1046 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
aa634f11
JZ
1047 if (e->dest != EXIT_BLOCK_PTR
1048 && (e->flags & EDGE_CAN_FALLTHRU)
1049 && !(e->flags & EDGE_COMPLEX)
99dc7277 1050 && (!best || e->probability > best->probability))
aa634f11 1051 {
628f6a4e 1052 edge_iterator ei;
aa634f11
JZ
1053 edge best2 = NULL;
1054 int best2_len = 0;
1055
6d9cc15b
JZ
1056 /* If the destination is a start of a trace which is only
1057 one block long, then no need to search the successor
99dc7277 1058 blocks of the trace. Accept it. */
6d9cc15b
JZ
1059 if (bbd[e->dest->index].start_of_trace >= 0
1060 && traces[bbd[e->dest->index].start_of_trace].length
1061 == 1)
1062 {
1063 best = e;
1064 try_copy = true;
1065 continue;
1066 }
99dc7277 1067
628f6a4e 1068 FOR_EACH_EDGE (e2, ei, e->dest->succs)
aa634f11
JZ
1069 {
1070 int di = e2->dest->index;
1071
1072 if (e2->dest == EXIT_BLOCK_PTR
1073 || ((e2->flags & EDGE_CAN_FALLTHRU)
1074 && !(e2->flags & EDGE_COMPLEX)
1075 && bbd[di].start_of_trace >= 0
1076 && !connected[bbd[di].start_of_trace]
87c8b4be 1077 && (BB_PARTITION (e2->dest) == current_partition)
aa634f11
JZ
1078 && (EDGE_FREQUENCY (e2) >= freq_threshold)
1079 && (e2->count >= count_threshold)
1080 && (!best2
1081 || e2->probability > best2->probability
1082 || (e2->probability == best2->probability
1083 && traces[bbd[di].start_of_trace].length
1084 > best2_len))))
1085 {
1086 best = e;
1087 best2 = e2;
1088 if (e2->dest != EXIT_BLOCK_PTR)
1089 best2_len = traces[bbd[di].start_of_trace].length;
1090 else
1091 best2_len = INT_MAX;
1092 next_bb = e2->dest;
99dc7277 1093 try_copy = true;
aa634f11
JZ
1094 }
1095 }
1096 }
99dc7277 1097
750054a2
CT
1098 if (flag_reorder_blocks_and_partition)
1099 try_copy = false;
1100
99dc7277
RH
1101 /* Copy tiny blocks always; copy larger blocks only when the
1102 edge is traversed frequently enough. */
1103 if (try_copy
1104 && copy_bb_p (best->dest,
1105 !optimize_size
1106 && EDGE_FREQUENCY (best) >= freq_threshold
1107 && best->count >= count_threshold))
aa634f11
JZ
1108 {
1109 basic_block new_bb;
1110
c263766c 1111 if (dump_file)
aa634f11 1112 {
c263766c 1113 fprintf (dump_file, "Connection: %d %d ",
aa634f11 1114 traces[t].last->index, best->dest->index);
99dc7277 1115 if (!next_bb)
c263766c 1116 fputc ('\n', dump_file);
99dc7277 1117 else if (next_bb == EXIT_BLOCK_PTR)
c263766c 1118 fprintf (dump_file, "exit\n");
aa634f11 1119 else
c263766c 1120 fprintf (dump_file, "%d\n", next_bb->index);
aa634f11
JZ
1121 }
1122
1123 new_bb = copy_bb (best->dest, best, traces[t].last, t);
1124 traces[t].last = new_bb;
99dc7277 1125 if (next_bb && next_bb != EXIT_BLOCK_PTR)
aa634f11
JZ
1126 {
1127 t = bbd[next_bb->index].start_of_trace;
370369e1 1128 traces[last_trace].last->aux = traces[t].first;
aa634f11
JZ
1129 connected[t] = true;
1130 last_trace = t;
1131 }
1132 else
1133 break; /* Stop finding the successor traces. */
1134 }
1135 else
1136 break; /* Stop finding the successor traces. */
1137 }
b0cc7919 1138 }
aa634f11
JZ
1139 }
1140
c263766c 1141 if (dump_file)
aa634f11
JZ
1142 {
1143 basic_block bb;
f008a564 1144
c263766c 1145 fprintf (dump_file, "Final order:\n");
370369e1 1146 for (bb = traces[0].first; bb; bb = bb->aux)
c263766c
RH
1147 fprintf (dump_file, "%d ", bb->index);
1148 fprintf (dump_file, "\n");
1149 fflush (dump_file);
295ae817
JE
1150 }
1151
aa634f11
JZ
1152 FREE (connected);
1153}
1154
1155/* Return true when BB can and should be copied. CODE_MAY_GROW is true
1156 when code size is allowed to grow by duplication. */
1157
1158static bool
4682ae04 1159copy_bb_p (basic_block bb, int code_may_grow)
aa634f11
JZ
1160{
1161 int size = 0;
1162 int max_size = uncond_jump_length;
1163 rtx insn;
1164
1165 if (!bb->frequency)
1166 return false;
628f6a4e 1167 if (EDGE_COUNT (bb->preds) < 2)
aa634f11 1168 return false;
6de9cd9a 1169 if (!can_duplicate_block_p (bb))
aa634f11
JZ
1170 return false;
1171
6ae533cf 1172 /* Avoid duplicating blocks which have many successors (PR/13430). */
628f6a4e
BE
1173 if (EDGE_COUNT (bb->succs) > 8)
1174 return false;
6ae533cf 1175
aa634f11 1176 if (code_may_grow && maybe_hot_bb_p (bb))
f935b9e0 1177 max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
aa634f11 1178
bbcb0c05 1179 FOR_BB_INSNS (bb, insn)
f008a564 1180 {
aa634f11 1181 if (INSN_P (insn))
070a7956 1182 size += get_attr_min_length (insn);
f008a564 1183 }
295ae817 1184
aa634f11
JZ
1185 if (size <= max_size)
1186 return true;
1187
c263766c 1188 if (dump_file)
f008a564 1189 {
c263766c 1190 fprintf (dump_file,
aa634f11
JZ
1191 "Block %d can't be copied because its size = %d.\n",
1192 bb->index, size);
f008a564 1193 }
295ae817 1194
aa634f11
JZ
1195 return false;
1196}
1197
1198/* Return the length of unconditional jump instruction. */
1199
1200static int
4682ae04 1201get_uncond_jump_length (void)
aa634f11
JZ
1202{
1203 rtx label, jump;
1204 int length;
1205
1206 label = emit_label_before (gen_label_rtx (), get_insns ());
1207 jump = emit_jump_insn (gen_jump (label));
1208
070a7956 1209 length = get_attr_min_length (jump);
aa634f11
JZ
1210
1211 delete_insn (jump);
1212 delete_insn (label);
1213 return length;
295ae817
JE
1214}
1215
750054a2
CT
1216/* Find the basic blocks that are rarely executed and need to be moved to
1217 a separate section of the .o file (to cut down on paging and improve
1218 cache locality). */
1219
1220static void
639d3040 1221find_rarely_executed_basic_blocks_and_crossing_edges (edge **crossing_edges,
c22cacf3 1222 int *n_crossing_edges,
750054a2
CT
1223 int *max_idx)
1224{
1225 basic_block bb;
9fb32434 1226 bool has_hot_blocks = false;
750054a2
CT
1227 edge e;
1228 int i;
628f6a4e 1229 edge_iterator ei;
750054a2
CT
1230
1231 /* Mark which partition (hot/cold) each basic block belongs in. */
c22cacf3 1232
750054a2
CT
1233 FOR_EACH_BB (bb)
1234 {
1235 if (probably_never_executed_bb_p (bb))
076c7ab8 1236 BB_SET_PARTITION (bb, BB_COLD_PARTITION);
750054a2 1237 else
9fb32434 1238 {
076c7ab8 1239 BB_SET_PARTITION (bb, BB_HOT_PARTITION);
9fb32434
CT
1240 has_hot_blocks = true;
1241 }
750054a2
CT
1242 }
1243
1244 /* Mark every edge that crosses between sections. */
1245
1246 i = 0;
c7466dee
CT
1247 FOR_EACH_BB (bb)
1248 FOR_EACH_EDGE (e, ei, bb->succs)
9fb32434 1249 {
c7466dee
CT
1250 if (e->src != ENTRY_BLOCK_PTR
1251 && e->dest != EXIT_BLOCK_PTR
1252 && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1253 {
1254 e->flags |= EDGE_CROSSING;
1255 if (i == *max_idx)
1256 {
1257 *max_idx *= 2;
639d3040 1258 *crossing_edges = xrealloc (*crossing_edges,
c7466dee
CT
1259 (*max_idx) * sizeof (edge));
1260 }
639d3040 1261 (*crossing_edges)[i++] = e;
c7466dee
CT
1262 }
1263 else
1264 e->flags &= ~EDGE_CROSSING;
9fb32434 1265 }
750054a2
CT
1266 *n_crossing_edges = i;
1267}
1268
750054a2
CT
1269/* If any destination of a crossing edge does not have a label, add label;
1270 Convert any fall-through crossing edges (for blocks that do not contain
9cf737f8 1271 a jump) to unconditional jumps. */
750054a2 1272
c22cacf3 1273static void
750054a2
CT
1274add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges)
1275{
1276 int i;
1277 basic_block src;
1278 basic_block dest;
1279 rtx label;
1280 rtx barrier;
1281 rtx new_jump;
c22cacf3
MS
1282
1283 for (i=0; i < n_crossing_edges; i++)
750054a2 1284 {
c22cacf3
MS
1285 if (crossing_edges[i])
1286 {
1287 src = crossing_edges[i]->src;
1288 dest = crossing_edges[i]->dest;
1289
1290 /* Make sure dest has a label. */
1291
1292 if (dest && (dest != EXIT_BLOCK_PTR))
1293 {
750054a2 1294 label = block_label (dest);
c22cacf3
MS
1295
1296 /* Make sure source block ends with a jump. */
1297
1298 if (src && (src != ENTRY_BLOCK_PTR))
1299 {
4b4bf941 1300 if (!JUMP_P (BB_END (src)))
c22cacf3
MS
1301 /* bb just falls through. */
1302 {
1303 /* make sure there's only one successor */
c5cbcccf 1304 gcc_assert (single_succ_p (src));
c22cacf3 1305
298e6adc
NS
1306 /* Find label in dest block. */
1307 label = block_label (dest);
c22cacf3
MS
1308
1309 new_jump = emit_jump_insn_after (gen_jump (label),
298e6adc
NS
1310 BB_END (src));
1311 barrier = emit_barrier_after (new_jump);
1312 JUMP_LABEL (new_jump) = label;
1313 LABEL_NUSES (label) += 1;
370369e1 1314 src->il.rtl->footer = unlink_insn_chain (barrier, barrier);
298e6adc
NS
1315 /* Mark edge as non-fallthru. */
1316 crossing_edges[i]->flags &= ~EDGE_FALLTHRU;
c22cacf3
MS
1317 } /* end: 'if (GET_CODE ... ' */
1318 } /* end: 'if (src && src->index...' */
1319 } /* end: 'if (dest && dest->index...' */
1320 } /* end: 'if (crossing_edges[i]...' */
750054a2
CT
1321 } /* end for loop */
1322}
1323
1324/* Find any bb's where the fall-through edge is a crossing edge (note that
1325 these bb's must also contain a conditional jump; we've already
1326 dealt with fall-through edges for blocks that didn't have a
1327 conditional jump in the call to add_labels_and_missing_jumps).
1328 Convert the fall-through edge to non-crossing edge by inserting a
1329 new bb to fall-through into. The new bb will contain an
1330 unconditional jump (crossing edge) to the original fall through
1331 destination. */
1332
c22cacf3 1333static void
750054a2
CT
1334fix_up_fall_thru_edges (void)
1335{
1336 basic_block cur_bb;
1337 basic_block new_bb;
1338 edge succ1;
1339 edge succ2;
1340 edge fall_thru;
4b493aa5 1341 edge cond_jump = NULL;
750054a2
CT
1342 edge e;
1343 bool cond_jump_crosses;
1344 int invert_worked;
1345 rtx old_jump;
1346 rtx fall_thru_label;
1347 rtx barrier;
c22cacf3 1348
750054a2
CT
1349 FOR_EACH_BB (cur_bb)
1350 {
1351 fall_thru = NULL;
628f6a4e
BE
1352 if (EDGE_COUNT (cur_bb->succs) > 0)
1353 succ1 = EDGE_SUCC (cur_bb, 0);
1354 else
1355 succ1 = NULL;
1356
1357 if (EDGE_COUNT (cur_bb->succs) > 1)
c22cacf3 1358 succ2 = EDGE_SUCC (cur_bb, 1);
750054a2 1359 else
c22cacf3
MS
1360 succ2 = NULL;
1361
750054a2 1362 /* Find the fall-through edge. */
c22cacf3
MS
1363
1364 if (succ1
1365 && (succ1->flags & EDGE_FALLTHRU))
1366 {
1367 fall_thru = succ1;
1368 cond_jump = succ2;
1369 }
1370 else if (succ2
1371 && (succ2->flags & EDGE_FALLTHRU))
1372 {
1373 fall_thru = succ2;
1374 cond_jump = succ1;
1375 }
1376
750054a2 1377 if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
c22cacf3
MS
1378 {
1379 /* Check to see if the fall-thru edge is a crossing edge. */
1380
bd454efd 1381 if (fall_thru->flags & EDGE_CROSSING)
c22cacf3 1382 {
750054a2 1383 /* The fall_thru edge crosses; now check the cond jump edge, if
c22cacf3
MS
1384 it exists. */
1385
1386 cond_jump_crosses = true;
1387 invert_worked = 0;
750054a2 1388 old_jump = BB_END (cur_bb);
c22cacf3
MS
1389
1390 /* Find the jump instruction, if there is one. */
1391
1392 if (cond_jump)
1393 {
bd454efd 1394 if (!(cond_jump->flags & EDGE_CROSSING))
c22cacf3
MS
1395 cond_jump_crosses = false;
1396
1397 /* We know the fall-thru edge crosses; if the cond
1398 jump edge does NOT cross, and its destination is the
750054a2 1399 next block in the bb order, invert the jump
c22cacf3
MS
1400 (i.e. fix it so the fall thru does not cross and
1401 the cond jump does). */
1402
750054a2 1403 if (!cond_jump_crosses
370369e1 1404 && cur_bb->aux == cond_jump->dest)
c22cacf3
MS
1405 {
1406 /* Find label in fall_thru block. We've already added
1407 any missing labels, so there must be one. */
1408
1409 fall_thru_label = block_label (fall_thru->dest);
1410
1411 if (old_jump && fall_thru_label)
1412 invert_worked = invert_jump (old_jump,
1413 fall_thru_label,0);
1414 if (invert_worked)
1415 {
1416 fall_thru->flags &= ~EDGE_FALLTHRU;
1417 cond_jump->flags |= EDGE_FALLTHRU;
1418 update_br_prob_note (cur_bb);
1419 e = fall_thru;
1420 fall_thru = cond_jump;
1421 cond_jump = e;
bd454efd
SB
1422 cond_jump->flags |= EDGE_CROSSING;
1423 fall_thru->flags &= ~EDGE_CROSSING;
c22cacf3
MS
1424 }
1425 }
1426 }
1427
1428 if (cond_jump_crosses || !invert_worked)
1429 {
1430 /* This is the case where both edges out of the basic
1431 block are crossing edges. Here we will fix up the
750054a2
CT
1432 fall through edge. The jump edge will be taken care
1433 of later. */
c22cacf3
MS
1434
1435 new_bb = force_nonfallthru (fall_thru);
1436
1437 if (new_bb)
1438 {
1439 new_bb->aux = cur_bb->aux;
1440 cur_bb->aux = new_bb;
1441
1442 /* Make sure new fall-through bb is in same
750054a2 1443 partition as bb it's falling through from. */
076c7ab8
ZW
1444
1445 BB_COPY_PARTITION (new_bb, cur_bb);
c5cbcccf 1446 single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
c22cacf3
MS
1447 }
1448
1449 /* Add barrier after new jump */
1450
1451 if (new_bb)
1452 {
1453 barrier = emit_barrier_after (BB_END (new_bb));
1454 new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1455 barrier);
1456 }
1457 else
1458 {
1459 barrier = emit_barrier_after (BB_END (cur_bb));
1460 cur_bb->il.rtl->footer = unlink_insn_chain (barrier,
1461 barrier);
1462 }
1463 }
1464 }
1465 }
750054a2
CT
1466 }
1467}
1468
1469/* This function checks the destination blockof a "crossing jump" to
1470 see if it has any crossing predecessors that begin with a code label
1471 and end with an unconditional jump. If so, it returns that predecessor
1472 block. (This is to avoid creating lots of new basic blocks that all
1473 contain unconditional jumps to the same destination). */
1474
1475static basic_block
c22cacf3
MS
1476find_jump_block (basic_block jump_dest)
1477{
1478 basic_block source_bb = NULL;
750054a2
CT
1479 edge e;
1480 rtx insn;
628f6a4e 1481 edge_iterator ei;
750054a2 1482
628f6a4e 1483 FOR_EACH_EDGE (e, ei, jump_dest->preds)
bd454efd 1484 if (e->flags & EDGE_CROSSING)
750054a2
CT
1485 {
1486 basic_block src = e->src;
c22cacf3 1487
750054a2
CT
1488 /* Check each predecessor to see if it has a label, and contains
1489 only one executable instruction, which is an unconditional jump.
9cf737f8 1490 If so, we can use it. */
c22cacf3 1491
4b4bf941 1492 if (LABEL_P (BB_HEAD (src)))
c22cacf3 1493 for (insn = BB_HEAD (src);
750054a2
CT
1494 !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1495 insn = NEXT_INSN (insn))
1496 {
1497 if (INSN_P (insn)
1498 && insn == BB_END (src)
4b4bf941 1499 && JUMP_P (insn)
750054a2
CT
1500 && !any_condjump_p (insn))
1501 {
1502 source_bb = src;
1503 break;
1504 }
1505 }
c22cacf3 1506
750054a2
CT
1507 if (source_bb)
1508 break;
1509 }
1510
1511 return source_bb;
1512}
1513
1514/* Find all BB's with conditional jumps that are crossing edges;
1515 insert a new bb and make the conditional jump branch to the new
1516 bb instead (make the new bb same color so conditional branch won't
1517 be a 'crossing' edge). Insert an unconditional jump from the
1518 new bb to the original destination of the conditional jump. */
1519
1520static void
1521fix_crossing_conditional_branches (void)
1522{
1523 basic_block cur_bb;
1524 basic_block new_bb;
1525 basic_block last_bb;
1526 basic_block dest;
1527 basic_block prev_bb;
1528 edge succ1;
1529 edge succ2;
1530 edge crossing_edge;
1531 edge new_edge;
1532 rtx old_jump;
1533 rtx set_src;
1534 rtx old_label = NULL_RTX;
1535 rtx new_label;
1536 rtx new_jump;
1537 rtx barrier;
1538
1539 last_bb = EXIT_BLOCK_PTR->prev_bb;
c22cacf3 1540
750054a2
CT
1541 FOR_EACH_BB (cur_bb)
1542 {
1543 crossing_edge = NULL;
628f6a4e
BE
1544 if (EDGE_COUNT (cur_bb->succs) > 0)
1545 succ1 = EDGE_SUCC (cur_bb, 0);
1546 else
1547 succ1 = NULL;
c22cacf3 1548
628f6a4e
BE
1549 if (EDGE_COUNT (cur_bb->succs) > 1)
1550 succ2 = EDGE_SUCC (cur_bb, 1);
750054a2 1551 else
628f6a4e 1552 succ2 = NULL;
c22cacf3 1553
750054a2
CT
1554 /* We already took care of fall-through edges, so only one successor
1555 can be a crossing edge. */
c22cacf3 1556
bd454efd 1557 if (succ1 && (succ1->flags & EDGE_CROSSING))
750054a2 1558 crossing_edge = succ1;
bd454efd 1559 else if (succ2 && (succ2->flags & EDGE_CROSSING))
c22cacf3
MS
1560 crossing_edge = succ2;
1561
1562 if (crossing_edge)
1563 {
750054a2 1564 old_jump = BB_END (cur_bb);
c22cacf3 1565
750054a2
CT
1566 /* Check to make sure the jump instruction is a
1567 conditional jump. */
c22cacf3 1568
750054a2
CT
1569 set_src = NULL_RTX;
1570
1571 if (any_condjump_p (old_jump))
1572 {
1573 if (GET_CODE (PATTERN (old_jump)) == SET)
1574 set_src = SET_SRC (PATTERN (old_jump));
1575 else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1576 {
1577 set_src = XVECEXP (PATTERN (old_jump), 0,0);
1578 if (GET_CODE (set_src) == SET)
1579 set_src = SET_SRC (set_src);
1580 else
1581 set_src = NULL_RTX;
1582 }
1583 }
1584
1585 if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1586 {
1587 if (GET_CODE (XEXP (set_src, 1)) == PC)
1588 old_label = XEXP (set_src, 2);
1589 else if (GET_CODE (XEXP (set_src, 2)) == PC)
1590 old_label = XEXP (set_src, 1);
c22cacf3 1591
750054a2
CT
1592 /* Check to see if new bb for jumping to that dest has
1593 already been created; if so, use it; if not, create
1594 a new one. */
1595
1596 new_bb = find_jump_block (crossing_edge->dest);
c22cacf3 1597
750054a2
CT
1598 if (new_bb)
1599 new_label = block_label (new_bb);
1600 else
1601 {
1602 /* Create new basic block to be dest for
1603 conditional jump. */
c22cacf3 1604
750054a2 1605 new_bb = create_basic_block (NULL, NULL, last_bb);
370369e1
JH
1606 new_bb->aux = last_bb->aux;
1607 last_bb->aux = new_bb;
750054a2
CT
1608 prev_bb = last_bb;
1609 last_bb = new_bb;
750054a2 1610 /* Put appropriate instructions in new bb. */
c22cacf3 1611
750054a2
CT
1612 new_label = gen_label_rtx ();
1613 emit_label_before (new_label, BB_HEAD (new_bb));
1614 BB_HEAD (new_bb) = new_label;
c22cacf3 1615
750054a2
CT
1616 if (GET_CODE (old_label) == LABEL_REF)
1617 {
1618 old_label = JUMP_LABEL (old_jump);
c22cacf3
MS
1619 new_jump = emit_jump_insn_after (gen_jump
1620 (old_label),
750054a2
CT
1621 BB_END (new_bb));
1622 }
750054a2 1623 else
298e6adc
NS
1624 {
1625 gcc_assert (HAVE_return
1626 && GET_CODE (old_label) == RETURN);
c22cacf3 1627 new_jump = emit_jump_insn_after (gen_return (),
298e6adc
NS
1628 BB_END (new_bb));
1629 }
c22cacf3 1630
750054a2
CT
1631 barrier = emit_barrier_after (new_jump);
1632 JUMP_LABEL (new_jump) = old_label;
c22cacf3 1633 new_bb->il.rtl->footer = unlink_insn_chain (barrier,
750054a2 1634 barrier);
c22cacf3 1635
750054a2
CT
1636 /* Make sure new bb is in same partition as source
1637 of conditional branch. */
076c7ab8 1638 BB_COPY_PARTITION (new_bb, cur_bb);
750054a2 1639 }
c22cacf3 1640
750054a2 1641 /* Make old jump branch to new bb. */
c22cacf3 1642
750054a2 1643 redirect_jump (old_jump, new_label, 0);
c22cacf3 1644
750054a2 1645 /* Remove crossing_edge as predecessor of 'dest'. */
c22cacf3 1646
750054a2 1647 dest = crossing_edge->dest;
c22cacf3 1648
750054a2 1649 redirect_edge_succ (crossing_edge, new_bb);
c22cacf3 1650
750054a2
CT
1651 /* Make a new edge from new_bb to old dest; new edge
1652 will be a successor for new_bb and a predecessor
1653 for 'dest'. */
c22cacf3 1654
628f6a4e 1655 if (EDGE_COUNT (new_bb->succs) == 0)
750054a2
CT
1656 new_edge = make_edge (new_bb, dest, 0);
1657 else
628f6a4e 1658 new_edge = EDGE_SUCC (new_bb, 0);
c22cacf3 1659
bd454efd
SB
1660 crossing_edge->flags &= ~EDGE_CROSSING;
1661 new_edge->flags |= EDGE_CROSSING;
750054a2 1662 }
c22cacf3 1663 }
750054a2
CT
1664 }
1665}
1666
1667/* Find any unconditional branches that cross between hot and cold
1668 sections. Convert them into indirect jumps instead. */
1669
1670static void
1671fix_crossing_unconditional_branches (void)
1672{
1673 basic_block cur_bb;
1674 rtx last_insn;
1675 rtx label;
1676 rtx label_addr;
1677 rtx indirect_jump_sequence;
1678 rtx jump_insn = NULL_RTX;
1679 rtx new_reg;
1680 rtx cur_insn;
1681 edge succ;
9fb32434 1682
750054a2
CT
1683 FOR_EACH_BB (cur_bb)
1684 {
1685 last_insn = BB_END (cur_bb);
87c8b4be
CT
1686
1687 if (EDGE_COUNT (cur_bb->succs) < 1)
1688 continue;
1689
628f6a4e 1690 succ = EDGE_SUCC (cur_bb, 0);
750054a2
CT
1691
1692 /* Check to see if bb ends in a crossing (unconditional) jump. At
c22cacf3 1693 this point, no crossing jumps should be conditional. */
750054a2 1694
4b4bf941 1695 if (JUMP_P (last_insn)
bd454efd 1696 && (succ->flags & EDGE_CROSSING))
750054a2
CT
1697 {
1698 rtx label2, table;
1699
298e6adc 1700 gcc_assert (!any_condjump_p (last_insn));
750054a2
CT
1701
1702 /* Make sure the jump is not already an indirect or table jump. */
1703
298e6adc
NS
1704 if (!computed_jump_p (last_insn)
1705 && !tablejump_p (last_insn, &label2, &table))
750054a2
CT
1706 {
1707 /* We have found a "crossing" unconditional branch. Now
1708 we must convert it to an indirect jump. First create
1709 reference of label, as target for jump. */
c22cacf3 1710
750054a2 1711 label = JUMP_LABEL (last_insn);
b50b729d 1712 label_addr = gen_rtx_LABEL_REF (Pmode, label);
750054a2 1713 LABEL_NUSES (label) += 1;
c22cacf3 1714
750054a2 1715 /* Get a register to use for the indirect jump. */
c22cacf3 1716
750054a2 1717 new_reg = gen_reg_rtx (Pmode);
c22cacf3 1718
750054a2 1719 /* Generate indirect the jump sequence. */
c22cacf3 1720
750054a2
CT
1721 start_sequence ();
1722 emit_move_insn (new_reg, label_addr);
1723 emit_indirect_jump (new_reg);
1724 indirect_jump_sequence = get_insns ();
1725 end_sequence ();
c22cacf3 1726
750054a2
CT
1727 /* Make sure every instruction in the new jump sequence has
1728 its basic block set to be cur_bb. */
c22cacf3 1729
750054a2
CT
1730 for (cur_insn = indirect_jump_sequence; cur_insn;
1731 cur_insn = NEXT_INSN (cur_insn))
1732 {
0e714a54
EB
1733 if (!BARRIER_P (cur_insn))
1734 BLOCK_FOR_INSN (cur_insn) = cur_bb;
4b4bf941 1735 if (JUMP_P (cur_insn))
750054a2
CT
1736 jump_insn = cur_insn;
1737 }
c22cacf3 1738
750054a2
CT
1739 /* Insert the new (indirect) jump sequence immediately before
1740 the unconditional jump, then delete the unconditional jump. */
c22cacf3 1741
750054a2
CT
1742 emit_insn_before (indirect_jump_sequence, last_insn);
1743 delete_insn (last_insn);
c22cacf3 1744
750054a2
CT
1745 /* Make BB_END for cur_bb be the jump instruction (NOT the
1746 barrier instruction at the end of the sequence...). */
c22cacf3 1747
750054a2
CT
1748 BB_END (cur_bb) = jump_insn;
1749 }
1750 }
1751 }
1752}
1753
1754/* Add REG_CROSSING_JUMP note to all crossing jump insns. */
1755
1756static void
1757add_reg_crossing_jump_notes (void)
1758{
1759 basic_block bb;
1760 edge e;
628f6a4e 1761 edge_iterator ei;
750054a2
CT
1762
1763 FOR_EACH_BB (bb)
628f6a4e 1764 FOR_EACH_EDGE (e, ei, bb->succs)
bd454efd 1765 if ((e->flags & EDGE_CROSSING)
4b4bf941 1766 && JUMP_P (BB_END (e->src)))
c22cacf3
MS
1767 REG_NOTES (BB_END (e->src)) = gen_rtx_EXPR_LIST (REG_CROSSING_JUMP,
1768 NULL_RTX,
1769 REG_NOTES (BB_END
750054a2
CT
1770 (e->src)));
1771}
1772
830113fd 1773/* Hot and cold basic blocks are partitioned and put in separate
87c8b4be
CT
1774 sections of the .o file, to reduce paging and improve cache
1775 performance (hopefully). This can result in bits of code from the
1776 same function being widely separated in the .o file. However this
1777 is not obvious to the current bb structure. Therefore we must take
1778 care to ensure that: 1). There are no fall_thru edges that cross
1779 between sections; 2). For those architectures which have "short"
1780 conditional branches, all conditional branches that attempt to
1781 cross between sections are converted to unconditional branches;
1782 and, 3). For those architectures which have "short" unconditional
1783 branches, all unconditional branches that attempt to cross between
1784 sections are converted to indirect jumps.
1785
750054a2 1786 The code for fixing up fall_thru edges that cross between hot and
c22cacf3
MS
1787 cold basic blocks does so by creating new basic blocks containing
1788 unconditional branches to the appropriate label in the "other"
750054a2
CT
1789 section. The new basic block is then put in the same (hot or cold)
1790 section as the original conditional branch, and the fall_thru edge
1791 is modified to fall into the new basic block instead. By adding
1792 this level of indirection we end up with only unconditional branches
c22cacf3
MS
1793 crossing between hot and cold sections.
1794
750054a2 1795 Conditional branches are dealt with by adding a level of indirection.
c22cacf3 1796 A new basic block is added in the same (hot/cold) section as the
750054a2
CT
1797 conditional branch, and the conditional branch is retargeted to the
1798 new basic block. The new basic block contains an unconditional branch
1799 to the original target of the conditional branch (in the other section).
1800
1801 Unconditional branches are dealt with by converting them into
1802 indirect jumps. */
1803
c22cacf3
MS
1804static void
1805fix_edges_for_rarely_executed_code (edge *crossing_edges,
750054a2
CT
1806 int n_crossing_edges)
1807{
1808 /* Make sure the source of any crossing edge ends in a jump and the
1809 destination of any crossing edge has a label. */
c22cacf3 1810
750054a2 1811 add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
c22cacf3 1812
750054a2
CT
1813 /* Convert all crossing fall_thru edges to non-crossing fall
1814 thrus to unconditional jumps (that jump to the original fall
1815 thru dest). */
c22cacf3 1816
750054a2 1817 fix_up_fall_thru_edges ();
c22cacf3 1818
c7466dee
CT
1819 /* If the architecture does not have conditional branches that can
1820 span all of memory, convert crossing conditional branches into
1821 crossing unconditional branches. */
c22cacf3 1822
c7466dee
CT
1823 if (!HAS_LONG_COND_BRANCH)
1824 fix_crossing_conditional_branches ();
c22cacf3 1825
c7466dee
CT
1826 /* If the architecture does not have unconditional branches that
1827 can span all of memory, convert crossing unconditional branches
1828 into indirect jumps. Since adding an indirect jump also adds
1829 a new register usage, update the register usage information as
1830 well. */
c22cacf3 1831
c7466dee 1832 if (!HAS_LONG_UNCOND_BRANCH)
6fb5fa3c 1833 fix_crossing_unconditional_branches ();
c22cacf3 1834
c7466dee 1835 add_reg_crossing_jump_notes ();
750054a2
CT
1836}
1837
87c8b4be
CT
1838/* Verify, in the basic block chain, that there is at most one switch
1839 between hot/cold partitions. This is modelled on
1840 rtl_verify_flow_info_1, but it cannot go inside that function
1841 because this condition will not be true until after
1842 reorder_basic_blocks is called. */
1843
1844static void
1845verify_hot_cold_block_grouping (void)
1846{
1847 basic_block bb;
1848 int err = 0;
1849 bool switched_sections = false;
1850 int current_partition = 0;
c22cacf3 1851
87c8b4be
CT
1852 FOR_EACH_BB (bb)
1853 {
1854 if (!current_partition)
1855 current_partition = BB_PARTITION (bb);
1856 if (BB_PARTITION (bb) != current_partition)
1857 {
1858 if (switched_sections)
1859 {
ab532386 1860 error ("multiple hot/cold transitions found (bb %i)",
87c8b4be
CT
1861 bb->index);
1862 err = 1;
1863 }
1864 else
1865 {
1866 switched_sections = true;
1867 current_partition = BB_PARTITION (bb);
1868 }
1869 }
1870 }
c22cacf3 1871
1ad435a5 1872 gcc_assert(!err);
87c8b4be
CT
1873}
1874
35b6b437
RS
1875/* Reorder basic blocks. The main entry point to this file. FLAGS is
1876 the set of flags to pass to cfg_layout_initialize(). */
295ae817
JE
1877
1878void
ad21dab7 1879reorder_basic_blocks (void)
295ae817 1880{
aa634f11
JZ
1881 int n_traces;
1882 int i;
1883 struct trace *traces;
1884
ad21dab7 1885 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
295ae817 1886
ad21dab7 1887 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
e4ec2cac
AO
1888 return;
1889
aa634f11
JZ
1890 set_edge_can_fallthru_flag ();
1891 mark_dfs_back_edges ();
1892
e0bb17a8
KH
1893 /* We are estimating the length of uncond jump insn only once since the code
1894 for getting the insn length always returns the minimal length now. */
4682ae04 1895 if (uncond_jump_length == 0)
aa634f11
JZ
1896 uncond_jump_length = get_uncond_jump_length ();
1897
1898 /* We need to know some information for each basic block. */
1899 array_size = GET_ARRAY_SIZE (last_basic_block);
5ed6ace5 1900 bbd = XNEWVEC (bbro_basic_block_data, array_size);
aa634f11
JZ
1901 for (i = 0; i < array_size; i++)
1902 {
1903 bbd[i].start_of_trace = -1;
87c8b4be 1904 bbd[i].in_trace = -1;
aa634f11
JZ
1905 bbd[i].end_of_trace = -1;
1906 bbd[i].heap = NULL;
1907 bbd[i].node = NULL;
1908 }
1909
5ed6ace5 1910 traces = XNEWVEC (struct trace, n_basic_blocks);
aa634f11
JZ
1911 n_traces = 0;
1912 find_traces (&n_traces, traces);
1913 connect_traces (n_traces, traces);
1914 FREE (traces);
1915 FREE (bbd);
402209ff 1916
ad21dab7
SB
1917 relink_block_chain (/*stay_in_cfglayout_mode=*/true);
1918
c263766c 1919 if (dump_file)
5b4fdb20 1920 dump_flow_info (dump_file, dump_flags);
402209ff 1921
c7466dee
CT
1922 if (flag_reorder_blocks_and_partition)
1923 verify_hot_cold_block_grouping ();
295ae817 1924}
bbcb0c05 1925
87c8b4be
CT
1926/* Determine which partition the first basic block in the function
1927 belongs to, then find the first basic block in the current function
1928 that belongs to a different section, and insert a
1929 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
1930 instruction stream. When writing out the assembly code,
1931 encountering this note will make the compiler switch between the
1932 hot and cold text sections. */
1933
43f237b4 1934static void
87c8b4be
CT
1935insert_section_boundary_note (void)
1936{
1937 basic_block bb;
1938 rtx new_note;
1939 int first_partition = 0;
c22cacf3 1940
c7466dee 1941 if (flag_reorder_blocks_and_partition)
87c8b4be
CT
1942 FOR_EACH_BB (bb)
1943 {
1944 if (!first_partition)
1945 first_partition = BB_PARTITION (bb);
1946 if (BB_PARTITION (bb) != first_partition)
1947 {
1948 new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
1949 BB_HEAD (bb));
1950 break;
1951 }
1952 }
1953}
1954
bbcb0c05
SB
1955/* Duplicate the blocks containing computed gotos. This basically unfactors
1956 computed gotos that were factored early on in the compilation process to
1957 speed up edge based data flow. We used to not unfactoring them again,
1958 which can seriously pessimize code with many computed jumps in the source
1959 code, such as interpreters. See e.g. PR15242. */
1960
ef330312
PB
1961static bool
1962gate_duplicate_computed_gotos (void)
1963{
ad21dab7
SB
1964 if (targetm.cannot_modify_jumps_p ())
1965 return false;
ef330312
PB
1966 return (optimize > 0 && flag_expensive_optimizations && !optimize_size);
1967}
1968
1969
c2924966 1970static unsigned int
bbcb0c05
SB
1971duplicate_computed_gotos (void)
1972{
1973 basic_block bb, new_bb;
1974 bitmap candidates;
1975 int max_size;
1976
24bd1a0b 1977 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
c2924966 1978 return 0;
bbcb0c05 1979
bbcb0c05
SB
1980 cfg_layout_initialize (0);
1981
1982 /* We are estimating the length of uncond jump insn only once
1983 since the code for getting the insn length always returns
1984 the minimal length now. */
1985 if (uncond_jump_length == 0)
1986 uncond_jump_length = get_uncond_jump_length ();
1987
1988 max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
8bdbfff5 1989 candidates = BITMAP_ALLOC (NULL);
bbcb0c05 1990
00b28cb0
SB
1991 /* Look for blocks that end in a computed jump, and see if such blocks
1992 are suitable for unfactoring. If a block is a candidate for unfactoring,
1993 mark it in the candidates. */
bbcb0c05
SB
1994 FOR_EACH_BB (bb)
1995 {
00b28cb0
SB
1996 rtx insn;
1997 edge e;
1998 edge_iterator ei;
1999 int size, all_flags;
2000
2001 /* Build the reorder chain for the original order of blocks. */
bbcb0c05 2002 if (bb->next_bb != EXIT_BLOCK_PTR)
370369e1 2003 bb->aux = bb->next_bb;
bbcb0c05 2004
00b28cb0
SB
2005 /* Obviously the block has to end in a computed jump. */
2006 if (!computed_jump_p (BB_END (bb)))
2007 continue;
bbcb0c05 2008
00b28cb0
SB
2009 /* Only consider blocks that can be duplicated. */
2010 if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2011 || !can_duplicate_block_p (bb))
2012 continue;
bbcb0c05 2013
00b28cb0
SB
2014 /* Make sure that the block is small enough. */
2015 size = 0;
2016 FOR_BB_INSNS (bb, insn)
2017 if (INSN_P (insn))
2018 {
070a7956 2019 size += get_attr_min_length (insn);
00b28cb0
SB
2020 if (size > max_size)
2021 break;
2022 }
2023 if (size > max_size)
2024 continue;
2025
2026 /* Final check: there must not be any incoming abnormal edges. */
2027 all_flags = 0;
2028 FOR_EACH_EDGE (e, ei, bb->preds)
2029 all_flags |= e->flags;
2030 if (all_flags & EDGE_COMPLEX)
2031 continue;
2032
2033 bitmap_set_bit (candidates, bb->index);
bbcb0c05
SB
2034 }
2035
2036 /* Nothing to do if there is no computed jump here. */
2037 if (bitmap_empty_p (candidates))
2038 goto done;
2039
2040 /* Duplicate computed gotos. */
2041 FOR_EACH_BB (bb)
2042 {
370369e1 2043 if (bb->il.rtl->visited)
bbcb0c05
SB
2044 continue;
2045
370369e1 2046 bb->il.rtl->visited = 1;
bbcb0c05
SB
2047
2048 /* BB must have one outgoing edge. That edge must not lead to
c22cacf3 2049 the exit block or the next block.
bbcb0c05 2050 The destination must have more than one predecessor. */
c5cbcccf
ZD
2051 if (!single_succ_p (bb)
2052 || single_succ (bb) == EXIT_BLOCK_PTR
2053 || single_succ (bb) == bb->next_bb
2054 || single_pred_p (single_succ (bb)))
bbcb0c05
SB
2055 continue;
2056
2057 /* The successor block has to be a duplication candidate. */
c5cbcccf 2058 if (!bitmap_bit_p (candidates, single_succ (bb)->index))
bbcb0c05
SB
2059 continue;
2060
b9a66240 2061 new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
370369e1
JH
2062 new_bb->aux = bb->aux;
2063 bb->aux = new_bb;
2064 new_bb->il.rtl->visited = 1;
bbcb0c05
SB
2065 }
2066
2067done:
2068 cfg_layout_finalize ();
2069
8bdbfff5 2070 BITMAP_FREE (candidates);
c2924966 2071 return 0;
bbcb0c05 2072}
750054a2 2073
ef330312
PB
2074struct tree_opt_pass pass_duplicate_computed_gotos =
2075{
defb77dc 2076 "compgotos", /* name */
ef330312
PB
2077 gate_duplicate_computed_gotos, /* gate */
2078 duplicate_computed_gotos, /* execute */
2079 NULL, /* sub */
2080 NULL, /* next */
2081 0, /* static_pass_number */
2082 TV_REORDER_BLOCKS, /* tv_id */
2083 0, /* properties_required */
2084 0, /* properties_provided */
2085 0, /* properties_destroyed */
2086 0, /* todo_flags_start */
defb77dc 2087 TODO_dump_func, /* todo_flags_finish */
ef330312
PB
2088 0 /* letter */
2089};
2090
2091
750054a2
CT
2092/* This function is the main 'entrance' for the optimization that
2093 partitions hot and cold basic blocks into separate sections of the
2094 .o file (to improve performance and cache locality). Ideally it
2095 would be called after all optimizations that rearrange the CFG have
2096 been called. However part of this optimization may introduce new
2097 register usage, so it must be called before register allocation has
2098 occurred. This means that this optimization is actually called
8e8d5162
CT
2099 well before the optimization that reorders basic blocks (see
2100 function above).
750054a2
CT
2101
2102 This optimization checks the feedback information to determine
87c8b4be
CT
2103 which basic blocks are hot/cold, updates flags on the basic blocks
2104 to indicate which section they belong in. This information is
2105 later used for writing out sections in the .o file. Because hot
2106 and cold sections can be arbitrarily large (within the bounds of
2107 memory), far beyond the size of a single function, it is necessary
2108 to fix up all edges that cross section boundaries, to make sure the
2109 instructions used can actually span the required distance. The
2110 fixes are described below.
8e8d5162
CT
2111
2112 Fall-through edges must be changed into jumps; it is not safe or
2113 legal to fall through across a section boundary. Whenever a
2114 fall-through edge crossing a section boundary is encountered, a new
2115 basic block is inserted (in the same section as the fall-through
2116 source), and the fall through edge is redirected to the new basic
2117 block. The new basic block contains an unconditional jump to the
2118 original fall-through target. (If the unconditional jump is
2119 insufficient to cross section boundaries, that is dealt with a
2120 little later, see below).
2121
2122 In order to deal with architectures that have short conditional
2123 branches (which cannot span all of memory) we take any conditional
2124 jump that attempts to cross a section boundary and add a level of
2125 indirection: it becomes a conditional jump to a new basic block, in
2126 the same section. The new basic block contains an unconditional
2127 jump to the original target, in the other section.
2128
2129 For those architectures whose unconditional branch is also
2130 incapable of reaching all of memory, those unconditional jumps are
2131 converted into indirect jumps, through a register.
2132
2133 IMPORTANT NOTE: This optimization causes some messy interactions
2134 with the cfg cleanup optimizations; those optimizations want to
2135 merge blocks wherever possible, and to collapse indirect jump
2136 sequences (change "A jumps to B jumps to C" directly into "A jumps
2137 to C"). Those optimizations can undo the jump fixes that
2138 partitioning is required to make (see above), in order to ensure
2139 that jumps attempting to cross section boundaries are really able
2140 to cover whatever distance the jump requires (on many architectures
2141 conditional or unconditional jumps are not able to reach all of
2142 memory). Therefore tests have to be inserted into each such
2143 optimization to make sure that it does not undo stuff necessary to
2144 cross partition boundaries. This would be much less of a problem
2145 if we could perform this optimization later in the compilation, but
2146 unfortunately the fact that we may need to create indirect jumps
2147 (through registers) requires that this optimization be performed
2148 before register allocation. */
750054a2 2149
081c6ba6 2150static void
750054a2
CT
2151partition_hot_cold_basic_blocks (void)
2152{
2153 basic_block cur_bb;
2154 edge *crossing_edges;
2155 int n_crossing_edges;
2156 int max_edges = 2 * last_basic_block;
c22cacf3 2157
24bd1a0b 2158 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
750054a2 2159 return;
c22cacf3 2160
5ed6ace5 2161 crossing_edges = XCNEWVEC (edge, max_edges);
750054a2 2162
35b6b437 2163 cfg_layout_initialize (0);
c22cacf3 2164
750054a2 2165 FOR_EACH_BB (cur_bb)
24bd1a0b 2166 if (cur_bb->index >= NUM_FIXED_BLOCKS
c22cacf3 2167 && cur_bb->next_bb->index >= NUM_FIXED_BLOCKS)
370369e1 2168 cur_bb->aux = cur_bb->next_bb;
c22cacf3 2169
639d3040 2170 find_rarely_executed_basic_blocks_and_crossing_edges (&crossing_edges,
c22cacf3 2171 &n_crossing_edges,
750054a2
CT
2172 &max_edges);
2173
2174 if (n_crossing_edges > 0)
2175 fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges);
c22cacf3 2176
750054a2
CT
2177 free (crossing_edges);
2178
62e5bf5d 2179 cfg_layout_finalize ();
750054a2 2180}
ef330312
PB
2181\f
2182static bool
2183gate_handle_reorder_blocks (void)
2184{
ad21dab7
SB
2185 if (targetm.cannot_modify_jumps_p ())
2186 return false;
ef330312
PB
2187 return (optimize > 0);
2188}
2189
2190
2191/* Reorder basic blocks. */
c2924966 2192static unsigned int
ef330312
PB
2193rest_of_handle_reorder_blocks (void)
2194{
ad21dab7 2195 basic_block bb;
ef330312
PB
2196
2197 /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2198 splitting possibly introduced more crossjumping opportunities. */
6fb5fa3c 2199 cfg_layout_initialize (CLEANUP_EXPENSIVE);
ef330312
PB
2200
2201 if (flag_sched2_use_traces && flag_schedule_insns_after_reload)
2202 {
2203 timevar_push (TV_TRACER);
ad21dab7 2204 tracer ();
ef330312
PB
2205 timevar_pop (TV_TRACER);
2206 }
2207
2208 if (flag_reorder_blocks || flag_reorder_blocks_and_partition)
ad21dab7 2209 reorder_basic_blocks ();
ef330312
PB
2210 if (flag_reorder_blocks || flag_reorder_blocks_and_partition
2211 || (flag_sched2_use_traces && flag_schedule_insns_after_reload))
6fb5fa3c 2212 cleanup_cfg (CLEANUP_EXPENSIVE);
43f237b4 2213
ad21dab7
SB
2214 FOR_EACH_BB (bb)
2215 if (bb->next_bb != EXIT_BLOCK_PTR)
2216 bb->aux = bb->next_bb;
2217 cfg_layout_finalize ();
2218
43f237b4
EB
2219 /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes. */
2220 insert_section_boundary_note ();
c2924966 2221 return 0;
ef330312
PB
2222}
2223
2224struct tree_opt_pass pass_reorder_blocks =
2225{
2226 "bbro", /* name */
2227 gate_handle_reorder_blocks, /* gate */
2228 rest_of_handle_reorder_blocks, /* execute */
2229 NULL, /* sub */
2230 NULL, /* next */
2231 0, /* static_pass_number */
2232 TV_REORDER_BLOCKS, /* tv_id */
2233 0, /* properties_required */
2234 0, /* properties_provided */
2235 0, /* properties_destroyed */
2236 0, /* todo_flags_start */
2237 TODO_dump_func, /* todo_flags_finish */
2238 'B' /* letter */
2239};
2240
2241static bool
2242gate_handle_partition_blocks (void)
2243{
2244 /* The optimization to partition hot/cold basic blocks into separate
2245 sections of the .o file does not work well with linkonce or with
2246 user defined section attributes. Don't call it if either case
2247 arises. */
2248
2249 return (flag_reorder_blocks_and_partition
c22cacf3
MS
2250 && !DECL_ONE_ONLY (current_function_decl)
2251 && !user_defined_section_attribute);
ef330312
PB
2252}
2253
2254/* Partition hot and cold basic blocks. */
c2924966 2255static unsigned int
ef330312
PB
2256rest_of_handle_partition_blocks (void)
2257{
ef330312 2258 partition_hot_cold_basic_blocks ();
c2924966 2259 return 0;
ef330312
PB
2260}
2261
2262struct tree_opt_pass pass_partition_blocks =
2263{
defb77dc 2264 "bbpart", /* name */
ef330312
PB
2265 gate_handle_partition_blocks, /* gate */
2266 rest_of_handle_partition_blocks, /* execute */
2267 NULL, /* sub */
2268 NULL, /* next */
2269 0, /* static_pass_number */
2270 TV_REORDER_BLOCKS, /* tv_id */
2271 0, /* properties_required */
2272 0, /* properties_provided */
2273 0, /* properties_destroyed */
2274 0, /* todo_flags_start */
defb77dc 2275 TODO_dump_func, /* todo_flags_finish */
ef330312
PB
2276 0 /* letter */
2277};
2278
2279