]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/cfgloopmanip.c
6baa15afadeb810a8001a5cf4162e9bcd0d43ac1
[thirdparty/gcc.git] / gcc / cfgloopmanip.c
1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002-2013 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "basic-block.h"
26 #include "cfgloop.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "gimple-iterator.h"
30 #include "gimplify-me.h"
31 #include "tree-ssa-loop-manip.h"
32 #include "dumpfile.h"
33
34 static void copy_loops_to (struct loop **, int,
35 struct loop *);
36 static void loop_redirect_edge (edge, basic_block);
37 static void remove_bbs (basic_block *, int);
38 static bool rpe_enum_p (const_basic_block, const void *);
39 static int find_path (edge, basic_block **);
40 static void fix_loop_placements (struct loop *, bool *);
41 static bool fix_bb_placement (basic_block);
42 static void fix_bb_placements (basic_block, bool *, bitmap);
43
44 /* Checks whether basic block BB is dominated by DATA. */
45 static bool
46 rpe_enum_p (const_basic_block bb, const void *data)
47 {
48 return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
49 }
50
51 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
52
53 static void
54 remove_bbs (basic_block *bbs, int nbbs)
55 {
56 int i;
57
58 for (i = 0; i < nbbs; i++)
59 delete_basic_block (bbs[i]);
60 }
61
62 /* Find path -- i.e. the basic blocks dominated by edge E and put them
63 into array BBS, that will be allocated large enough to contain them.
64 E->dest must have exactly one predecessor for this to work (it is
65 easy to achieve and we do not put it here because we do not want to
66 alter anything by this function). The number of basic blocks in the
67 path is returned. */
68 static int
69 find_path (edge e, basic_block **bbs)
70 {
71 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
72
73 /* Find bbs in the path. */
74 *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
75 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
76 n_basic_blocks_for_fn (cfun), e->dest);
77 }
78
79 /* Fix placement of basic block BB inside loop hierarchy --
80 Let L be a loop to that BB belongs. Then every successor of BB must either
81 1) belong to some superloop of loop L, or
82 2) be a header of loop K such that K->outer is superloop of L
83 Returns true if we had to move BB into other loop to enforce this condition,
84 false if the placement of BB was already correct (provided that placements
85 of its successors are correct). */
86 static bool
87 fix_bb_placement (basic_block bb)
88 {
89 edge e;
90 edge_iterator ei;
91 struct loop *loop = current_loops->tree_root, *act;
92
93 FOR_EACH_EDGE (e, ei, bb->succs)
94 {
95 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
96 continue;
97
98 act = e->dest->loop_father;
99 if (act->header == e->dest)
100 act = loop_outer (act);
101
102 if (flow_loop_nested_p (loop, act))
103 loop = act;
104 }
105
106 if (loop == bb->loop_father)
107 return false;
108
109 remove_bb_from_loops (bb);
110 add_bb_to_loop (bb, loop);
111
112 return true;
113 }
114
115 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
116 of LOOP to that leads at least one exit edge of LOOP, and set it
117 as the immediate superloop of LOOP. Return true if the immediate superloop
118 of LOOP changed.
119
120 IRRED_INVALIDATED is set to true if a change in the loop structures might
121 invalidate the information about irreducible regions. */
122
123 static bool
124 fix_loop_placement (struct loop *loop, bool *irred_invalidated)
125 {
126 unsigned i;
127 edge e;
128 vec<edge> exits = get_loop_exit_edges (loop);
129 struct loop *father = current_loops->tree_root, *act;
130 bool ret = false;
131
132 FOR_EACH_VEC_ELT (exits, i, e)
133 {
134 act = find_common_loop (loop, e->dest->loop_father);
135 if (flow_loop_nested_p (father, act))
136 father = act;
137 }
138
139 if (father != loop_outer (loop))
140 {
141 for (act = loop_outer (loop); act != father; act = loop_outer (act))
142 act->num_nodes -= loop->num_nodes;
143 flow_loop_tree_node_remove (loop);
144 flow_loop_tree_node_add (father, loop);
145
146 /* The exit edges of LOOP no longer exits its original immediate
147 superloops; remove them from the appropriate exit lists. */
148 FOR_EACH_VEC_ELT (exits, i, e)
149 {
150 /* We may need to recompute irreducible loops. */
151 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
152 *irred_invalidated = true;
153 rescan_loop_exit (e, false, false);
154 }
155
156 ret = true;
157 }
158
159 exits.release ();
160 return ret;
161 }
162
163 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
164 enforce condition condition stated in description of fix_bb_placement. We
165 start from basic block FROM that had some of its successors removed, so that
166 his placement no longer has to be correct, and iteratively fix placement of
167 its predecessors that may change if placement of FROM changed. Also fix
168 placement of subloops of FROM->loop_father, that might also be altered due
169 to this change; the condition for them is similar, except that instead of
170 successors we consider edges coming out of the loops.
171
172 If the changes may invalidate the information about irreducible regions,
173 IRRED_INVALIDATED is set to true.
174
175 If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
176 changed loop_father are collected there. */
177
178 static void
179 fix_bb_placements (basic_block from,
180 bool *irred_invalidated,
181 bitmap loop_closed_ssa_invalidated)
182 {
183 sbitmap in_queue;
184 basic_block *queue, *qtop, *qbeg, *qend;
185 struct loop *base_loop, *target_loop;
186 edge e;
187
188 /* We pass through blocks back-reachable from FROM, testing whether some
189 of their successors moved to outer loop. It may be necessary to
190 iterate several times, but it is finite, as we stop unless we move
191 the basic block up the loop structure. The whole story is a bit
192 more complicated due to presence of subloops, those are moved using
193 fix_loop_placement. */
194
195 base_loop = from->loop_father;
196 /* If we are already in the outermost loop, the basic blocks cannot be moved
197 outside of it. If FROM is the header of the base loop, it cannot be moved
198 outside of it, either. In both cases, we can end now. */
199 if (base_loop == current_loops->tree_root
200 || from == base_loop->header)
201 return;
202
203 in_queue = sbitmap_alloc (last_basic_block);
204 bitmap_clear (in_queue);
205 bitmap_set_bit (in_queue, from->index);
206 /* Prevent us from going out of the base_loop. */
207 bitmap_set_bit (in_queue, base_loop->header->index);
208
209 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
210 qtop = queue + base_loop->num_nodes + 1;
211 qbeg = queue;
212 qend = queue + 1;
213 *qbeg = from;
214
215 while (qbeg != qend)
216 {
217 edge_iterator ei;
218 from = *qbeg;
219 qbeg++;
220 if (qbeg == qtop)
221 qbeg = queue;
222 bitmap_clear_bit (in_queue, from->index);
223
224 if (from->loop_father->header == from)
225 {
226 /* Subloop header, maybe move the loop upward. */
227 if (!fix_loop_placement (from->loop_father, irred_invalidated))
228 continue;
229 target_loop = loop_outer (from->loop_father);
230 if (loop_closed_ssa_invalidated)
231 {
232 basic_block *bbs = get_loop_body (from->loop_father);
233 for (unsigned i = 0; i < from->loop_father->num_nodes; ++i)
234 bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
235 free (bbs);
236 }
237 }
238 else
239 {
240 /* Ordinary basic block. */
241 if (!fix_bb_placement (from))
242 continue;
243 target_loop = from->loop_father;
244 if (loop_closed_ssa_invalidated)
245 bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
246 }
247
248 FOR_EACH_EDGE (e, ei, from->succs)
249 {
250 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
251 *irred_invalidated = true;
252 }
253
254 /* Something has changed, insert predecessors into queue. */
255 FOR_EACH_EDGE (e, ei, from->preds)
256 {
257 basic_block pred = e->src;
258 struct loop *nca;
259
260 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
261 *irred_invalidated = true;
262
263 if (bitmap_bit_p (in_queue, pred->index))
264 continue;
265
266 /* If it is subloop, then it either was not moved, or
267 the path up the loop tree from base_loop do not contain
268 it. */
269 nca = find_common_loop (pred->loop_father, base_loop);
270 if (pred->loop_father != base_loop
271 && (nca == base_loop
272 || nca != pred->loop_father))
273 pred = pred->loop_father->header;
274 else if (!flow_loop_nested_p (target_loop, pred->loop_father))
275 {
276 /* If PRED is already higher in the loop hierarchy than the
277 TARGET_LOOP to that we moved FROM, the change of the position
278 of FROM does not affect the position of PRED, so there is no
279 point in processing it. */
280 continue;
281 }
282
283 if (bitmap_bit_p (in_queue, pred->index))
284 continue;
285
286 /* Schedule the basic block. */
287 *qend = pred;
288 qend++;
289 if (qend == qtop)
290 qend = queue;
291 bitmap_set_bit (in_queue, pred->index);
292 }
293 }
294 free (in_queue);
295 free (queue);
296 }
297
298 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
299 and update loop structures and dominators. Return true if we were able
300 to remove the path, false otherwise (and nothing is affected then). */
301 bool
302 remove_path (edge e)
303 {
304 edge ae;
305 basic_block *rem_bbs, *bord_bbs, from, bb;
306 vec<basic_block> dom_bbs;
307 int i, nrem, n_bord_bbs;
308 sbitmap seen;
309 bool irred_invalidated = false;
310 edge_iterator ei;
311 struct loop *l, *f;
312
313 if (!can_remove_branch_p (e))
314 return false;
315
316 /* Keep track of whether we need to update information about irreducible
317 regions. This is the case if the removed area is a part of the
318 irreducible region, or if the set of basic blocks that belong to a loop
319 that is inside an irreducible region is changed, or if such a loop is
320 removed. */
321 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
322 irred_invalidated = true;
323
324 /* We need to check whether basic blocks are dominated by the edge
325 e, but we only have basic block dominators. This is easy to
326 fix -- when e->dest has exactly one predecessor, this corresponds
327 to blocks dominated by e->dest, if not, split the edge. */
328 if (!single_pred_p (e->dest))
329 e = single_pred_edge (split_edge (e));
330
331 /* It may happen that by removing path we remove one or more loops
332 we belong to. In this case first unloop the loops, then proceed
333 normally. We may assume that e->dest is not a header of any loop,
334 as it now has exactly one predecessor. */
335 for (l = e->src->loop_father; loop_outer (l); l = f)
336 {
337 f = loop_outer (l);
338 if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
339 unloop (l, &irred_invalidated, NULL);
340 }
341
342 /* Identify the path. */
343 nrem = find_path (e, &rem_bbs);
344
345 n_bord_bbs = 0;
346 bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
347 seen = sbitmap_alloc (last_basic_block);
348 bitmap_clear (seen);
349
350 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
351 for (i = 0; i < nrem; i++)
352 bitmap_set_bit (seen, rem_bbs[i]->index);
353 if (!irred_invalidated)
354 FOR_EACH_EDGE (ae, ei, e->src->succs)
355 if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
356 && !bitmap_bit_p (seen, ae->dest->index)
357 && ae->flags & EDGE_IRREDUCIBLE_LOOP)
358 {
359 irred_invalidated = true;
360 break;
361 }
362
363 for (i = 0; i < nrem; i++)
364 {
365 bb = rem_bbs[i];
366 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
367 if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
368 && !bitmap_bit_p (seen, ae->dest->index))
369 {
370 bitmap_set_bit (seen, ae->dest->index);
371 bord_bbs[n_bord_bbs++] = ae->dest;
372
373 if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
374 irred_invalidated = true;
375 }
376 }
377
378 /* Remove the path. */
379 from = e->src;
380 remove_branch (e);
381 dom_bbs.create (0);
382
383 /* Cancel loops contained in the path. */
384 for (i = 0; i < nrem; i++)
385 if (rem_bbs[i]->loop_father->header == rem_bbs[i])
386 cancel_loop_tree (rem_bbs[i]->loop_father);
387
388 remove_bbs (rem_bbs, nrem);
389 free (rem_bbs);
390
391 /* Find blocks whose dominators may be affected. */
392 bitmap_clear (seen);
393 for (i = 0; i < n_bord_bbs; i++)
394 {
395 basic_block ldom;
396
397 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
398 if (bitmap_bit_p (seen, bb->index))
399 continue;
400 bitmap_set_bit (seen, bb->index);
401
402 for (ldom = first_dom_son (CDI_DOMINATORS, bb);
403 ldom;
404 ldom = next_dom_son (CDI_DOMINATORS, ldom))
405 if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
406 dom_bbs.safe_push (ldom);
407 }
408
409 free (seen);
410
411 /* Recount dominators. */
412 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
413 dom_bbs.release ();
414 free (bord_bbs);
415
416 /* Fix placements of basic blocks inside loops and the placement of
417 loops in the loop tree. */
418 fix_bb_placements (from, &irred_invalidated, NULL);
419 fix_loop_placements (from->loop_father, &irred_invalidated);
420
421 if (irred_invalidated
422 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
423 mark_irreducible_loops ();
424
425 return true;
426 }
427
428 /* Creates place for a new LOOP in loops structure of FN. */
429
430 void
431 place_new_loop (struct function *fn, struct loop *loop)
432 {
433 loop->num = number_of_loops (fn);
434 vec_safe_push (loops_for_fn (fn)->larray, loop);
435 }
436
437 /* Given LOOP structure with filled header and latch, find the body of the
438 corresponding loop and add it to loops tree. Insert the LOOP as a son of
439 outer. */
440
441 void
442 add_loop (struct loop *loop, struct loop *outer)
443 {
444 basic_block *bbs;
445 int i, n;
446 struct loop *subloop;
447 edge e;
448 edge_iterator ei;
449
450 /* Add it to loop structure. */
451 place_new_loop (cfun, loop);
452 flow_loop_tree_node_add (outer, loop);
453
454 /* Find its nodes. */
455 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
456 n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
457
458 for (i = 0; i < n; i++)
459 {
460 if (bbs[i]->loop_father == outer)
461 {
462 remove_bb_from_loops (bbs[i]);
463 add_bb_to_loop (bbs[i], loop);
464 continue;
465 }
466
467 loop->num_nodes++;
468
469 /* If we find a direct subloop of OUTER, move it to LOOP. */
470 subloop = bbs[i]->loop_father;
471 if (loop_outer (subloop) == outer
472 && subloop->header == bbs[i])
473 {
474 flow_loop_tree_node_remove (subloop);
475 flow_loop_tree_node_add (loop, subloop);
476 }
477 }
478
479 /* Update the information about loop exit edges. */
480 for (i = 0; i < n; i++)
481 {
482 FOR_EACH_EDGE (e, ei, bbs[i]->succs)
483 {
484 rescan_loop_exit (e, false, false);
485 }
486 }
487
488 free (bbs);
489 }
490
491 /* Multiply all frequencies in LOOP by NUM/DEN. */
492
493 void
494 scale_loop_frequencies (struct loop *loop, int num, int den)
495 {
496 basic_block *bbs;
497
498 bbs = get_loop_body (loop);
499 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
500 free (bbs);
501 }
502
503 /* Multiply all frequencies in LOOP by SCALE/REG_BR_PROB_BASE.
504 If ITERATION_BOUND is non-zero, scale even further if loop is predicted
505 to iterate too many times. */
506
507 void
508 scale_loop_profile (struct loop *loop, int scale, gcov_type iteration_bound)
509 {
510 gcov_type iterations = expected_loop_iterations_unbounded (loop);
511 edge e;
512 edge_iterator ei;
513
514 if (dump_file && (dump_flags & TDF_DETAILS))
515 fprintf (dump_file, ";; Scaling loop %i with scale %f, "
516 "bounding iterations to %i from guessed %i\n",
517 loop->num, (double)scale / REG_BR_PROB_BASE,
518 (int)iteration_bound, (int)iterations);
519
520 /* See if loop is predicted to iterate too many times. */
521 if (iteration_bound && iterations > 0
522 && apply_probability (iterations, scale) > iteration_bound)
523 {
524 /* Fixing loop profile for different trip count is not trivial; the exit
525 probabilities has to be updated to match and frequencies propagated down
526 to the loop body.
527
528 We fully update only the simple case of loop with single exit that is
529 either from the latch or BB just before latch and leads from BB with
530 simple conditional jump. This is OK for use in vectorizer. */
531 e = single_exit (loop);
532 if (e)
533 {
534 edge other_e;
535 int freq_delta;
536 gcov_type count_delta;
537
538 FOR_EACH_EDGE (other_e, ei, e->src->succs)
539 if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
540 && e != other_e)
541 break;
542
543 /* Probability of exit must be 1/iterations. */
544 freq_delta = EDGE_FREQUENCY (e);
545 e->probability = REG_BR_PROB_BASE / iteration_bound;
546 other_e->probability = inverse_probability (e->probability);
547 freq_delta -= EDGE_FREQUENCY (e);
548
549 /* Adjust counts accordingly. */
550 count_delta = e->count;
551 e->count = apply_probability (e->src->count, e->probability);
552 other_e->count = apply_probability (e->src->count, other_e->probability);
553 count_delta -= e->count;
554
555 /* If latch exists, change its frequency and count, since we changed
556 probability of exit. Theoretically we should update everything from
557 source of exit edge to latch, but for vectorizer this is enough. */
558 if (loop->latch
559 && loop->latch != e->src)
560 {
561 loop->latch->frequency += freq_delta;
562 if (loop->latch->frequency < 0)
563 loop->latch->frequency = 0;
564 loop->latch->count += count_delta;
565 if (loop->latch->count < 0)
566 loop->latch->count = 0;
567 }
568 }
569
570 /* Roughly speaking we want to reduce the loop body profile by the
571 the difference of loop iterations. We however can do better if
572 we look at the actual profile, if it is available. */
573 scale = RDIV (iteration_bound * scale, iterations);
574 if (loop->header->count)
575 {
576 gcov_type count_in = 0;
577
578 FOR_EACH_EDGE (e, ei, loop->header->preds)
579 if (e->src != loop->latch)
580 count_in += e->count;
581
582 if (count_in != 0)
583 scale = GCOV_COMPUTE_SCALE (count_in * iteration_bound,
584 loop->header->count);
585 }
586 else if (loop->header->frequency)
587 {
588 int freq_in = 0;
589
590 FOR_EACH_EDGE (e, ei, loop->header->preds)
591 if (e->src != loop->latch)
592 freq_in += EDGE_FREQUENCY (e);
593
594 if (freq_in != 0)
595 scale = GCOV_COMPUTE_SCALE (freq_in * iteration_bound,
596 loop->header->frequency);
597 }
598 if (!scale)
599 scale = 1;
600 }
601
602 if (scale == REG_BR_PROB_BASE)
603 return;
604
605 /* Scale the actual probabilities. */
606 scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
607 if (dump_file && (dump_flags & TDF_DETAILS))
608 fprintf (dump_file, ";; guessed iterations are now %i\n",
609 (int)expected_loop_iterations_unbounded (loop));
610 }
611
612 /* Recompute dominance information for basic blocks outside LOOP. */
613
614 static void
615 update_dominators_in_loop (struct loop *loop)
616 {
617 vec<basic_block> dom_bbs = vNULL;
618 sbitmap seen;
619 basic_block *body;
620 unsigned i;
621
622 seen = sbitmap_alloc (last_basic_block);
623 bitmap_clear (seen);
624 body = get_loop_body (loop);
625
626 for (i = 0; i < loop->num_nodes; i++)
627 bitmap_set_bit (seen, body[i]->index);
628
629 for (i = 0; i < loop->num_nodes; i++)
630 {
631 basic_block ldom;
632
633 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
634 ldom;
635 ldom = next_dom_son (CDI_DOMINATORS, ldom))
636 if (!bitmap_bit_p (seen, ldom->index))
637 {
638 bitmap_set_bit (seen, ldom->index);
639 dom_bbs.safe_push (ldom);
640 }
641 }
642
643 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
644 free (body);
645 free (seen);
646 dom_bbs.release ();
647 }
648
649 /* Creates an if region as shown above. CONDITION is used to create
650 the test for the if.
651
652 |
653 | ------------- -------------
654 | | pred_bb | | pred_bb |
655 | ------------- -------------
656 | | |
657 | | | ENTRY_EDGE
658 | | ENTRY_EDGE V
659 | | ====> -------------
660 | | | cond_bb |
661 | | | CONDITION |
662 | | -------------
663 | V / \
664 | ------------- e_false / \ e_true
665 | | succ_bb | V V
666 | ------------- ----------- -----------
667 | | false_bb | | true_bb |
668 | ----------- -----------
669 | \ /
670 | \ /
671 | V V
672 | -------------
673 | | join_bb |
674 | -------------
675 | | exit_edge (result)
676 | V
677 | -----------
678 | | succ_bb |
679 | -----------
680 |
681 */
682
683 edge
684 create_empty_if_region_on_edge (edge entry_edge, tree condition)
685 {
686
687 basic_block cond_bb, true_bb, false_bb, join_bb;
688 edge e_true, e_false, exit_edge;
689 gimple cond_stmt;
690 tree simple_cond;
691 gimple_stmt_iterator gsi;
692
693 cond_bb = split_edge (entry_edge);
694
695 /* Insert condition in cond_bb. */
696 gsi = gsi_last_bb (cond_bb);
697 simple_cond =
698 force_gimple_operand_gsi (&gsi, condition, true, NULL,
699 false, GSI_NEW_STMT);
700 cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
701 gsi = gsi_last_bb (cond_bb);
702 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
703
704 join_bb = split_edge (single_succ_edge (cond_bb));
705
706 e_true = single_succ_edge (cond_bb);
707 true_bb = split_edge (e_true);
708
709 e_false = make_edge (cond_bb, join_bb, 0);
710 false_bb = split_edge (e_false);
711
712 e_true->flags &= ~EDGE_FALLTHRU;
713 e_true->flags |= EDGE_TRUE_VALUE;
714 e_false->flags &= ~EDGE_FALLTHRU;
715 e_false->flags |= EDGE_FALSE_VALUE;
716
717 set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
718 set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
719 set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
720 set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
721
722 exit_edge = single_succ_edge (join_bb);
723
724 if (single_pred_p (exit_edge->dest))
725 set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
726
727 return exit_edge;
728 }
729
730 /* create_empty_loop_on_edge
731 |
732 | - pred_bb - ------ pred_bb ------
733 | | | | iv0 = initial_value |
734 | -----|----- ---------|-----------
735 | | ______ | entry_edge
736 | | entry_edge / | |
737 | | ====> | -V---V- loop_header -------------
738 | V | | iv_before = phi (iv0, iv_after) |
739 | - succ_bb - | ---|-----------------------------
740 | | | | |
741 | ----------- | ---V--- loop_body ---------------
742 | | | iv_after = iv_before + stride |
743 | | | if (iv_before < upper_bound) |
744 | | ---|--------------\--------------
745 | | | \ exit_e
746 | | V \
747 | | - loop_latch - V- succ_bb -
748 | | | | | |
749 | | /------------- -----------
750 | \ ___ /
751
752 Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
753 that is used before the increment of IV. IV_BEFORE should be used for
754 adding code to the body that uses the IV. OUTER is the outer loop in
755 which the new loop should be inserted.
756
757 Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
758 inserted on the loop entry edge. This implies that this function
759 should be used only when the UPPER_BOUND expression is a loop
760 invariant. */
761
762 struct loop *
763 create_empty_loop_on_edge (edge entry_edge,
764 tree initial_value,
765 tree stride, tree upper_bound,
766 tree iv,
767 tree *iv_before,
768 tree *iv_after,
769 struct loop *outer)
770 {
771 basic_block loop_header, loop_latch, succ_bb, pred_bb;
772 struct loop *loop;
773 gimple_stmt_iterator gsi;
774 gimple_seq stmts;
775 gimple cond_expr;
776 tree exit_test;
777 edge exit_e;
778 int prob;
779
780 gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
781
782 /* Create header, latch and wire up the loop. */
783 pred_bb = entry_edge->src;
784 loop_header = split_edge (entry_edge);
785 loop_latch = split_edge (single_succ_edge (loop_header));
786 succ_bb = single_succ (loop_latch);
787 make_edge (loop_header, succ_bb, 0);
788 redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
789
790 /* Set immediate dominator information. */
791 set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
792 set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
793 set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
794
795 /* Initialize a loop structure and put it in a loop hierarchy. */
796 loop = alloc_loop ();
797 loop->header = loop_header;
798 loop->latch = loop_latch;
799 add_loop (loop, outer);
800
801 /* TODO: Fix frequencies and counts. */
802 prob = REG_BR_PROB_BASE / 2;
803
804 scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE);
805
806 /* Update dominators. */
807 update_dominators_in_loop (loop);
808
809 /* Modify edge flags. */
810 exit_e = single_exit (loop);
811 exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
812 single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
813
814 /* Construct IV code in loop. */
815 initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
816 if (stmts)
817 {
818 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
819 gsi_commit_edge_inserts ();
820 }
821
822 upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
823 if (stmts)
824 {
825 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
826 gsi_commit_edge_inserts ();
827 }
828
829 gsi = gsi_last_bb (loop_header);
830 create_iv (initial_value, stride, iv, loop, &gsi, false,
831 iv_before, iv_after);
832
833 /* Insert loop exit condition. */
834 cond_expr = gimple_build_cond
835 (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
836
837 exit_test = gimple_cond_lhs (cond_expr);
838 exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
839 false, GSI_NEW_STMT);
840 gimple_cond_set_lhs (cond_expr, exit_test);
841 gsi = gsi_last_bb (exit_e->src);
842 gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
843
844 split_block_after_labels (loop_header);
845
846 return loop;
847 }
848
849 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
850 latch to header and update loop tree and dominators
851 accordingly. Everything between them plus LATCH_EDGE destination must
852 be dominated by HEADER_EDGE destination, and back-reachable from
853 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
854 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
855 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
856 Returns the newly created loop. Frequencies and counts in the new loop
857 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */
858
859 struct loop *
860 loopify (edge latch_edge, edge header_edge,
861 basic_block switch_bb, edge true_edge, edge false_edge,
862 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
863 {
864 basic_block succ_bb = latch_edge->dest;
865 basic_block pred_bb = header_edge->src;
866 struct loop *loop = alloc_loop ();
867 struct loop *outer = loop_outer (succ_bb->loop_father);
868 int freq;
869 gcov_type cnt;
870 edge e;
871 edge_iterator ei;
872
873 loop->header = header_edge->dest;
874 loop->latch = latch_edge->src;
875
876 freq = EDGE_FREQUENCY (header_edge);
877 cnt = header_edge->count;
878
879 /* Redirect edges. */
880 loop_redirect_edge (latch_edge, loop->header);
881 loop_redirect_edge (true_edge, succ_bb);
882
883 /* During loop versioning, one of the switch_bb edge is already properly
884 set. Do not redirect it again unless redirect_all_edges is true. */
885 if (redirect_all_edges)
886 {
887 loop_redirect_edge (header_edge, switch_bb);
888 loop_redirect_edge (false_edge, loop->header);
889
890 /* Update dominators. */
891 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
892 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
893 }
894
895 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
896
897 /* Compute new loop. */
898 add_loop (loop, outer);
899
900 /* Add switch_bb to appropriate loop. */
901 if (switch_bb->loop_father)
902 remove_bb_from_loops (switch_bb);
903 add_bb_to_loop (switch_bb, outer);
904
905 /* Fix frequencies. */
906 if (redirect_all_edges)
907 {
908 switch_bb->frequency = freq;
909 switch_bb->count = cnt;
910 FOR_EACH_EDGE (e, ei, switch_bb->succs)
911 {
912 e->count = apply_probability (switch_bb->count, e->probability);
913 }
914 }
915 scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
916 scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
917 update_dominators_in_loop (loop);
918
919 return loop;
920 }
921
922 /* Remove the latch edge of a LOOP and update loops to indicate that
923 the LOOP was removed. After this function, original loop latch will
924 have no successor, which caller is expected to fix somehow.
925
926 If this may cause the information about irreducible regions to become
927 invalid, IRRED_INVALIDATED is set to true.
928
929 LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
930 basic blocks that had non-trivial update on their loop_father.*/
931
932 void
933 unloop (struct loop *loop, bool *irred_invalidated,
934 bitmap loop_closed_ssa_invalidated)
935 {
936 basic_block *body;
937 struct loop *ploop;
938 unsigned i, n;
939 basic_block latch = loop->latch;
940 bool dummy = false;
941
942 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
943 *irred_invalidated = true;
944
945 /* This is relatively straightforward. The dominators are unchanged, as
946 loop header dominates loop latch, so the only thing we have to care of
947 is the placement of loops and basic blocks inside the loop tree. We
948 move them all to the loop->outer, and then let fix_bb_placements do
949 its work. */
950
951 body = get_loop_body (loop);
952 n = loop->num_nodes;
953 for (i = 0; i < n; i++)
954 if (body[i]->loop_father == loop)
955 {
956 remove_bb_from_loops (body[i]);
957 add_bb_to_loop (body[i], loop_outer (loop));
958 }
959 free (body);
960
961 while (loop->inner)
962 {
963 ploop = loop->inner;
964 flow_loop_tree_node_remove (ploop);
965 flow_loop_tree_node_add (loop_outer (loop), ploop);
966 }
967
968 /* Remove the loop and free its data. */
969 delete_loop (loop);
970
971 remove_edge (single_succ_edge (latch));
972
973 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
974 there is an irreducible region inside the cancelled loop, the flags will
975 be still correct. */
976 fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
977 }
978
979 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
980 condition stated in description of fix_loop_placement holds for them.
981 It is used in case when we removed some edges coming out of LOOP, which
982 may cause the right placement of LOOP inside loop tree to change.
983
984 IRRED_INVALIDATED is set to true if a change in the loop structures might
985 invalidate the information about irreducible regions. */
986
987 static void
988 fix_loop_placements (struct loop *loop, bool *irred_invalidated)
989 {
990 struct loop *outer;
991
992 while (loop_outer (loop))
993 {
994 outer = loop_outer (loop);
995 if (!fix_loop_placement (loop, irred_invalidated))
996 break;
997
998 /* Changing the placement of a loop in the loop tree may alter the
999 validity of condition 2) of the description of fix_bb_placement
1000 for its preheader, because the successor is the header and belongs
1001 to the loop. So call fix_bb_placements to fix up the placement
1002 of the preheader and (possibly) of its predecessors. */
1003 fix_bb_placements (loop_preheader_edge (loop)->src,
1004 irred_invalidated, NULL);
1005 loop = outer;
1006 }
1007 }
1008
1009 /* Duplicate loop bounds and other information we store about
1010 the loop into its duplicate. */
1011
1012 void
1013 copy_loop_info (struct loop *loop, struct loop *target)
1014 {
1015 gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1016 target->any_upper_bound = loop->any_upper_bound;
1017 target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1018 target->any_estimate = loop->any_estimate;
1019 target->nb_iterations_estimate = loop->nb_iterations_estimate;
1020 target->estimate_state = loop->estimate_state;
1021 }
1022
1023 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1024 created loop into loops structure. */
1025 struct loop *
1026 duplicate_loop (struct loop *loop, struct loop *target)
1027 {
1028 struct loop *cloop;
1029 cloop = alloc_loop ();
1030 place_new_loop (cfun, cloop);
1031
1032 copy_loop_info (loop, cloop);
1033
1034 /* Mark the new loop as copy of LOOP. */
1035 set_loop_copy (loop, cloop);
1036
1037 /* Add it to target. */
1038 flow_loop_tree_node_add (target, cloop);
1039
1040 return cloop;
1041 }
1042
1043 /* Copies structure of subloops of LOOP into TARGET loop, placing
1044 newly created loops into loop tree. */
1045 void
1046 duplicate_subloops (struct loop *loop, struct loop *target)
1047 {
1048 struct loop *aloop, *cloop;
1049
1050 for (aloop = loop->inner; aloop; aloop = aloop->next)
1051 {
1052 cloop = duplicate_loop (aloop, target);
1053 duplicate_subloops (aloop, cloop);
1054 }
1055 }
1056
1057 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1058 into TARGET loop, placing newly created loops into loop tree. */
1059 static void
1060 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
1061 {
1062 struct loop *aloop;
1063 int i;
1064
1065 for (i = 0; i < n; i++)
1066 {
1067 aloop = duplicate_loop (copied_loops[i], target);
1068 duplicate_subloops (copied_loops[i], aloop);
1069 }
1070 }
1071
1072 /* Redirects edge E to basic block DEST. */
1073 static void
1074 loop_redirect_edge (edge e, basic_block dest)
1075 {
1076 if (e->dest == dest)
1077 return;
1078
1079 redirect_edge_and_branch_force (e, dest);
1080 }
1081
1082 /* Check whether LOOP's body can be duplicated. */
1083 bool
1084 can_duplicate_loop_p (const struct loop *loop)
1085 {
1086 int ret;
1087 basic_block *bbs = get_loop_body (loop);
1088
1089 ret = can_copy_bbs_p (bbs, loop->num_nodes);
1090 free (bbs);
1091
1092 return ret;
1093 }
1094
1095 /* Sets probability and count of edge E to zero. The probability and count
1096 is redistributed evenly to the remaining edges coming from E->src. */
1097
1098 static void
1099 set_zero_probability (edge e)
1100 {
1101 basic_block bb = e->src;
1102 edge_iterator ei;
1103 edge ae, last = NULL;
1104 unsigned n = EDGE_COUNT (bb->succs);
1105 gcov_type cnt = e->count, cnt1;
1106 unsigned prob = e->probability, prob1;
1107
1108 gcc_assert (n > 1);
1109 cnt1 = cnt / (n - 1);
1110 prob1 = prob / (n - 1);
1111
1112 FOR_EACH_EDGE (ae, ei, bb->succs)
1113 {
1114 if (ae == e)
1115 continue;
1116
1117 ae->probability += prob1;
1118 ae->count += cnt1;
1119 last = ae;
1120 }
1121
1122 /* Move the rest to one of the edges. */
1123 last->probability += prob % (n - 1);
1124 last->count += cnt % (n - 1);
1125
1126 e->probability = 0;
1127 e->count = 0;
1128 }
1129
1130 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1131 loop structure and dominators. E's destination must be LOOP header for
1132 this to work, i.e. it must be entry or latch edge of this loop; these are
1133 unique, as the loops must have preheaders for this function to work
1134 correctly (in case E is latch, the function unrolls the loop, if E is entry
1135 edge, it peels the loop). Store edges created by copying ORIG edge from
1136 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
1137 original LOOP body, the other copies are numbered in order given by control
1138 flow through them) into TO_REMOVE array. Returns false if duplication is
1139 impossible. */
1140
1141 bool
1142 duplicate_loop_to_header_edge (struct loop *loop, edge e,
1143 unsigned int ndupl, sbitmap wont_exit,
1144 edge orig, vec<edge> *to_remove,
1145 int flags)
1146 {
1147 struct loop *target, *aloop;
1148 struct loop **orig_loops;
1149 unsigned n_orig_loops;
1150 basic_block header = loop->header, latch = loop->latch;
1151 basic_block *new_bbs, *bbs, *first_active;
1152 basic_block new_bb, bb, first_active_latch = NULL;
1153 edge ae, latch_edge;
1154 edge spec_edges[2], new_spec_edges[2];
1155 #define SE_LATCH 0
1156 #define SE_ORIG 1
1157 unsigned i, j, n;
1158 int is_latch = (latch == e->src);
1159 int scale_act = 0, *scale_step = NULL, scale_main = 0;
1160 int scale_after_exit = 0;
1161 int p, freq_in, freq_le, freq_out_orig;
1162 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
1163 int add_irreducible_flag;
1164 basic_block place_after;
1165 bitmap bbs_to_scale = NULL;
1166 bitmap_iterator bi;
1167
1168 gcc_assert (e->dest == loop->header);
1169 gcc_assert (ndupl > 0);
1170
1171 if (orig)
1172 {
1173 /* Orig must be edge out of the loop. */
1174 gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1175 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1176 }
1177
1178 n = loop->num_nodes;
1179 bbs = get_loop_body_in_dom_order (loop);
1180 gcc_assert (bbs[0] == loop->header);
1181 gcc_assert (bbs[n - 1] == loop->latch);
1182
1183 /* Check whether duplication is possible. */
1184 if (!can_copy_bbs_p (bbs, loop->num_nodes))
1185 {
1186 free (bbs);
1187 return false;
1188 }
1189 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1190
1191 /* In case we are doing loop peeling and the loop is in the middle of
1192 irreducible region, the peeled copies will be inside it too. */
1193 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1194 gcc_assert (!is_latch || !add_irreducible_flag);
1195
1196 /* Find edge from latch. */
1197 latch_edge = loop_latch_edge (loop);
1198
1199 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1200 {
1201 /* Calculate coefficients by that we have to scale frequencies
1202 of duplicated loop bodies. */
1203 freq_in = header->frequency;
1204 freq_le = EDGE_FREQUENCY (latch_edge);
1205 if (freq_in == 0)
1206 freq_in = 1;
1207 if (freq_in < freq_le)
1208 freq_in = freq_le;
1209 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
1210 if (freq_out_orig > freq_in - freq_le)
1211 freq_out_orig = freq_in - freq_le;
1212 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
1213 prob_pass_wont_exit =
1214 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
1215
1216 if (orig
1217 && REG_BR_PROB_BASE - orig->probability != 0)
1218 {
1219 /* The blocks that are dominated by a removed exit edge ORIG have
1220 frequencies scaled by this. */
1221 scale_after_exit
1222 = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE,
1223 REG_BR_PROB_BASE - orig->probability);
1224 bbs_to_scale = BITMAP_ALLOC (NULL);
1225 for (i = 0; i < n; i++)
1226 {
1227 if (bbs[i] != orig->src
1228 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1229 bitmap_set_bit (bbs_to_scale, i);
1230 }
1231 }
1232
1233 scale_step = XNEWVEC (int, ndupl);
1234
1235 for (i = 1; i <= ndupl; i++)
1236 scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1237 ? prob_pass_wont_exit
1238 : prob_pass_thru;
1239
1240 /* Complete peeling is special as the probability of exit in last
1241 copy becomes 1. */
1242 if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1243 {
1244 int wanted_freq = EDGE_FREQUENCY (e);
1245
1246 if (wanted_freq > freq_in)
1247 wanted_freq = freq_in;
1248
1249 gcc_assert (!is_latch);
1250 /* First copy has frequency of incoming edge. Each subsequent
1251 frequency should be reduced by prob_pass_wont_exit. Caller
1252 should've managed the flags so all except for original loop
1253 has won't exist set. */
1254 scale_act = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
1255 /* Now simulate the duplication adjustments and compute header
1256 frequency of the last copy. */
1257 for (i = 0; i < ndupl; i++)
1258 wanted_freq = combine_probabilities (wanted_freq, scale_step[i]);
1259 scale_main = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
1260 }
1261 else if (is_latch)
1262 {
1263 prob_pass_main = bitmap_bit_p (wont_exit, 0)
1264 ? prob_pass_wont_exit
1265 : prob_pass_thru;
1266 p = prob_pass_main;
1267 scale_main = REG_BR_PROB_BASE;
1268 for (i = 0; i < ndupl; i++)
1269 {
1270 scale_main += p;
1271 p = combine_probabilities (p, scale_step[i]);
1272 }
1273 scale_main = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, scale_main);
1274 scale_act = combine_probabilities (scale_main, prob_pass_main);
1275 }
1276 else
1277 {
1278 scale_main = REG_BR_PROB_BASE;
1279 for (i = 0; i < ndupl; i++)
1280 scale_main = combine_probabilities (scale_main, scale_step[i]);
1281 scale_act = REG_BR_PROB_BASE - prob_pass_thru;
1282 }
1283 for (i = 0; i < ndupl; i++)
1284 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
1285 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
1286 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE);
1287 }
1288
1289 /* Loop the new bbs will belong to. */
1290 target = e->src->loop_father;
1291
1292 /* Original loops. */
1293 n_orig_loops = 0;
1294 for (aloop = loop->inner; aloop; aloop = aloop->next)
1295 n_orig_loops++;
1296 orig_loops = XNEWVEC (struct loop *, n_orig_loops);
1297 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1298 orig_loops[i] = aloop;
1299
1300 set_loop_copy (loop, target);
1301
1302 first_active = XNEWVEC (basic_block, n);
1303 if (is_latch)
1304 {
1305 memcpy (first_active, bbs, n * sizeof (basic_block));
1306 first_active_latch = latch;
1307 }
1308
1309 spec_edges[SE_ORIG] = orig;
1310 spec_edges[SE_LATCH] = latch_edge;
1311
1312 place_after = e->src;
1313 for (j = 0; j < ndupl; j++)
1314 {
1315 /* Copy loops. */
1316 copy_loops_to (orig_loops, n_orig_loops, target);
1317
1318 /* Copy bbs. */
1319 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1320 place_after, true);
1321 place_after = new_spec_edges[SE_LATCH]->src;
1322
1323 if (flags & DLTHE_RECORD_COPY_NUMBER)
1324 for (i = 0; i < n; i++)
1325 {
1326 gcc_assert (!new_bbs[i]->aux);
1327 new_bbs[i]->aux = (void *)(size_t)(j + 1);
1328 }
1329
1330 /* Note whether the blocks and edges belong to an irreducible loop. */
1331 if (add_irreducible_flag)
1332 {
1333 for (i = 0; i < n; i++)
1334 new_bbs[i]->flags |= BB_DUPLICATED;
1335 for (i = 0; i < n; i++)
1336 {
1337 edge_iterator ei;
1338 new_bb = new_bbs[i];
1339 if (new_bb->loop_father == target)
1340 new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1341
1342 FOR_EACH_EDGE (ae, ei, new_bb->succs)
1343 if ((ae->dest->flags & BB_DUPLICATED)
1344 && (ae->src->loop_father == target
1345 || ae->dest->loop_father == target))
1346 ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1347 }
1348 for (i = 0; i < n; i++)
1349 new_bbs[i]->flags &= ~BB_DUPLICATED;
1350 }
1351
1352 /* Redirect the special edges. */
1353 if (is_latch)
1354 {
1355 redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1356 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1357 loop->header);
1358 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1359 latch = loop->latch = new_bbs[n - 1];
1360 e = latch_edge = new_spec_edges[SE_LATCH];
1361 }
1362 else
1363 {
1364 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1365 loop->header);
1366 redirect_edge_and_branch_force (e, new_bbs[0]);
1367 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1368 e = new_spec_edges[SE_LATCH];
1369 }
1370
1371 /* Record exit edge in this copy. */
1372 if (orig && bitmap_bit_p (wont_exit, j + 1))
1373 {
1374 if (to_remove)
1375 to_remove->safe_push (new_spec_edges[SE_ORIG]);
1376 set_zero_probability (new_spec_edges[SE_ORIG]);
1377
1378 /* Scale the frequencies of the blocks dominated by the exit. */
1379 if (bbs_to_scale)
1380 {
1381 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1382 {
1383 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
1384 REG_BR_PROB_BASE);
1385 }
1386 }
1387 }
1388
1389 /* Record the first copy in the control flow order if it is not
1390 the original loop (i.e. in case of peeling). */
1391 if (!first_active_latch)
1392 {
1393 memcpy (first_active, new_bbs, n * sizeof (basic_block));
1394 first_active_latch = new_bbs[n - 1];
1395 }
1396
1397 /* Set counts and frequencies. */
1398 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1399 {
1400 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
1401 scale_act = combine_probabilities (scale_act, scale_step[j]);
1402 }
1403 }
1404 free (new_bbs);
1405 free (orig_loops);
1406
1407 /* Record the exit edge in the original loop body, and update the frequencies. */
1408 if (orig && bitmap_bit_p (wont_exit, 0))
1409 {
1410 if (to_remove)
1411 to_remove->safe_push (orig);
1412 set_zero_probability (orig);
1413
1414 /* Scale the frequencies of the blocks dominated by the exit. */
1415 if (bbs_to_scale)
1416 {
1417 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1418 {
1419 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
1420 REG_BR_PROB_BASE);
1421 }
1422 }
1423 }
1424
1425 /* Update the original loop. */
1426 if (!is_latch)
1427 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1428 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1429 {
1430 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
1431 free (scale_step);
1432 }
1433
1434 /* Update dominators of outer blocks if affected. */
1435 for (i = 0; i < n; i++)
1436 {
1437 basic_block dominated, dom_bb;
1438 vec<basic_block> dom_bbs;
1439 unsigned j;
1440
1441 bb = bbs[i];
1442 bb->aux = 0;
1443
1444 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1445 FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1446 {
1447 if (flow_bb_inside_loop_p (loop, dominated))
1448 continue;
1449 dom_bb = nearest_common_dominator (
1450 CDI_DOMINATORS, first_active[i], first_active_latch);
1451 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1452 }
1453 dom_bbs.release ();
1454 }
1455 free (first_active);
1456
1457 free (bbs);
1458 BITMAP_FREE (bbs_to_scale);
1459
1460 return true;
1461 }
1462
1463 /* A callback for make_forwarder block, to redirect all edges except for
1464 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1465 whether to redirect it. */
1466
1467 edge mfb_kj_edge;
1468 bool
1469 mfb_keep_just (edge e)
1470 {
1471 return e != mfb_kj_edge;
1472 }
1473
1474 /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1475
1476 static bool
1477 has_preds_from_loop (basic_block block, struct loop *loop)
1478 {
1479 edge e;
1480 edge_iterator ei;
1481
1482 FOR_EACH_EDGE (e, ei, block->preds)
1483 if (e->src->loop_father == loop)
1484 return true;
1485 return false;
1486 }
1487
1488 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1489 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1490 entry; otherwise we also force preheader block to have only one successor.
1491 When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1492 to be a fallthru predecessor to the loop header and to have only
1493 predecessors from outside of the loop.
1494 The function also updates dominators. */
1495
1496 basic_block
1497 create_preheader (struct loop *loop, int flags)
1498 {
1499 edge e, fallthru;
1500 basic_block dummy;
1501 int nentry = 0;
1502 bool irred = false;
1503 bool latch_edge_was_fallthru;
1504 edge one_succ_pred = NULL, single_entry = NULL;
1505 edge_iterator ei;
1506
1507 FOR_EACH_EDGE (e, ei, loop->header->preds)
1508 {
1509 if (e->src == loop->latch)
1510 continue;
1511 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1512 nentry++;
1513 single_entry = e;
1514 if (single_succ_p (e->src))
1515 one_succ_pred = e;
1516 }
1517 gcc_assert (nentry);
1518 if (nentry == 1)
1519 {
1520 bool need_forwarder_block = false;
1521
1522 /* We do not allow entry block to be the loop preheader, since we
1523 cannot emit code there. */
1524 if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1525 need_forwarder_block = true;
1526 else
1527 {
1528 /* If we want simple preheaders, also force the preheader to have
1529 just a single successor. */
1530 if ((flags & CP_SIMPLE_PREHEADERS)
1531 && !single_succ_p (single_entry->src))
1532 need_forwarder_block = true;
1533 /* If we want fallthru preheaders, also create forwarder block when
1534 preheader ends with a jump or has predecessors from loop. */
1535 else if ((flags & CP_FALLTHRU_PREHEADERS)
1536 && (JUMP_P (BB_END (single_entry->src))
1537 || has_preds_from_loop (single_entry->src, loop)))
1538 need_forwarder_block = true;
1539 }
1540 if (! need_forwarder_block)
1541 return NULL;
1542 }
1543
1544 mfb_kj_edge = loop_latch_edge (loop);
1545 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1546 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1547 dummy = fallthru->src;
1548 loop->header = fallthru->dest;
1549
1550 /* Try to be clever in placing the newly created preheader. The idea is to
1551 avoid breaking any "fallthruness" relationship between blocks.
1552
1553 The preheader was created just before the header and all incoming edges
1554 to the header were redirected to the preheader, except the latch edge.
1555 So the only problematic case is when this latch edge was a fallthru
1556 edge: it is not anymore after the preheader creation so we have broken
1557 the fallthruness. We're therefore going to look for a better place. */
1558 if (latch_edge_was_fallthru)
1559 {
1560 if (one_succ_pred)
1561 e = one_succ_pred;
1562 else
1563 e = EDGE_PRED (dummy, 0);
1564
1565 move_block_after (dummy, e->src);
1566 }
1567
1568 if (irred)
1569 {
1570 dummy->flags |= BB_IRREDUCIBLE_LOOP;
1571 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1572 }
1573
1574 if (dump_file)
1575 fprintf (dump_file, "Created preheader block for loop %i\n",
1576 loop->num);
1577
1578 if (flags & CP_FALLTHRU_PREHEADERS)
1579 gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1580 && !JUMP_P (BB_END (dummy)));
1581
1582 return dummy;
1583 }
1584
1585 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1586
1587 void
1588 create_preheaders (int flags)
1589 {
1590 struct loop *loop;
1591
1592 if (!current_loops)
1593 return;
1594
1595 FOR_EACH_LOOP (loop, 0)
1596 create_preheader (loop, flags);
1597 loops_state_set (LOOPS_HAVE_PREHEADERS);
1598 }
1599
1600 /* Forces all loop latches to have only single successor. */
1601
1602 void
1603 force_single_succ_latches (void)
1604 {
1605 struct loop *loop;
1606 edge e;
1607
1608 FOR_EACH_LOOP (loop, 0)
1609 {
1610 if (loop->latch != loop->header && single_succ_p (loop->latch))
1611 continue;
1612
1613 e = find_edge (loop->latch, loop->header);
1614 gcc_checking_assert (e != NULL);
1615
1616 split_edge (e);
1617 }
1618 loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1619 }
1620
1621 /* This function is called from loop_version. It splits the entry edge
1622 of the loop we want to version, adds the versioning condition, and
1623 adjust the edges to the two versions of the loop appropriately.
1624 e is an incoming edge. Returns the basic block containing the
1625 condition.
1626
1627 --- edge e ---- > [second_head]
1628
1629 Split it and insert new conditional expression and adjust edges.
1630
1631 --- edge e ---> [cond expr] ---> [first_head]
1632 |
1633 +---------> [second_head]
1634
1635 THEN_PROB is the probability of then branch of the condition. */
1636
1637 static basic_block
1638 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1639 edge e, void *cond_expr, unsigned then_prob)
1640 {
1641 basic_block new_head = NULL;
1642 edge e1;
1643
1644 gcc_assert (e->dest == second_head);
1645
1646 /* Split edge 'e'. This will create a new basic block, where we can
1647 insert conditional expr. */
1648 new_head = split_edge (e);
1649
1650 lv_add_condition_to_bb (first_head, second_head, new_head,
1651 cond_expr);
1652
1653 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1654 e = single_succ_edge (new_head);
1655 e1 = make_edge (new_head, first_head,
1656 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1657 e1->probability = then_prob;
1658 e->probability = REG_BR_PROB_BASE - then_prob;
1659 e1->count = apply_probability (e->count, e1->probability);
1660 e->count = apply_probability (e->count, e->probability);
1661
1662 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1663 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1664
1665 /* Adjust loop header phi nodes. */
1666 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1667
1668 return new_head;
1669 }
1670
1671 /* Main entry point for Loop Versioning transformation.
1672
1673 This transformation given a condition and a loop, creates
1674 -if (condition) { loop_copy1 } else { loop_copy2 },
1675 where loop_copy1 is the loop transformed in one way, and loop_copy2
1676 is the loop transformed in another way (or unchanged). 'condition'
1677 may be a run time test for things that were not resolved by static
1678 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1679
1680 THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1681 is the ratio by that the frequencies in the original loop should
1682 be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1683 new loop should be scaled.
1684
1685 If PLACE_AFTER is true, we place the new loop after LOOP in the
1686 instruction stream, otherwise it is placed before LOOP. */
1687
1688 struct loop *
1689 loop_version (struct loop *loop,
1690 void *cond_expr, basic_block *condition_bb,
1691 unsigned then_prob, unsigned then_scale, unsigned else_scale,
1692 bool place_after)
1693 {
1694 basic_block first_head, second_head;
1695 edge entry, latch_edge, true_edge, false_edge;
1696 int irred_flag;
1697 struct loop *nloop;
1698 basic_block cond_bb;
1699
1700 /* Record entry and latch edges for the loop */
1701 entry = loop_preheader_edge (loop);
1702 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1703 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1704
1705 /* Note down head of loop as first_head. */
1706 first_head = entry->dest;
1707
1708 /* Duplicate loop. */
1709 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1710 NULL, NULL, NULL, 0))
1711 {
1712 entry->flags |= irred_flag;
1713 return NULL;
1714 }
1715
1716 /* After duplication entry edge now points to new loop head block.
1717 Note down new head as second_head. */
1718 second_head = entry->dest;
1719
1720 /* Split loop entry edge and insert new block with cond expr. */
1721 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1722 entry, cond_expr, then_prob);
1723 if (condition_bb)
1724 *condition_bb = cond_bb;
1725
1726 if (!cond_bb)
1727 {
1728 entry->flags |= irred_flag;
1729 return NULL;
1730 }
1731
1732 latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1733
1734 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1735 nloop = loopify (latch_edge,
1736 single_pred_edge (get_bb_copy (loop->header)),
1737 cond_bb, true_edge, false_edge,
1738 false /* Do not redirect all edges. */,
1739 then_scale, else_scale);
1740
1741 copy_loop_info (loop, nloop);
1742
1743 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1744 lv_flush_pending_stmts (latch_edge);
1745
1746 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1747 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1748 lv_flush_pending_stmts (false_edge);
1749 /* Adjust irreducible flag. */
1750 if (irred_flag)
1751 {
1752 cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1753 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1754 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1755 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1756 }
1757
1758 if (place_after)
1759 {
1760 basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1761 unsigned i;
1762
1763 after = loop->latch;
1764
1765 for (i = 0; i < nloop->num_nodes; i++)
1766 {
1767 move_block_after (bbs[i], after);
1768 after = bbs[i];
1769 }
1770 free (bbs);
1771 }
1772
1773 /* At this point condition_bb is loop preheader with two successors,
1774 first_head and second_head. Make sure that loop preheader has only
1775 one successor. */
1776 split_edge (loop_preheader_edge (loop));
1777 split_edge (loop_preheader_edge (nloop));
1778
1779 return nloop;
1780 }