]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ch.cc
loop-ch improvements, part 5
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ch.cc
1 /* Loop header copying on trees.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "gimple-ssa.h"
29 #include "gimple-iterator.h"
30 #include "tree-cfg.h"
31 #include "tree-into-ssa.h"
32 #include "cfgloop.h"
33 #include "tree-inline.h"
34 #include "tree-ssa-threadedge.h"
35 #include "tree-ssa-sccvn.h"
36 #include "tree-phinodes.h"
37 #include "ssa-iterators.h"
38 #include "value-range.h"
39 #include "gimple-range.h"
40 #include "gimple-range-path.h"
41 #include "gimple-pretty-print.h"
42 #include "cfganal.h"
43
44 /* Return path query insteance for testing ranges of statements
45 in headers of LOOP contained in basic block BB.
46 Use RANGER instance. */
47
48 static path_range_query *
49 get_range_query (class loop *loop,
50 basic_block bb,
51 gimple_ranger &ranger)
52 {
53 auto_vec<basic_block, 8> path;
54 for (; bb != loop->header; bb = single_pred_edge (bb)->src)
55 path.safe_push (bb);
56 path.safe_push (loop->header);
57 path.safe_push (loop_preheader_edge (loop)->src);
58 return new path_range_query (ranger, path);
59 }
60
61 /* Return edge that is true in the first iteration of the loop
62 and NULL otherwise.
63 Formulate corrent ranger query to RANGER. */
64
65 static edge
66 static_loop_exit (class loop *l, basic_block bb, gimple_ranger &ranger,
67 path_range_query *&query)
68 {
69 gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
70 edge ret_e;
71
72 if (!last)
73 return NULL;
74
75 edge true_e, false_e;
76 extract_true_false_edges_from_block (bb, &true_e, &false_e);
77
78 /* If neither edge is the exit edge, this is not a case we'd like to
79 special-case. */
80 if (!loop_exit_edge_p (l, true_e) && !loop_exit_edge_p (l, false_e))
81 return NULL;
82
83 int_range<1> desired_static_range;
84 if (loop_exit_edge_p (l, true_e))
85 {
86 desired_static_range = range_false ();
87 ret_e = true_e;
88 }
89 else
90 {
91 desired_static_range = range_true ();
92 ret_e = false_e;
93 }
94
95 if (!query)
96 query = get_range_query (l, gimple_bb (last), ranger);
97
98 int_range<2> r;
99 if (!query->range_of_stmt (r, last))
100 return NULL;
101 return r == desired_static_range ? ret_e : NULL;
102 }
103
104 /* Return true if STMT is static in LOOP. This means that its value
105 is constant in the first iteration.
106 Use RANGER and formulate query cached in QUERY. */
107
108 static bool
109 loop_static_stmt_p (class loop *loop,
110 gimple_ranger &ranger,
111 path_range_query *&query,
112 gimple *stmt)
113 {
114 tree type = gimple_range_type (stmt);
115 if (!type || !Value_Range::supports_type_p (type))
116 return false;
117
118 if (!query)
119 query = get_range_query (loop, gimple_bb (stmt), ranger);
120
121 Value_Range r (gimple_range_type (stmt));
122 if (!query->range_of_stmt (r, stmt))
123 return false;
124 return r.singleton_p ();
125 }
126
127 /* Return true if OP is invariant. */
128
129 static bool
130 loop_invariant_op_p (class loop *loop,
131 tree op)
132 {
133 if (is_gimple_min_invariant (op))
134 return true;
135 if (SSA_NAME_IS_DEFAULT_DEF (op)
136 || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (op))))
137 return true;
138 return gimple_uid (SSA_NAME_DEF_STMT (op)) & 1;
139 }
140
141 /* Return true if OP combines outcome of static and
142 loop invariant conditional. */
143
144 static bool
145 loop_static_op_p (class loop *loop, tree op)
146 {
147 /* Always check for invariant first. */
148 gcc_checking_assert (!is_gimple_min_invariant (op)
149 && !SSA_NAME_IS_DEFAULT_DEF (op)
150 && flow_bb_inside_loop_p (loop,
151 gimple_bb (SSA_NAME_DEF_STMT (op))));
152 return gimple_uid (SSA_NAME_DEF_STMT (op)) & 2;
153 }
154
155
156 /* Return true if OP combines outcome of static and
157 loop invariant conditional. */
158
159 static bool
160 loop_combined_static_and_iv_p (class loop *loop,
161 tree op)
162 {
163 /* Always check for invariant first. */
164 gcc_checking_assert (!is_gimple_min_invariant (op)
165 && !SSA_NAME_IS_DEFAULT_DEF (op)
166 && flow_bb_inside_loop_p (loop,
167 gimple_bb (SSA_NAME_DEF_STMT (op))));
168 return gimple_uid (SSA_NAME_DEF_STMT (op)) & 4;
169 }
170
171 /* Decision about posibility of copying a given header. */
172
173 enum ch_decision
174 {
175 /* We do not want to copy this header. */
176 ch_impossible,
177 /* We can copy it if it enables wins. */
178 ch_possible,
179 /* We can "cop" it if it enables wins and doing
180 so will introduce no new code. */
181 ch_possible_zero_cost,
182 /* We want to copy. */
183 ch_win,
184 /* We want to copy and we will eliminate loop exit. */
185 ch_win_invariant_exit
186 };
187
188 /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
189 instructions should be duplicated, limit is decreased by the actual
190 amount. */
191
192 static ch_decision
193 should_duplicate_loop_header_p (basic_block header, class loop *loop,
194 gimple_ranger *ranger,
195 int *limit,
196 hash_set <edge> *invariant_exits,
197 hash_set <edge> *static_exits)
198 {
199 gimple_stmt_iterator bsi;
200
201 gcc_assert (!header->aux);
202
203 gcc_assert (EDGE_COUNT (header->succs) > 0);
204 if (single_succ_p (header))
205 {
206 if (dump_file && (dump_flags & TDF_DETAILS))
207 fprintf (dump_file,
208 " Not duplicating bb %i: it is single succ.\n",
209 header->index);
210 return ch_impossible;
211 }
212
213 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)
214 && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest))
215 {
216 if (dump_file && (dump_flags & TDF_DETAILS))
217 fprintf (dump_file,
218 " Not duplicating bb %i: both successors are in loop.\n",
219 loop->num);
220 return ch_impossible;
221 }
222
223 /* If this is not the original loop header, we want it to have just
224 one predecessor in order to match the && pattern. */
225 if (header != loop->header && !single_pred_p (header))
226 {
227 if (dump_file && (dump_flags & TDF_DETAILS))
228 fprintf (dump_file,
229 " Not duplicating bb %i: it has mutiple predecestors.\n",
230 header->index);
231 return ch_impossible;
232 }
233
234 gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (header));
235 if (!last)
236 {
237 if (dump_file && (dump_flags & TDF_DETAILS))
238 fprintf (dump_file,
239 " Not duplicating bb %i: it does not end by conditional.\n",
240 header->index);
241 return ch_impossible;
242 }
243
244 path_range_query *query = NULL;
245 for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi);
246 gsi_next (&psi))
247 if (!virtual_operand_p (gimple_phi_result (psi.phi ())))
248 {
249 bool static_p = loop_static_stmt_p (loop, *ranger,
250 query, psi.phi ());
251 gimple_set_uid (psi.phi (), static_p ? 2 : 0);
252 }
253 bool code_size_cost = false;
254
255 /* Count number of instructions and punt on calls.
256 Populate stmts INV flag to later apply heuristics to the
257 kind of conditions we want to copy. */
258 for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi))
259 {
260 gimple *last = gsi_stmt (bsi);
261
262 if (gimple_code (last) == GIMPLE_LABEL)
263 continue;
264
265 if (is_gimple_debug (last))
266 continue;
267
268 if (gimple_code (last) == GIMPLE_COND)
269 break;
270
271 if (dump_file && (dump_flags & TDF_DETAILS))
272 {
273 fprintf (dump_file, " Analyzing: ");
274 print_gimple_stmt (dump_file, last, 0, TDF_SLIM);
275 }
276
277 if (gimple_code (last) == GIMPLE_CALL
278 && (!gimple_inexpensive_call_p (as_a <gcall *> (last))
279 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed
280 at current loop's header. Don't copy in this case. */
281 || gimple_call_internal_p (last, IFN_LOOP_DIST_ALIAS)))
282 {
283 if (dump_file && (dump_flags & TDF_DETAILS))
284 fprintf (dump_file,
285 " Not duplicating bb %i: it contains call.\n",
286 header->index);
287 if (query)
288 delete query;
289 return ch_impossible;
290 }
291
292 /* Classify the stmt is invariant in the loop. */
293 gimple_set_uid (last, 0);
294 if (!gimple_vuse (last)
295 && gimple_code (last) != GIMPLE_ASM
296 && (gimple_code (last) != GIMPLE_CALL
297 || gimple_call_flags (last) & ECF_CONST))
298 {
299 bool inv = true, static_p = false;
300 ssa_op_iter i;
301 tree op;
302 FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE)
303 if (!loop_invariant_op_p (loop, op))
304 inv = false;
305 /* Assume that code is reasonably optimized and invariant
306 constants are already identified. */
307 if (!inv)
308 static_p = loop_static_stmt_p (loop, *ranger, query, last);
309 gimple_set_uid (last, (inv ? 1 : 0) | (static_p ? 2 : 0));
310 if (dump_file && (dump_flags & TDF_DETAILS))
311 {
312 if (inv)
313 fprintf (dump_file, " Stmt is loop invariant\n");
314 if (static_p)
315 fprintf (dump_file, " Stmt is static "
316 "(constant in the first iteration)\n");
317 }
318 /* Loop invariants will be optimized out in loop body after
319 duplication; do not account invariant computation in code
320 size costs.
321
322 Similarly static computations will be optimized out in the
323 duplicatd header. */
324 if (inv || static_p)
325 continue;
326
327 /* Match the following:
328 _1 = i_1 < 10 <- static condtion
329 _2 = n != 0 <- loop invariant condition
330 _3 = _1 & _2 <- combined static and iv statement. */
331 tree_code code;
332 if (gimple_code (last) == GIMPLE_ASSIGN
333 && ((code = gimple_assign_rhs_code (last)) == BIT_AND_EXPR
334 || code == BIT_IOR_EXPR || code == BIT_XOR_EXPR))
335 {
336 tree op1 = gimple_assign_rhs1 (last);
337 tree op2 = gimple_assign_rhs2 (last);
338
339 if ((loop_invariant_op_p (loop, op1)
340 || loop_combined_static_and_iv_p (loop, op1)
341 || loop_static_op_p (loop, op1))
342 && (loop_invariant_op_p (loop, op2)
343 || loop_combined_static_and_iv_p (loop, op2)
344 || loop_static_op_p (loop, op2)))
345 {
346 /* Duplicating loop header with combned conditional will
347 remove this statement in each copy. But we account for
348 that later when seeing that condition.
349
350 Note that this may be overly optimistic for bit operations
351 where the static parameter may still result in non-trivial
352 bit operation. */
353 gimple_set_uid (last, 4);
354 if (dump_file && (dump_flags & TDF_DETAILS))
355 fprintf (dump_file,
356 " Stmt combines static and invariant op\n");
357 continue;
358 }
359 }
360 }
361
362 int insns = estimate_num_insns (last, &eni_size_weights);
363 *limit -= insns;
364 if (insns)
365 code_size_cost = true;
366 if (dump_file && (dump_flags & TDF_DETAILS))
367 fprintf (dump_file,
368 " Acconting stmt as %i insns\n", insns);
369 if (*limit < 0)
370 {
371 if (dump_file && (dump_flags & TDF_DETAILS))
372 fprintf (dump_file,
373 " Not duplicating bb %i contains too many insns.\n",
374 header->index);
375 if (query)
376 delete query;
377 return ch_impossible;
378 }
379 }
380
381 if (dump_file && (dump_flags & TDF_DETAILS))
382 {
383 fprintf (dump_file, " Analyzing: ");
384 print_gimple_stmt (dump_file, last, 0, TDF_SLIM);
385 }
386
387 /* If the condition tests a non-IV loop variant we do not want to rotate
388 the loop further. Unless this is the original loop header. */
389 tree lhs = gimple_cond_lhs (last);
390 tree rhs = gimple_cond_rhs (last);
391 bool lhs_invariant = loop_invariant_op_p (loop, lhs);
392 bool rhs_invariant = loop_invariant_op_p (loop, rhs);
393
394 /* Combined conditional is a result of if combining:
395
396 _1 = i_1 < 10 <- static condtion
397 _2 = n != 0 <- loop invariant condition
398 _3 = _1 & _2 <- combined static and iv statement
399 if (_3 != 0) <- combined conditional
400
401 Combined conditionals will not be optimized out in either copy.
402 However duplicaed header simplifies to:
403
404 if (n < 10)
405
406 while loop body to
407
408 if (i_1 < 10)
409
410 So effectively the resulting code sequence will be of same length as
411 the original code.
412
413 Combined conditionals are never static or invariant, so save some work
414 below. */
415 if (lhs_invariant != rhs_invariant
416 && (lhs_invariant
417 || loop_combined_static_and_iv_p (loop, lhs))
418 && (rhs_invariant
419 || loop_combined_static_and_iv_p (loop, rhs)))
420 {
421 if (query)
422 delete query;
423 if (dump_file && (dump_flags & TDF_DETAILS))
424 fprintf (dump_file,
425 " Conditional combines static and invariant op.\n");
426 return ch_win;
427 }
428
429 edge static_exit = static_loop_exit (loop, header, *ranger, query);
430 if (query)
431 delete query;
432
433 if (static_exit && static_exits)
434 {
435 static_exits->add (static_exit);
436 if (dump_file && (dump_flags & TDF_DETAILS))
437 fprintf (dump_file,
438 " Will eliminate peeled conditional in bb %d.\n",
439 static_exit->src->index);
440 /* Still look for invariant exits; exit may be both. */
441 }
442 if (lhs_invariant && rhs_invariant)
443 {
444 if (invariant_exits)
445 {
446 edge e;
447 edge_iterator ei;
448 FOR_EACH_EDGE (e, ei, header->succs)
449 if (loop_exit_edge_p (loop, e))
450 {
451 if (dump_file && (dump_flags & TDF_DETAILS))
452 fprintf (dump_file,
453 " Will elliminate invariant exit %i->%i\n",
454 e->src->index, e->dest->index);
455 invariant_exits->add (e);
456 }
457 }
458 return ch_win_invariant_exit;
459 }
460
461 /* If the static exit fully optimize out, it is win to "duplicate"
462 it.
463
464 TODO: Even if duplication costs some size we may opt to do so in case
465 exit probability is significant enough (do partial peeling). */
466 if (static_exit)
467 return code_size_cost ? ch_possible_zero_cost : ch_win;
468
469 /* We was not able to prove that conditional will be eliminated. */
470 int insns = estimate_num_insns (last, &eni_size_weights);
471 *limit -= insns;
472 if (dump_file && (dump_flags & TDF_DETAILS))
473 fprintf (dump_file,
474 " Acconting stmt as %i insns\n", insns);
475 if (*limit < 0)
476 {
477 if (dump_file && (dump_flags & TDF_DETAILS))
478 fprintf (dump_file,
479 " Not duplicating bb %i contains too many insns.\n",
480 header->index);
481 return ch_impossible;
482 }
483
484 return ch_possible;
485 }
486
487 /* Checks whether LOOP is a do-while style loop. */
488
489 static bool
490 do_while_loop_p (class loop *loop)
491 {
492 gimple *stmt = last_nondebug_stmt (loop->latch);
493
494 /* If the latch of the loop is not empty, it is not a do-while loop. */
495 if (stmt
496 && gimple_code (stmt) != GIMPLE_LABEL)
497 {
498 if (dump_file && (dump_flags & TDF_DETAILS))
499 fprintf (dump_file,
500 "Loop %i is not do-while loop: latch is not empty.\n",
501 loop->num);
502 return false;
503 }
504
505 /* If the latch does not have a single predecessor, it is not a
506 do-while loop. */
507 if (!single_pred_p (loop->latch))
508 {
509 if (dump_file && (dump_flags & TDF_DETAILS))
510 fprintf (dump_file,
511 "Loop %i is not do-while loop: latch has multiple "
512 "predecessors.\n", loop->num);
513 return false;
514 }
515 basic_block pred = single_pred (loop->latch);
516
517 /* If the latch predecessor doesn't exit the loop, it is not a
518 do-while loop. */
519 if (!loop_exits_from_bb_p (loop, pred))
520 {
521 if (dump_file && (dump_flags & TDF_DETAILS))
522 fprintf (dump_file,
523 "Loop %i is not do-while loop: latch predecessor "
524 "does not exit loop.\n", loop->num);
525 return false;
526 }
527 gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (pred));
528 if (last
529 && (gimple_cond_lhs (last) == boolean_false_node
530 || gimple_cond_lhs (last) == boolean_true_node)
531 && gimple_cond_rhs (last) == boolean_false_node)
532 {
533 if (dump_file && (dump_flags & TDF_DETAILS))
534 fprintf (dump_file,
535 "Loop %i is not do-while loop: latch predecessor "
536 "contains exit we optimized out.\n", loop->num);
537 return false;
538 }
539
540 if (dump_file && (dump_flags & TDF_DETAILS))
541 fprintf (dump_file, "Loop %i is do-while loop\n", loop->num);
542
543 return true;
544 }
545
546 /* Update profile after header copying of LOOP.
547 REGION is the original (in loop) sequence, REGION_COPY is the
548 duplicated header (now outside of loop). N_REGION is number of
549 bbs duplicated.
550 ELIMINATED_EDGE is edge to be removed from duplicated sequence.
551 INVARIANT_EXITS are edges in the loop body to be elimianted
552 since they are loop invariants
553
554 So We expect the following:
555
556 // region_copy_start entry will be scaled to entry_count
557 if (cond1) <- this condition will become false
558 and we update probabilities
559 goto loop_exit;
560 if (cond2) <- this condition is loop invariant
561 goto loop_exit;
562 goto loop_header <- this will be redirected to loop.
563 // region_copy_end
564 loop:
565 <body>
566 // region start
567 loop_header:
568 if (cond1) <- we need to update probabbility here
569 goto loop_exit;
570 if (cond2) <- and determine scaling factor here.
571 moreover cond2 is now always true
572 goto loop_exit;
573 else
574 goto loop;
575 // region end
576
577 Adding support for more exits can be done similarly,
578 but only consumer so far is tree-ssa-loop-ch and it uses only this
579 to handle the common case of peeling headers which have
580 conditionals known to be always true upon entry. */
581
582 static void
583 update_profile_after_ch (class loop *loop,
584 basic_block *region, basic_block *region_copy,
585 unsigned n_region,
586 hash_set <edge> *invariant_exits,
587 hash_set <edge> *static_exits,
588 profile_count entry_count)
589 {
590 for (unsigned int i = 0; i < n_region; i++)
591 {
592 edge exit_e, exit_e_copy, e, e_copy;
593 if (EDGE_COUNT (region[i]->succs) == 1)
594 {
595 region_copy[i]->count = entry_count;
596 region[i]->count -= entry_count;
597 continue;
598 }
599
600 gcc_checking_assert (EDGE_COUNT (region[i]->succs) == 2);
601 if (loop_exit_edge_p (loop,
602 EDGE_SUCC (region[i], 0)))
603 {
604 exit_e = EDGE_SUCC (region[i], 0);
605 exit_e_copy = EDGE_SUCC (region_copy[i], 0);
606 e = EDGE_SUCC (region[i], 1);
607 e_copy = EDGE_SUCC (region_copy[i], 1);
608 }
609 else
610 {
611 exit_e = EDGE_SUCC (region[i], 1);
612 exit_e_copy = EDGE_SUCC (region_copy[i], 1);
613 e = EDGE_SUCC (region[i], 0);
614 e_copy = EDGE_SUCC (region_copy[i], 0);
615 }
616 gcc_assert (i == n_region - 1
617 || (e->dest == region[i + 1]
618 && e_copy->dest == region_copy[i + 1]));
619 region_copy[i]->count = entry_count;
620 profile_count exit_e_count = exit_e->count ();
621 bool was_static = false;
622 if (static_exits->contains (exit_e))
623 {
624 /* Update profile and the conditional.
625 CFG update is done by caller. */
626 static_exits->remove (exit_e);
627 was_static = true;
628 e_copy->probability = profile_probability::always ();
629 exit_e_copy->probability = profile_probability::never ();
630 gcond *cond_stmt
631 = as_a <gcond *>(*gsi_last_bb (region_copy[i]));
632 if (e_copy->flags & EDGE_TRUE_VALUE)
633 gimple_cond_make_true (cond_stmt);
634 else
635 gimple_cond_make_false (cond_stmt);
636 update_stmt (cond_stmt);
637 /* Header copying is a special case of jump threading, so use
638 common code to update loop body exit condition. */
639 update_bb_profile_for_threading (region[i], entry_count, e);
640 }
641 else
642 region[i]->count -= region_copy[i]->count;
643 if (invariant_exits->contains (exit_e))
644 {
645 invariant_exits->remove (exit_e);
646 /* All exits will happen in exit_e_copy which is out of the
647 loop, so increase probability accordingly.
648 If the edge is eliminated_edge we already corrected
649 profile above. */
650 if (entry_count.nonzero_p () && !was_static)
651 set_edge_probability_and_rescale_others
652 (exit_e_copy, exit_e_count.probability_in
653 (entry_count));
654 /* Eliminate in-loop conditional. */
655 e->probability = profile_probability::always ();
656 exit_e->probability = profile_probability::never ();
657 gcond *cond_stmt = as_a <gcond *>(*gsi_last_bb (region[i]));
658 if (e->flags & EDGE_TRUE_VALUE)
659 gimple_cond_make_true (cond_stmt);
660 else
661 gimple_cond_make_false (cond_stmt);
662 update_stmt (cond_stmt);
663 }
664 entry_count = e_copy->count ();
665 }
666 /* Be sure that we seen all invariant exit edges we are supposed to update.
667 We may have recorded some static exists we decided to not duplicate. */
668 gcc_checking_assert (invariant_exits->is_empty ());
669 }
670
671 namespace {
672
673 /* Common superclass for both header-copying phases. */
674 class ch_base : public gimple_opt_pass
675 {
676 protected:
677 ch_base (pass_data data, gcc::context *ctxt)
678 : gimple_opt_pass (data, ctxt)
679 {}
680
681 /* Copies headers of all loops in FUN for which process_loop_p is true. */
682 unsigned int copy_headers (function *fun);
683
684 /* Return true to copy headers of LOOP or false to skip. */
685 virtual bool process_loop_p (class loop *loop) = 0;
686 };
687
688 const pass_data pass_data_ch =
689 {
690 GIMPLE_PASS, /* type */
691 "ch", /* name */
692 OPTGROUP_LOOP, /* optinfo_flags */
693 TV_TREE_CH, /* tv_id */
694 ( PROP_cfg | PROP_ssa ), /* properties_required */
695 0, /* properties_provided */
696 0, /* properties_destroyed */
697 0, /* todo_flags_start */
698 0, /* todo_flags_finish */
699 };
700
701 class pass_ch : public ch_base
702 {
703 public:
704 pass_ch (gcc::context *ctxt)
705 : ch_base (pass_data_ch, ctxt)
706 {}
707
708 /* opt_pass methods: */
709 bool gate (function *) final override { return flag_tree_ch != 0; }
710
711 /* Initialize and finalize loop structures, copying headers inbetween. */
712 unsigned int execute (function *) final override;
713
714 opt_pass * clone () final override { return new pass_ch (m_ctxt); }
715
716 protected:
717 /* ch_base method: */
718 bool process_loop_p (class loop *loop) final override;
719 }; // class pass_ch
720
721 const pass_data pass_data_ch_vect =
722 {
723 GIMPLE_PASS, /* type */
724 "ch_vect", /* name */
725 OPTGROUP_LOOP, /* optinfo_flags */
726 TV_TREE_CH, /* tv_id */
727 ( PROP_cfg | PROP_ssa ), /* properties_required */
728 0, /* properties_provided */
729 0, /* properties_destroyed */
730 0, /* todo_flags_start */
731 0, /* todo_flags_finish */
732 };
733
734 /* This is a more aggressive version of the same pass, designed to run just
735 before if-conversion and vectorization, to put more loops into the form
736 required for those phases. */
737 class pass_ch_vect : public ch_base
738 {
739 public:
740 pass_ch_vect (gcc::context *ctxt)
741 : ch_base (pass_data_ch_vect, ctxt)
742 {}
743
744 /* opt_pass methods: */
745 bool gate (function *fun) final override
746 {
747 return flag_tree_ch != 0
748 && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops);
749 }
750
751 /* Just copy headers, no initialization/finalization of loop structures. */
752 unsigned int execute (function *) final override;
753
754 protected:
755 /* ch_base method: */
756 bool process_loop_p (class loop *loop) final override;
757 }; // class pass_ch_vect
758
759 /* For all loops, copy the condition at the end of the loop body in front
760 of the loop. This is beneficial since it increases efficiency of
761 code motion optimizations. It also saves one jump on entry to the loop. */
762
763 unsigned int
764 ch_base::copy_headers (function *fun)
765 {
766 basic_block *bbs, *copied_bbs;
767 unsigned bbs_size;
768 bool changed = false;
769
770 if (number_of_loops (fun) <= 1)
771 return 0;
772
773 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
774 copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
775 bbs_size = n_basic_blocks_for_fn (fun);
776
777 struct candidate
778 {
779 class loop *loop;
780 unsigned int nheaders;
781 hash_set <edge> *invariant_exits, *static_exits;
782 };
783 auto_vec<struct candidate> candidates;
784 auto_vec<std::pair<edge, loop_p> > copied;
785 auto_vec<class loop *> loops_to_unloop;
786 auto_vec<int> loops_to_unloop_nunroll;
787
788 mark_dfs_back_edges ();
789 gimple_ranger *ranger = new gimple_ranger;
790 for (auto loop : loops_list (cfun, 0))
791 {
792 int initial_limit = optimize_loop_for_speed_p (loop)
793 ? param_max_loop_header_insns : 0;
794 int remaining_limit = initial_limit;
795 if (dump_file && (dump_flags & TDF_DETAILS))
796 fprintf (dump_file,
797 "Analyzing loop %i\n", loop->num);
798
799 basic_block header = loop->header;
800 if (!get_max_loop_iterations_int (loop))
801 {
802 if (dump_file && (dump_flags & TDF_DETAILS))
803 fprintf (dump_file, "Loop %d never loops.\n", loop->num);
804 scale_loop_profile (loop, profile_probability::always (), 0);
805 loops_to_unloop.safe_push (loop);
806 loops_to_unloop_nunroll.safe_push (0);
807 continue;
808 }
809
810 /* If the loop is already a do-while style one (either because it was
811 written as such, or because jump threading transformed it into one),
812 we might be in fact peeling the first iteration of the loop. This
813 in general is not a good idea. Also avoid touching infinite loops. */
814 if (!loop_has_exit_edges (loop)
815 || !process_loop_p (loop))
816 continue;
817
818 /* Iterate the header copying up to limit; this takes care of the cases
819 like while (a && b) {...}, where we want to have both of the conditions
820 copied. TODO -- handle while (a || b) - like cases, by not requiring
821 the header to have just a single successor and copying up to
822 postdominator. */
823 int nheaders = 0;
824 int last_win_nheaders = 0;
825 bool last_win_invariant_exit = false;
826 ch_decision ret;
827 hash_set <edge> *invariant_exits = new hash_set <edge>;
828 hash_set <edge> *static_exits = new hash_set <edge>;
829 while ((ret = should_duplicate_loop_header_p (header, loop, ranger,
830 &remaining_limit,
831 invariant_exits,
832 static_exits))
833 != ch_impossible)
834 {
835 nheaders++;
836 if (ret >= ch_win)
837 {
838 last_win_nheaders = nheaders;
839 last_win_invariant_exit = (ret == ch_win_invariant_exit);
840 if (dump_file && (dump_flags & TDF_DETAILS))
841 fprintf (dump_file, " Duplicating bb %i is a win\n",
842 header->index);
843 }
844 /* Duplicate BB if has zero cost but be sure it will not
845 imply duplication of other BBs. */
846 else if (ret == ch_possible_zero_cost
847 && (last_win_nheaders == nheaders - 1
848 || (last_win_nheaders == nheaders - 2
849 && last_win_invariant_exit)))
850 {
851 last_win_nheaders = nheaders;
852 last_win_invariant_exit = false;
853 if (dump_file && (dump_flags & TDF_DETAILS))
854 fprintf (dump_file,
855 " Duplicating bb %i is a win; it has zero cost\n",
856 header->index);
857 }
858 else
859 if (dump_file && (dump_flags & TDF_DETAILS))
860 fprintf (dump_file, " May duplicate bb %i\n", header->index);
861
862 /* Find a successor of header that is inside a loop; i.e. the new
863 header after the condition is copied. */
864 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
865 header = EDGE_SUCC (header, 0)->dest;
866 else
867 header = EDGE_SUCC (header, 1)->dest;
868 }
869
870 /* Try to turn loop into do-while. This means ensuring that
871 last duplicated header will not have loop invariant exit. */
872 if (last_win_nheaders && last_win_invariant_exit
873 && nheaders > last_win_nheaders)
874 {
875 last_win_nheaders++;
876 if (dump_file && (dump_flags & TDF_DETAILS))
877 fprintf (dump_file,
878 " Duplicating additional BB to obtain do-while loop\n");
879 }
880 else if (!last_win_nheaders && nheaders && !do_while_loop_p (loop))
881 {
882 last_win_nheaders = 1;
883 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file,
885 " Duplicating header BB to obtain do-while loop\n");
886 }
887
888 if (last_win_nheaders)
889 candidates.safe_push ({loop, last_win_nheaders,
890 invariant_exits, static_exits});
891 else
892 {
893 delete invariant_exits;
894 delete static_exits;
895 }
896 }
897 /* Do not use ranger after we change the IL and not have updated SSA. */
898 delete ranger;
899
900 for (auto candidate : candidates)
901 {
902 class loop *loop = candidate.loop;
903 if (dump_file && (dump_flags & TDF_DETAILS))
904 fprintf (dump_file,
905 "Copying headers of loop %i\n", loop->num);
906
907 basic_block header = loop->header;
908 edge nonexit = NULL;
909 edge exit = NULL;
910 unsigned int n_bbs = 0;
911 int nexits = 0;
912 profile_count exit_count = profile_count::zero ();
913 profile_count entry_count = profile_count::zero ();
914 edge e;
915 edge_iterator ei;
916
917 FOR_EACH_EDGE (e, ei, loop->header->preds)
918 if (e->src != loop->latch)
919 entry_count += e->count ();
920 while (n_bbs < candidate.nheaders)
921 {
922 if (dump_file && (dump_flags & TDF_DETAILS))
923 fprintf (dump_file, " Will duplicate bb %i\n", header->index);
924 /* Find a successor of header that is inside a loop; i.e. the new
925 header after the condition is copied. */
926 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
927 {
928 nonexit = EDGE_SUCC (header, 0);
929 exit = EDGE_SUCC (header, 1);
930 }
931 else
932 {
933 nonexit = EDGE_SUCC (header, 1);
934 exit = EDGE_SUCC (header, 0);
935 }
936 exit_count += exit->count ();
937 nexits++;
938 bbs[n_bbs++] = header;
939 gcc_assert (bbs_size > n_bbs);
940 header = nonexit->dest;
941 }
942
943 if (dump_file && (dump_flags & TDF_DETAILS))
944 fprintf (dump_file,
945 "Duplicating header of the loop %d up to edge %d->%d\n",
946 loop->num, exit->src->index, exit->dest->index);
947
948 /* Ensure that the header will have just the latch as a predecessor
949 inside the loop. */
950 if (!single_pred_p (nonexit->dest))
951 {
952 header = split_edge (nonexit);
953 exit = single_pred_edge (header);
954 }
955
956 edge entry = loop_preheader_edge (loop);
957
958 propagate_threaded_block_debug_into (exit->dest, entry->dest);
959 if (!gimple_duplicate_seme_region (entry, exit, bbs, n_bbs, copied_bbs,
960 true))
961 {
962 delete candidate.static_exits;
963 delete candidate.invariant_exits;
964 if (dump_file && (dump_flags & TDF_DETAILS))
965 fprintf (dump_file, "Duplication failed.\n");
966 continue;
967 }
968 if (loop->header->count.initialized_p ())
969 update_profile_after_ch (loop, bbs, copied_bbs, n_bbs,
970 candidate.invariant_exits,
971 candidate.static_exits,
972 entry_count);
973 delete candidate.static_exits;
974 delete candidate.invariant_exits;
975 copied.safe_push (std::make_pair (entry, loop));
976
977 /* If the loop has the form "for (i = j; i < j + 10; i++)" then
978 this copying can introduce a case where we rely on undefined
979 signed overflow to eliminate the preheader condition, because
980 we assume that "j < j + 10" is true. We don't want to warn
981 about that case for -Wstrict-overflow, because in general we
982 don't warn about overflow involving loops. Prevent the
983 warning by setting the no_warning flag in the condition. */
984 if (warn_strict_overflow > 0)
985 {
986 unsigned int i;
987
988 for (i = 0; i < n_bbs; ++i)
989 {
990 gimple_stmt_iterator bsi;
991
992 for (bsi = gsi_start_bb (copied_bbs[i]);
993 !gsi_end_p (bsi);
994 gsi_next (&bsi))
995 {
996 gimple *stmt = gsi_stmt (bsi);
997 if (gimple_code (stmt) == GIMPLE_COND)
998 {
999 tree lhs = gimple_cond_lhs (stmt);
1000 if (gimple_cond_code (stmt) != EQ_EXPR
1001 && gimple_cond_code (stmt) != NE_EXPR
1002 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1003 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
1004 suppress_warning (stmt, OPT_Wstrict_overflow_);
1005 }
1006 else if (is_gimple_assign (stmt))
1007 {
1008 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
1009 tree rhs1 = gimple_assign_rhs1 (stmt);
1010 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison
1011 && rhs_code != EQ_EXPR
1012 && rhs_code != NE_EXPR
1013 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
1014 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1)))
1015 suppress_warning (stmt, OPT_Wstrict_overflow_);
1016 }
1017 }
1018 }
1019 }
1020
1021 /* Update header of the loop. */
1022 loop->header = header;
1023 /* Find correct latch. We only duplicate chain of conditionals so
1024 there should be precisely two edges to the new header. One entry
1025 edge and one to latch. */
1026 FOR_EACH_EDGE (e, ei, loop->header->preds)
1027 if (header != e->src)
1028 {
1029 loop->latch = e->src;
1030 break;
1031 }
1032 /* Ensure that the latch is simple. */
1033 if (!single_succ_p (loop_latch_edge (loop)->src))
1034 split_edge (loop_latch_edge (loop));
1035
1036 if (dump_file && (dump_flags & TDF_DETAILS))
1037 {
1038 if (do_while_loop_p (loop))
1039 fprintf (dump_file, "Loop %d is now do-while loop.\n", loop->num);
1040 else
1041 fprintf (dump_file, "Loop %d is still not do-while loop.\n",
1042 loop->num);
1043 fprintf (dump_file, "Exit count: ");
1044 exit_count.dump (dump_file);
1045 fprintf (dump_file, "\nEntry count: ");
1046 entry_count.dump (dump_file);
1047 fprintf (dump_file, "\n");
1048 }
1049
1050 /* We possibly decreased number of itrations by 1. */
1051 auto_vec<edge> exits = get_loop_exit_edges (loop);
1052 bool precise = (nexits == (int) exits.length ());
1053 /* Check that loop may not terminate in other way than via
1054 basic blocks we duplicated. */
1055 if (precise)
1056 {
1057 basic_block *bbs = get_loop_body (loop);
1058 for (unsigned i = 0; i < loop->num_nodes && precise; ++i)
1059 {
1060 basic_block bb = bbs[i];
1061 bool found_exit = false;
1062 FOR_EACH_EDGE (e, ei, bb->succs)
1063 if (!flow_bb_inside_loop_p (loop, e->dest))
1064 {
1065 found_exit = true;
1066 break;
1067 }
1068 /* If BB has exit, it was duplicated. */
1069 if (found_exit)
1070 continue;
1071 /* Give up on irreducible loops. */
1072 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1073 {
1074 precise = false;
1075 break;
1076 }
1077 /* Check that inner loops are finite. */
1078 for (class loop *l = bb->loop_father; l != loop && precise;
1079 l = loop_outer (l))
1080 if (!l->finite_p)
1081 {
1082 precise = false;
1083 break;
1084 }
1085 /* Verify that there is no statement that may be terminate
1086 execution in a way not visible to CFG. */
1087 for (gimple_stmt_iterator bsi = gsi_start_bb (bb);
1088 !gsi_end_p (bsi); gsi_next (&bsi))
1089 if (stmt_can_terminate_bb_p (gsi_stmt (bsi)))
1090 precise = false;
1091 }
1092 free (bbs);
1093 }
1094 if (precise
1095 && get_max_loop_iterations_int (loop) == 1)
1096 {
1097 if (dump_file && (dump_flags & TDF_DETAILS))
1098 fprintf (dump_file, "Loop %d no longer loops.\n", loop->num);
1099 scale_loop_profile (loop, profile_probability::always (), 0);
1100 loops_to_unloop.safe_push (loop);
1101 loops_to_unloop_nunroll.safe_push (0);
1102 }
1103 else if (precise)
1104 {
1105 if (dump_file && (dump_flags & TDF_DETAILS))
1106 fprintf (dump_file,
1107 "Peeled all exits:"
1108 " decreased number of iterations of loop %d by 1.\n",
1109 loop->num);
1110 adjust_loop_info_after_peeling (loop, 1, true);
1111 }
1112 else if (exit_count >= entry_count.apply_scale (9, 10))
1113 {
1114 if (dump_file && (dump_flags & TDF_DETAILS))
1115 fprintf (dump_file,
1116 "Peeled likely exits: likely decreased number "
1117 "of iterations of loop %d by 1.\n", loop->num);
1118 adjust_loop_info_after_peeling (loop, 1, false);
1119 }
1120 else if (dump_file && (dump_flags & TDF_DETAILS))
1121 fprintf (dump_file,
1122 "Not decreased number"
1123 " of iterations of loop %d; likely exits remains.\n",
1124 loop->num);
1125
1126 changed = true;
1127 }
1128
1129 if (changed)
1130 {
1131 update_ssa (TODO_update_ssa);
1132 /* After updating SSA form perform CSE on the loop header
1133 copies. This is esp. required for the pass before
1134 vectorization since nothing cleans up copied exit tests
1135 that can now be simplified. CSE from the entry of the
1136 region we copied till all loop exit blocks but not
1137 entering the loop itself. */
1138 for (unsigned i = 0; i < copied.length (); ++i)
1139 {
1140 edge entry = copied[i].first;
1141 loop_p loop = copied[i].second;
1142 auto_vec<edge> exit_edges = get_loop_exit_edges (loop);
1143 bitmap exit_bbs = BITMAP_ALLOC (NULL);
1144 for (unsigned j = 0; j < exit_edges.length (); ++j)
1145 bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index);
1146 bitmap_set_bit (exit_bbs, loop->header->index);
1147 do_rpo_vn (cfun, entry, exit_bbs);
1148 BITMAP_FREE (exit_bbs);
1149 }
1150 }
1151 if (!loops_to_unloop.is_empty ())
1152 {
1153 bool irred_invalidated;
1154 unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, NULL, &irred_invalidated);
1155 changed = true;
1156 }
1157 free (bbs);
1158 free (copied_bbs);
1159
1160 return changed ? TODO_cleanup_cfg : 0;
1161 }
1162
1163 /* Initialize the loop structures we need, and finalize after. */
1164
1165 unsigned int
1166 pass_ch::execute (function *fun)
1167 {
1168 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1169 | LOOPS_HAVE_SIMPLE_LATCHES
1170 | LOOPS_HAVE_RECORDED_EXITS);
1171
1172 unsigned int res = copy_headers (fun);
1173
1174 loop_optimizer_finalize ();
1175 return res;
1176 }
1177
1178 /* Assume an earlier phase has already initialized all the loop structures that
1179 we need here (and perhaps others too), and that these will be finalized by
1180 a later phase. */
1181
1182 unsigned int
1183 pass_ch_vect::execute (function *fun)
1184 {
1185 return copy_headers (fun);
1186 }
1187
1188 /* Apply header copying according to a very simple test of do-while shape. */
1189
1190 bool
1191 pass_ch::process_loop_p (class loop *)
1192 {
1193 return true;
1194 }
1195
1196 /* Apply header-copying to loops where we might enable vectorization. */
1197
1198 bool
1199 pass_ch_vect::process_loop_p (class loop *loop)
1200 {
1201 if (!flag_tree_loop_vectorize && !loop->force_vectorize)
1202 return false;
1203
1204 if (loop->dont_vectorize)
1205 return false;
1206
1207 /* The vectorizer won't handle anything with multiple exits, so skip. */
1208 edge exit = single_exit (loop);
1209 if (!exit)
1210 return false;
1211
1212 if (!do_while_loop_p (loop))
1213 return true;
1214
1215 return false;
1216 }
1217
1218 } // anon namespace
1219
1220 gimple_opt_pass *
1221 make_pass_ch_vect (gcc::context *ctxt)
1222 {
1223 return new pass_ch_vect (ctxt);
1224 }
1225
1226 gimple_opt_pass *
1227 make_pass_ch (gcc::context *ctxt)
1228 {
1229 return new pass_ch (ctxt);
1230 }