]>
Commit | Line | Data |
---|---|---|
28df8730 | 1 | /* Loop splitting. |
aeee4812 | 2 | Copyright (C) 2015-2023 Free Software Foundation, Inc. |
28df8730 MM |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by the | |
8 | Free Software Foundation; either version 3, or (at your option) any | |
9 | later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "backend.h" | |
24 | #include "tree.h" | |
25 | #include "gimple.h" | |
26 | #include "tree-pass.h" | |
27 | #include "ssa.h" | |
28 | #include "fold-const.h" | |
29 | #include "tree-cfg.h" | |
30 | #include "tree-ssa.h" | |
31 | #include "tree-ssa-loop-niter.h" | |
32 | #include "tree-ssa-loop.h" | |
33 | #include "tree-ssa-loop-manip.h" | |
34 | #include "tree-into-ssa.h" | |
095f78c6 FX |
35 | #include "tree-inline.h" |
36 | #include "tree-cfgcleanup.h" | |
28df8730 MM |
37 | #include "cfgloop.h" |
38 | #include "tree-scalar-evolution.h" | |
39 | #include "gimple-iterator.h" | |
40 | #include "gimple-pretty-print.h" | |
41 | #include "cfghooks.h" | |
42 | #include "gimple-fold.h" | |
43 | #include "gimplify-me.h" | |
b24acae8 | 44 | #include "print-tree.h" |
f5fb9ff2 | 45 | #include "value-query.h" |
28df8730 | 46 | |
095f78c6 FX |
47 | /* This file implements two kinds of loop splitting. |
48 | ||
49 | One transformation of loops like: | |
28df8730 MM |
50 | |
51 | for (i = 0; i < 100; i++) | |
52 | { | |
53 | if (i < 50) | |
54 | A; | |
55 | else | |
56 | B; | |
57 | } | |
58 | ||
59 | into: | |
60 | ||
61 | for (i = 0; i < 50; i++) | |
62 | { | |
63 | A; | |
64 | } | |
65 | for (; i < 100; i++) | |
66 | { | |
67 | B; | |
68 | } | |
69 | ||
70 | */ | |
71 | ||
72 | /* Return true when BB inside LOOP is a potential iteration space | |
73 | split point, i.e. ends with a condition like "IV < comp", which | |
74 | is true on one side of the iteration space and false on the other, | |
75 | and the split point can be computed. If so, also return the border | |
76 | point in *BORDER and the comparison induction variable in IV. */ | |
77 | ||
78 | static tree | |
f5fb9ff2 JH |
79 | split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv, |
80 | enum tree_code *guard_code) | |
28df8730 | 81 | { |
28df8730 MM |
82 | gcond *stmt; |
83 | affine_iv iv2; | |
84 | ||
85 | /* BB must end in a simple conditional jump. */ | |
64780df2 RB |
86 | stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb)); |
87 | if (!stmt) | |
28df8730 | 88 | return NULL_TREE; |
28df8730 MM |
89 | |
90 | enum tree_code code = gimple_cond_code (stmt); | |
91 | ||
28df8730 MM |
92 | if (loop_exits_from_bb_p (loop, bb)) |
93 | return NULL_TREE; | |
94 | ||
95 | tree op0 = gimple_cond_lhs (stmt); | |
96 | tree op1 = gimple_cond_rhs (stmt); | |
99b1c316 | 97 | class loop *useloop = loop_containing_stmt (stmt); |
28df8730 | 98 | |
9042295c | 99 | if (!simple_iv (loop, useloop, op0, iv, false)) |
28df8730 | 100 | return NULL_TREE; |
9042295c | 101 | if (!simple_iv (loop, useloop, op1, &iv2, false)) |
28df8730 MM |
102 | return NULL_TREE; |
103 | ||
104 | /* Make it so that the first argument of the condition is | |
105 | the looping one. */ | |
106 | if (!integer_zerop (iv2.step)) | |
107 | { | |
108 | std::swap (op0, op1); | |
109 | std::swap (*iv, iv2); | |
110 | code = swap_tree_comparison (code); | |
111 | gimple_cond_set_condition (stmt, code, op0, op1); | |
112 | update_stmt (stmt); | |
113 | } | |
114 | else if (integer_zerop (iv->step)) | |
115 | return NULL_TREE; | |
116 | if (!integer_zerop (iv2.step)) | |
117 | return NULL_TREE; | |
9042295c MM |
118 | if (!iv->no_overflow) |
119 | return NULL_TREE; | |
28df8730 | 120 | |
f5fb9ff2 JH |
121 | /* Only handle relational comparisons, for equality and non-equality |
122 | we'd have to split the loop into two loops and a middle statement. */ | |
123 | switch (code) | |
124 | { | |
125 | case LT_EXPR: | |
126 | case LE_EXPR: | |
127 | case GT_EXPR: | |
128 | case GE_EXPR: | |
129 | break; | |
130 | case NE_EXPR: | |
131 | case EQ_EXPR: | |
132 | /* If the test check for first iteration, we can handle NE/EQ | |
133 | with only one split loop. */ | |
134 | if (operand_equal_p (iv->base, iv2.base, 0)) | |
135 | { | |
136 | if (code == EQ_EXPR) | |
137 | code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR; | |
138 | else | |
139 | code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR; | |
140 | break; | |
141 | } | |
142 | /* Similarly when the test checks for minimal or maximal | |
143 | value range. */ | |
144 | else | |
145 | { | |
146 | int_range<2> r; | |
147 | get_global_range_query ()->range_of_expr (r, op0, stmt); | |
148 | if (!r.varying_p () && !r.undefined_p () | |
149 | && TREE_CODE (op1) == INTEGER_CST) | |
150 | { | |
151 | wide_int val = wi::to_wide (op1); | |
152 | if (known_eq (val, r.lower_bound ())) | |
153 | { | |
154 | code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR; | |
155 | break; | |
156 | } | |
157 | else if (known_eq (val, r.upper_bound ())) | |
158 | { | |
159 | code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR; | |
160 | break; | |
161 | } | |
162 | } | |
163 | } | |
164 | /* TODO: We can compare with exit condition; it seems that testing for | |
165 | last iteration is common case. */ | |
166 | return NULL_TREE; | |
167 | default: | |
168 | return NULL_TREE; | |
169 | } | |
170 | ||
28df8730 MM |
171 | if (dump_file && (dump_flags & TDF_DETAILS)) |
172 | { | |
173 | fprintf (dump_file, "Found potential split point: "); | |
174 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); | |
175 | fprintf (dump_file, " { "); | |
176 | print_generic_expr (dump_file, iv->base, TDF_SLIM); | |
177 | fprintf (dump_file, " + I*"); | |
178 | print_generic_expr (dump_file, iv->step, TDF_SLIM); | |
179 | fprintf (dump_file, " } %s ", get_tree_code_name (code)); | |
180 | print_generic_expr (dump_file, iv2.base, TDF_SLIM); | |
181 | fprintf (dump_file, "\n"); | |
182 | } | |
183 | ||
184 | *border = iv2.base; | |
f5fb9ff2 | 185 | *guard_code = code; |
28df8730 MM |
186 | return op0; |
187 | } | |
188 | ||
189 | /* Given a GUARD conditional stmt inside LOOP, which we want to make always | |
190 | true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL | |
191 | (a post-increment IV) and NEWBOUND (the comparator) adjust the loop | |
192 | exit test statement to loop back only if the GUARD statement will | |
193 | also be true/false in the next iteration. */ | |
194 | ||
195 | static void | |
99b1c316 | 196 | patch_loop_exit (class loop *loop, gcond *guard, tree nextval, tree newbound, |
28df8730 MM |
197 | bool initial_true) |
198 | { | |
199 | edge exit = single_exit (loop); | |
64780df2 | 200 | gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src)); |
28df8730 MM |
201 | gimple_cond_set_condition (stmt, gimple_cond_code (guard), |
202 | nextval, newbound); | |
203 | update_stmt (stmt); | |
204 | ||
d886761f | 205 | edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit); |
28df8730 MM |
206 | |
207 | exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); | |
208 | stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); | |
209 | ||
210 | if (initial_true) | |
211 | { | |
212 | exit->flags |= EDGE_FALSE_VALUE; | |
213 | stay->flags |= EDGE_TRUE_VALUE; | |
214 | } | |
215 | else | |
216 | { | |
217 | exit->flags |= EDGE_TRUE_VALUE; | |
218 | stay->flags |= EDGE_FALSE_VALUE; | |
219 | } | |
220 | } | |
221 | ||
222 | /* Give an induction variable GUARD_IV, and its affine descriptor IV, | |
223 | find the loop phi node in LOOP defining it directly, or create | |
224 | such phi node. Return that phi node. */ | |
225 | ||
226 | static gphi * | |
99b1c316 | 227 | find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/) |
28df8730 MM |
228 | { |
229 | gimple *def = SSA_NAME_DEF_STMT (guard_iv); | |
230 | gphi *phi; | |
231 | if ((phi = dyn_cast <gphi *> (def)) | |
232 | && gimple_bb (phi) == loop->header) | |
233 | return phi; | |
234 | ||
235 | /* XXX Create the PHI instead. */ | |
236 | return NULL; | |
237 | } | |
238 | ||
09844a5f MM |
239 | /* Returns true if the exit values of all loop phi nodes can be |
240 | determined easily (i.e. that connect_loop_phis can determine them). */ | |
241 | ||
242 | static bool | |
99b1c316 | 243 | easy_exit_values (class loop *loop) |
09844a5f MM |
244 | { |
245 | edge exit = single_exit (loop); | |
246 | edge latch = loop_latch_edge (loop); | |
247 | gphi_iterator psi; | |
248 | ||
249 | /* Currently we regard the exit values as easy if they are the same | |
250 | as the value over the backedge. Which is the case if the definition | |
251 | of the backedge value dominates the exit edge. */ | |
252 | for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) | |
253 | { | |
254 | gphi *phi = psi.phi (); | |
255 | tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch); | |
256 | basic_block bb; | |
257 | if (TREE_CODE (next) == SSA_NAME | |
258 | && (bb = gimple_bb (SSA_NAME_DEF_STMT (next))) | |
259 | && !dominated_by_p (CDI_DOMINATORS, exit->src, bb)) | |
260 | return false; | |
261 | } | |
262 | ||
263 | return true; | |
264 | } | |
265 | ||
28df8730 MM |
266 | /* This function updates the SSA form after connect_loops made a new |
267 | edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate | |
268 | conditional). I.e. the second loop can now be entered either | |
269 | via the original entry or via NEW_E, so the entry values of LOOP2 | |
270 | phi nodes are either the original ones or those at the exit | |
271 | of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting | |
09844a5f | 272 | this. The loops need to fulfill easy_exit_values(). */ |
28df8730 MM |
273 | |
274 | static void | |
99b1c316 | 275 | connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) |
28df8730 MM |
276 | { |
277 | basic_block rest = loop_preheader_edge (loop2)->src; | |
278 | gcc_assert (new_e->dest == rest); | |
279 | edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e); | |
280 | ||
281 | edge firste = loop_preheader_edge (loop1); | |
282 | edge seconde = loop_preheader_edge (loop2); | |
283 | edge firstn = loop_latch_edge (loop1); | |
284 | gphi_iterator psi_first, psi_second; | |
285 | for (psi_first = gsi_start_phis (loop1->header), | |
286 | psi_second = gsi_start_phis (loop2->header); | |
287 | !gsi_end_p (psi_first); | |
288 | gsi_next (&psi_first), gsi_next (&psi_second)) | |
289 | { | |
290 | tree init, next, new_init; | |
291 | use_operand_p op; | |
292 | gphi *phi_first = psi_first.phi (); | |
293 | gphi *phi_second = psi_second.phi (); | |
294 | ||
295 | init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste); | |
296 | next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn); | |
297 | op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde); | |
298 | gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op))); | |
299 | ||
300 | /* Prefer using original variable as a base for the new ssa name. | |
301 | This is necessary for virtual ops, and useful in order to avoid | |
302 | losing debug info for real ops. */ | |
303 | if (TREE_CODE (next) == SSA_NAME | |
304 | && useless_type_conversion_p (TREE_TYPE (next), | |
305 | TREE_TYPE (init))) | |
306 | new_init = copy_ssa_name (next); | |
307 | else if (TREE_CODE (init) == SSA_NAME | |
308 | && useless_type_conversion_p (TREE_TYPE (init), | |
309 | TREE_TYPE (next))) | |
310 | new_init = copy_ssa_name (init); | |
311 | else if (useless_type_conversion_p (TREE_TYPE (next), | |
312 | TREE_TYPE (init))) | |
313 | new_init = make_temp_ssa_name (TREE_TYPE (next), NULL, | |
314 | "unrinittmp"); | |
315 | else | |
316 | new_init = make_temp_ssa_name (TREE_TYPE (init), NULL, | |
317 | "unrinittmp"); | |
318 | ||
319 | gphi * newphi = create_phi_node (new_init, rest); | |
320 | add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION); | |
321 | add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION); | |
322 | SET_USE (op, new_init); | |
323 | } | |
324 | } | |
325 | ||
326 | /* The two loops LOOP1 and LOOP2 were just created by loop versioning, | |
327 | they are still equivalent and placed in two arms of a diamond, like so: | |
328 | ||
329 | .------if (cond)------. | |
330 | v v | |
331 | pre1 pre2 | |
332 | | | | |
333 | .--->h1 h2<----. | |
334 | | | | | | |
335 | | ex1---. .---ex2 | | |
336 | | / | | \ | | |
337 | '---l1 X | l2---' | |
338 | | | | |
339 | | | | |
340 | '--->join<---' | |
341 | ||
342 | This function transforms the program such that LOOP1 is conditionally | |
343 | falling through to LOOP2, or skipping it. This is done by splitting | |
344 | the ex1->join edge at X in the diagram above, and inserting a condition | |
345 | whose one arm goes to pre2, resulting in this situation: | |
1c0a8806 | 346 | |
28df8730 MM |
347 | .------if (cond)------. |
348 | v v | |
349 | pre1 .---------->pre2 | |
350 | | | | | |
351 | .--->h1 | h2<----. | |
352 | | | | | | | |
353 | | ex1---. | .---ex2 | | |
354 | | / v | | \ | | |
355 | '---l1 skip---' | l2---' | |
356 | | | | |
357 | | | | |
358 | '--->join<---' | |
359 | ||
1c0a8806 | 360 | |
28df8730 MM |
361 | The condition used is the exit condition of LOOP1, which effectively means |
362 | that when the first loop exits (for whatever reason) but the real original | |
363 | exit expression is still false the second loop will be entered. | |
364 | The function returns the new edge cond->pre2. | |
1c0a8806 | 365 | |
28df8730 MM |
366 | This doesn't update the SSA form, see connect_loop_phis for that. */ |
367 | ||
368 | static edge | |
99b1c316 | 369 | connect_loops (class loop *loop1, class loop *loop2) |
28df8730 MM |
370 | { |
371 | edge exit = single_exit (loop1); | |
372 | basic_block skip_bb = split_edge (exit); | |
373 | gcond *skip_stmt; | |
374 | gimple_stmt_iterator gsi; | |
375 | edge new_e, skip_e; | |
376 | ||
64780df2 | 377 | gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src)); |
28df8730 MM |
378 | skip_stmt = gimple_build_cond (gimple_cond_code (stmt), |
379 | gimple_cond_lhs (stmt), | |
380 | gimple_cond_rhs (stmt), | |
381 | NULL_TREE, NULL_TREE); | |
382 | gsi = gsi_last_bb (skip_bb); | |
383 | gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT); | |
384 | ||
385 | skip_e = EDGE_SUCC (skip_bb, 0); | |
386 | skip_e->flags &= ~EDGE_FALLTHRU; | |
387 | new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0); | |
388 | if (exit->flags & EDGE_TRUE_VALUE) | |
389 | { | |
390 | skip_e->flags |= EDGE_TRUE_VALUE; | |
391 | new_e->flags |= EDGE_FALSE_VALUE; | |
392 | } | |
393 | else | |
394 | { | |
395 | skip_e->flags |= EDGE_FALSE_VALUE; | |
396 | new_e->flags |= EDGE_TRUE_VALUE; | |
397 | } | |
398 | ||
b24acae8 | 399 | new_e->probability = profile_probability::very_likely (); |
ef30ab83 | 400 | skip_e->probability = new_e->probability.invert (); |
28df8730 MM |
401 | |
402 | return new_e; | |
403 | } | |
404 | ||
405 | /* This returns the new bound for iterations given the original iteration | |
406 | space in NITER, an arbitrary new bound BORDER, assumed to be some | |
407 | comparison value with a different IV, the initial value GUARD_INIT of | |
408 | that other IV, and the comparison code GUARD_CODE that compares | |
409 | that other IV with BORDER. We return an SSA name, and place any | |
410 | necessary statements for that computation into *STMTS. | |
411 | ||
412 | For example for such a loop: | |
413 | ||
414 | for (i = beg, j = guard_init; i < end; i++, j++) | |
415 | if (j < border) // this is supposed to be true/false | |
416 | ... | |
417 | ||
418 | we want to return a new bound (on j) that makes the loop iterate | |
419 | as long as the condition j < border stays true. We also don't want | |
420 | to iterate more often than the original loop, so we have to introduce | |
421 | some cut-off as well (via min/max), effectively resulting in: | |
422 | ||
423 | newend = min (end+guard_init-beg, border) | |
424 | for (i = beg; j = guard_init; j < newend; i++, j++) | |
425 | if (j < c) | |
426 | ... | |
427 | ||
428 | Depending on the direction of the IVs and if the exit tests | |
429 | are strict or non-strict we need to use MIN or MAX, | |
430 | and add or subtract 1. This routine computes newend above. */ | |
431 | ||
432 | static tree | |
99b1c316 | 433 | compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter, |
28df8730 MM |
434 | tree border, |
435 | enum tree_code guard_code, tree guard_init) | |
436 | { | |
437 | /* The niter structure contains the after-increment IV, we need | |
438 | the loop-enter base, so subtract STEP once. */ | |
439 | tree controlbase = force_gimple_operand (niter->control.base, | |
440 | stmts, true, NULL_TREE); | |
441 | tree controlstep = niter->control.step; | |
442 | tree enddiff; | |
443 | if (POINTER_TYPE_P (TREE_TYPE (controlbase))) | |
444 | { | |
445 | controlstep = gimple_build (stmts, NEGATE_EXPR, | |
446 | TREE_TYPE (controlstep), controlstep); | |
447 | enddiff = gimple_build (stmts, POINTER_PLUS_EXPR, | |
448 | TREE_TYPE (controlbase), | |
449 | controlbase, controlstep); | |
450 | } | |
451 | else | |
452 | enddiff = gimple_build (stmts, MINUS_EXPR, | |
453 | TREE_TYPE (controlbase), | |
454 | controlbase, controlstep); | |
455 | ||
09844a5f MM |
456 | /* Compute end-beg. */ |
457 | gimple_seq stmts2; | |
458 | tree end = force_gimple_operand (niter->bound, &stmts2, | |
459 | true, NULL_TREE); | |
460 | gimple_seq_add_seq_without_update (stmts, stmts2); | |
28df8730 MM |
461 | if (POINTER_TYPE_P (TREE_TYPE (enddiff))) |
462 | { | |
09844a5f | 463 | tree tem = gimple_convert (stmts, sizetype, enddiff); |
28df8730 MM |
464 | tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem); |
465 | enddiff = gimple_build (stmts, POINTER_PLUS_EXPR, | |
466 | TREE_TYPE (enddiff), | |
09844a5f | 467 | end, tem); |
28df8730 MM |
468 | } |
469 | else | |
470 | enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff), | |
09844a5f | 471 | end, enddiff); |
28df8730 | 472 | |
09844a5f MM |
473 | /* Compute guard_init + (end-beg). */ |
474 | tree newbound; | |
475 | enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff); | |
476 | if (POINTER_TYPE_P (TREE_TYPE (guard_init))) | |
28df8730 MM |
477 | { |
478 | enddiff = gimple_convert (stmts, sizetype, enddiff); | |
28df8730 | 479 | newbound = gimple_build (stmts, POINTER_PLUS_EXPR, |
09844a5f MM |
480 | TREE_TYPE (guard_init), |
481 | guard_init, enddiff); | |
28df8730 MM |
482 | } |
483 | else | |
09844a5f MM |
484 | newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init), |
485 | guard_init, enddiff); | |
28df8730 MM |
486 | |
487 | /* Depending on the direction of the IVs the new bound for the first | |
488 | loop is the minimum or maximum of old bound and border. | |
489 | Also, if the guard condition isn't strictly less or greater, | |
490 | we need to adjust the bound. */ | |
491 | int addbound = 0; | |
492 | enum tree_code minmax; | |
493 | if (niter->cmp == LT_EXPR) | |
494 | { | |
495 | /* GT and LE are the same, inverted. */ | |
496 | if (guard_code == GT_EXPR || guard_code == LE_EXPR) | |
497 | addbound = -1; | |
498 | minmax = MIN_EXPR; | |
499 | } | |
500 | else | |
501 | { | |
502 | gcc_assert (niter->cmp == GT_EXPR); | |
503 | if (guard_code == GE_EXPR || guard_code == LT_EXPR) | |
504 | addbound = 1; | |
505 | minmax = MAX_EXPR; | |
506 | } | |
507 | ||
508 | if (addbound) | |
509 | { | |
510 | tree type2 = TREE_TYPE (newbound); | |
511 | if (POINTER_TYPE_P (type2)) | |
512 | type2 = sizetype; | |
513 | newbound = gimple_build (stmts, | |
514 | POINTER_TYPE_P (TREE_TYPE (newbound)) | |
515 | ? POINTER_PLUS_EXPR : PLUS_EXPR, | |
516 | TREE_TYPE (newbound), | |
517 | newbound, | |
518 | build_int_cst (type2, addbound)); | |
519 | } | |
520 | ||
28df8730 MM |
521 | tree newend = gimple_build (stmts, minmax, TREE_TYPE (border), |
522 | border, newbound); | |
523 | return newend; | |
524 | } | |
525 | ||
44372676 XL |
526 | /* Fix the two loop's bb count after split based on the split edge probability, |
527 | don't adjust the bbs dominated by true branches of that loop to avoid | |
528 | dropping 1s down. */ | |
529 | static void | |
530 | fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge, | |
531 | edge false_edge) | |
532 | { | |
44372676 XL |
533 | /* Proportion first loop's bb counts except those dominated by true |
534 | branch to avoid drop 1s down. */ | |
535 | basic_block *bbs1, *bbs2; | |
536 | bbs1 = get_loop_body (loop1); | |
537 | unsigned j; | |
538 | for (j = 0; j < loop1->num_nodes; j++) | |
539 | if (bbs1[j] == loop1->latch | |
b24acae8 JH |
540 | /* Watch for case where the true conditional is empty. */ |
541 | || !single_pred_p (true_edge->dest) | |
44372676 XL |
542 | || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest)) |
543 | bbs1[j]->count | |
544 | = bbs1[j]->count.apply_probability (true_edge->probability); | |
545 | free (bbs1); | |
546 | ||
547 | /* Proportion second loop's bb counts except those dominated by false | |
548 | branch to avoid drop 1s down. */ | |
549 | basic_block bbi_copy = get_bb_copy (false_edge->dest); | |
550 | bbs2 = get_loop_body (loop2); | |
551 | for (j = 0; j < loop2->num_nodes; j++) | |
552 | if (bbs2[j] == loop2->latch | |
b24acae8 JH |
553 | /* Watch for case where the flase conditional is empty. */ |
554 | || !single_pred_p (bbi_copy) | |
44372676 XL |
555 | || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy)) |
556 | bbs2[j]->count | |
557 | = bbs2[j]->count.apply_probability (true_edge->probability.invert ()); | |
558 | free (bbs2); | |
559 | } | |
560 | ||
28df8730 MM |
561 | /* Checks if LOOP contains an conditional block whose condition |
562 | depends on which side in the iteration space it is, and if so | |
563 | splits the iteration space into two loops. Returns true if the | |
564 | loop was split. NITER must contain the iteration descriptor for the | |
565 | single exit of LOOP. */ | |
566 | ||
567 | static bool | |
095f78c6 | 568 | split_loop (class loop *loop1) |
28df8730 | 569 | { |
095f78c6 | 570 | class tree_niter_desc niter; |
28df8730 MM |
571 | basic_block *bbs; |
572 | unsigned i; | |
573 | bool changed = false; | |
574 | tree guard_iv; | |
d61d5fcd | 575 | tree border = NULL_TREE; |
28df8730 | 576 | affine_iv iv; |
03866099 | 577 | edge exit1; |
28df8730 | 578 | |
03866099 RB |
579 | if (!(exit1 = single_exit (loop1)) |
580 | || EDGE_COUNT (exit1->src->succs) != 2 | |
095f78c6 FX |
581 | /* ??? We could handle non-empty latches when we split the latch edge |
582 | (not the exit edge), and put the new exit condition in the new block. | |
583 | OTOH this executes some code unconditionally that might have been | |
584 | skipped by the original exit before. */ | |
585 | || !empty_block_p (loop1->latch) | |
586 | || !easy_exit_values (loop1) | |
03866099 | 587 | || !number_of_iterations_exit (loop1, exit1, &niter, false, true) |
b9d7140c | 588 | || niter.cmp == ERROR_MARK) |
095f78c6 | 589 | return false; |
b9d7140c JH |
590 | if (niter.cmp == NE_EXPR) |
591 | { | |
592 | if (!niter.control.no_overflow) | |
593 | return false; | |
f5fb9ff2 | 594 | if (tree_int_cst_sign_bit (niter.control.step)) |
b9d7140c JH |
595 | niter.cmp = GT_EXPR; |
596 | else | |
597 | niter.cmp = LT_EXPR; | |
598 | } | |
095f78c6 | 599 | |
28df8730 MM |
600 | bbs = get_loop_body (loop1); |
601 | ||
095f78c6 FX |
602 | if (!can_copy_bbs_p (bbs, loop1->num_nodes)) |
603 | { | |
604 | free (bbs); | |
605 | return false; | |
606 | } | |
607 | ||
28df8730 | 608 | /* Find a splitting opportunity. */ |
f5fb9ff2 | 609 | enum tree_code guard_code; |
28df8730 | 610 | for (i = 0; i < loop1->num_nodes; i++) |
f5fb9ff2 | 611 | if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code))) |
28df8730 | 612 | { |
b24acae8 | 613 | profile_count entry_count = loop_preheader_edge (loop1)->count (); |
28df8730 MM |
614 | /* Handling opposite steps is not implemented yet. Neither |
615 | is handling different step sizes. */ | |
616 | if ((tree_int_cst_sign_bit (iv.step) | |
095f78c6 FX |
617 | != tree_int_cst_sign_bit (niter.control.step)) |
618 | || !tree_int_cst_equal (iv.step, niter.control.step)) | |
28df8730 MM |
619 | continue; |
620 | ||
621 | /* Find a loop PHI node that defines guard_iv directly, | |
622 | or create one doing that. */ | |
623 | gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv); | |
624 | if (!phi) | |
625 | continue; | |
64780df2 | 626 | gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i])); |
28df8730 MM |
627 | tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi, |
628 | loop_preheader_edge (loop1)); | |
28df8730 MM |
629 | |
630 | /* Loop splitting is implemented by versioning the loop, placing | |
631 | the new loop after the old loop, make the first loop iterate | |
632 | as long as the conditional stays true (or false) and let the | |
633 | second (new) loop handle the rest of the iterations. | |
634 | ||
635 | First we need to determine if the condition will start being true | |
636 | or false in the first loop. */ | |
637 | bool initial_true; | |
638 | switch (guard_code) | |
639 | { | |
640 | case LT_EXPR: | |
641 | case LE_EXPR: | |
642 | initial_true = !tree_int_cst_sign_bit (iv.step); | |
643 | break; | |
644 | case GT_EXPR: | |
645 | case GE_EXPR: | |
646 | initial_true = tree_int_cst_sign_bit (iv.step); | |
647 | break; | |
648 | default: | |
649 | gcc_unreachable (); | |
650 | } | |
651 | ||
652 | /* Build a condition that will skip the first loop when the | |
653 | guard condition won't ever be true (or false). */ | |
654 | gimple_seq stmts2; | |
655 | border = force_gimple_operand (border, &stmts2, true, NULL_TREE); | |
656 | if (stmts2) | |
657 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1), | |
658 | stmts2); | |
b24acae8 JH |
659 | tree cond = fold_build2 (guard_code, boolean_type_node, |
660 | guard_init, border); | |
28df8730 | 661 | if (!initial_true) |
cd5ae148 XL |
662 | cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); |
663 | ||
664 | edge true_edge, false_edge; | |
665 | extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge); | |
28df8730 MM |
666 | |
667 | /* Now version the loop, placing loop2 after loop1 connecting | |
668 | them, and fix up SSA form for that. */ | |
669 | initialize_original_copy_tables (); | |
670 | basic_block cond_bb; | |
357067f2 | 671 | |
b24acae8 JH |
672 | profile_probability loop1_prob |
673 | = integer_onep (cond) ? profile_probability::always () | |
674 | : true_edge->probability; | |
675 | /* TODO: It is commonly a case that we know that both loops will be | |
676 | entered. very_likely below is the probability that second loop will | |
677 | be entered given by connect_loops. We should work out the common | |
678 | case it is always true. */ | |
99b1c316 | 679 | class loop *loop2 = loop_version (loop1, cond, &cond_bb, |
b24acae8 JH |
680 | loop1_prob, |
681 | /* Pass always as we will later | |
682 | redirect first loop to second | |
683 | loop. */ | |
cd5ae148 XL |
684 | profile_probability::always (), |
685 | profile_probability::always (), | |
b24acae8 | 686 | profile_probability::very_likely (), |
cd5ae148 | 687 | true); |
28df8730 | 688 | gcc_assert (loop2); |
b24acae8 JH |
689 | /* Correct probability of edge cond_bb->preheader_of_loop2. */ |
690 | single_pred_edge | |
691 | (loop_preheader_edge (loop2)->src)->probability | |
692 | = loop1_prob.invert (); | |
693 | ||
694 | fix_loop_bb_probability (loop1, loop2, true_edge, false_edge); | |
695 | ||
696 | /* Fix loop's exit probability after scaling. */ | |
697 | ||
698 | if (entry_count.initialized_p () && entry_count.nonzero_p ()) | |
699 | { | |
700 | edge exit_to_latch1; | |
701 | if (EDGE_SUCC (exit1->src, 0) == exit1) | |
702 | exit_to_latch1 = EDGE_SUCC (exit1->src, 1); | |
703 | else | |
704 | exit_to_latch1 = EDGE_SUCC (exit1->src, 0); | |
705 | if (exit1->src->count.nonzero_p ()) | |
706 | { | |
707 | /* First loop is entered loop1_prob * entry_count times | |
708 | and it needs to exit the same number of times. */ | |
709 | exit1->probability | |
710 | = entry_count.apply_probability | |
711 | (loop1_prob).probability_in (exit1->src->count); | |
712 | exit_to_latch1->probability = exit1->probability.invert (); | |
713 | scale_dominated_blocks_in_loop (loop1, exit1->src, | |
714 | exit_to_latch1->count (), | |
715 | exit_to_latch1->dest->count); | |
716 | } | |
717 | ||
718 | edge exit_to_latch2, exit2 = single_exit (loop2); | |
719 | if (EDGE_SUCC (exit2->src, 0) == exit2) | |
720 | exit_to_latch2 = EDGE_SUCC (exit2->src, 1); | |
721 | else | |
722 | exit_to_latch2 = EDGE_SUCC (exit2->src, 0); | |
723 | if (exit2->src->count.nonzero_p ()) | |
724 | { | |
725 | /* Second loop is entered very_likely * entry_count times | |
726 | and it needs to exit the same number of times. */ | |
727 | exit2->probability | |
728 | = entry_count.apply_probability | |
729 | (profile_probability::very_likely ()) | |
730 | .probability_in (exit2->src->count); | |
731 | exit_to_latch2->probability = exit2->probability.invert (); | |
732 | scale_dominated_blocks_in_loop (loop2, exit2->src, | |
733 | exit_to_latch2->count (), | |
734 | exit_to_latch2->dest->count); | |
735 | } | |
736 | } | |
28df8730 MM |
737 | |
738 | edge new_e = connect_loops (loop1, loop2); | |
739 | connect_loop_phis (loop1, loop2, new_e); | |
740 | ||
741 | /* The iterations of the second loop is now already | |
742 | exactly those that the first loop didn't do, but the | |
743 | iteration space of the first loop is still the original one. | |
744 | Compute the new bound for the guarding IV and patch the | |
745 | loop exit to use it instead of original IV and bound. */ | |
746 | gimple_seq stmts = NULL; | |
095f78c6 | 747 | tree newend = compute_new_first_bound (&stmts, &niter, border, |
28df8730 MM |
748 | guard_code, guard_init); |
749 | if (stmts) | |
750 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1), | |
751 | stmts); | |
752 | tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1)); | |
753 | patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true); | |
754 | ||
b24acae8 | 755 | /* TODO: Update any_esitmate and upper bounds. */ |
cd5ae148 | 756 | |
28df8730 MM |
757 | /* Finally patch out the two copies of the condition to be always |
758 | true/false (or opposite). */ | |
64780df2 RB |
759 | gcond *force_true = as_a<gcond *> (*gsi_last_bb (bbs[i])); |
760 | gcond *force_false = as_a<gcond *> (*gsi_last_bb (get_bb_copy (bbs[i]))); | |
28df8730 MM |
761 | if (!initial_true) |
762 | std::swap (force_true, force_false); | |
763 | gimple_cond_make_true (force_true); | |
764 | gimple_cond_make_false (force_false); | |
765 | update_stmt (force_true); | |
766 | update_stmt (force_false); | |
767 | ||
768 | free_original_copy_tables (); | |
769 | ||
28df8730 MM |
770 | changed = true; |
771 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
772 | fprintf (dump_file, ";; Loop split.\n"); | |
773 | ||
a1ac9ffb RB |
774 | if (dump_enabled_p ()) |
775 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n"); | |
776 | ||
28df8730 MM |
777 | /* Only deal with the first opportunity. */ |
778 | break; | |
779 | } | |
780 | ||
781 | free (bbs); | |
782 | return changed; | |
783 | } | |
784 | ||
095f78c6 FX |
785 | /* Another transformation of loops like: |
786 | ||
787 | for (i = INIT (); CHECK (i); i = NEXT ()) | |
788 | { | |
789 | if (expr (a_1, a_2, ..., a_n)) // expr is pure | |
790 | a_j = ...; // change at least one a_j | |
791 | else | |
792 | S; // not change any a_j | |
793 | } | |
794 | ||
795 | into: | |
796 | ||
797 | for (i = INIT (); CHECK (i); i = NEXT ()) | |
798 | { | |
799 | if (expr (a_1, a_2, ..., a_n)) | |
800 | a_j = ...; | |
801 | else | |
802 | { | |
803 | S; | |
804 | i = NEXT (); | |
805 | break; | |
806 | } | |
807 | } | |
808 | ||
809 | for (; CHECK (i); i = NEXT ()) | |
810 | { | |
811 | S; | |
812 | } | |
813 | ||
814 | */ | |
815 | ||
816 | /* Data structure to hold temporary information during loop split upon | |
817 | semi-invariant conditional statement. */ | |
818 | class split_info { | |
819 | public: | |
820 | /* Array of all basic blocks in a loop, returned by get_loop_body(). */ | |
821 | basic_block *bbs; | |
822 | ||
823 | /* All memory store/clobber statements in a loop. */ | |
824 | auto_vec<gimple *> memory_stores; | |
825 | ||
826 | /* Whether above memory stores vector has been filled. */ | |
827 | int need_init; | |
828 | ||
829 | /* Control dependencies of basic blocks in a loop. */ | |
830 | auto_vec<hash_set<basic_block> *> control_deps; | |
831 | ||
832 | split_info () : bbs (NULL), need_init (true) { } | |
833 | ||
834 | ~split_info () | |
835 | { | |
836 | if (bbs) | |
837 | free (bbs); | |
838 | ||
839 | for (unsigned i = 0; i < control_deps.length (); i++) | |
840 | delete control_deps[i]; | |
841 | } | |
842 | }; | |
843 | ||
844 | /* Find all statements with memory-write effect in LOOP, including memory | |
845 | store and non-pure function call, and keep those in a vector. This work | |
846 | is only done one time, for the vector should be constant during analysis | |
847 | stage of semi-invariant condition. */ | |
848 | ||
849 | static void | |
850 | find_vdef_in_loop (struct loop *loop) | |
851 | { | |
852 | split_info *info = (split_info *) loop->aux; | |
853 | gphi *vphi = get_virtual_phi (loop->header); | |
854 | ||
855 | /* Indicate memory store vector has been filled. */ | |
856 | info->need_init = false; | |
857 | ||
858 | /* If loop contains memory operation, there must be a virtual PHI node in | |
859 | loop header basic block. */ | |
860 | if (vphi == NULL) | |
861 | return; | |
862 | ||
863 | /* All virtual SSA names inside the loop are connected to be a cyclic | |
864 | graph via virtual PHI nodes. The virtual PHI node in loop header just | |
865 | links the first and the last virtual SSA names, by using the last as | |
866 | PHI operand to define the first. */ | |
867 | const edge latch = loop_latch_edge (loop); | |
868 | const tree first = gimple_phi_result (vphi); | |
869 | const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch); | |
870 | ||
871 | /* The virtual SSA cyclic graph might consist of only one SSA name, who | |
872 | is defined by itself. | |
873 | ||
874 | .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)> | |
875 | ||
876 | This means the loop contains only memory loads, so we can skip it. */ | |
877 | if (first == last) | |
878 | return; | |
879 | ||
880 | auto_vec<gimple *> other_stores; | |
881 | auto_vec<tree> worklist; | |
882 | auto_bitmap visited; | |
883 | ||
884 | bitmap_set_bit (visited, SSA_NAME_VERSION (first)); | |
885 | bitmap_set_bit (visited, SSA_NAME_VERSION (last)); | |
886 | worklist.safe_push (last); | |
887 | ||
888 | do | |
889 | { | |
890 | tree vuse = worklist.pop (); | |
891 | gimple *stmt = SSA_NAME_DEF_STMT (vuse); | |
892 | ||
893 | /* We mark the first and last SSA names as visited at the beginning, | |
894 | and reversely start the process from the last SSA name towards the | |
895 | first, which ensures that this do-while will not touch SSA names | |
896 | defined outside the loop. */ | |
897 | gcc_assert (gimple_bb (stmt) | |
898 | && flow_bb_inside_loop_p (loop, gimple_bb (stmt))); | |
899 | ||
900 | if (gimple_code (stmt) == GIMPLE_PHI) | |
901 | { | |
902 | gphi *phi = as_a <gphi *> (stmt); | |
903 | ||
904 | for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) | |
905 | { | |
906 | tree arg = gimple_phi_arg_def (stmt, i); | |
907 | ||
908 | if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg))) | |
909 | worklist.safe_push (arg); | |
910 | } | |
911 | } | |
912 | else | |
913 | { | |
914 | tree prev = gimple_vuse (stmt); | |
915 | ||
916 | /* Non-pure call statement is conservatively assumed to impact all | |
917 | memory locations. So place call statements ahead of other memory | |
700d4cb0 | 918 | stores in the vector with an idea of using them as shortcut |
095f78c6 FX |
919 | terminators to memory alias analysis. */ |
920 | if (gimple_code (stmt) == GIMPLE_CALL) | |
921 | info->memory_stores.safe_push (stmt); | |
922 | else | |
923 | other_stores.safe_push (stmt); | |
924 | ||
925 | if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev))) | |
926 | worklist.safe_push (prev); | |
927 | } | |
928 | } while (!worklist.is_empty ()); | |
929 | ||
930 | info->memory_stores.safe_splice (other_stores); | |
931 | } | |
932 | ||
933 | /* Two basic blocks have equivalent control dependency if one dominates to | |
934 | the other, and it is post-dominated by the latter. Given a basic block | |
935 | BB in LOOP, find farest equivalent dominating basic block. For BB, there | |
936 | is a constraint that BB does not post-dominate loop header of LOOP, this | |
937 | means BB is control-dependent on at least one basic block in LOOP. */ | |
938 | ||
939 | static basic_block | |
940 | get_control_equiv_head_block (struct loop *loop, basic_block bb) | |
941 | { | |
942 | while (!bb->aux) | |
943 | { | |
944 | basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb); | |
945 | ||
946 | gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb)); | |
947 | ||
948 | if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb)) | |
949 | break; | |
950 | ||
951 | bb = dom_bb; | |
952 | } | |
953 | return bb; | |
954 | } | |
955 | ||
956 | /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control- | |
957 | dependent on. */ | |
958 | ||
959 | static hash_set<basic_block> * | |
960 | find_control_dep_blocks (struct loop *loop, basic_block bb) | |
961 | { | |
962 | /* BB has same control dependency as loop header, then it is not control- | |
963 | dependent on any basic block in LOOP. */ | |
964 | if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb)) | |
965 | return NULL; | |
966 | ||
967 | basic_block equiv_head = get_control_equiv_head_block (loop, bb); | |
968 | ||
969 | if (equiv_head->aux) | |
970 | { | |
971 | /* There is a basic block containing control dependency equivalent | |
972 | to BB. No need to recompute that, and also set this information | |
973 | to other equivalent basic blocks. */ | |
974 | for (; bb != equiv_head; | |
975 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) | |
976 | bb->aux = equiv_head->aux; | |
977 | return (hash_set<basic_block> *) equiv_head->aux; | |
978 | } | |
979 | ||
980 | /* A basic block X is control-dependent on another Y iff there exists | |
981 | a path from X to Y, in which every basic block other than X and Y | |
982 | is post-dominated by Y, but X is not post-dominated by Y. | |
983 | ||
984 | According to this rule, traverse basic blocks in the loop backwards | |
985 | starting from BB, if a basic block is post-dominated by BB, extend | |
986 | current post-dominating path to this block, otherwise it is another | |
987 | one that BB is control-dependent on. */ | |
988 | ||
989 | auto_vec<basic_block> pdom_worklist; | |
990 | hash_set<basic_block> pdom_visited; | |
991 | hash_set<basic_block> *dep_bbs = new hash_set<basic_block>; | |
992 | ||
993 | pdom_worklist.safe_push (equiv_head); | |
994 | ||
995 | do | |
996 | { | |
997 | basic_block pdom_bb = pdom_worklist.pop (); | |
998 | edge_iterator ei; | |
999 | edge e; | |
1000 | ||
1001 | if (pdom_visited.add (pdom_bb)) | |
1002 | continue; | |
1003 | ||
1004 | FOR_EACH_EDGE (e, ei, pdom_bb->preds) | |
1005 | { | |
1006 | basic_block pred_bb = e->src; | |
1007 | ||
1008 | if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb)) | |
1009 | { | |
1010 | dep_bbs->add (pred_bb); | |
1011 | continue; | |
1012 | } | |
1013 | ||
1014 | pred_bb = get_control_equiv_head_block (loop, pred_bb); | |
1015 | ||
1016 | if (pdom_visited.contains (pred_bb)) | |
1017 | continue; | |
1018 | ||
1019 | if (!pred_bb->aux) | |
1020 | { | |
1021 | pdom_worklist.safe_push (pred_bb); | |
1022 | continue; | |
1023 | } | |
1024 | ||
1025 | /* If control dependency of basic block is available, fast extend | |
1026 | post-dominating path using the information instead of advancing | |
1027 | forward step-by-step. */ | |
1028 | hash_set<basic_block> *pred_dep_bbs | |
1029 | = (hash_set<basic_block> *) pred_bb->aux; | |
1030 | ||
1031 | for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin (); | |
1032 | iter != pred_dep_bbs->end (); ++iter) | |
1033 | { | |
1034 | basic_block pred_dep_bb = *iter; | |
1035 | ||
1036 | /* Basic blocks can either be in control dependency of BB, or | |
1037 | must be post-dominated by BB, if so, extend the path from | |
1038 | these basic blocks. */ | |
1039 | if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb)) | |
1040 | dep_bbs->add (pred_dep_bb); | |
1041 | else if (!pdom_visited.contains (pred_dep_bb)) | |
1042 | pdom_worklist.safe_push (pred_dep_bb); | |
1043 | } | |
1044 | } | |
1045 | } while (!pdom_worklist.is_empty ()); | |
1046 | ||
1047 | /* Record computed control dependencies in loop so that we can reach them | |
1048 | when reclaiming resources. */ | |
1049 | ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs); | |
1050 | ||
1051 | /* Associate control dependence with related equivalent basic blocks. */ | |
1052 | for (equiv_head->aux = dep_bbs; bb != equiv_head; | |
1053 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) | |
1054 | bb->aux = dep_bbs; | |
1055 | ||
1056 | return dep_bbs; | |
1057 | } | |
1058 | ||
1059 | /* Forward declaration */ | |
1060 | ||
1061 | static bool | |
1062 | stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt, | |
1063 | const_basic_block skip_head, | |
1064 | hash_map<gimple *, bool> &stmt_stat); | |
1065 | ||
1066 | /* Given STMT, memory load or pure call statement, check whether it is impacted | |
1067 | by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the | |
1068 | trace is composed of SKIP_HEAD and those basic block dominated by it, always | |
1069 | corresponds to one branch of a conditional statement). If SKIP_HEAD is | |
1070 | NULL, all basic blocks of LOOP are checked. */ | |
1071 | ||
1072 | static bool | |
1073 | vuse_semi_invariant_p (struct loop *loop, gimple *stmt, | |
1074 | const_basic_block skip_head) | |
1075 | { | |
1076 | split_info *info = (split_info *) loop->aux; | |
1077 | tree rhs = NULL_TREE; | |
1078 | ao_ref ref; | |
1079 | gimple *store; | |
1080 | unsigned i; | |
1081 | ||
1082 | /* Collect memory store/clobber statements if haven't done that. */ | |
1083 | if (info->need_init) | |
1084 | find_vdef_in_loop (loop); | |
1085 | ||
1086 | if (is_gimple_assign (stmt)) | |
1087 | rhs = gimple_assign_rhs1 (stmt); | |
1088 | ||
1089 | ao_ref_init (&ref, rhs); | |
1090 | ||
1091 | FOR_EACH_VEC_ELT (info->memory_stores, i, store) | |
1092 | { | |
1093 | /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL. */ | |
1094 | if (skip_head | |
1095 | && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head)) | |
1096 | continue; | |
1097 | ||
1098 | if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref)) | |
1099 | return false; | |
1100 | } | |
1101 | ||
1102 | return true; | |
1103 | } | |
1104 | ||
1105 | /* Suppose one condition branch, led by SKIP_HEAD, is not executed since | |
1106 | certain iteration of LOOP, check whether an SSA name (NAME) remains | |
1107 | unchanged in next iteration. We call this characteristic semi- | |
1108 | invariantness. SKIP_HEAD might be NULL, if so, nothing excluded, all basic | |
1109 | blocks and control flows in the loop will be considered. Semi-invariant | |
1110 | state of checked statement is cached in hash map STMT_STAT to avoid | |
1111 | redundant computation in possible following re-check. */ | |
1112 | ||
1113 | static inline bool | |
1114 | ssa_semi_invariant_p (struct loop *loop, tree name, | |
1115 | const_basic_block skip_head, | |
1116 | hash_map<gimple *, bool> &stmt_stat) | |
1117 | { | |
1118 | gimple *def = SSA_NAME_DEF_STMT (name); | |
1119 | const_basic_block def_bb = gimple_bb (def); | |
1120 | ||
1121 | /* An SSA name defined outside loop is definitely semi-invariant. */ | |
1122 | if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb)) | |
1123 | return true; | |
1124 | ||
5b2cc633 RB |
1125 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) |
1126 | return false; | |
1127 | ||
095f78c6 FX |
1128 | return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat); |
1129 | } | |
1130 | ||
1131 | /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is | |
1132 | semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL), | |
1133 | are excluded from LOOP. */ | |
1134 | ||
1135 | static bool | |
1136 | loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi, | |
1137 | const_basic_block skip_head) | |
1138 | { | |
1139 | const_edge latch = loop_latch_edge (loop); | |
1140 | tree name = gimple_phi_result (loop_phi); | |
1141 | tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch); | |
1142 | ||
1143 | gcc_checking_assert (from); | |
1144 | ||
1145 | /* Loop iteration PHI node locates in loop header, and it has two source | |
1146 | operands, one is an initial value coming from outside the loop, the other | |
1147 | is a value through latch of the loop, which is derived in last iteration, | |
1148 | we call the latter latch value. From the PHI node to definition of latch | |
1149 | value, if excluding branch trace starting from SKIP_HEAD, except copy- | |
1150 | assignment or likewise, there is no other kind of value redefinition, SSA | |
1151 | name defined by the PHI node is semi-invariant. | |
1152 | ||
1153 | loop entry | |
1154 | | .--- latch ---. | |
1155 | | | | | |
1156 | v v | | |
1157 | x_1 = PHI <x_0, x_3> | | |
1158 | | | | |
1159 | v | | |
1160 | .------- if (cond) -------. | | |
1161 | | | | | |
1162 | | [ SKIP ] | | |
1163 | | | | | |
1164 | | x_2 = ... | | |
1165 | | | | | |
1166 | '---- T ---->.<---- F ----' | | |
1167 | | | | |
1168 | v | | |
1169 | x_3 = PHI <x_1, x_2> | | |
1170 | | | | |
1171 | '----------------------' | |
1172 | ||
1173 | Suppose in certain iteration, execution flow in above graph goes through | |
1174 | true branch, which means that one source value to define x_3 in false | |
1175 | branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next | |
1176 | iterations is defined by x_3, we know that x_1 will never changed if COND | |
1177 | always chooses true branch from then on. */ | |
1178 | ||
1179 | while (from != name) | |
1180 | { | |
1181 | /* A new value comes from a CONSTANT. */ | |
1182 | if (TREE_CODE (from) != SSA_NAME) | |
1183 | return false; | |
1184 | ||
1185 | gimple *stmt = SSA_NAME_DEF_STMT (from); | |
1186 | const_basic_block bb = gimple_bb (stmt); | |
1187 | ||
1188 | /* A new value comes from outside the loop. */ | |
1189 | if (!bb || !flow_bb_inside_loop_p (loop, bb)) | |
1190 | return false; | |
1191 | ||
1192 | from = NULL_TREE; | |
1193 | ||
1194 | if (gimple_code (stmt) == GIMPLE_PHI) | |
1195 | { | |
1196 | gphi *phi = as_a <gphi *> (stmt); | |
1197 | ||
1198 | for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) | |
1199 | { | |
1200 | if (skip_head) | |
1201 | { | |
1202 | const_edge e = gimple_phi_arg_edge (phi, i); | |
1203 | ||
1204 | /* Don't consider redefinitions in excluded basic blocks. */ | |
1205 | if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head)) | |
1206 | continue; | |
1207 | } | |
1208 | ||
1209 | tree arg = gimple_phi_arg_def (phi, i); | |
1210 | ||
1211 | if (!from) | |
1212 | from = arg; | |
1213 | else if (!operand_equal_p (from, arg, 0)) | |
1214 | /* There are more than one source operands that provide | |
1215 | different values to the SSA name, it is variant. */ | |
1216 | return false; | |
1217 | } | |
1218 | } | |
1219 | else if (gimple_code (stmt) == GIMPLE_ASSIGN) | |
1220 | { | |
1221 | /* For simple value copy, check its rhs instead. */ | |
1222 | if (gimple_assign_ssa_name_copy_p (stmt)) | |
1223 | from = gimple_assign_rhs1 (stmt); | |
1224 | } | |
1225 | ||
1226 | /* Any other kind of definition is deemed to introduce a new value | |
1227 | to the SSA name. */ | |
1228 | if (!from) | |
1229 | return false; | |
1230 | } | |
1231 | return true; | |
1232 | } | |
1233 | ||
1234 | /* Check whether conditional predicates that BB is control-dependent on, are | |
1235 | semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL), | |
1236 | are excluded from LOOP. Semi-invariant state of checked statement is cached | |
1237 | in hash map STMT_STAT. */ | |
1238 | ||
1239 | static bool | |
1240 | control_dep_semi_invariant_p (struct loop *loop, basic_block bb, | |
1241 | const_basic_block skip_head, | |
1242 | hash_map<gimple *, bool> &stmt_stat) | |
1243 | { | |
1244 | hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb); | |
1245 | ||
1246 | if (!dep_bbs) | |
1247 | return true; | |
1248 | ||
1249 | for (hash_set<basic_block>::iterator iter = dep_bbs->begin (); | |
1250 | iter != dep_bbs->end (); ++iter) | |
1251 | { | |
64780df2 | 1252 | gimple *last = *gsi_last_bb (*iter); |
095f78c6 FX |
1253 | if (!last) |
1254 | return false; | |
1255 | ||
1256 | /* Only check condition predicates. */ | |
1257 | if (gimple_code (last) != GIMPLE_COND | |
1258 | && gimple_code (last) != GIMPLE_SWITCH) | |
1259 | return false; | |
1260 | ||
1261 | if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat)) | |
1262 | return false; | |
1263 | } | |
1264 | ||
1265 | return true; | |
1266 | } | |
1267 | ||
1268 | /* Check whether STMT is semi-invariant in LOOP, iff all its operands are | |
1269 | semi-invariant, consequently, all its defined values are semi-invariant. | |
1270 | Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. | |
1271 | Semi-invariant state of checked statement is cached in hash map | |
1272 | STMT_STAT. */ | |
1273 | ||
1274 | static bool | |
1275 | stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt, | |
1276 | const_basic_block skip_head, | |
1277 | hash_map<gimple *, bool> &stmt_stat) | |
1278 | { | |
1279 | bool existed; | |
1280 | bool &invar = stmt_stat.get_or_insert (stmt, &existed); | |
1281 | ||
1282 | if (existed) | |
1283 | return invar; | |
1284 | ||
1285 | /* A statement might depend on itself, which is treated as variant. So set | |
1286 | state of statement under check to be variant to ensure that. */ | |
1287 | invar = false; | |
1288 | ||
1289 | if (gimple_code (stmt) == GIMPLE_PHI) | |
1290 | { | |
1291 | gphi *phi = as_a <gphi *> (stmt); | |
1292 | ||
1293 | if (gimple_bb (stmt) == loop->header) | |
1294 | { | |
2b2f3867 RB |
1295 | /* If the entry value is subject to abnormal coalescing |
1296 | avoid the transform since we're going to duplicate the | |
1297 | loop header and thus likely introduce overlapping life-ranges | |
1298 | between the PHI def and the entry on the path when the | |
1299 | first loop is skipped. */ | |
1300 | tree entry_def | |
1301 | = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); | |
1302 | if (TREE_CODE (entry_def) == SSA_NAME | |
1303 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def)) | |
1304 | return false; | |
095f78c6 FX |
1305 | invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head); |
1306 | return invar; | |
1307 | } | |
1308 | ||
1309 | /* For a loop PHI node that does not locate in loop header, it is semi- | |
1310 | invariant only if two conditions are met. The first is its source | |
1311 | values are derived from CONSTANT (including loop-invariant value), or | |
1312 | from SSA name defined by semi-invariant loop iteration PHI node. The | |
1313 | second is its source incoming edges are control-dependent on semi- | |
1314 | invariant conditional predicates. */ | |
1315 | for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) | |
1316 | { | |
1317 | const_edge e = gimple_phi_arg_edge (phi, i); | |
1318 | tree arg = gimple_phi_arg_def (phi, i); | |
1319 | ||
1320 | if (TREE_CODE (arg) == SSA_NAME) | |
1321 | { | |
1322 | if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat)) | |
1323 | return false; | |
1324 | ||
1325 | /* If source value is defined in location from where the source | |
1326 | edge comes in, no need to check control dependency again | |
1327 | since this has been done in above SSA name check stage. */ | |
1328 | if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg))) | |
1329 | continue; | |
1330 | } | |
1331 | ||
1332 | if (!control_dep_semi_invariant_p (loop, e->src, skip_head, | |
1333 | stmt_stat)) | |
1334 | return false; | |
1335 | } | |
1336 | } | |
1337 | else | |
1338 | { | |
1339 | ssa_op_iter iter; | |
1340 | tree use; | |
1341 | ||
1342 | /* Volatile memory load or return of normal (non-const/non-pure) call | |
1343 | should not be treated as constant in each iteration of loop. */ | |
1344 | if (gimple_has_side_effects (stmt)) | |
1345 | return false; | |
1346 | ||
1347 | /* Check if any memory store may kill memory load at this place. */ | |
1348 | if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head)) | |
1349 | return false; | |
1350 | ||
1351 | /* Although operand of a statement might be SSA name, CONSTANT or | |
1352 | VARDECL, here we only need to check SSA name operands. This is | |
1353 | because check on VARDECL operands, which involve memory loads, | |
1354 | must have been done prior to invocation of this function in | |
1355 | vuse_semi_invariant_p. */ | |
1356 | FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) | |
1357 | if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat)) | |
1358 | return false; | |
1359 | } | |
1360 | ||
1361 | if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head, | |
1362 | stmt_stat)) | |
1363 | return false; | |
1364 | ||
1365 | /* Here we SHOULD NOT use invar = true, since hash map might be changed due | |
1366 | to new insertion, and thus invar may point to invalid memory. */ | |
1367 | stmt_stat.put (stmt, true); | |
1368 | return true; | |
1369 | } | |
1370 | ||
1371 | /* A helper function to check whether STMT is semi-invariant in LOOP. Basic | |
1372 | blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. */ | |
1373 | ||
1374 | static bool | |
1375 | stmt_semi_invariant_p (struct loop *loop, gimple *stmt, | |
1376 | const_basic_block skip_head) | |
1377 | { | |
1378 | hash_map<gimple *, bool> stmt_stat; | |
1379 | return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat); | |
1380 | } | |
1381 | ||
1382 | /* Determine when conditional statement never transfers execution to one of its | |
1383 | branch, whether we can remove the branch's leading basic block (BRANCH_BB) | |
1384 | and those basic blocks dominated by BRANCH_BB. */ | |
1385 | ||
1386 | static bool | |
1387 | branch_removable_p (basic_block branch_bb) | |
1388 | { | |
1389 | edge_iterator ei; | |
1390 | edge e; | |
1391 | ||
1392 | if (single_pred_p (branch_bb)) | |
1393 | return true; | |
1394 | ||
1395 | FOR_EACH_EDGE (e, ei, branch_bb->preds) | |
1396 | { | |
1397 | if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb)) | |
1398 | continue; | |
1399 | ||
1400 | if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src)) | |
1401 | continue; | |
1402 | ||
1403 | /* The branch can be reached from opposite branch, or from some | |
1404 | statement not dominated by the conditional statement. */ | |
1405 | return false; | |
1406 | } | |
1407 | ||
1408 | return true; | |
1409 | } | |
1410 | ||
1411 | /* Find out which branch of a conditional statement (COND) is invariant in the | |
1412 | execution context of LOOP. That is: once the branch is selected in certain | |
1413 | iteration of the loop, any operand that contributes to computation of the | |
1414 | conditional statement remains unchanged in all following iterations. */ | |
1415 | ||
1416 | static edge | |
1417 | get_cond_invariant_branch (struct loop *loop, gcond *cond) | |
1418 | { | |
1419 | basic_block cond_bb = gimple_bb (cond); | |
1420 | basic_block targ_bb[2]; | |
1421 | bool invar[2]; | |
1422 | unsigned invar_checks = 0; | |
1423 | ||
1424 | for (unsigned i = 0; i < 2; i++) | |
1425 | { | |
1426 | targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest; | |
1427 | ||
1428 | /* One branch directs to loop exit, no need to perform loop split upon | |
1429 | this conditional statement. Firstly, it is trivial if the exit branch | |
1430 | is semi-invariant, for the statement is just to break loop. Secondly, | |
1431 | if the opposite branch is semi-invariant, it means that the statement | |
1432 | is real loop-invariant, which is covered by loop unswitch. */ | |
1433 | if (!flow_bb_inside_loop_p (loop, targ_bb[i])) | |
1434 | return NULL; | |
1435 | } | |
1436 | ||
1437 | for (unsigned i = 0; i < 2; i++) | |
1438 | { | |
1439 | invar[!i] = false; | |
1440 | ||
1441 | if (!branch_removable_p (targ_bb[i])) | |
1442 | continue; | |
1443 | ||
1444 | /* Given a semi-invariant branch, if its opposite branch dominates | |
1445 | loop latch, it and its following trace will only be executed in | |
1446 | final iteration of loop, namely it is not part of repeated body | |
1447 | of the loop. Similar to the above case that the branch is loop | |
1448 | exit, no need to split loop. */ | |
1449 | if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i])) | |
1450 | continue; | |
1451 | ||
1452 | invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]); | |
1453 | invar_checks++; | |
1454 | } | |
1455 | ||
1456 | /* With both branches being invariant (handled by loop unswitch) or | |
1457 | variant is not what we want. */ | |
1458 | if (invar[0] ^ !invar[1]) | |
1459 | return NULL; | |
1460 | ||
1461 | /* Found a real loop-invariant condition, do nothing. */ | |
1462 | if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL)) | |
1463 | return NULL; | |
1464 | ||
1465 | return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1); | |
1466 | } | |
1467 | ||
1468 | /* Calculate increased code size measured by estimated insn number if applying | |
1469 | loop split upon certain branch (BRANCH_EDGE) of a conditional statement. */ | |
1470 | ||
1471 | static int | |
1472 | compute_added_num_insns (struct loop *loop, const_edge branch_edge) | |
1473 | { | |
1474 | basic_block cond_bb = branch_edge->src; | |
1475 | unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge; | |
1476 | basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest; | |
1477 | basic_block *bbs = ((split_info *) loop->aux)->bbs; | |
1478 | int num = 0; | |
1479 | ||
1480 | for (unsigned i = 0; i < loop->num_nodes; i++) | |
1481 | { | |
1482 | /* Do no count basic blocks only in opposite branch. */ | |
1483 | if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb)) | |
1484 | continue; | |
1485 | ||
1486 | num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights); | |
1487 | } | |
1488 | ||
1489 | /* It is unnecessary to evaluate expression of the conditional statement | |
1490 | in new loop that contains only invariant branch. This expression should | |
1491 | be constant value (either true or false). Exclude code size of insns | |
1492 | that contribute to computation of the expression. */ | |
1493 | ||
1494 | auto_vec<gimple *> worklist; | |
1495 | hash_set<gimple *> removed; | |
fe8ac82f | 1496 | gimple *stmt = last_nondebug_stmt (cond_bb); |
095f78c6 FX |
1497 | |
1498 | worklist.safe_push (stmt); | |
1499 | removed.add (stmt); | |
1500 | num -= estimate_num_insns (stmt, &eni_size_weights); | |
1501 | ||
1502 | do | |
1503 | { | |
1504 | ssa_op_iter opnd_iter; | |
1505 | use_operand_p opnd_p; | |
1506 | ||
1507 | stmt = worklist.pop (); | |
1508 | FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE) | |
1509 | { | |
1510 | tree opnd = USE_FROM_PTR (opnd_p); | |
1511 | ||
1512 | if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd)) | |
1513 | continue; | |
1514 | ||
1515 | gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd); | |
1516 | use_operand_p use_p; | |
1517 | imm_use_iterator use_iter; | |
1518 | ||
1519 | if (removed.contains (opnd_stmt) | |
1520 | || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt))) | |
1521 | continue; | |
1522 | ||
1523 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd) | |
1524 | { | |
1525 | gimple *use_stmt = USE_STMT (use_p); | |
1526 | ||
1527 | if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt)) | |
1528 | { | |
1529 | opnd_stmt = NULL; | |
1530 | break; | |
1531 | } | |
1532 | } | |
1533 | ||
1534 | if (opnd_stmt) | |
1535 | { | |
1536 | worklist.safe_push (opnd_stmt); | |
1537 | removed.add (opnd_stmt); | |
1538 | num -= estimate_num_insns (opnd_stmt, &eni_size_weights); | |
1539 | } | |
1540 | } | |
1541 | } while (!worklist.is_empty ()); | |
1542 | ||
1543 | gcc_assert (num >= 0); | |
1544 | return num; | |
1545 | } | |
1546 | ||
1547 | /* Find out loop-invariant branch of a conditional statement (COND) if it has, | |
1548 | and check whether it is eligible and profitable to perform loop split upon | |
1549 | this branch in LOOP. */ | |
1550 | ||
1551 | static edge | |
1552 | get_cond_branch_to_split_loop (struct loop *loop, gcond *cond) | |
1553 | { | |
1554 | edge invar_branch = get_cond_invariant_branch (loop, cond); | |
1555 | if (!invar_branch) | |
1556 | return NULL; | |
1557 | ||
1558 | /* When accurate profile information is available, and execution | |
1559 | frequency of the branch is too low, just let it go. */ | |
1560 | profile_probability prob = invar_branch->probability; | |
1561 | if (prob.reliable_p ()) | |
1562 | { | |
028d4092 | 1563 | int thres = param_min_loop_cond_split_prob; |
095f78c6 FX |
1564 | |
1565 | if (prob < profile_probability::always ().apply_scale (thres, 100)) | |
1566 | return NULL; | |
1567 | } | |
1568 | ||
1569 | /* Add a threshold for increased code size to disable loop split. */ | |
028d4092 | 1570 | if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns) |
095f78c6 FX |
1571 | return NULL; |
1572 | ||
1573 | return invar_branch; | |
1574 | } | |
1575 | ||
1576 | /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some | |
1577 | conditional statement, perform loop split transformation illustrated | |
1578 | as the following graph. | |
1579 | ||
1580 | .-------T------ if (true) ------F------. | |
1581 | | .---------------. | | |
1582 | | | | | | |
1583 | v | v v | |
1584 | pre-header | pre-header | |
1585 | | .------------. | | .------------. | |
1586 | | | | | | | | | |
1587 | | v | | | v | | |
1588 | header | | header | | |
1589 | | | | | | | |
1590 | .--- if (cond) ---. | | .--- if (true) ---. | | |
1591 | | | | | | | | | |
1592 | invariant | | | invariant | | | |
1593 | | | | | | | | | |
1594 | '---T--->.<---F---' | | '---T--->.<---F---' | | |
1595 | | | / | | | |
1596 | stmts | / stmts | | |
1597 | | F T | | | |
1598 | / \ | / / \ | | |
1599 | .-------* * [ if (cond) ] .-------* * | | |
1600 | | | | | | | | |
1601 | | latch | | latch | | |
1602 | | | | | | | | |
1603 | | '------------' | '------------' | |
1604 | '------------------------. .-----------' | |
1605 | loop1 | | loop2 | |
1606 | v v | |
1607 | exits | |
1608 | ||
1609 | In the graph, loop1 represents the part derived from original one, and | |
1610 | loop2 is duplicated using loop_version (), which corresponds to the part | |
1611 | of original one being splitted out. In original latch edge of loop1, we | |
1612 | insert a new conditional statement duplicated from the semi-invariant cond, | |
1613 | and one of its branch goes back to loop1 header as a latch edge, and the | |
1614 | other branch goes to loop2 pre-header as an entry edge. And also in loop2, | |
1615 | we abandon the variant branch of the conditional statement by setting a | |
1616 | constant bool condition, based on which branch is semi-invariant. */ | |
1617 | ||
1618 | static bool | |
1619 | do_split_loop_on_cond (struct loop *loop1, edge invar_branch) | |
1620 | { | |
1621 | basic_block cond_bb = invar_branch->src; | |
1622 | bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE); | |
64780df2 | 1623 | gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb)); |
095f78c6 FX |
1624 | |
1625 | gcc_assert (cond_bb->loop_father == loop1); | |
1626 | ||
1627 | if (dump_enabled_p ()) | |
1628 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond, | |
1629 | "loop split on semi-invariant condition at %s branch\n", | |
1630 | true_invar ? "true" : "false"); | |
1631 | ||
1632 | initialize_original_copy_tables (); | |
1633 | ||
1634 | struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL, | |
cd5ae148 XL |
1635 | invar_branch->probability.invert (), |
1636 | invar_branch->probability, | |
095f78c6 FX |
1637 | profile_probability::always (), |
1638 | profile_probability::always (), | |
1639 | true); | |
1640 | if (!loop2) | |
1641 | { | |
1642 | free_original_copy_tables (); | |
1643 | return false; | |
1644 | } | |
1645 | ||
1646 | basic_block cond_bb_copy = get_bb_copy (cond_bb); | |
64780df2 | 1647 | gcond *cond_copy = as_a<gcond *> (*gsi_last_bb (cond_bb_copy)); |
095f78c6 FX |
1648 | |
1649 | /* Replace the condition in loop2 with a bool constant to let PassManager | |
1650 | remove the variant branch after current pass completes. */ | |
1651 | if (true_invar) | |
1652 | gimple_cond_make_true (cond_copy); | |
1653 | else | |
1654 | gimple_cond_make_false (cond_copy); | |
1655 | ||
1656 | update_stmt (cond_copy); | |
1657 | ||
1658 | /* Insert a new conditional statement on latch edge of loop1, its condition | |
1659 | is duplicated from the semi-invariant. This statement acts as a switch | |
1660 | to transfer execution from loop1 to loop2, when loop1 enters into | |
1661 | invariant state. */ | |
1662 | basic_block latch_bb = split_edge (loop_latch_edge (loop1)); | |
1663 | basic_block break_bb = split_edge (single_pred_edge (latch_bb)); | |
1664 | gimple *break_cond = gimple_build_cond (gimple_cond_code(cond), | |
1665 | gimple_cond_lhs (cond), | |
1666 | gimple_cond_rhs (cond), | |
1667 | NULL_TREE, NULL_TREE); | |
1668 | ||
1669 | gimple_stmt_iterator gsi = gsi_last_bb (break_bb); | |
1670 | gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT); | |
1671 | ||
1672 | edge to_loop1 = single_succ_edge (break_bb); | |
1673 | edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0); | |
1674 | ||
1675 | to_loop1->flags &= ~EDGE_FALLTHRU; | |
1676 | to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE; | |
1677 | to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE; | |
1678 | ||
095f78c6 FX |
1679 | /* Due to introduction of a control flow edge from loop1 latch to loop2 |
1680 | pre-header, we should update PHIs in loop2 to reflect this connection | |
1681 | between loop1 and loop2. */ | |
1682 | connect_loop_phis (loop1, loop2, to_loop2); | |
1683 | ||
cd5ae148 XL |
1684 | edge true_edge, false_edge, skip_edge1, skip_edge2; |
1685 | extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); | |
1686 | ||
cd5ae148 XL |
1687 | skip_edge1 = true_invar ? false_edge : true_edge; |
1688 | skip_edge2 = true_invar ? true_edge : false_edge; | |
44372676 | 1689 | fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2); |
cd5ae148 XL |
1690 | |
1691 | /* Fix first loop's exit probability after scaling. */ | |
1692 | to_loop1->probability = invar_branch->probability.invert (); | |
1693 | to_loop2->probability = invar_branch->probability; | |
1694 | ||
095f78c6 FX |
1695 | free_original_copy_tables (); |
1696 | ||
095f78c6 FX |
1697 | return true; |
1698 | } | |
1699 | ||
1700 | /* Traverse all conditional statements in LOOP, to find out a good candidate | |
1701 | upon which we can do loop split. */ | |
1702 | ||
1703 | static bool | |
1704 | split_loop_on_cond (struct loop *loop) | |
1705 | { | |
1706 | split_info *info = new split_info (); | |
1707 | basic_block *bbs = info->bbs = get_loop_body (loop); | |
1708 | bool do_split = false; | |
1709 | ||
1710 | /* Allocate an area to keep temporary info, and associate its address | |
1711 | with loop aux field. */ | |
1712 | loop->aux = info; | |
1713 | ||
1714 | for (unsigned i = 0; i < loop->num_nodes; i++) | |
1715 | bbs[i]->aux = NULL; | |
1716 | ||
1717 | for (unsigned i = 0; i < loop->num_nodes; i++) | |
1718 | { | |
1719 | basic_block bb = bbs[i]; | |
1720 | ||
1721 | /* We only consider conditional statement, which be executed at most once | |
1722 | in each iteration of the loop. So skip statements in inner loops. */ | |
1723 | if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP)) | |
1724 | continue; | |
1725 | ||
1726 | /* Actually this check is not a must constraint. With it, we can ensure | |
1727 | conditional statement will always be executed in each iteration. */ | |
1728 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
1729 | continue; | |
1730 | ||
64780df2 RB |
1731 | gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (bb)); |
1732 | if (!cond) | |
095f78c6 FX |
1733 | continue; |
1734 | ||
095f78c6 FX |
1735 | edge branch_edge = get_cond_branch_to_split_loop (loop, cond); |
1736 | ||
1737 | if (branch_edge) | |
1738 | { | |
1739 | do_split_loop_on_cond (loop, branch_edge); | |
1740 | do_split = true; | |
1741 | break; | |
1742 | } | |
1743 | } | |
1744 | ||
1745 | delete info; | |
1746 | loop->aux = NULL; | |
1747 | ||
1748 | return do_split; | |
1749 | } | |
1750 | ||
28df8730 MM |
1751 | /* Main entry point. Perform loop splitting on all suitable loops. */ |
1752 | ||
1753 | static unsigned int | |
1754 | tree_ssa_split_loops (void) | |
1755 | { | |
28df8730 MM |
1756 | bool changed = false; |
1757 | ||
1758 | gcc_assert (scev_initialized_p ()); | |
095f78c6 FX |
1759 | |
1760 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
1761 | ||
e41ba804 | 1762 | for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT)) |
28df8730 MM |
1763 | loop->aux = NULL; |
1764 | ||
1765 | /* Go through all loops starting from innermost. */ | |
e41ba804 | 1766 | for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) |
28df8730 | 1767 | { |
28df8730 MM |
1768 | if (loop->aux) |
1769 | { | |
1770 | /* If any of our inner loops was split, don't split us, | |
8354d0ab RB |
1771 | and mark our containing loop as having had splits as well. |
1772 | This allows for delaying SSA update. */ | |
28df8730 MM |
1773 | loop_outer (loop)->aux = loop; |
1774 | continue; | |
1775 | } | |
1776 | ||
095f78c6 FX |
1777 | if (optimize_loop_for_size_p (loop)) |
1778 | continue; | |
1779 | ||
1780 | if (split_loop (loop) || split_loop_on_cond (loop)) | |
28df8730 | 1781 | { |
095f78c6 FX |
1782 | /* Mark our containing loop as having had some split inner loops. */ |
1783 | loop_outer (loop)->aux = loop; | |
1784 | changed = true; | |
28df8730 MM |
1785 | } |
1786 | } | |
1787 | ||
e41ba804 | 1788 | for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT)) |
28df8730 MM |
1789 | loop->aux = NULL; |
1790 | ||
095f78c6 FX |
1791 | clear_aux_for_blocks (); |
1792 | ||
1793 | free_dominance_info (CDI_POST_DOMINATORS); | |
1794 | ||
28df8730 | 1795 | if (changed) |
a1ac9ffb RB |
1796 | { |
1797 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | |
1798 | return TODO_cleanup_cfg; | |
1799 | } | |
28df8730 MM |
1800 | return 0; |
1801 | } | |
1802 | ||
1803 | /* Loop splitting pass. */ | |
1804 | ||
1805 | namespace { | |
1806 | ||
1807 | const pass_data pass_data_loop_split = | |
1808 | { | |
1809 | GIMPLE_PASS, /* type */ | |
1810 | "lsplit", /* name */ | |
1811 | OPTGROUP_LOOP, /* optinfo_flags */ | |
1812 | TV_LOOP_SPLIT, /* tv_id */ | |
1813 | PROP_cfg, /* properties_required */ | |
1814 | 0, /* properties_provided */ | |
1815 | 0, /* properties_destroyed */ | |
1816 | 0, /* todo_flags_start */ | |
1817 | 0, /* todo_flags_finish */ | |
1818 | }; | |
1819 | ||
1820 | class pass_loop_split : public gimple_opt_pass | |
1821 | { | |
1822 | public: | |
1823 | pass_loop_split (gcc::context *ctxt) | |
1824 | : gimple_opt_pass (pass_data_loop_split, ctxt) | |
1825 | {} | |
1826 | ||
1827 | /* opt_pass methods: */ | |
725793af DM |
1828 | bool gate (function *) final override { return flag_split_loops != 0; } |
1829 | unsigned int execute (function *) final override; | |
28df8730 MM |
1830 | |
1831 | }; // class pass_loop_split | |
1832 | ||
1833 | unsigned int | |
1834 | pass_loop_split::execute (function *fun) | |
1835 | { | |
1836 | if (number_of_loops (fun) <= 1) | |
1837 | return 0; | |
1838 | ||
1839 | return tree_ssa_split_loops (); | |
1840 | } | |
1841 | ||
1842 | } // anon namespace | |
1843 | ||
1844 | gimple_opt_pass * | |
1845 | make_pass_loop_split (gcc::context *ctxt) | |
1846 | { | |
1847 | return new pass_loop_split (ctxt); | |
1848 | } |