]>
Commit | Line | Data |
---|---|---|
1cc521f1 | 1 | /* Loop unroll-and-jam. |
8d9254fc | 2 | Copyright (C) 2017-2020 Free Software Foundation, Inc. |
1cc521f1 MM |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by the | |
8 | Free Software Foundation; either version 3, or (at your option) any | |
9 | later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
1cc521f1 MM |
23 | #include "tree-pass.h" |
24 | #include "backend.h" | |
25 | #include "tree.h" | |
26 | #include "gimple.h" | |
27 | #include "ssa.h" | |
28 | #include "fold-const.h" | |
29 | #include "tree-cfg.h" | |
30 | #include "tree-ssa.h" | |
31 | #include "tree-ssa-loop-niter.h" | |
32 | #include "tree-ssa-loop.h" | |
33 | #include "tree-ssa-loop-manip.h" | |
34 | #include "cfgloop.h" | |
35 | #include "tree-scalar-evolution.h" | |
36 | #include "gimple-iterator.h" | |
37 | #include "cfghooks.h" | |
38 | #include "tree-data-ref.h" | |
39 | #include "tree-ssa-loop-ivopts.h" | |
40 | #include "tree-vectorizer.h" | |
41 | ||
42 | /* Unroll and Jam transformation | |
43 | ||
44 | This is a combination of two transformations, where the second | |
45 | is not always valid. It's applicable if a loop nest has redundancies | |
46 | over the iterations of an outer loop while not having that with | |
47 | an inner loop. | |
48 | ||
49 | Given this nest: | |
50 | for (i) { | |
51 | for (j) { | |
52 | B(i,j) | |
53 | } | |
54 | } | |
55 | ||
56 | first unroll: | |
57 | for (i by 2) { | |
58 | for (j) { | |
59 | B(i,j) | |
60 | } | |
61 | for (j) { | |
62 | B(i+1,j) | |
63 | } | |
64 | } | |
65 | ||
66 | then fuse the two adjacent inner loops resulting from that: | |
67 | for (i by 2) { | |
68 | for (j) { | |
69 | B(i,j) | |
70 | B(i+1,j) | |
71 | } | |
72 | } | |
73 | ||
74 | As the order of evaluations of the body B changes this is valid | |
75 | only in certain situations: all distance vectors need to be forward. | |
76 | Additionally if there are multiple induction variables than just | |
77 | a counting control IV (j above) we can also deal with some situations. | |
78 | ||
79 | The validity is checked by unroll_jam_possible_p, and the data-dep | |
80 | testing below. | |
81 | ||
82 | A trivial example where the fusion is wrong would be when | |
83 | B(i,j) == x[j-1] = x[j]; | |
84 | for (i by 2) { | |
85 | for (j) { | |
86 | x[j-1] = x[j]; | |
87 | } | |
88 | for (j) { | |
89 | x[j-1] = x[j]; | |
90 | } | |
91 | } effect: move content to front by two elements | |
92 | --> | |
93 | for (i by 2) { | |
94 | for (j) { | |
95 | x[j-1] = x[j]; | |
96 | x[j-1] = x[j]; | |
97 | } | |
98 | } effect: move content to front by one element | |
99 | */ | |
100 | ||
101 | /* Modify the loop tree for the fact that all code once belonging | |
102 | to the OLD loop or the outer loop of OLD now is inside LOOP. */ | |
103 | ||
104 | static void | |
99b1c316 | 105 | merge_loop_tree (class loop *loop, class loop *old) |
1cc521f1 MM |
106 | { |
107 | basic_block *bbs; | |
108 | int i, n; | |
99b1c316 | 109 | class loop *subloop; |
1cc521f1 MM |
110 | edge e; |
111 | edge_iterator ei; | |
112 | ||
113 | /* Find its nodes. */ | |
114 | bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); | |
115 | n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun)); | |
116 | ||
117 | for (i = 0; i < n; i++) | |
118 | { | |
119 | /* If the block was direct child of OLD loop it's now part | |
7ba6738b | 120 | of LOOP. If it was outside OLD, then it moved into LOOP |
1cc521f1 MM |
121 | as well. This avoids changing the loop father for BBs |
122 | in inner loops of OLD. */ | |
123 | if (bbs[i]->loop_father == old | |
124 | || loop_depth (bbs[i]->loop_father) < loop_depth (old)) | |
125 | { | |
126 | remove_bb_from_loops (bbs[i]); | |
127 | add_bb_to_loop (bbs[i], loop); | |
128 | continue; | |
129 | } | |
130 | ||
131 | /* If we find a direct subloop of OLD, move it to LOOP. */ | |
132 | subloop = bbs[i]->loop_father; | |
133 | if (loop_outer (subloop) == old && subloop->header == bbs[i]) | |
134 | { | |
135 | flow_loop_tree_node_remove (subloop); | |
136 | flow_loop_tree_node_add (loop, subloop); | |
137 | } | |
138 | } | |
139 | ||
140 | /* Update the information about loop exit edges. */ | |
141 | for (i = 0; i < n; i++) | |
142 | { | |
143 | FOR_EACH_EDGE (e, ei, bbs[i]->succs) | |
144 | { | |
145 | rescan_loop_exit (e, false, false); | |
146 | } | |
147 | } | |
148 | ||
149 | loop->num_nodes = n; | |
150 | ||
151 | free (bbs); | |
152 | } | |
153 | ||
dc236397 | 154 | /* BB is part of the outer loop of an unroll-and-jam situation. |
1cc521f1 MM |
155 | Check if any statements therein would prevent the transformation. */ |
156 | ||
157 | static bool | |
158 | bb_prevents_fusion_p (basic_block bb) | |
159 | { | |
160 | gimple_stmt_iterator gsi; | |
161 | /* BB is duplicated by outer unrolling and then all N-1 first copies | |
dc236397 | 162 | move into the body of the fused inner loop. If BB exits the outer loop |
395552b5 | 163 | the last copy still does so, and the first N-1 copies are cancelled |
dc236397 MM |
164 | by loop unrolling, so also after fusion it's the exit block. |
165 | But there might be other reasons that prevent fusion: | |
1cc521f1 MM |
166 | * stores or unknown side-effects prevent fusion |
167 | * loads don't | |
168 | * computations into SSA names: these aren't problematic. Their | |
7ba6738b | 169 | result will be unused on the exit edges of the first N-1 copies |
1cc521f1 MM |
170 | (those aren't taken after unrolling). If they are used on the |
171 | other edge (the one leading to the outer latch block) they are | |
172 | loop-carried (on the outer loop) and the Nth copy of BB will | |
173 | compute them again (i.e. the first N-1 copies will be dead). */ | |
174 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
175 | { | |
176 | gimple *g = gsi_stmt (gsi); | |
177 | if (gimple_vdef (g) || gimple_has_side_effects (g)) | |
178 | return true; | |
179 | } | |
180 | return false; | |
181 | } | |
182 | ||
183 | /* Given an inner loop LOOP (of some OUTER loop) determine if | |
184 | we can safely fuse copies of it (generated by outer unrolling). | |
185 | If so return true, otherwise return false. */ | |
186 | ||
187 | static bool | |
99b1c316 | 188 | unroll_jam_possible_p (class loop *outer, class loop *loop) |
1cc521f1 MM |
189 | { |
190 | basic_block *bbs; | |
191 | int i, n; | |
99b1c316 | 192 | class tree_niter_desc niter; |
1cc521f1 MM |
193 | |
194 | /* When fusing the loops we skip the latch block | |
195 | of the first one, so it mustn't have any effects to | |
196 | preserve. */ | |
197 | if (!empty_block_p (loop->latch)) | |
198 | return false; | |
199 | ||
200 | if (!single_exit (loop)) | |
201 | return false; | |
202 | ||
203 | /* We need a perfect nest. Quick check for adjacent inner loops. */ | |
204 | if (outer->inner != loop || loop->next) | |
205 | return false; | |
206 | ||
dc236397 MM |
207 | /* Prevent head-controlled inner loops, that we usually have. |
208 | The guard block would need to be accepted | |
209 | (invariant condition either entering or skipping the loop), | |
210 | without also accepting arbitrary control flow. When unswitching | |
211 | ran before us (as with -O3) this won't be a problem because its | |
212 | outer loop unswitching will have moved out the invariant condition. | |
213 | ||
214 | If we do that we need to extend fuse_loops() to cope with this | |
215 | by threading through the (still invariant) copied condition | |
216 | between the two loop copies. */ | |
217 | if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header)) | |
218 | return false; | |
219 | ||
1cc521f1 MM |
220 | /* The number of iterations of the inner loop must be loop invariant |
221 | with respect to the outer loop. */ | |
222 | if (!number_of_iterations_exit (loop, single_exit (loop), &niter, | |
223 | false, true) | |
224 | || niter.cmp == ERROR_MARK | |
225 | || !integer_zerop (niter.may_be_zero) | |
226 | || !expr_invariant_in_loop_p (outer, niter.niter)) | |
227 | return false; | |
228 | ||
395552b5 MM |
229 | /* If the inner loop produces any values that are used inside the |
230 | outer loop (except the virtual op) then it can flow | |
231 | back (perhaps indirectly) into the inner loop. This prevents | |
232 | fusion: without fusion the value at the last iteration is used, | |
233 | with fusion the value after the initial iteration is used. | |
234 | ||
235 | If all uses are outside the outer loop this doesn't prevent fusion; | |
236 | the value of the last iteration is still used (and the values from | |
237 | all intermediate iterations are dead). */ | |
238 | gphi_iterator psi; | |
239 | for (psi = gsi_start_phis (single_exit (loop)->dest); | |
240 | !gsi_end_p (psi); gsi_next (&psi)) | |
241 | { | |
242 | imm_use_iterator imm_iter; | |
243 | use_operand_p use_p; | |
244 | tree op = gimple_phi_result (psi.phi ()); | |
245 | if (virtual_operand_p (op)) | |
246 | continue; | |
247 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op) | |
248 | { | |
249 | gimple *use_stmt = USE_STMT (use_p); | |
250 | if (!is_gimple_debug (use_stmt) | |
251 | && flow_bb_inside_loop_p (outer, gimple_bb (use_stmt))) | |
252 | return false; | |
253 | } | |
254 | } | |
255 | ||
1cc521f1 MM |
256 | /* And check blocks belonging to just outer loop. */ |
257 | bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); | |
258 | n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun)); | |
259 | ||
260 | for (i = 0; i < n; i++) | |
dc236397 MM |
261 | if (bbs[i]->loop_father == outer && bb_prevents_fusion_p (bbs[i])) |
262 | break; | |
1cc521f1 MM |
263 | free (bbs); |
264 | if (i != n) | |
265 | return false; | |
266 | ||
267 | /* For now we can safely fuse copies of LOOP only if all | |
268 | loop carried variables are inductions (or the virtual op). | |
269 | ||
270 | We could handle reductions as well (the initial value in the second | |
271 | body would be the after-iter value of the first body) if it's over | |
272 | an associative and commutative operation. We wouldn't | |
273 | be able to handle unknown cycles. */ | |
1cc521f1 MM |
274 | for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) |
275 | { | |
276 | affine_iv iv; | |
277 | tree op = gimple_phi_result (psi.phi ()); | |
278 | ||
279 | if (virtual_operand_p (op)) | |
280 | continue; | |
281 | if (!simple_iv (loop, loop, op, &iv, true)) | |
282 | return false; | |
283 | /* The inductions must be regular, loop invariant step and initial | |
7ba6738b | 284 | value. */ |
1cc521f1 MM |
285 | if (!expr_invariant_in_loop_p (outer, iv.step) |
286 | || !expr_invariant_in_loop_p (outer, iv.base)) | |
287 | return false; | |
288 | /* XXX With more effort we could also be able to deal with inductions | |
7ba6738b | 289 | where the initial value is loop variant but a simple IV in the |
1cc521f1 MM |
290 | outer loop. The initial value for the second body would be |
291 | the original initial value plus iv.base.step. The next value | |
292 | for the fused loop would be the original next value of the first | |
293 | copy, _not_ the next value of the second body. */ | |
294 | } | |
295 | ||
296 | return true; | |
297 | } | |
298 | ||
299 | /* Fuse LOOP with all further neighbors. The loops are expected to | |
300 | be in appropriate form. */ | |
301 | ||
302 | static void | |
99b1c316 | 303 | fuse_loops (class loop *loop) |
1cc521f1 | 304 | { |
99b1c316 | 305 | class loop *next = loop->next; |
1cc521f1 MM |
306 | |
307 | while (next) | |
308 | { | |
309 | edge e; | |
310 | ||
311 | remove_branch (single_pred_edge (loop->latch)); | |
312 | /* Make delete_basic_block not fiddle with the loop structure. */ | |
313 | basic_block oldlatch = loop->latch; | |
314 | loop->latch = NULL; | |
315 | delete_basic_block (oldlatch); | |
316 | e = redirect_edge_and_branch (loop_latch_edge (next), | |
317 | loop->header); | |
318 | loop->latch = e->src; | |
319 | flush_pending_stmts (e); | |
320 | ||
321 | gcc_assert (EDGE_COUNT (next->header->preds) == 1); | |
322 | ||
323 | /* The PHI nodes of the second body (single-argument now) | |
7ba6738b | 324 | need adjustments to use the right values: either directly |
1cc521f1 MM |
325 | the value of the corresponding PHI in the first copy or |
326 | the one leaving the first body which unrolling did for us. | |
327 | ||
328 | See also unroll_jam_possible_p() for further possibilities. */ | |
329 | gphi_iterator psi_first, psi_second; | |
330 | e = single_pred_edge (next->header); | |
331 | for (psi_first = gsi_start_phis (loop->header), | |
332 | psi_second = gsi_start_phis (next->header); | |
333 | !gsi_end_p (psi_first); | |
334 | gsi_next (&psi_first), gsi_next (&psi_second)) | |
335 | { | |
336 | gphi *phi_first = psi_first.phi (); | |
337 | gphi *phi_second = psi_second.phi (); | |
338 | tree firstop = gimple_phi_result (phi_first); | |
339 | /* The virtual operand is correct already as it's | |
340 | always live at exit, hence has a LCSSA node and outer | |
341 | loop unrolling updated SSA form. */ | |
342 | if (virtual_operand_p (firstop)) | |
343 | continue; | |
344 | ||
345 | /* Due to unroll_jam_possible_p() we know that this is | |
346 | an induction. The second body goes over the same | |
347 | iteration space. */ | |
348 | add_phi_arg (phi_second, firstop, e, | |
349 | gimple_location (phi_first)); | |
350 | } | |
351 | gcc_assert (gsi_end_p (psi_second)); | |
352 | ||
353 | merge_loop_tree (loop, next); | |
354 | gcc_assert (!next->num_nodes); | |
99b1c316 | 355 | class loop *ln = next->next; |
1cc521f1 MM |
356 | delete_loop (next); |
357 | next = ln; | |
358 | } | |
359 | rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop); | |
360 | } | |
361 | ||
92781ff1 MM |
362 | /* Return true if any of the access functions for dataref A |
363 | isn't invariant with respect to loop LOOP_NEST. */ | |
364 | static bool | |
365 | any_access_function_variant_p (const struct data_reference *a, | |
366 | const class loop *loop_nest) | |
367 | { | |
368 | unsigned int i; | |
369 | vec<tree> fns = DR_ACCESS_FNS (a); | |
370 | tree t; | |
371 | ||
372 | FOR_EACH_VEC_ELT (fns, i, t) | |
373 | if (!evolution_function_is_invariant_p (t, loop_nest->num)) | |
374 | return true; | |
375 | ||
376 | return false; | |
377 | } | |
378 | ||
1cc521f1 MM |
379 | /* Returns true if the distance in DDR can be determined and adjusts |
380 | the unroll factor in *UNROLL to make unrolling valid for that distance. | |
92781ff1 | 381 | Otherwise return false. DDR is with respect to the outer loop of INNER. |
1cc521f1 MM |
382 | |
383 | If this data dep can lead to a removed memory reference, increment | |
384 | *REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor | |
385 | for this to happen. */ | |
386 | ||
387 | static bool | |
92781ff1 | 388 | adjust_unroll_factor (class loop *inner, struct data_dependence_relation *ddr, |
1cc521f1 MM |
389 | unsigned *unroll, unsigned *profit_unroll, |
390 | unsigned *removed) | |
391 | { | |
392 | bool ret = false; | |
393 | if (DDR_ARE_DEPENDENT (ddr) != chrec_known) | |
394 | { | |
395 | if (DDR_NUM_DIST_VECTS (ddr) == 0) | |
396 | return false; | |
397 | unsigned i; | |
398 | lambda_vector dist_v; | |
399 | FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) | |
400 | { | |
401 | /* A distance (a,b) is at worst transformed into (a/N,b) by the | |
402 | unrolling (factor N), so the transformation is valid if | |
403 | a >= N, or b > 0, or b is zero and a > 0. Otherwise the unroll | |
404 | factor needs to be limited so that the first condition holds. | |
405 | That may limit the factor down to zero in the worst case. */ | |
406 | int dist = dist_v[0]; | |
407 | if (dist < 0) | |
408 | gcc_unreachable (); | |
409 | else if ((unsigned)dist >= *unroll) | |
410 | ; | |
92781ff1 MM |
411 | else if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)) |
412 | { | |
413 | /* We have (a,0) with a < N, so this will be transformed into | |
414 | (0,0) after unrolling by N. This might potentially be a | |
415 | problem, if it's not a read-read dependency. */ | |
416 | if (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))) | |
417 | ; | |
418 | else | |
419 | { | |
420 | /* So, at least one is a write, and we might reduce the | |
421 | distance vector to (0,0). This is still no problem | |
422 | if both data-refs are affine with respect to the inner | |
423 | loops. But if one of them is invariant with respect | |
424 | to an inner loop our reordering implicit in loop fusion | |
425 | corrupts the program, as our data dependences don't | |
426 | capture this. E.g. for: | |
427 | for (0 <= i < n) | |
428 | for (0 <= j < m) | |
429 | a[i][0] = a[i+1][0] + 2; // (1) | |
430 | b[i][j] = b[i+1][j] + 2; // (2) | |
431 | the distance vector for both statements is (-1,0), | |
432 | but exchanging the order for (2) is okay, while | |
433 | for (1) it is not. To see this, write out the original | |
434 | accesses (assume m is 2): | |
435 | a i j original | |
436 | 0 0 0 r a[1][0] b[1][0] | |
437 | 1 0 0 w a[0][0] b[0][0] | |
438 | 2 0 1 r a[1][0] b[1][1] | |
439 | 3 0 1 w a[0][0] b[0][1] | |
440 | 4 1 0 r a[2][0] b[2][0] | |
441 | 5 1 0 w a[1][0] b[1][0] | |
442 | after unroll-by-2 and fusion the accesses are done in | |
443 | this order (from column a): 0,1, 4,5, 2,3, i.e. this: | |
444 | a i j transformed | |
445 | 0 0 0 r a[1][0] b[1][0] | |
446 | 1 0 0 w a[0][0] b[0][0] | |
447 | 4 1 0 r a[2][0] b[2][0] | |
448 | 5 1 0 w a[1][0] b[1][0] | |
449 | 2 0 1 r a[1][0] b[1][1] | |
450 | 3 0 1 w a[0][0] b[0][1] | |
451 | Note how access 2 accesses the same element as access 5 | |
452 | for array 'a' but not for array 'b'. */ | |
453 | if (any_access_function_variant_p (DDR_A (ddr), inner) | |
454 | && any_access_function_variant_p (DDR_B (ddr), inner)) | |
455 | ; | |
456 | else | |
457 | /* And if any dataref of this pair is invariant with | |
458 | respect to the inner loop, we have no chance than | |
459 | to reduce the unroll factor. */ | |
460 | *unroll = dist; | |
461 | } | |
462 | } | |
463 | else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)) | |
1cc521f1 MM |
464 | ; |
465 | else | |
466 | *unroll = dist; | |
467 | ||
468 | /* With a distance (a,0) it's always profitable to unroll-and-jam | |
469 | (by a+1), because one memory reference will go away. With | |
470 | (a,b) and b != 0 that's less clear. We will increase the | |
471 | number of streams without lowering the number of mem refs. | |
472 | So for now only handle the first situation. */ | |
473 | if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)) | |
474 | { | |
475 | *profit_unroll = MAX (*profit_unroll, (unsigned)dist + 1); | |
476 | (*removed)++; | |
477 | } | |
478 | ||
479 | ret = true; | |
480 | } | |
481 | } | |
482 | return ret; | |
483 | } | |
484 | ||
485 | /* Main entry point for the unroll-and-jam transformation | |
486 | described above. */ | |
487 | ||
488 | static unsigned int | |
489 | tree_loop_unroll_and_jam (void) | |
490 | { | |
99b1c316 | 491 | class loop *loop; |
1cc521f1 MM |
492 | bool changed = false; |
493 | ||
494 | gcc_assert (scev_initialized_p ()); | |
495 | ||
496 | /* Go through all innermost loops. */ | |
497 | FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) | |
498 | { | |
99b1c316 | 499 | class loop *outer = loop_outer (loop); |
1cc521f1 MM |
500 | |
501 | if (loop_depth (loop) < 2 | |
502 | || optimize_loop_nest_for_size_p (outer)) | |
503 | continue; | |
504 | ||
505 | if (!unroll_jam_possible_p (outer, loop)) | |
506 | continue; | |
507 | ||
508 | vec<data_reference_p> datarefs; | |
509 | vec<ddr_p> dependences; | |
510 | unsigned unroll_factor, profit_unroll, removed; | |
99b1c316 | 511 | class tree_niter_desc desc; |
1cc521f1 MM |
512 | bool unroll = false; |
513 | ||
514 | auto_vec<loop_p, 3> loop_nest; | |
515 | dependences.create (10); | |
516 | datarefs.create (10); | |
517 | if (!compute_data_dependences_for_loop (outer, true, &loop_nest, | |
7ba6738b | 518 | &datarefs, &dependences)) |
1cc521f1 MM |
519 | { |
520 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
521 | fprintf (dump_file, "Cannot analyze data dependencies\n"); | |
522 | free_data_refs (datarefs); | |
523 | free_dependence_relations (dependences); | |
7ba6738b | 524 | continue; |
1cc521f1 MM |
525 | } |
526 | if (!datarefs.length ()) | |
527 | continue; | |
528 | ||
529 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
530 | dump_data_dependence_relations (dump_file, dependences); | |
531 | ||
532 | unroll_factor = (unsigned)-1; | |
533 | profit_unroll = 1; | |
534 | removed = 0; | |
535 | ||
536 | /* Check all dependencies. */ | |
537 | unsigned i; | |
538 | struct data_dependence_relation *ddr; | |
539 | FOR_EACH_VEC_ELT (dependences, i, ddr) | |
540 | { | |
541 | struct data_reference *dra, *drb; | |
542 | ||
543 | /* If the refs are independend there's nothing to do. */ | |
544 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) | |
545 | continue; | |
546 | dra = DDR_A (ddr); | |
547 | drb = DDR_B (ddr); | |
548 | /* Nothing interesting for the self dependencies. */ | |
549 | if (dra == drb) | |
550 | continue; | |
551 | ||
552 | /* Now check the distance vector, for determining a sensible | |
553 | outer unroll factor, and for validity of merging the inner | |
554 | loop copies. */ | |
92781ff1 | 555 | if (!adjust_unroll_factor (loop, ddr, &unroll_factor, &profit_unroll, |
1cc521f1 MM |
556 | &removed)) |
557 | { | |
558 | /* Couldn't get the distance vector. For two reads that's | |
7ba6738b | 559 | harmless (we assume we should unroll). For at least |
1cc521f1 MM |
560 | one write this means we can't check the dependence direction |
561 | and hence can't determine safety. */ | |
562 | ||
563 | if (DR_IS_WRITE (dra) || DR_IS_WRITE (drb)) | |
564 | { | |
565 | unroll_factor = 0; | |
566 | break; | |
567 | } | |
568 | } | |
569 | } | |
570 | ||
571 | /* We regard a user-specified minimum percentage of zero as a request | |
7ba6738b | 572 | to ignore all profitability concerns and apply the transformation |
1cc521f1 | 573 | always. */ |
028d4092 | 574 | if (!param_unroll_jam_min_percent) |
92781ff1 | 575 | profit_unroll = MAX(2, profit_unroll); |
1cc521f1 | 576 | else if (removed * 100 / datarefs.length () |
028d4092 | 577 | < (unsigned)param_unroll_jam_min_percent) |
1cc521f1 MM |
578 | profit_unroll = 1; |
579 | if (unroll_factor > profit_unroll) | |
580 | unroll_factor = profit_unroll; | |
028d4092 ML |
581 | if (unroll_factor > (unsigned)param_unroll_jam_max_unroll) |
582 | unroll_factor = param_unroll_jam_max_unroll; | |
1cc521f1 MM |
583 | unroll = (unroll_factor > 1 |
584 | && can_unroll_loop_p (outer, unroll_factor, &desc)); | |
585 | ||
586 | if (unroll) | |
587 | { | |
588 | if (dump_enabled_p ()) | |
589 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, | |
590 | find_loop_location (outer), | |
591 | "applying unroll and jam with factor %d\n", | |
592 | unroll_factor); | |
593 | initialize_original_copy_tables (); | |
594 | tree_unroll_loop (outer, unroll_factor, single_dom_exit (outer), | |
595 | &desc); | |
596 | free_original_copy_tables (); | |
597 | fuse_loops (outer->inner); | |
598 | changed = true; | |
599 | } | |
600 | ||
601 | loop_nest.release (); | |
602 | free_dependence_relations (dependences); | |
603 | free_data_refs (datarefs); | |
604 | } | |
605 | ||
606 | if (changed) | |
607 | { | |
608 | scev_reset (); | |
609 | free_dominance_info (CDI_DOMINATORS); | |
610 | return TODO_cleanup_cfg; | |
611 | } | |
612 | return 0; | |
613 | } | |
614 | ||
615 | /* Pass boilerplate */ | |
616 | ||
617 | namespace { | |
618 | ||
619 | const pass_data pass_data_loop_jam = | |
620 | { | |
621 | GIMPLE_PASS, /* type */ | |
622 | "unrolljam", /* name */ | |
623 | OPTGROUP_LOOP, /* optinfo_flags */ | |
624 | TV_LOOP_JAM, /* tv_id */ | |
625 | PROP_cfg, /* properties_required */ | |
626 | 0, /* properties_provided */ | |
627 | 0, /* properties_destroyed */ | |
628 | 0, /* todo_flags_start */ | |
629 | 0, /* todo_flags_finish */ | |
630 | }; | |
631 | ||
632 | class pass_loop_jam : public gimple_opt_pass | |
633 | { | |
634 | public: | |
635 | pass_loop_jam (gcc::context *ctxt) | |
636 | : gimple_opt_pass (pass_data_loop_jam, ctxt) | |
637 | {} | |
638 | ||
639 | /* opt_pass methods: */ | |
640 | virtual bool gate (function *) { return flag_unroll_jam != 0; } | |
641 | virtual unsigned int execute (function *); | |
642 | ||
643 | }; | |
644 | ||
645 | unsigned int | |
646 | pass_loop_jam::execute (function *fun) | |
647 | { | |
648 | if (number_of_loops (fun) <= 1) | |
649 | return 0; | |
650 | ||
651 | return tree_loop_unroll_and_jam (); | |
652 | } | |
653 | ||
654 | } | |
655 | ||
656 | gimple_opt_pass * | |
657 | make_pass_loop_jam (gcc::context *ctxt) | |
658 | { | |
659 | return new pass_loop_jam (ctxt); | |
660 | } |