]>
Commit | Line | Data |
---|---|---|
dea61d92 | 1 | /* Loop distribution. |
d1e082c2 | 2 | Copyright (C) 2006-2013 Free Software Foundation, Inc. |
dea61d92 SP |
3 | Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> |
4 | and Sebastian Pop <sebastian.pop@amd.com>. | |
5 | ||
6 | This file is part of GCC. | |
b8698a0f | 7 | |
dea61d92 SP |
8 | GCC is free software; you can redistribute it and/or modify it |
9 | under the terms of the GNU General Public License as published by the | |
10 | Free Software Foundation; either version 3, or (at your option) any | |
11 | later version. | |
b8698a0f | 12 | |
dea61d92 SP |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT |
14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
b8698a0f | 17 | |
dea61d92 SP |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | /* This pass performs loop distribution: for example, the loop | |
23 | ||
24 | |DO I = 2, N | |
25 | | A(I) = B(I) + C | |
26 | | D(I) = A(I-1)*E | |
27 | |ENDDO | |
28 | ||
b8698a0f | 29 | is transformed to |
dea61d92 SP |
30 | |
31 | |DOALL I = 2, N | |
32 | | A(I) = B(I) + C | |
33 | |ENDDO | |
34 | | | |
35 | |DOALL I = 2, N | |
36 | | D(I) = A(I-1)*E | |
37 | |ENDDO | |
38 | ||
39 | This pass uses an RDG, Reduced Dependence Graph built on top of the | |
40 | data dependence relations. The RDG is then topologically sorted to | |
41 | obtain a map of information producers/consumers based on which it | |
42 | generates the new loops. */ | |
43 | ||
44 | #include "config.h" | |
45 | #include "system.h" | |
46 | #include "coretypes.h" | |
dea61d92 | 47 | #include "tree-flow.h" |
dea61d92 | 48 | #include "cfgloop.h" |
dea61d92 SP |
49 | #include "tree-chrec.h" |
50 | #include "tree-data-ref.h" | |
51 | #include "tree-scalar-evolution.h" | |
52 | #include "tree-pass.h" | |
dea61d92 | 53 | |
b9fc0497 RB |
54 | enum partition_kind { |
55 | PKIND_NORMAL, PKIND_REDUCTION, PKIND_MEMSET, PKIND_MEMCPY | |
56 | }; | |
30d55936 | 57 | |
c61f8985 RG |
58 | typedef struct partition_s |
59 | { | |
60 | bitmap stmts; | |
7ad672e4 | 61 | bool has_writes; |
30d55936 | 62 | enum partition_kind kind; |
d0582dc1 RG |
63 | /* data-references a kind != PKIND_NORMAL partition is about. */ |
64 | data_reference_p main_dr; | |
65 | data_reference_p secondary_dr; | |
c61f8985 RG |
66 | } *partition_t; |
67 | ||
c61f8985 RG |
68 | |
69 | /* Allocate and initialize a partition from BITMAP. */ | |
70 | ||
71 | static partition_t | |
72 | partition_alloc (bitmap stmts) | |
73 | { | |
74 | partition_t partition = XCNEW (struct partition_s); | |
75 | partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL); | |
7ad672e4 | 76 | partition->has_writes = false; |
30d55936 | 77 | partition->kind = PKIND_NORMAL; |
c61f8985 RG |
78 | return partition; |
79 | } | |
80 | ||
81 | /* Free PARTITION. */ | |
82 | ||
83 | static void | |
84 | partition_free (partition_t partition) | |
85 | { | |
86 | BITMAP_FREE (partition->stmts); | |
87 | free (partition); | |
88 | } | |
89 | ||
30d55936 RG |
90 | /* Returns true if the partition can be generated as a builtin. */ |
91 | ||
92 | static bool | |
93 | partition_builtin_p (partition_t partition) | |
94 | { | |
b9fc0497 | 95 | return partition->kind > PKIND_REDUCTION; |
30d55936 | 96 | } |
c61f8985 | 97 | |
7ad672e4 RG |
98 | /* Returns true if the partition has an writes. */ |
99 | ||
100 | static bool | |
101 | partition_has_writes (partition_t partition) | |
102 | { | |
103 | return partition->has_writes; | |
104 | } | |
105 | ||
dea61d92 SP |
106 | /* If bit I is not set, it means that this node represents an |
107 | operation that has already been performed, and that should not be | |
108 | performed again. This is the subgraph of remaining important | |
109 | computations that is passed to the DFS algorithm for avoiding to | |
110 | include several times the same stores in different loops. */ | |
111 | static bitmap remaining_stmts; | |
112 | ||
113 | /* A node of the RDG is marked in this bitmap when it has as a | |
114 | predecessor a node that writes to memory. */ | |
115 | static bitmap upstream_mem_writes; | |
116 | ||
c07a8cb3 RG |
117 | /* Returns true when DEF is an SSA_NAME defined in LOOP and used after |
118 | the LOOP. */ | |
119 | ||
120 | static bool | |
121 | ssa_name_has_uses_outside_loop_p (tree def, loop_p loop) | |
122 | { | |
123 | imm_use_iterator imm_iter; | |
124 | use_operand_p use_p; | |
125 | ||
126 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) | |
e665269a RG |
127 | { |
128 | gimple use_stmt = USE_STMT (use_p); | |
129 | if (!is_gimple_debug (use_stmt) | |
130 | && loop != loop_containing_stmt (use_stmt)) | |
131 | return true; | |
132 | } | |
c07a8cb3 RG |
133 | |
134 | return false; | |
135 | } | |
136 | ||
137 | /* Returns true when STMT defines a scalar variable used after the | |
88af7c1a | 138 | loop LOOP. */ |
c07a8cb3 RG |
139 | |
140 | static bool | |
88af7c1a | 141 | stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt) |
c07a8cb3 | 142 | { |
88af7c1a RG |
143 | def_operand_p def_p; |
144 | ssa_op_iter op_iter; | |
c07a8cb3 | 145 | |
9ca86fc3 RG |
146 | if (gimple_code (stmt) == GIMPLE_PHI) |
147 | return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop); | |
148 | ||
88af7c1a RG |
149 | FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) |
150 | if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop)) | |
151 | return true; | |
c07a8cb3 | 152 | |
88af7c1a | 153 | return false; |
c07a8cb3 RG |
154 | } |
155 | ||
dea61d92 SP |
156 | /* Return a copy of LOOP placed before LOOP. */ |
157 | ||
158 | static struct loop * | |
159 | copy_loop_before (struct loop *loop) | |
160 | { | |
161 | struct loop *res; | |
162 | edge preheader = loop_preheader_edge (loop); | |
163 | ||
dea61d92 SP |
164 | initialize_original_copy_tables (); |
165 | res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader); | |
30d55936 | 166 | gcc_assert (res != NULL); |
dea61d92 | 167 | free_original_copy_tables (); |
2cfc56b9 | 168 | delete_update_ssa (); |
dea61d92 SP |
169 | |
170 | return res; | |
171 | } | |
172 | ||
173 | /* Creates an empty basic block after LOOP. */ | |
174 | ||
175 | static void | |
176 | create_bb_after_loop (struct loop *loop) | |
177 | { | |
178 | edge exit = single_exit (loop); | |
179 | ||
180 | if (!exit) | |
181 | return; | |
182 | ||
183 | split_edge (exit); | |
184 | } | |
185 | ||
186 | /* Generate code for PARTITION from the code in LOOP. The loop is | |
187 | copied when COPY_P is true. All the statements not flagged in the | |
188 | PARTITION bitmap are removed from the loop or from its copy. The | |
189 | statements are indexed in sequence inside a basic block, and the | |
30d55936 | 190 | basic blocks of a loop are taken in dom order. */ |
dea61d92 | 191 | |
30d55936 | 192 | static void |
c61f8985 RG |
193 | generate_loops_for_partition (struct loop *loop, partition_t partition, |
194 | bool copy_p) | |
dea61d92 SP |
195 | { |
196 | unsigned i, x; | |
726a989a | 197 | gimple_stmt_iterator bsi; |
dea61d92 SP |
198 | basic_block *bbs; |
199 | ||
200 | if (copy_p) | |
201 | { | |
202 | loop = copy_loop_before (loop); | |
30d55936 | 203 | gcc_assert (loop != NULL); |
dea61d92 SP |
204 | create_preheader (loop, CP_SIMPLE_PREHEADERS); |
205 | create_bb_after_loop (loop); | |
206 | } | |
207 | ||
dea61d92 SP |
208 | /* Remove stmts not in the PARTITION bitmap. The order in which we |
209 | visit the phi nodes and the statements is exactly as in | |
210 | stmts_from_loop. */ | |
211 | bbs = get_loop_body_in_dom_order (loop); | |
212 | ||
b03c3082 JJ |
213 | if (MAY_HAVE_DEBUG_STMTS) |
214 | for (x = 0, i = 0; i < loop->num_nodes; i++) | |
215 | { | |
216 | basic_block bb = bbs[i]; | |
217 | ||
218 | for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
c61f8985 | 219 | if (!bitmap_bit_p (partition->stmts, x++)) |
b03c3082 JJ |
220 | reset_debug_uses (gsi_stmt (bsi)); |
221 | ||
222 | for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
223 | { | |
224 | gimple stmt = gsi_stmt (bsi); | |
225 | if (gimple_code (stmt) != GIMPLE_LABEL | |
226 | && !is_gimple_debug (stmt) | |
c61f8985 | 227 | && !bitmap_bit_p (partition->stmts, x++)) |
b03c3082 JJ |
228 | reset_debug_uses (stmt); |
229 | } | |
230 | } | |
231 | ||
dea61d92 SP |
232 | for (x = 0, i = 0; i < loop->num_nodes; i++) |
233 | { | |
234 | basic_block bb = bbs[i]; | |
dea61d92 | 235 | |
726a989a | 236 | for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) |
c61f8985 | 237 | if (!bitmap_bit_p (partition->stmts, x++)) |
2706a615 RG |
238 | { |
239 | gimple phi = gsi_stmt (bsi); | |
ea057359 | 240 | if (virtual_operand_p (gimple_phi_result (phi))) |
2706a615 RG |
241 | mark_virtual_phi_result_for_renaming (phi); |
242 | remove_phi_node (&bsi, true); | |
243 | } | |
dea61d92 | 244 | else |
726a989a | 245 | gsi_next (&bsi); |
dea61d92 | 246 | |
726a989a | 247 | for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) |
2706a615 RG |
248 | { |
249 | gimple stmt = gsi_stmt (bsi); | |
b03c3082 JJ |
250 | if (gimple_code (stmt) != GIMPLE_LABEL |
251 | && !is_gimple_debug (stmt) | |
c61f8985 | 252 | && !bitmap_bit_p (partition->stmts, x++)) |
2706a615 RG |
253 | { |
254 | unlink_stmt_vdef (stmt); | |
255 | gsi_remove (&bsi, true); | |
256 | release_defs (stmt); | |
257 | } | |
258 | else | |
259 | gsi_next (&bsi); | |
260 | } | |
dea61d92 SP |
261 | } |
262 | ||
263 | free (bbs); | |
dea61d92 SP |
264 | } |
265 | ||
d0582dc1 | 266 | /* Build the size argument for a memory operation call. */ |
3661e899 | 267 | |
d0582dc1 RG |
268 | static tree |
269 | build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter) | |
3661e899 | 270 | { |
d0582dc1 RG |
271 | tree size; |
272 | size = fold_build2_loc (loc, MULT_EXPR, sizetype, | |
273 | fold_convert_loc (loc, sizetype, nb_iter), | |
274 | TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); | |
275 | return fold_convert_loc (loc, size_type_node, size); | |
276 | } | |
277 | ||
278 | /* Build an address argument for a memory operation call. */ | |
279 | ||
280 | static tree | |
281 | build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes) | |
282 | { | |
283 | tree addr_base; | |
284 | ||
285 | addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); | |
286 | addr_base = fold_convert_loc (loc, sizetype, addr_base); | |
287 | ||
288 | /* Test for a negative stride, iterating over every element. */ | |
289 | if (tree_int_cst_sgn (DR_STEP (dr)) == -1) | |
290 | { | |
291 | addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, | |
292 | fold_convert_loc (loc, sizetype, nb_bytes)); | |
293 | addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, | |
294 | TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); | |
295 | } | |
296 | ||
297 | return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base); | |
3661e899 TB |
298 | } |
299 | ||
401f3a81 JJ |
300 | /* If VAL memory representation contains the same value in all bytes, |
301 | return that value, otherwise return -1. | |
302 | E.g. for 0x24242424 return 0x24, for IEEE double | |
303 | 747708026454360457216.0 return 0x44, etc. */ | |
304 | ||
305 | static int | |
306 | const_with_all_bytes_same (tree val) | |
307 | { | |
308 | unsigned char buf[64]; | |
309 | int i, len; | |
310 | ||
311 | if (integer_zerop (val) | |
312 | || real_zerop (val) | |
313 | || (TREE_CODE (val) == CONSTRUCTOR | |
314 | && !TREE_CLOBBER_P (val) | |
315 | && CONSTRUCTOR_NELTS (val) == 0)) | |
316 | return 0; | |
317 | ||
318 | if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) | |
319 | return -1; | |
320 | ||
321 | len = native_encode_expr (val, buf, sizeof (buf)); | |
322 | if (len == 0) | |
323 | return -1; | |
324 | for (i = 1; i < len; i++) | |
325 | if (buf[i] != buf[0]) | |
326 | return -1; | |
327 | return buf[0]; | |
328 | } | |
329 | ||
30d55936 | 330 | /* Generate a call to memset for PARTITION in LOOP. */ |
dea61d92 | 331 | |
cfee318d | 332 | static void |
d0582dc1 | 333 | generate_memset_builtin (struct loop *loop, partition_t partition) |
dea61d92 | 334 | { |
30d55936 RG |
335 | gimple_stmt_iterator gsi; |
336 | gimple stmt, fn_call; | |
d0582dc1 | 337 | tree nb_iter, mem, fn, nb_bytes; |
30d55936 | 338 | location_t loc; |
b6dd5261 | 339 | tree val; |
30d55936 | 340 | |
d0582dc1 | 341 | stmt = DR_STMT (partition->main_dr); |
30d55936 | 342 | loc = gimple_location (stmt); |
30d55936 RG |
343 | if (gimple_bb (stmt) == loop->latch) |
344 | nb_iter = number_of_latch_executions (loop); | |
345 | else | |
346 | nb_iter = number_of_exit_cond_executions (loop); | |
347 | ||
348 | /* The new statements will be placed before LOOP. */ | |
349 | gsi = gsi_last_bb (loop_preheader_edge (loop)->src); | |
dea61d92 | 350 | |
d0582dc1 RG |
351 | nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter); |
352 | nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, | |
353 | false, GSI_CONTINUE_LINKING); | |
354 | mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); | |
355 | mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, | |
356 | false, GSI_CONTINUE_LINKING); | |
dea61d92 | 357 | |
b6dd5261 RG |
358 | /* This exactly matches the pattern recognition in classify_partition. */ |
359 | val = gimple_assign_rhs1 (stmt); | |
401f3a81 JJ |
360 | /* Handle constants like 0x15151515 and similarly |
361 | floating point constants etc. where all bytes are the same. */ | |
362 | int bytev = const_with_all_bytes_same (val); | |
363 | if (bytev != -1) | |
364 | val = build_int_cst (integer_type_node, bytev); | |
365 | else if (TREE_CODE (val) == INTEGER_CST) | |
366 | val = fold_convert (integer_type_node, val); | |
367 | else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) | |
b6dd5261 | 368 | { |
401f3a81 JJ |
369 | gimple cstmt; |
370 | tree tem = make_ssa_name (integer_type_node, NULL); | |
371 | cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE); | |
372 | gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); | |
373 | val = tem; | |
b6dd5261 RG |
374 | } |
375 | ||
e79983f4 | 376 | fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); |
b6dd5261 | 377 | fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); |
d0582dc1 | 378 | gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); |
dea61d92 SP |
379 | |
380 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
b6dd5261 RG |
381 | { |
382 | fprintf (dump_file, "generated memset"); | |
401f3a81 | 383 | if (bytev == 0) |
b6dd5261 | 384 | fprintf (dump_file, " zero\n"); |
b6dd5261 RG |
385 | else |
386 | fprintf (dump_file, "\n"); | |
387 | } | |
dea61d92 SP |
388 | } |
389 | ||
d0582dc1 RG |
390 | /* Generate a call to memcpy for PARTITION in LOOP. */ |
391 | ||
392 | static void | |
393 | generate_memcpy_builtin (struct loop *loop, partition_t partition) | |
394 | { | |
395 | gimple_stmt_iterator gsi; | |
396 | gimple stmt, fn_call; | |
397 | tree nb_iter, dest, src, fn, nb_bytes; | |
398 | location_t loc; | |
399 | enum built_in_function kind; | |
400 | ||
401 | stmt = DR_STMT (partition->main_dr); | |
402 | loc = gimple_location (stmt); | |
403 | if (gimple_bb (stmt) == loop->latch) | |
404 | nb_iter = number_of_latch_executions (loop); | |
405 | else | |
406 | nb_iter = number_of_exit_cond_executions (loop); | |
407 | ||
408 | /* The new statements will be placed before LOOP. */ | |
409 | gsi = gsi_last_bb (loop_preheader_edge (loop)->src); | |
410 | ||
411 | nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter); | |
412 | nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, | |
413 | false, GSI_CONTINUE_LINKING); | |
414 | dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); | |
415 | src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes); | |
416 | if (ptr_derefs_may_alias_p (dest, src)) | |
417 | kind = BUILT_IN_MEMMOVE; | |
418 | else | |
419 | kind = BUILT_IN_MEMCPY; | |
420 | ||
421 | dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, | |
422 | false, GSI_CONTINUE_LINKING); | |
423 | src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, | |
424 | false, GSI_CONTINUE_LINKING); | |
425 | fn = build_fold_addr_expr (builtin_decl_implicit (kind)); | |
426 | fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); | |
427 | gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); | |
428 | ||
429 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
430 | { | |
431 | if (kind == BUILT_IN_MEMCPY) | |
432 | fprintf (dump_file, "generated memcpy\n"); | |
433 | else | |
434 | fprintf (dump_file, "generated memmove\n"); | |
435 | } | |
436 | } | |
437 | ||
30d55936 | 438 | /* Remove and destroy the loop LOOP. */ |
dea61d92 | 439 | |
30d55936 RG |
440 | static void |
441 | destroy_loop (struct loop *loop) | |
dea61d92 | 442 | { |
30d55936 RG |
443 | unsigned nbbs = loop->num_nodes; |
444 | edge exit = single_exit (loop); | |
445 | basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; | |
dea61d92 | 446 | basic_block *bbs; |
30d55936 | 447 | unsigned i; |
dea61d92 SP |
448 | |
449 | bbs = get_loop_body_in_dom_order (loop); | |
450 | ||
30d55936 RG |
451 | redirect_edge_pred (exit, src); |
452 | exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); | |
453 | exit->flags |= EDGE_FALLTHRU; | |
454 | cancel_loop_tree (loop); | |
455 | rescan_loop_exit (exit, false, true); | |
dea61d92 | 456 | |
30d55936 | 457 | for (i = 0; i < nbbs; i++) |
c014f6f5 RG |
458 | { |
459 | /* We have made sure to not leave any dangling uses of SSA | |
460 | names defined in the loop. With the exception of virtuals. | |
461 | Make sure we replace all uses of virtual defs that will remain | |
462 | outside of the loop with the bare symbol as delete_basic_block | |
463 | will release them. */ | |
464 | gimple_stmt_iterator gsi; | |
465 | for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) | |
466 | { | |
467 | gimple phi = gsi_stmt (gsi); | |
ea057359 | 468 | if (virtual_operand_p (gimple_phi_result (phi))) |
c014f6f5 RG |
469 | mark_virtual_phi_result_for_renaming (phi); |
470 | } | |
471 | for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) | |
472 | { | |
473 | gimple stmt = gsi_stmt (gsi); | |
474 | tree vdef = gimple_vdef (stmt); | |
475 | if (vdef && TREE_CODE (vdef) == SSA_NAME) | |
476 | mark_virtual_operand_for_renaming (vdef); | |
477 | } | |
478 | delete_basic_block (bbs[i]); | |
479 | } | |
dea61d92 | 480 | free (bbs); |
30d55936 RG |
481 | |
482 | set_immediate_dominator (CDI_DOMINATORS, dest, | |
483 | recompute_dominator (CDI_DOMINATORS, dest)); | |
dea61d92 SP |
484 | } |
485 | ||
30d55936 | 486 | /* Generates code for PARTITION. */ |
dea61d92 | 487 | |
30d55936 | 488 | static void |
d0582dc1 | 489 | generate_code_for_partition (struct loop *loop, |
be6b029b | 490 | partition_t partition, bool copy_p) |
dea61d92 | 491 | { |
30d55936 RG |
492 | switch (partition->kind) |
493 | { | |
494 | case PKIND_MEMSET: | |
d0582dc1 RG |
495 | generate_memset_builtin (loop, partition); |
496 | /* If this is the last partition for which we generate code, we have | |
497 | to destroy the loop. */ | |
498 | if (!copy_p) | |
499 | destroy_loop (loop); | |
500 | break; | |
501 | ||
502 | case PKIND_MEMCPY: | |
503 | generate_memcpy_builtin (loop, partition); | |
30d55936 RG |
504 | /* If this is the last partition for which we generate code, we have |
505 | to destroy the loop. */ | |
506 | if (!copy_p) | |
507 | destroy_loop (loop); | |
508 | break; | |
509 | ||
b9fc0497 RB |
510 | case PKIND_REDUCTION: |
511 | /* Reductions all have to be in the last partition. */ | |
512 | gcc_assert (!copy_p); | |
30d55936 RG |
513 | case PKIND_NORMAL: |
514 | generate_loops_for_partition (loop, partition, copy_p); | |
515 | break; | |
516 | ||
517 | default: | |
518 | gcc_unreachable (); | |
519 | } | |
dea61d92 SP |
520 | } |
521 | ||
522 | ||
523 | /* Returns true if the node V of RDG cannot be recomputed. */ | |
524 | ||
525 | static bool | |
526 | rdg_cannot_recompute_vertex_p (struct graph *rdg, int v) | |
527 | { | |
528 | if (RDG_MEM_WRITE_STMT (rdg, v)) | |
529 | return true; | |
530 | ||
531 | return false; | |
532 | } | |
533 | ||
534 | /* Returns true when the vertex V has already been generated in the | |
535 | current partition (V is in PROCESSED), or when V belongs to another | |
536 | partition and cannot be recomputed (V is not in REMAINING_STMTS). */ | |
537 | ||
538 | static inline bool | |
539 | already_processed_vertex_p (bitmap processed, int v) | |
540 | { | |
541 | return (bitmap_bit_p (processed, v) | |
542 | || !bitmap_bit_p (remaining_stmts, v)); | |
543 | } | |
544 | ||
545 | /* Returns NULL when there is no anti-dependence among the successors | |
546 | of vertex V, otherwise returns the edge with the anti-dep. */ | |
547 | ||
548 | static struct graph_edge * | |
549 | has_anti_dependence (struct vertex *v) | |
550 | { | |
551 | struct graph_edge *e; | |
552 | ||
553 | if (v->succ) | |
554 | for (e = v->succ; e; e = e->succ_next) | |
555 | if (RDGE_TYPE (e) == anti_dd) | |
556 | return e; | |
557 | ||
558 | return NULL; | |
559 | } | |
560 | ||
561 | /* Returns true when V has an anti-dependence edge among its successors. */ | |
562 | ||
563 | static bool | |
564 | predecessor_has_mem_write (struct graph *rdg, struct vertex *v) | |
565 | { | |
566 | struct graph_edge *e; | |
567 | ||
568 | if (v->pred) | |
569 | for (e = v->pred; e; e = e->pred_next) | |
570 | if (bitmap_bit_p (upstream_mem_writes, e->src) | |
571 | /* Don't consider flow channels: a write to memory followed | |
572 | by a read from memory. These channels allow the split of | |
573 | the RDG in different partitions. */ | |
574 | && !RDG_MEM_WRITE_STMT (rdg, e->src)) | |
575 | return true; | |
576 | ||
577 | return false; | |
578 | } | |
579 | ||
580 | /* Initializes the upstream_mem_writes bitmap following the | |
581 | information from RDG. */ | |
582 | ||
583 | static void | |
584 | mark_nodes_having_upstream_mem_writes (struct graph *rdg) | |
585 | { | |
586 | int v, x; | |
587 | bitmap seen = BITMAP_ALLOC (NULL); | |
588 | ||
589 | for (v = rdg->n_vertices - 1; v >= 0; v--) | |
590 | if (!bitmap_bit_p (seen, v)) | |
591 | { | |
592 | unsigned i; | |
9771b263 DN |
593 | vec<int> nodes; |
594 | nodes.create (3); | |
dea61d92 SP |
595 | |
596 | graphds_dfs (rdg, &v, 1, &nodes, false, NULL); | |
597 | ||
9771b263 | 598 | FOR_EACH_VEC_ELT (nodes, i, x) |
dea61d92 | 599 | { |
fcaa4ca4 | 600 | if (!bitmap_set_bit (seen, x)) |
dea61d92 SP |
601 | continue; |
602 | ||
dea61d92 SP |
603 | if (RDG_MEM_WRITE_STMT (rdg, x) |
604 | || predecessor_has_mem_write (rdg, &(rdg->vertices[x])) | |
605 | /* In anti dependences the read should occur before | |
606 | the write, this is why both the read and the write | |
607 | should be placed in the same partition. */ | |
608 | || has_anti_dependence (&(rdg->vertices[x]))) | |
609 | { | |
dea61d92 SP |
610 | bitmap_set_bit (upstream_mem_writes, x); |
611 | } | |
612 | } | |
613 | ||
9771b263 | 614 | nodes.release (); |
dea61d92 SP |
615 | } |
616 | } | |
617 | ||
618 | /* Returns true when vertex u has a memory write node as a predecessor | |
619 | in RDG. */ | |
620 | ||
621 | static bool | |
622 | has_upstream_mem_writes (int u) | |
623 | { | |
624 | return bitmap_bit_p (upstream_mem_writes, u); | |
625 | } | |
626 | ||
c61f8985 | 627 | static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t, |
7ad672e4 | 628 | bitmap, bitmap); |
dea61d92 | 629 | |
dea61d92 SP |
630 | /* Flag the uses of U stopping following the information from |
631 | upstream_mem_writes. */ | |
632 | ||
633 | static void | |
c61f8985 | 634 | rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops, |
7ad672e4 | 635 | bitmap processed) |
dea61d92 | 636 | { |
dea61d92 SP |
637 | use_operand_p use_p; |
638 | struct vertex *x = &(rdg->vertices[u]); | |
726a989a | 639 | gimple stmt = RDGV_STMT (x); |
dea61d92 SP |
640 | struct graph_edge *anti_dep = has_anti_dependence (x); |
641 | ||
642 | /* Keep in the same partition the destination of an antidependence, | |
643 | because this is a store to the exact same location. Putting this | |
644 | in another partition is bad for cache locality. */ | |
645 | if (anti_dep) | |
646 | { | |
647 | int v = anti_dep->dest; | |
648 | ||
649 | if (!already_processed_vertex_p (processed, v)) | |
650 | rdg_flag_vertex_and_dependent (rdg, v, partition, loops, | |
7ad672e4 | 651 | processed); |
dea61d92 SP |
652 | } |
653 | ||
726a989a | 654 | if (gimple_code (stmt) != GIMPLE_PHI) |
dea61d92 | 655 | { |
5006671f | 656 | if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P) |
dea61d92 SP |
657 | { |
658 | tree use = USE_FROM_PTR (use_p); | |
659 | ||
b9fc0497 RB |
660 | if (TREE_CODE (use) == SSA_NAME |
661 | && !SSA_NAME_IS_DEFAULT_DEF (use)) | |
dea61d92 | 662 | { |
726a989a | 663 | gimple def_stmt = SSA_NAME_DEF_STMT (use); |
dea61d92 SP |
664 | int v = rdg_vertex_for_stmt (rdg, def_stmt); |
665 | ||
666 | if (v >= 0 | |
667 | && !already_processed_vertex_p (processed, v)) | |
668 | rdg_flag_vertex_and_dependent (rdg, v, partition, loops, | |
7ad672e4 | 669 | processed); |
dea61d92 SP |
670 | } |
671 | } | |
672 | } | |
673 | ||
726a989a | 674 | if (is_gimple_assign (stmt) && has_upstream_mem_writes (u)) |
dea61d92 | 675 | { |
726a989a | 676 | tree op0 = gimple_assign_lhs (stmt); |
dea61d92 SP |
677 | |
678 | /* Scalar channels don't have enough space for transmitting data | |
679 | between tasks, unless we add more storage by privatizing. */ | |
680 | if (is_gimple_reg (op0)) | |
681 | { | |
682 | use_operand_p use_p; | |
683 | imm_use_iterator iter; | |
684 | ||
685 | FOR_EACH_IMM_USE_FAST (use_p, iter, op0) | |
686 | { | |
687 | int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p)); | |
688 | ||
689 | if (!already_processed_vertex_p (processed, v)) | |
690 | rdg_flag_vertex_and_dependent (rdg, v, partition, loops, | |
7ad672e4 | 691 | processed); |
dea61d92 SP |
692 | } |
693 | } | |
694 | } | |
695 | } | |
696 | ||
697 | /* Flag V from RDG as part of PARTITION, and also flag its loop number | |
698 | in LOOPS. */ | |
699 | ||
700 | static void | |
7ad672e4 | 701 | rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops) |
dea61d92 SP |
702 | { |
703 | struct loop *loop; | |
704 | ||
c61f8985 | 705 | if (!bitmap_set_bit (partition->stmts, v)) |
dea61d92 SP |
706 | return; |
707 | ||
708 | loop = loop_containing_stmt (RDG_STMT (rdg, v)); | |
709 | bitmap_set_bit (loops, loop->num); | |
dea61d92 SP |
710 | |
711 | if (rdg_cannot_recompute_vertex_p (rdg, v)) | |
712 | { | |
7ad672e4 | 713 | partition->has_writes = true; |
dea61d92 SP |
714 | bitmap_clear_bit (remaining_stmts, v); |
715 | } | |
716 | } | |
717 | ||
718 | /* Flag in the bitmap PARTITION the vertex V and all its predecessors. | |
fa10beec | 719 | Also flag their loop number in LOOPS. */ |
dea61d92 SP |
720 | |
721 | static void | |
c61f8985 | 722 | rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition, |
7ad672e4 | 723 | bitmap loops, bitmap processed) |
dea61d92 SP |
724 | { |
725 | unsigned i; | |
9771b263 DN |
726 | vec<int> nodes; |
727 | nodes.create (3); | |
dea61d92 SP |
728 | int x; |
729 | ||
730 | bitmap_set_bit (processed, v); | |
7ad672e4 | 731 | rdg_flag_uses (rdg, v, partition, loops, processed); |
dea61d92 | 732 | graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts); |
7ad672e4 | 733 | rdg_flag_vertex (rdg, v, partition, loops); |
dea61d92 | 734 | |
9771b263 | 735 | FOR_EACH_VEC_ELT (nodes, i, x) |
dea61d92 | 736 | if (!already_processed_vertex_p (processed, x)) |
7ad672e4 | 737 | rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed); |
dea61d92 | 738 | |
9771b263 | 739 | nodes.release (); |
dea61d92 SP |
740 | } |
741 | ||
742 | /* Initialize CONDS with all the condition statements from the basic | |
743 | blocks of LOOP. */ | |
744 | ||
745 | static void | |
9771b263 | 746 | collect_condition_stmts (struct loop *loop, vec<gimple> *conds) |
dea61d92 SP |
747 | { |
748 | unsigned i; | |
749 | edge e; | |
9771b263 | 750 | vec<edge> exits = get_loop_exit_edges (loop); |
dea61d92 | 751 | |
9771b263 | 752 | FOR_EACH_VEC_ELT (exits, i, e) |
dea61d92 | 753 | { |
726a989a | 754 | gimple cond = last_stmt (e->src); |
dea61d92 SP |
755 | |
756 | if (cond) | |
9771b263 | 757 | conds->safe_push (cond); |
dea61d92 SP |
758 | } |
759 | ||
9771b263 | 760 | exits.release (); |
dea61d92 SP |
761 | } |
762 | ||
763 | /* Add to PARTITION all the exit condition statements for LOOPS | |
764 | together with all their dependent statements determined from | |
765 | RDG. */ | |
766 | ||
767 | static void | |
c61f8985 | 768 | rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition, |
7ad672e4 | 769 | bitmap processed) |
dea61d92 SP |
770 | { |
771 | unsigned i; | |
772 | bitmap_iterator bi; | |
9771b263 DN |
773 | vec<gimple> conds; |
774 | conds.create (3); | |
dea61d92 SP |
775 | |
776 | EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi) | |
0fc822d0 | 777 | collect_condition_stmts (get_loop (cfun, i), &conds); |
dea61d92 | 778 | |
9771b263 | 779 | while (!conds.is_empty ()) |
dea61d92 | 780 | { |
9771b263 | 781 | gimple cond = conds.pop (); |
dea61d92 SP |
782 | int v = rdg_vertex_for_stmt (rdg, cond); |
783 | bitmap new_loops = BITMAP_ALLOC (NULL); | |
784 | ||
785 | if (!already_processed_vertex_p (processed, v)) | |
7ad672e4 | 786 | rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed); |
dea61d92 SP |
787 | |
788 | EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi) | |
fcaa4ca4 | 789 | if (bitmap_set_bit (loops, i)) |
0fc822d0 | 790 | collect_condition_stmts (get_loop (cfun, i), &conds); |
dea61d92 SP |
791 | |
792 | BITMAP_FREE (new_loops); | |
793 | } | |
01be8516 | 794 | |
9771b263 | 795 | conds.release (); |
dea61d92 SP |
796 | } |
797 | ||
dea61d92 SP |
798 | /* Returns a bitmap in which all the statements needed for computing |
799 | the strongly connected component C of the RDG are flagged, also | |
800 | including the loop exit conditions. */ | |
801 | ||
c61f8985 | 802 | static partition_t |
7ad672e4 | 803 | build_rdg_partition_for_component (struct graph *rdg, rdgc c) |
dea61d92 SP |
804 | { |
805 | int i, v; | |
c61f8985 | 806 | partition_t partition = partition_alloc (NULL); |
dea61d92 SP |
807 | bitmap loops = BITMAP_ALLOC (NULL); |
808 | bitmap processed = BITMAP_ALLOC (NULL); | |
809 | ||
9771b263 | 810 | FOR_EACH_VEC_ELT (c->vertices, i, v) |
dea61d92 | 811 | if (!already_processed_vertex_p (processed, v)) |
7ad672e4 | 812 | rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed); |
dea61d92 | 813 | |
7ad672e4 | 814 | rdg_flag_loop_exits (rdg, loops, partition, processed); |
dea61d92 SP |
815 | |
816 | BITMAP_FREE (processed); | |
817 | BITMAP_FREE (loops); | |
818 | return partition; | |
819 | } | |
820 | ||
821 | /* Free memory for COMPONENTS. */ | |
822 | ||
823 | static void | |
9771b263 | 824 | free_rdg_components (vec<rdgc> components) |
dea61d92 SP |
825 | { |
826 | int i; | |
827 | rdgc x; | |
828 | ||
9771b263 | 829 | FOR_EACH_VEC_ELT (components, i, x) |
dea61d92 | 830 | { |
9771b263 | 831 | x->vertices.release (); |
dea61d92 SP |
832 | free (x); |
833 | } | |
01be8516 | 834 | |
9771b263 | 835 | components.release (); |
dea61d92 SP |
836 | } |
837 | ||
838 | /* Build the COMPONENTS vector with the strongly connected components | |
839 | of RDG in which the STARTING_VERTICES occur. */ | |
840 | ||
841 | static void | |
9771b263 DN |
842 | rdg_build_components (struct graph *rdg, vec<int> starting_vertices, |
843 | vec<rdgc> *components) | |
dea61d92 SP |
844 | { |
845 | int i, v; | |
846 | bitmap saved_components = BITMAP_ALLOC (NULL); | |
847 | int n_components = graphds_scc (rdg, NULL); | |
9771b263 DN |
848 | /* ??? Macros cannot process template types with more than one |
849 | argument, so we need this typedef. */ | |
850 | typedef vec<int> vec_int_heap; | |
851 | vec<int> *all_components = XNEWVEC (vec_int_heap, n_components); | |
dea61d92 SP |
852 | |
853 | for (i = 0; i < n_components; i++) | |
9771b263 | 854 | all_components[i].create (3); |
dea61d92 SP |
855 | |
856 | for (i = 0; i < rdg->n_vertices; i++) | |
9771b263 | 857 | all_components[rdg->vertices[i].component].safe_push (i); |
dea61d92 | 858 | |
9771b263 | 859 | FOR_EACH_VEC_ELT (starting_vertices, i, v) |
dea61d92 SP |
860 | { |
861 | int c = rdg->vertices[v].component; | |
862 | ||
fcaa4ca4 | 863 | if (bitmap_set_bit (saved_components, c)) |
dea61d92 SP |
864 | { |
865 | rdgc x = XCNEW (struct rdg_component); | |
866 | x->num = c; | |
867 | x->vertices = all_components[c]; | |
868 | ||
9771b263 | 869 | components->safe_push (x); |
dea61d92 SP |
870 | } |
871 | } | |
872 | ||
873 | for (i = 0; i < n_components; i++) | |
874 | if (!bitmap_bit_p (saved_components, i)) | |
9771b263 | 875 | all_components[i].release (); |
dea61d92 SP |
876 | |
877 | free (all_components); | |
878 | BITMAP_FREE (saved_components); | |
879 | } | |
880 | ||
30d55936 RG |
881 | /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. |
882 | For the moment we detect only the memset zero pattern. */ | |
cfee318d | 883 | |
30d55936 RG |
884 | static void |
885 | classify_partition (loop_p loop, struct graph *rdg, partition_t partition) | |
cfee318d | 886 | { |
cfee318d | 887 | bitmap_iterator bi; |
30d55936 RG |
888 | unsigned i; |
889 | tree nb_iter; | |
d0582dc1 | 890 | data_reference_p single_load, single_store; |
b9fc0497 | 891 | bool volatiles_p = false; |
30d55936 RG |
892 | |
893 | partition->kind = PKIND_NORMAL; | |
d0582dc1 RG |
894 | partition->main_dr = NULL; |
895 | partition->secondary_dr = NULL; | |
30d55936 | 896 | |
c61f8985 | 897 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) |
30d55936 RG |
898 | { |
899 | gimple stmt = RDG_STMT (rdg, i); | |
900 | ||
901 | if (gimple_has_volatile_ops (stmt)) | |
b9fc0497 | 902 | volatiles_p = true; |
cfee318d | 903 | |
30d55936 RG |
904 | /* If the stmt has uses outside of the loop fail. |
905 | ??? If the stmt is generated in another partition that | |
906 | is not created as builtin we can ignore this. */ | |
9ca86fc3 | 907 | if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) |
30d55936 RG |
908 | { |
909 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
910 | fprintf (dump_file, "not generating builtin, partition has " | |
911 | "scalar uses outside of the loop\n"); | |
b9fc0497 | 912 | partition->kind = PKIND_REDUCTION; |
30d55936 RG |
913 | return; |
914 | } | |
915 | } | |
916 | ||
b9fc0497 RB |
917 | /* Perform general partition disqualification for builtins. */ |
918 | if (volatiles_p | |
919 | || !flag_tree_loop_distribute_patterns) | |
920 | return; | |
921 | ||
922 | nb_iter = number_of_exit_cond_executions (loop); | |
923 | if (!nb_iter || nb_iter == chrec_dont_know) | |
924 | return; | |
925 | ||
d0582dc1 RG |
926 | /* Detect memset and memcpy. */ |
927 | single_load = NULL; | |
928 | single_store = NULL; | |
30d55936 RG |
929 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) |
930 | { | |
931 | gimple stmt = RDG_STMT (rdg, i); | |
d0582dc1 RG |
932 | data_reference_p dr; |
933 | unsigned j; | |
30d55936 RG |
934 | |
935 | if (gimple_code (stmt) == GIMPLE_PHI) | |
936 | continue; | |
937 | ||
938 | /* Any scalar stmts are ok. */ | |
939 | if (!gimple_vuse (stmt)) | |
940 | continue; | |
941 | ||
d0582dc1 RG |
942 | /* Otherwise just regular loads/stores. */ |
943 | if (!gimple_assign_single_p (stmt)) | |
944 | return; | |
945 | ||
946 | /* But exactly one store and/or load. */ | |
9771b263 | 947 | for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j) |
30d55936 | 948 | { |
d0582dc1 RG |
949 | if (DR_IS_READ (dr)) |
950 | { | |
951 | if (single_load != NULL) | |
952 | return; | |
953 | single_load = dr; | |
954 | } | |
955 | else | |
956 | { | |
957 | if (single_store != NULL) | |
958 | return; | |
959 | single_store = dr; | |
960 | } | |
30d55936 | 961 | } |
30d55936 RG |
962 | } |
963 | ||
d0582dc1 RG |
964 | if (single_store && !single_load) |
965 | { | |
966 | gimple stmt = DR_STMT (single_store); | |
967 | tree rhs = gimple_assign_rhs1 (stmt); | |
401f3a81 JJ |
968 | if (const_with_all_bytes_same (rhs) == -1 |
969 | && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs)) | |
970 | || (TYPE_MODE (TREE_TYPE (rhs)) | |
971 | != TYPE_MODE (unsigned_char_type_node)))) | |
d0582dc1 RG |
972 | return; |
973 | if (TREE_CODE (rhs) == SSA_NAME | |
974 | && !SSA_NAME_IS_DEFAULT_DEF (rhs) | |
975 | && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) | |
976 | return; | |
977 | if (!adjacent_dr_p (single_store)) | |
978 | return; | |
979 | partition->kind = PKIND_MEMSET; | |
980 | partition->main_dr = single_store; | |
981 | } | |
982 | else if (single_store && single_load) | |
983 | { | |
984 | gimple store = DR_STMT (single_store); | |
985 | gimple load = DR_STMT (single_load); | |
986 | /* Direct aggregate copy or via an SSA name temporary. */ | |
987 | if (load != store | |
988 | && gimple_assign_lhs (load) != gimple_assign_rhs1 (store)) | |
989 | return; | |
990 | if (!adjacent_dr_p (single_store) | |
991 | || !adjacent_dr_p (single_load) | |
992 | || !operand_equal_p (DR_STEP (single_store), | |
993 | DR_STEP (single_load), 0)) | |
994 | return; | |
f20132e7 RG |
995 | /* Now check that if there is a dependence this dependence is |
996 | of a suitable form for memmove. */ | |
6e1aa848 | 997 | vec<loop_p> loops = vNULL; |
f20132e7 | 998 | ddr_p ddr; |
9771b263 | 999 | loops.safe_push (loop); |
f20132e7 RG |
1000 | ddr = initialize_data_dependence_relation (single_load, single_store, |
1001 | loops); | |
1002 | compute_affine_dependence (ddr, loop); | |
f20132e7 RG |
1003 | if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) |
1004 | { | |
1005 | free_dependence_relation (ddr); | |
9771b263 | 1006 | loops.release (); |
f20132e7 RG |
1007 | return; |
1008 | } | |
1009 | if (DDR_ARE_DEPENDENT (ddr) != chrec_known) | |
1010 | { | |
1011 | if (DDR_NUM_DIST_VECTS (ddr) == 0) | |
1012 | { | |
1013 | free_dependence_relation (ddr); | |
9771b263 | 1014 | loops.release (); |
f20132e7 RG |
1015 | return; |
1016 | } | |
1017 | lambda_vector dist_v; | |
9771b263 | 1018 | FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) |
f20132e7 RG |
1019 | { |
1020 | int dist = dist_v[index_in_loop_nest (loop->num, | |
1021 | DDR_LOOP_NEST (ddr))]; | |
1022 | if (dist > 0 && !DDR_REVERSED_P (ddr)) | |
1023 | { | |
1024 | free_dependence_relation (ddr); | |
9771b263 | 1025 | loops.release (); |
f20132e7 RG |
1026 | return; |
1027 | } | |
1028 | } | |
1029 | } | |
b7ce70b3 | 1030 | free_dependence_relation (ddr); |
9771b263 | 1031 | loops.release (); |
d0582dc1 RG |
1032 | partition->kind = PKIND_MEMCPY; |
1033 | partition->main_dr = single_store; | |
1034 | partition->secondary_dr = single_load; | |
1035 | } | |
cfee318d SP |
1036 | } |
1037 | ||
1fa0c180 RG |
1038 | /* For a data reference REF, return the declaration of its base |
1039 | address or NULL_TREE if the base is not determined. */ | |
1040 | ||
1041 | static tree | |
1042 | ref_base_address (data_reference_p dr) | |
1043 | { | |
1044 | tree base_address = DR_BASE_ADDRESS (dr); | |
1045 | if (base_address | |
1046 | && TREE_CODE (base_address) == ADDR_EXPR) | |
1047 | return TREE_OPERAND (base_address, 0); | |
1048 | ||
1049 | return base_address; | |
1050 | } | |
1051 | ||
cfee318d SP |
1052 | /* Returns true when PARTITION1 and PARTITION2 have similar memory |
1053 | accesses in RDG. */ | |
1054 | ||
1055 | static bool | |
c61f8985 RG |
1056 | similar_memory_accesses (struct graph *rdg, partition_t partition1, |
1057 | partition_t partition2) | |
cfee318d | 1058 | { |
1fa0c180 | 1059 | unsigned i, j, k, l; |
cfee318d | 1060 | bitmap_iterator bi, bj; |
1fa0c180 RG |
1061 | data_reference_p ref1, ref2; |
1062 | ||
1063 | /* First check whether in the intersection of the two partitions are | |
1064 | any loads or stores. Common loads are the situation that happens | |
1065 | most often. */ | |
1066 | EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi) | |
1067 | if (RDG_MEM_WRITE_STMT (rdg, i) | |
1068 | || RDG_MEM_READS_STMT (rdg, i)) | |
1069 | return true; | |
cfee318d | 1070 | |
1fa0c180 | 1071 | /* Then check all data-references against each other. */ |
c61f8985 | 1072 | EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi) |
cfee318d SP |
1073 | if (RDG_MEM_WRITE_STMT (rdg, i) |
1074 | || RDG_MEM_READS_STMT (rdg, i)) | |
c61f8985 | 1075 | EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj) |
cfee318d SP |
1076 | if (RDG_MEM_WRITE_STMT (rdg, j) |
1077 | || RDG_MEM_READS_STMT (rdg, j)) | |
1fa0c180 | 1078 | { |
9771b263 | 1079 | FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1) |
1fa0c180 RG |
1080 | { |
1081 | tree base1 = ref_base_address (ref1); | |
1082 | if (base1) | |
9771b263 | 1083 | FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2) |
1fa0c180 RG |
1084 | if (base1 == ref_base_address (ref2)) |
1085 | return true; | |
1086 | } | |
1087 | } | |
cfee318d SP |
1088 | |
1089 | return false; | |
1090 | } | |
1091 | ||
dea61d92 SP |
1092 | /* Aggregate several components into a useful partition that is |
1093 | registered in the PARTITIONS vector. Partitions will be | |
1094 | distributed in different loops. */ | |
1095 | ||
1096 | static void | |
9771b263 DN |
1097 | rdg_build_partitions (struct graph *rdg, vec<rdgc> components, |
1098 | vec<int> *other_stores, | |
1099 | vec<partition_t> *partitions, bitmap processed) | |
dea61d92 SP |
1100 | { |
1101 | int i; | |
1102 | rdgc x; | |
c61f8985 | 1103 | partition_t partition = partition_alloc (NULL); |
dea61d92 | 1104 | |
9771b263 | 1105 | FOR_EACH_VEC_ELT (components, i, x) |
dea61d92 | 1106 | { |
c61f8985 | 1107 | partition_t np; |
9771b263 | 1108 | int v = x->vertices[0]; |
b8698a0f | 1109 | |
dea61d92 SP |
1110 | if (bitmap_bit_p (processed, v)) |
1111 | continue; | |
b8698a0f | 1112 | |
7ad672e4 | 1113 | np = build_rdg_partition_for_component (rdg, x); |
c61f8985 | 1114 | bitmap_ior_into (partition->stmts, np->stmts); |
7ad672e4 | 1115 | partition->has_writes = partition_has_writes (np); |
c61f8985 RG |
1116 | bitmap_ior_into (processed, np->stmts); |
1117 | partition_free (np); | |
dea61d92 | 1118 | |
7ad672e4 | 1119 | if (partition_has_writes (partition)) |
dea61d92 SP |
1120 | { |
1121 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1122 | { | |
1123 | fprintf (dump_file, "ldist useful partition:\n"); | |
c61f8985 | 1124 | dump_bitmap (dump_file, partition->stmts); |
dea61d92 SP |
1125 | } |
1126 | ||
9771b263 | 1127 | partitions->safe_push (partition); |
c61f8985 | 1128 | partition = partition_alloc (NULL); |
dea61d92 SP |
1129 | } |
1130 | } | |
1131 | ||
1132 | /* Add the nodes from the RDG that were not marked as processed, and | |
1133 | that are used outside the current loop. These are scalar | |
1134 | computations that are not yet part of previous partitions. */ | |
1135 | for (i = 0; i < rdg->n_vertices; i++) | |
1136 | if (!bitmap_bit_p (processed, i) | |
1137 | && rdg_defs_used_in_other_loops_p (rdg, i)) | |
9771b263 | 1138 | other_stores->safe_push (i); |
dea61d92 SP |
1139 | |
1140 | /* If there are still statements left in the OTHER_STORES array, | |
1141 | create other components and partitions with these stores and | |
1142 | their dependences. */ | |
9771b263 | 1143 | if (other_stores->length () > 0) |
dea61d92 | 1144 | { |
9771b263 DN |
1145 | vec<rdgc> comps; |
1146 | comps.create (3); | |
1147 | vec<int> foo; | |
1148 | foo.create (3); | |
dea61d92 SP |
1149 | |
1150 | rdg_build_components (rdg, *other_stores, &comps); | |
1151 | rdg_build_partitions (rdg, comps, &foo, partitions, processed); | |
1152 | ||
9771b263 | 1153 | foo.release (); |
dea61d92 SP |
1154 | free_rdg_components (comps); |
1155 | } | |
1156 | ||
1157 | /* If there is something left in the last partition, save it. */ | |
c61f8985 | 1158 | if (bitmap_count_bits (partition->stmts) > 0) |
9771b263 | 1159 | partitions->safe_push (partition); |
dea61d92 | 1160 | else |
c61f8985 | 1161 | partition_free (partition); |
dea61d92 SP |
1162 | } |
1163 | ||
1164 | /* Dump to FILE the PARTITIONS. */ | |
1165 | ||
1166 | static void | |
9771b263 | 1167 | dump_rdg_partitions (FILE *file, vec<partition_t> partitions) |
dea61d92 SP |
1168 | { |
1169 | int i; | |
c61f8985 | 1170 | partition_t partition; |
dea61d92 | 1171 | |
9771b263 | 1172 | FOR_EACH_VEC_ELT (partitions, i, partition) |
c61f8985 | 1173 | debug_bitmap_file (file, partition->stmts); |
dea61d92 SP |
1174 | } |
1175 | ||
1176 | /* Debug PARTITIONS. */ | |
9771b263 | 1177 | extern void debug_rdg_partitions (vec<partition_t> ); |
dea61d92 | 1178 | |
24e47c76 | 1179 | DEBUG_FUNCTION void |
9771b263 | 1180 | debug_rdg_partitions (vec<partition_t> partitions) |
dea61d92 SP |
1181 | { |
1182 | dump_rdg_partitions (stderr, partitions); | |
1183 | } | |
1184 | ||
2b8aee8e SP |
1185 | /* Returns the number of read and write operations in the RDG. */ |
1186 | ||
1187 | static int | |
1188 | number_of_rw_in_rdg (struct graph *rdg) | |
1189 | { | |
1190 | int i, res = 0; | |
1191 | ||
1192 | for (i = 0; i < rdg->n_vertices; i++) | |
1193 | { | |
1194 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1195 | ++res; | |
1196 | ||
1197 | if (RDG_MEM_READS_STMT (rdg, i)) | |
1198 | ++res; | |
1199 | } | |
1200 | ||
1201 | return res; | |
1202 | } | |
1203 | ||
1204 | /* Returns the number of read and write operations in a PARTITION of | |
1205 | the RDG. */ | |
1206 | ||
1207 | static int | |
c61f8985 | 1208 | number_of_rw_in_partition (struct graph *rdg, partition_t partition) |
2b8aee8e SP |
1209 | { |
1210 | int res = 0; | |
1211 | unsigned i; | |
1212 | bitmap_iterator ii; | |
1213 | ||
c61f8985 | 1214 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii) |
2b8aee8e SP |
1215 | { |
1216 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1217 | ++res; | |
1218 | ||
1219 | if (RDG_MEM_READS_STMT (rdg, i)) | |
1220 | ++res; | |
1221 | } | |
1222 | ||
1223 | return res; | |
1224 | } | |
1225 | ||
1226 | /* Returns true when one of the PARTITIONS contains all the read or | |
1227 | write operations of RDG. */ | |
1228 | ||
1229 | static bool | |
9771b263 DN |
1230 | partition_contains_all_rw (struct graph *rdg, |
1231 | vec<partition_t> partitions) | |
2b8aee8e SP |
1232 | { |
1233 | int i; | |
c61f8985 | 1234 | partition_t partition; |
2b8aee8e SP |
1235 | int nrw = number_of_rw_in_rdg (rdg); |
1236 | ||
9771b263 | 1237 | FOR_EACH_VEC_ELT (partitions, i, partition) |
2b8aee8e SP |
1238 | if (nrw == number_of_rw_in_partition (rdg, partition)) |
1239 | return true; | |
1240 | ||
1241 | return false; | |
1242 | } | |
1243 | ||
dea61d92 SP |
1244 | /* Generate code from STARTING_VERTICES in RDG. Returns the number of |
1245 | distributed loops. */ | |
1246 | ||
1247 | static int | |
1248 | ldist_gen (struct loop *loop, struct graph *rdg, | |
9771b263 | 1249 | vec<int> starting_vertices) |
dea61d92 SP |
1250 | { |
1251 | int i, nbp; | |
9771b263 DN |
1252 | vec<rdgc> components; |
1253 | components.create (3); | |
1254 | vec<partition_t> partitions; | |
1255 | partitions.create (3); | |
1256 | vec<int> other_stores; | |
1257 | other_stores.create (3); | |
c61f8985 RG |
1258 | partition_t partition; |
1259 | bitmap processed = BITMAP_ALLOC (NULL); | |
be6b029b | 1260 | bool any_builtin; |
dea61d92 SP |
1261 | |
1262 | remaining_stmts = BITMAP_ALLOC (NULL); | |
1263 | upstream_mem_writes = BITMAP_ALLOC (NULL); | |
1264 | ||
1265 | for (i = 0; i < rdg->n_vertices; i++) | |
1266 | { | |
1267 | bitmap_set_bit (remaining_stmts, i); | |
1268 | ||
1269 | /* Save in OTHER_STORES all the memory writes that are not in | |
1270 | STARTING_VERTICES. */ | |
1271 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1272 | { | |
1273 | int v; | |
1274 | unsigned j; | |
1275 | bool found = false; | |
1276 | ||
9771b263 | 1277 | FOR_EACH_VEC_ELT (starting_vertices, j, v) |
dea61d92 SP |
1278 | if (i == v) |
1279 | { | |
1280 | found = true; | |
1281 | break; | |
1282 | } | |
1283 | ||
1284 | if (!found) | |
9771b263 | 1285 | other_stores.safe_push (i); |
dea61d92 SP |
1286 | } |
1287 | } | |
1288 | ||
1289 | mark_nodes_having_upstream_mem_writes (rdg); | |
1290 | rdg_build_components (rdg, starting_vertices, &components); | |
1291 | rdg_build_partitions (rdg, components, &other_stores, &partitions, | |
1292 | processed); | |
1293 | BITMAP_FREE (processed); | |
dea61d92 | 1294 | |
be6b029b | 1295 | any_builtin = false; |
9771b263 | 1296 | FOR_EACH_VEC_ELT (partitions, i, partition) |
be6b029b RG |
1297 | { |
1298 | classify_partition (loop, rdg, partition); | |
1299 | any_builtin |= partition_builtin_p (partition); | |
1300 | } | |
30d55936 | 1301 | |
c014f6f5 RG |
1302 | /* If we are only distributing patterns fuse all partitions that |
1303 | were not properly classified as builtins. Else fuse partitions | |
1304 | with similar memory accesses. */ | |
1305 | if (!flag_tree_loop_distribution) | |
1306 | { | |
1307 | partition_t into; | |
be6b029b RG |
1308 | /* If we did not detect any builtin simply bail out. */ |
1309 | if (!any_builtin) | |
1310 | { | |
1311 | nbp = 0; | |
1312 | goto ldist_done; | |
1313 | } | |
38ad2d07 RG |
1314 | /* Only fuse adjacent non-builtin partitions, see PR53616. |
1315 | ??? Use dependence information to improve partition ordering. */ | |
1316 | i = 0; | |
1317 | do | |
1318 | { | |
9771b263 | 1319 | for (; partitions.iterate (i, &into); ++i) |
38ad2d07 RG |
1320 | if (!partition_builtin_p (into)) |
1321 | break; | |
9771b263 | 1322 | for (++i; partitions.iterate (i, &partition); ++i) |
38ad2d07 RG |
1323 | if (!partition_builtin_p (partition)) |
1324 | { | |
1325 | bitmap_ior_into (into->stmts, partition->stmts); | |
b9fc0497 RB |
1326 | if (partition->kind == PKIND_REDUCTION) |
1327 | into->kind = PKIND_REDUCTION; | |
9771b263 | 1328 | partitions.ordered_remove (i); |
5eb010bc | 1329 | partition_free (partition); |
38ad2d07 RG |
1330 | i--; |
1331 | } | |
1332 | else | |
1333 | break; | |
1334 | } | |
9771b263 | 1335 | while ((unsigned) i < partitions.length ()); |
c014f6f5 RG |
1336 | } |
1337 | else | |
1338 | { | |
1339 | partition_t into; | |
1340 | int j; | |
9771b263 | 1341 | for (i = 0; partitions.iterate (i, &into); ++i) |
c014f6f5 RG |
1342 | { |
1343 | if (partition_builtin_p (into)) | |
1344 | continue; | |
1345 | for (j = i + 1; | |
9771b263 | 1346 | partitions.iterate (j, &partition); ++j) |
c014f6f5 RG |
1347 | { |
1348 | if (!partition_builtin_p (partition) | |
1349 | /* ??? The following is horribly inefficient, | |
1350 | we are re-computing and analyzing data-references | |
1351 | of the stmts in the partitions all the time. */ | |
1352 | && similar_memory_accesses (rdg, into, partition)) | |
1353 | { | |
1354 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1355 | { | |
1356 | fprintf (dump_file, "fusing partitions\n"); | |
1357 | dump_bitmap (dump_file, into->stmts); | |
1358 | dump_bitmap (dump_file, partition->stmts); | |
1359 | fprintf (dump_file, "because they have similar " | |
1360 | "memory accesses\n"); | |
1361 | } | |
1362 | bitmap_ior_into (into->stmts, partition->stmts); | |
b9fc0497 RB |
1363 | if (partition->kind == PKIND_REDUCTION) |
1364 | into->kind = PKIND_REDUCTION; | |
9771b263 | 1365 | partitions.ordered_remove (j); |
5eb010bc | 1366 | partition_free (partition); |
c014f6f5 RG |
1367 | j--; |
1368 | } | |
1369 | } | |
1370 | } | |
1371 | } | |
30d55936 | 1372 | |
b9fc0497 RB |
1373 | /* Fuse all reduction partitions into the last. */ |
1374 | if (partitions.length () > 1) | |
1375 | { | |
1376 | partition_t into = partitions.last (); | |
1377 | for (i = partitions.length () - 2; i >= 0; --i) | |
1378 | { | |
1379 | partition_t what = partitions[i]; | |
1380 | if (what->kind == PKIND_REDUCTION) | |
1381 | { | |
1382 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1383 | { | |
1384 | fprintf (dump_file, "fusing partitions\n"); | |
1385 | dump_bitmap (dump_file, into->stmts); | |
1386 | dump_bitmap (dump_file, what->stmts); | |
1387 | fprintf (dump_file, "because the latter has reductions\n"); | |
1388 | } | |
1389 | bitmap_ior_into (into->stmts, what->stmts); | |
1390 | into->kind = PKIND_REDUCTION; | |
1391 | partitions.ordered_remove (i); | |
5eb010bc | 1392 | partition_free (what); |
b9fc0497 RB |
1393 | } |
1394 | } | |
1395 | } | |
1396 | ||
9771b263 | 1397 | nbp = partitions.length (); |
a4293fa6 | 1398 | if (nbp == 0 |
9771b263 DN |
1399 | || (nbp == 1 && !partition_builtin_p (partitions[0])) |
1400 | || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) | |
c014f6f5 RG |
1401 | { |
1402 | nbp = 0; | |
1403 | goto ldist_done; | |
1404 | } | |
dea61d92 SP |
1405 | |
1406 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1407 | dump_rdg_partitions (dump_file, partitions); | |
1408 | ||
9771b263 | 1409 | FOR_EACH_VEC_ELT (partitions, i, partition) |
d0582dc1 | 1410 | generate_code_for_partition (loop, partition, i < nbp - 1); |
dea61d92 | 1411 | |
dea61d92 SP |
1412 | ldist_done: |
1413 | ||
1414 | BITMAP_FREE (remaining_stmts); | |
1415 | BITMAP_FREE (upstream_mem_writes); | |
1416 | ||
9771b263 | 1417 | FOR_EACH_VEC_ELT (partitions, i, partition) |
c61f8985 | 1418 | partition_free (partition); |
dea61d92 | 1419 | |
9771b263 DN |
1420 | other_stores.release (); |
1421 | partitions.release (); | |
dea61d92 SP |
1422 | free_rdg_components (components); |
1423 | return nbp; | |
1424 | } | |
1425 | ||
1426 | /* Distributes the code from LOOP in such a way that producer | |
1427 | statements are placed before consumer statements. When STMTS is | |
1428 | NULL, performs the maximal distribution, if STMTS is not NULL, | |
1429 | tries to separate only these statements from the LOOP's body. | |
1430 | Returns the number of distributed loops. */ | |
1431 | ||
1432 | static int | |
9771b263 | 1433 | distribute_loop (struct loop *loop, vec<gimple> stmts) |
dea61d92 | 1434 | { |
e96d7dd7 | 1435 | int res = 0; |
dea61d92 | 1436 | struct graph *rdg; |
726a989a | 1437 | gimple s; |
dea61d92 | 1438 | unsigned i; |
9771b263 DN |
1439 | vec<int> vertices; |
1440 | vec<ddr_p> dependence_relations; | |
1441 | vec<data_reference_p> datarefs; | |
1442 | vec<loop_p> loop_nest; | |
1443 | ||
1444 | datarefs.create (10); | |
1445 | dependence_relations.create (100); | |
1446 | loop_nest.create (3); | |
01be8516 | 1447 | rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs); |
dea61d92 SP |
1448 | |
1449 | if (!rdg) | |
1450 | { | |
1451 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1452 | fprintf (dump_file, | |
1453 | "FIXME: Loop %d not distributed: failed to build the RDG.\n", | |
1454 | loop->num); | |
1455 | ||
01be8516 SP |
1456 | free_dependence_relations (dependence_relations); |
1457 | free_data_refs (datarefs); | |
9771b263 | 1458 | loop_nest.release (); |
dea61d92 SP |
1459 | return res; |
1460 | } | |
1461 | ||
9771b263 | 1462 | vertices.create (3); |
dea61d92 SP |
1463 | |
1464 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1465 | dump_rdg (dump_file, rdg); | |
1466 | ||
9771b263 | 1467 | FOR_EACH_VEC_ELT (stmts, i, s) |
dea61d92 SP |
1468 | { |
1469 | int v = rdg_vertex_for_stmt (rdg, s); | |
1470 | ||
1471 | if (v >= 0) | |
1472 | { | |
9771b263 | 1473 | vertices.safe_push (v); |
dea61d92 SP |
1474 | |
1475 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1476 | fprintf (dump_file, | |
1477 | "ldist asked to generate code for vertex %d\n", v); | |
1478 | } | |
1479 | } | |
1480 | ||
1481 | res = ldist_gen (loop, rdg, vertices); | |
9771b263 | 1482 | vertices.release (); |
dea61d92 | 1483 | free_rdg (rdg); |
01be8516 SP |
1484 | free_dependence_relations (dependence_relations); |
1485 | free_data_refs (datarefs); | |
9771b263 | 1486 | loop_nest.release (); |
dea61d92 SP |
1487 | return res; |
1488 | } | |
1489 | ||
1490 | /* Distribute all loops in the current function. */ | |
1491 | ||
1492 | static unsigned int | |
1493 | tree_loop_distribution (void) | |
1494 | { | |
1495 | struct loop *loop; | |
1496 | loop_iterator li; | |
c014f6f5 | 1497 | bool changed = false; |
1fa0c180 RG |
1498 | basic_block bb; |
1499 | ||
1500 | FOR_ALL_BB (bb) | |
1501 | { | |
1502 | gimple_stmt_iterator gsi; | |
1503 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1504 | gimple_set_uid (gsi_stmt (gsi), -1); | |
1505 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1506 | gimple_set_uid (gsi_stmt (gsi), -1); | |
1507 | } | |
dea61d92 | 1508 | |
c014f6f5 RG |
1509 | /* We can at the moment only distribute non-nested loops, thus restrict |
1510 | walking to innermost loops. */ | |
1511 | FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) | |
dea61d92 | 1512 | { |
6e1aa848 | 1513 | vec<gimple> work_list = vNULL; |
be6b029b | 1514 | basic_block *bbs; |
0e20c89f | 1515 | int num = loop->num; |
c014f6f5 | 1516 | int nb_generated_loops = 0; |
be6b029b | 1517 | unsigned int i; |
a3357f7d RG |
1518 | |
1519 | /* If the loop doesn't have a single exit we will fail anyway, | |
1520 | so do that early. */ | |
1521 | if (!single_exit (loop)) | |
1522 | continue; | |
dea61d92 | 1523 | |
f56f2d33 JH |
1524 | /* Only optimize hot loops. */ |
1525 | if (!optimize_loop_for_speed_p (loop)) | |
1526 | continue; | |
1527 | ||
c014f6f5 RG |
1528 | /* Only distribute loops with a header and latch for now. */ |
1529 | if (loop->num_nodes > 2) | |
1530 | continue; | |
1531 | ||
be6b029b RG |
1532 | /* Initialize the worklist with stmts we seed the partitions with. */ |
1533 | bbs = get_loop_body_in_dom_order (loop); | |
1534 | for (i = 0; i < loop->num_nodes; ++i) | |
1535 | { | |
1536 | gimple_stmt_iterator gsi; | |
1537 | for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1538 | { | |
1539 | gimple stmt = gsi_stmt (gsi); | |
b9fc0497 RB |
1540 | /* Distribute stmts which have defs that are used outside of |
1541 | the loop. */ | |
1542 | if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) | |
1543 | ; | |
1544 | /* Otherwise only distribute stores for now. */ | |
1545 | else if (!gimple_assign_single_p (stmt) | |
1546 | || is_gimple_reg (gimple_assign_lhs (stmt))) | |
be6b029b RG |
1547 | continue; |
1548 | ||
9771b263 | 1549 | work_list.safe_push (stmt); |
be6b029b RG |
1550 | } |
1551 | } | |
1552 | free (bbs); | |
c014f6f5 | 1553 | |
9771b263 | 1554 | if (work_list.length () > 0) |
c014f6f5 RG |
1555 | nb_generated_loops = distribute_loop (loop, work_list); |
1556 | ||
1557 | if (nb_generated_loops > 0) | |
1558 | changed = true; | |
dea61d92 SP |
1559 | |
1560 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1561 | { | |
1562 | if (nb_generated_loops > 1) | |
1563 | fprintf (dump_file, "Loop %d distributed: split to %d loops.\n", | |
0e20c89f | 1564 | num, nb_generated_loops); |
dea61d92 | 1565 | else |
0e20c89f | 1566 | fprintf (dump_file, "Loop %d is the same.\n", num); |
dea61d92 SP |
1567 | } |
1568 | ||
9771b263 | 1569 | work_list.release (); |
dea61d92 SP |
1570 | } |
1571 | ||
c014f6f5 RG |
1572 | if (changed) |
1573 | { | |
525174a2 | 1574 | mark_virtual_operands_for_renaming (cfun); |
c014f6f5 RG |
1575 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); |
1576 | } | |
1577 | ||
1578 | #ifdef ENABLE_CHECKING | |
1579 | verify_loop_structure (); | |
1580 | #endif | |
1581 | ||
5006671f | 1582 | return 0; |
dea61d92 SP |
1583 | } |
1584 | ||
1585 | static bool | |
1586 | gate_tree_loop_distribution (void) | |
1587 | { | |
20769d5e SP |
1588 | return flag_tree_loop_distribution |
1589 | || flag_tree_loop_distribute_patterns; | |
dea61d92 SP |
1590 | } |
1591 | ||
8ddbbcae | 1592 | struct gimple_opt_pass pass_loop_distribution = |
dea61d92 | 1593 | { |
8ddbbcae JH |
1594 | { |
1595 | GIMPLE_PASS, | |
dea61d92 | 1596 | "ldist", /* name */ |
2b4e6bf1 | 1597 | OPTGROUP_LOOP, /* optinfo_flags */ |
dea61d92 SP |
1598 | gate_tree_loop_distribution, /* gate */ |
1599 | tree_loop_distribution, /* execute */ | |
1600 | NULL, /* sub */ | |
1601 | NULL, /* next */ | |
1602 | 0, /* static_pass_number */ | |
1603 | TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ | |
1604 | PROP_cfg | PROP_ssa, /* properties_required */ | |
1605 | 0, /* properties_provided */ | |
1606 | 0, /* properties_destroyed */ | |
1607 | 0, /* todo_flags_start */ | |
bb313b93 | 1608 | TODO_verify_ssa /* todo_flags_finish */ |
8ddbbcae | 1609 | } |
dea61d92 | 1610 | }; |