]>
Commit | Line | Data |
---|---|---|
801c5610 | 1 | /* Loop distribution. |
7cf0dbf3 | 2 | Copyright (C) 2006, 2007, 2008, 2009, 2010 |
3 | Free Software Foundation, Inc. | |
801c5610 | 4 | Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> |
5 | and Sebastian Pop <sebastian.pop@amd.com>. | |
6 | ||
7 | This file is part of GCC. | |
48e1416a | 8 | |
801c5610 | 9 | GCC is free software; you can redistribute it and/or modify it |
10 | under the terms of the GNU General Public License as published by the | |
11 | Free Software Foundation; either version 3, or (at your option) any | |
12 | later version. | |
48e1416a | 13 | |
801c5610 | 14 | GCC is distributed in the hope that it will be useful, but WITHOUT |
15 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 | for more details. | |
48e1416a | 18 | |
801c5610 | 19 | You should have received a copy of the GNU General Public License |
20 | along with GCC; see the file COPYING3. If not see | |
21 | <http://www.gnu.org/licenses/>. */ | |
22 | ||
23 | /* This pass performs loop distribution: for example, the loop | |
24 | ||
25 | |DO I = 2, N | |
26 | | A(I) = B(I) + C | |
27 | | D(I) = A(I-1)*E | |
28 | |ENDDO | |
29 | ||
48e1416a | 30 | is transformed to |
801c5610 | 31 | |
32 | |DOALL I = 2, N | |
33 | | A(I) = B(I) + C | |
34 | |ENDDO | |
35 | | | |
36 | |DOALL I = 2, N | |
37 | | D(I) = A(I-1)*E | |
38 | |ENDDO | |
39 | ||
40 | This pass uses an RDG, Reduced Dependence Graph built on top of the | |
41 | data dependence relations. The RDG is then topologically sorted to | |
42 | obtain a map of information producers/consumers based on which it | |
43 | generates the new loops. */ | |
44 | ||
45 | #include "config.h" | |
46 | #include "system.h" | |
47 | #include "coretypes.h" | |
48 | #include "tm.h" | |
801c5610 | 49 | #include "tree.h" |
801c5610 | 50 | #include "basic-block.h" |
51 | #include "diagnostic.h" | |
52 | #include "tree-flow.h" | |
53 | #include "tree-dump.h" | |
54 | #include "timevar.h" | |
55 | #include "cfgloop.h" | |
56 | #include "expr.h" | |
57 | #include "optabs.h" | |
58 | #include "tree-chrec.h" | |
59 | #include "tree-data-ref.h" | |
60 | #include "tree-scalar-evolution.h" | |
61 | #include "tree-pass.h" | |
62 | #include "lambda.h" | |
63 | #include "langhooks.h" | |
64 | #include "tree-vectorizer.h" | |
65 | ||
66 | /* If bit I is not set, it means that this node represents an | |
67 | operation that has already been performed, and that should not be | |
68 | performed again. This is the subgraph of remaining important | |
69 | computations that is passed to the DFS algorithm for avoiding to | |
70 | include several times the same stores in different loops. */ | |
71 | static bitmap remaining_stmts; | |
72 | ||
73 | /* A node of the RDG is marked in this bitmap when it has as a | |
74 | predecessor a node that writes to memory. */ | |
75 | static bitmap upstream_mem_writes; | |
76 | ||
77 | /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of | |
78 | ORIG_LOOP. */ | |
79 | ||
80 | static void | |
81 | update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop) | |
82 | { | |
83 | tree new_ssa_name; | |
75a70cf9 | 84 | gimple_stmt_iterator si_new, si_orig; |
801c5610 | 85 | edge orig_loop_latch = loop_latch_edge (orig_loop); |
86 | edge orig_entry_e = loop_preheader_edge (orig_loop); | |
87 | edge new_loop_entry_e = loop_preheader_edge (new_loop); | |
88 | ||
89 | /* Scan the phis in the headers of the old and new loops | |
90 | (they are organized in exactly the same order). */ | |
75a70cf9 | 91 | for (si_new = gsi_start_phis (new_loop->header), |
92 | si_orig = gsi_start_phis (orig_loop->header); | |
93 | !gsi_end_p (si_new) && !gsi_end_p (si_orig); | |
94 | gsi_next (&si_new), gsi_next (&si_orig)) | |
801c5610 | 95 | { |
75a70cf9 | 96 | tree def; |
efbcb6de | 97 | source_location locus; |
75a70cf9 | 98 | gimple phi_new = gsi_stmt (si_new); |
99 | gimple phi_orig = gsi_stmt (si_orig); | |
100 | ||
801c5610 | 101 | /* Add the first phi argument for the phi in NEW_LOOP (the one |
102 | associated with the entry of NEW_LOOP) */ | |
75a70cf9 | 103 | def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e); |
efbcb6de | 104 | locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e); |
105 | add_phi_arg (phi_new, def, new_loop_entry_e, locus); | |
801c5610 | 106 | |
107 | /* Add the second phi argument for the phi in NEW_LOOP (the one | |
108 | associated with the latch of NEW_LOOP) */ | |
109 | def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch); | |
efbcb6de | 110 | locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch); |
801c5610 | 111 | |
112 | if (TREE_CODE (def) == SSA_NAME) | |
113 | { | |
114 | new_ssa_name = get_current_def (def); | |
115 | ||
116 | if (!new_ssa_name) | |
117 | /* This only happens if there are no definitions inside the | |
44f1d7db | 118 | loop. Use the the invariant in the new loop as is. */ |
119 | new_ssa_name = def; | |
801c5610 | 120 | } |
121 | else | |
122 | /* Could be an integer. */ | |
123 | new_ssa_name = def; | |
124 | ||
efbcb6de | 125 | add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus); |
801c5610 | 126 | } |
127 | } | |
128 | ||
129 | /* Return a copy of LOOP placed before LOOP. */ | |
130 | ||
131 | static struct loop * | |
132 | copy_loop_before (struct loop *loop) | |
133 | { | |
134 | struct loop *res; | |
135 | edge preheader = loop_preheader_edge (loop); | |
136 | ||
137 | if (!single_exit (loop)) | |
138 | return NULL; | |
139 | ||
140 | initialize_original_copy_tables (); | |
141 | res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader); | |
142 | free_original_copy_tables (); | |
143 | ||
144 | if (!res) | |
145 | return NULL; | |
146 | ||
147 | update_phis_for_loop_copy (loop, res); | |
148 | rename_variables_in_loop (res); | |
149 | ||
150 | return res; | |
151 | } | |
152 | ||
153 | /* Creates an empty basic block after LOOP. */ | |
154 | ||
155 | static void | |
156 | create_bb_after_loop (struct loop *loop) | |
157 | { | |
158 | edge exit = single_exit (loop); | |
159 | ||
160 | if (!exit) | |
161 | return; | |
162 | ||
163 | split_edge (exit); | |
164 | } | |
165 | ||
166 | /* Generate code for PARTITION from the code in LOOP. The loop is | |
167 | copied when COPY_P is true. All the statements not flagged in the | |
168 | PARTITION bitmap are removed from the loop or from its copy. The | |
169 | statements are indexed in sequence inside a basic block, and the | |
170 | basic blocks of a loop are taken in dom order. Returns true when | |
171 | the code gen succeeded. */ | |
172 | ||
173 | static bool | |
174 | generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p) | |
175 | { | |
176 | unsigned i, x; | |
75a70cf9 | 177 | gimple_stmt_iterator bsi; |
801c5610 | 178 | basic_block *bbs; |
179 | ||
180 | if (copy_p) | |
181 | { | |
182 | loop = copy_loop_before (loop); | |
183 | create_preheader (loop, CP_SIMPLE_PREHEADERS); | |
184 | create_bb_after_loop (loop); | |
185 | } | |
186 | ||
187 | if (loop == NULL) | |
188 | return false; | |
189 | ||
190 | /* Remove stmts not in the PARTITION bitmap. The order in which we | |
191 | visit the phi nodes and the statements is exactly as in | |
192 | stmts_from_loop. */ | |
193 | bbs = get_loop_body_in_dom_order (loop); | |
194 | ||
195 | for (x = 0, i = 0; i < loop->num_nodes; i++) | |
196 | { | |
197 | basic_block bb = bbs[i]; | |
801c5610 | 198 | |
75a70cf9 | 199 | for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) |
801c5610 | 200 | if (!bitmap_bit_p (partition, x++)) |
75a70cf9 | 201 | remove_phi_node (&bsi, true); |
801c5610 | 202 | else |
75a70cf9 | 203 | gsi_next (&bsi); |
801c5610 | 204 | |
75a70cf9 | 205 | for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) |
206 | if (gimple_code (gsi_stmt (bsi)) != GIMPLE_LABEL | |
801c5610 | 207 | && !bitmap_bit_p (partition, x++)) |
75a70cf9 | 208 | gsi_remove (&bsi, false); |
801c5610 | 209 | else |
75a70cf9 | 210 | gsi_next (&bsi); |
801c5610 | 211 | |
212 | mark_virtual_ops_in_bb (bb); | |
213 | } | |
214 | ||
215 | free (bbs); | |
216 | return true; | |
217 | } | |
218 | ||
c821ef7d | 219 | /* Build the size argument for a memset call. */ |
880734c8 | 220 | |
221 | static inline tree | |
96eaf651 | 222 | build_size_arg_loc (location_t loc, tree nb_iter, tree op, |
223 | gimple_seq *stmt_list) | |
880734c8 | 224 | { |
96eaf651 | 225 | gimple_seq stmts; |
ad086ed4 | 226 | tree x = size_binop_loc (loc, MULT_EXPR, |
227 | fold_convert_loc (loc, sizetype, nb_iter), | |
228 | TYPE_SIZE_UNIT (TREE_TYPE (op))); | |
96eaf651 | 229 | x = force_gimple_operand (x, &stmts, true, NULL); |
230 | gimple_seq_add_seq (stmt_list, stmts); | |
880734c8 | 231 | |
96eaf651 | 232 | return x; |
880734c8 | 233 | } |
234 | ||
801c5610 | 235 | /* Generate a call to memset. Return true when the operation succeeded. */ |
236 | ||
237 | static bool | |
75a70cf9 | 238 | generate_memset_zero (gimple stmt, tree op0, tree nb_iter, |
239 | gimple_stmt_iterator bsi) | |
801c5610 | 240 | { |
96eaf651 | 241 | tree addr_base, nb_bytes; |
801c5610 | 242 | bool res = false; |
96eaf651 | 243 | gimple_seq stmt_list = NULL, stmts; |
75a70cf9 | 244 | gimple fn_call; |
96eaf651 | 245 | tree mem, fn; |
75a70cf9 | 246 | gimple_stmt_iterator i; |
801c5610 | 247 | struct data_reference *dr = XCNEW (struct data_reference); |
389dd41b | 248 | location_t loc = gimple_location (stmt); |
801c5610 | 249 | |
801c5610 | 250 | DR_STMT (dr) = stmt; |
251 | DR_REF (dr) = op0; | |
880734c8 | 252 | if (!dr_analyze_innermost (dr)) |
253 | goto end; | |
801c5610 | 254 | |
255 | /* Test for a positive stride, iterating over every element. */ | |
96eaf651 | 256 | if (integer_zerop (size_binop (MINUS_EXPR, |
257 | fold_convert (sizetype, DR_STEP (dr)), | |
258 | TYPE_SIZE_UNIT (TREE_TYPE (op0))))) | |
c6b3a2da | 259 | { |
96eaf651 | 260 | addr_base = fold_convert_loc (loc, sizetype, |
261 | size_binop_loc (loc, PLUS_EXPR, | |
262 | DR_OFFSET (dr), | |
263 | DR_INIT (dr))); | |
389dd41b | 264 | addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, |
96eaf651 | 265 | TREE_TYPE (DR_BASE_ADDRESS (dr)), |
266 | DR_BASE_ADDRESS (dr), addr_base); | |
267 | ||
268 | nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); | |
c6b3a2da | 269 | } |
801c5610 | 270 | |
271 | /* Test for a negative stride, iterating over every element. */ | |
96eaf651 | 272 | else if (integer_zerop (size_binop (PLUS_EXPR, |
273 | TYPE_SIZE_UNIT (TREE_TYPE (op0)), | |
274 | fold_convert (sizetype, DR_STEP (dr))))) | |
801c5610 | 275 | { |
389dd41b | 276 | nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); |
801c5610 | 277 | |
96eaf651 | 278 | addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); |
279 | addr_base = fold_convert_loc (loc, sizetype, addr_base); | |
280 | addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, | |
281 | fold_convert_loc (loc, sizetype, nb_bytes)); | |
bfa5edba | 282 | addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, |
283 | TYPE_SIZE_UNIT (TREE_TYPE (op0))); | |
389dd41b | 284 | addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, |
96eaf651 | 285 | TREE_TYPE (DR_BASE_ADDRESS (dr)), |
286 | DR_BASE_ADDRESS (dr), addr_base); | |
801c5610 | 287 | } |
288 | else | |
289 | goto end; | |
290 | ||
291 | mem = force_gimple_operand (addr_base, &stmts, true, NULL); | |
75a70cf9 | 292 | gimple_seq_add_seq (&stmt_list, stmts); |
801c5610 | 293 | |
96eaf651 | 294 | fn = build_fold_addr_expr (implicit_built_in_decls [BUILT_IN_MEMSET]); |
75a70cf9 | 295 | fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes); |
296 | gimple_seq_add_stmt (&stmt_list, fn_call); | |
801c5610 | 297 | |
75a70cf9 | 298 | for (i = gsi_start (stmt_list); !gsi_end_p (i); gsi_next (&i)) |
801c5610 | 299 | { |
75a70cf9 | 300 | gimple s = gsi_stmt (i); |
801c5610 | 301 | update_stmt_if_modified (s); |
801c5610 | 302 | } |
303 | ||
75a70cf9 | 304 | gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING); |
801c5610 | 305 | res = true; |
306 | ||
307 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
308 | fprintf (dump_file, "generated memset zero\n"); | |
309 | ||
310 | end: | |
311 | free_data_ref (dr); | |
312 | return res; | |
313 | } | |
314 | ||
a6c1b5f0 | 315 | /* Propagate phis in BB b to their uses and remove them. */ |
316 | ||
317 | static void | |
318 | prop_phis (basic_block b) | |
319 | { | |
320 | gimple_stmt_iterator psi; | |
321 | gimple_seq phis = phi_nodes (b); | |
322 | ||
323 | for (psi = gsi_start (phis); !gsi_end_p (psi); ) | |
324 | { | |
325 | gimple phi = gsi_stmt (psi); | |
326 | tree def = gimple_phi_result (phi), use = gimple_phi_arg_def (phi, 0); | |
327 | ||
328 | gcc_assert (gimple_phi_num_args (phi) == 1); | |
329 | ||
330 | if (!is_gimple_reg (def)) | |
331 | { | |
332 | imm_use_iterator iter; | |
333 | use_operand_p use_p; | |
334 | gimple stmt; | |
335 | ||
336 | FOR_EACH_IMM_USE_STMT (stmt, iter, def) | |
337 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
338 | SET_USE (use_p, use); | |
339 | } | |
340 | else | |
341 | replace_uses_by (def, use); | |
342 | ||
343 | remove_phi_node (&psi, true); | |
344 | } | |
345 | } | |
346 | ||
801c5610 | 347 | /* Tries to generate a builtin function for the instructions of LOOP |
348 | pointed to by the bits set in PARTITION. Returns true when the | |
349 | operation succeeded. */ | |
350 | ||
351 | static bool | |
352 | generate_builtin (struct loop *loop, bitmap partition, bool copy_p) | |
353 | { | |
354 | bool res = false; | |
355 | unsigned i, x = 0; | |
356 | basic_block *bbs; | |
75a70cf9 | 357 | gimple write = NULL; |
801c5610 | 358 | tree op0, op1; |
75a70cf9 | 359 | gimple_stmt_iterator bsi; |
801c5610 | 360 | tree nb_iter = number_of_exit_cond_executions (loop); |
361 | ||
362 | if (!nb_iter || nb_iter == chrec_dont_know) | |
363 | return false; | |
364 | ||
365 | bbs = get_loop_body_in_dom_order (loop); | |
366 | ||
367 | for (i = 0; i < loop->num_nodes; i++) | |
368 | { | |
369 | basic_block bb = bbs[i]; | |
801c5610 | 370 | |
75a70cf9 | 371 | for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) |
801c5610 | 372 | x++; |
373 | ||
75a70cf9 | 374 | for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) |
801c5610 | 375 | { |
75a70cf9 | 376 | gimple stmt = gsi_stmt (bsi); |
801c5610 | 377 | |
378 | if (bitmap_bit_p (partition, x++) | |
75a70cf9 | 379 | && is_gimple_assign (stmt) |
380 | && !is_gimple_reg (gimple_assign_lhs (stmt))) | |
801c5610 | 381 | { |
382 | /* Don't generate the builtins when there are more than | |
383 | one memory write. */ | |
384 | if (write != NULL) | |
385 | goto end; | |
386 | ||
387 | write = stmt; | |
bfa5edba | 388 | if (bb == loop->latch) |
389 | nb_iter = number_of_latch_executions (loop); | |
801c5610 | 390 | } |
391 | } | |
392 | } | |
393 | ||
394 | if (!write) | |
395 | goto end; | |
396 | ||
75a70cf9 | 397 | op0 = gimple_assign_lhs (write); |
398 | op1 = gimple_assign_rhs1 (write); | |
801c5610 | 399 | |
400 | if (!(TREE_CODE (op0) == ARRAY_REF | |
401 | || TREE_CODE (op0) == INDIRECT_REF)) | |
402 | goto end; | |
403 | ||
404 | /* The new statements will be placed before LOOP. */ | |
75a70cf9 | 405 | bsi = gsi_last_bb (loop_preheader_edge (loop)->src); |
801c5610 | 406 | |
75a70cf9 | 407 | if (gimple_assign_rhs_code (write) == INTEGER_CST |
408 | && (integer_zerop (op1) || real_zerop (op1))) | |
801c5610 | 409 | res = generate_memset_zero (write, op0, nb_iter, bsi); |
410 | ||
411 | /* If this is the last partition for which we generate code, we have | |
412 | to destroy the loop. */ | |
413 | if (res && !copy_p) | |
414 | { | |
415 | unsigned nbbs = loop->num_nodes; | |
416 | basic_block src = loop_preheader_edge (loop)->src; | |
417 | basic_block dest = single_exit (loop)->dest; | |
a6c1b5f0 | 418 | prop_phis (dest); |
801c5610 | 419 | make_edge (src, dest, EDGE_FALLTHRU); |
801c5610 | 420 | cancel_loop_tree (loop); |
421 | ||
422 | for (i = 0; i < nbbs; i++) | |
423 | delete_basic_block (bbs[i]); | |
82b19557 | 424 | |
425 | set_immediate_dominator (CDI_DOMINATORS, dest, | |
426 | recompute_dominator (CDI_DOMINATORS, dest)); | |
801c5610 | 427 | } |
428 | ||
429 | end: | |
430 | free (bbs); | |
431 | return res; | |
432 | } | |
433 | ||
434 | /* Generates code for PARTITION. For simple loops, this function can | |
435 | generate a built-in. */ | |
436 | ||
437 | static bool | |
438 | generate_code_for_partition (struct loop *loop, bitmap partition, bool copy_p) | |
439 | { | |
440 | if (generate_builtin (loop, partition, copy_p)) | |
441 | return true; | |
442 | ||
443 | return generate_loops_for_partition (loop, partition, copy_p); | |
444 | } | |
445 | ||
446 | ||
447 | /* Returns true if the node V of RDG cannot be recomputed. */ | |
448 | ||
449 | static bool | |
450 | rdg_cannot_recompute_vertex_p (struct graph *rdg, int v) | |
451 | { | |
452 | if (RDG_MEM_WRITE_STMT (rdg, v)) | |
453 | return true; | |
454 | ||
455 | return false; | |
456 | } | |
457 | ||
458 | /* Returns true when the vertex V has already been generated in the | |
459 | current partition (V is in PROCESSED), or when V belongs to another | |
460 | partition and cannot be recomputed (V is not in REMAINING_STMTS). */ | |
461 | ||
462 | static inline bool | |
463 | already_processed_vertex_p (bitmap processed, int v) | |
464 | { | |
465 | return (bitmap_bit_p (processed, v) | |
466 | || !bitmap_bit_p (remaining_stmts, v)); | |
467 | } | |
468 | ||
469 | /* Returns NULL when there is no anti-dependence among the successors | |
470 | of vertex V, otherwise returns the edge with the anti-dep. */ | |
471 | ||
472 | static struct graph_edge * | |
473 | has_anti_dependence (struct vertex *v) | |
474 | { | |
475 | struct graph_edge *e; | |
476 | ||
477 | if (v->succ) | |
478 | for (e = v->succ; e; e = e->succ_next) | |
479 | if (RDGE_TYPE (e) == anti_dd) | |
480 | return e; | |
481 | ||
482 | return NULL; | |
483 | } | |
484 | ||
485 | /* Returns true when V has an anti-dependence edge among its successors. */ | |
486 | ||
487 | static bool | |
488 | predecessor_has_mem_write (struct graph *rdg, struct vertex *v) | |
489 | { | |
490 | struct graph_edge *e; | |
491 | ||
492 | if (v->pred) | |
493 | for (e = v->pred; e; e = e->pred_next) | |
494 | if (bitmap_bit_p (upstream_mem_writes, e->src) | |
495 | /* Don't consider flow channels: a write to memory followed | |
496 | by a read from memory. These channels allow the split of | |
497 | the RDG in different partitions. */ | |
498 | && !RDG_MEM_WRITE_STMT (rdg, e->src)) | |
499 | return true; | |
500 | ||
501 | return false; | |
502 | } | |
503 | ||
504 | /* Initializes the upstream_mem_writes bitmap following the | |
505 | information from RDG. */ | |
506 | ||
507 | static void | |
508 | mark_nodes_having_upstream_mem_writes (struct graph *rdg) | |
509 | { | |
510 | int v, x; | |
511 | bitmap seen = BITMAP_ALLOC (NULL); | |
512 | ||
513 | for (v = rdg->n_vertices - 1; v >= 0; v--) | |
514 | if (!bitmap_bit_p (seen, v)) | |
515 | { | |
516 | unsigned i; | |
517 | VEC (int, heap) *nodes = VEC_alloc (int, heap, 3); | |
801c5610 | 518 | |
519 | graphds_dfs (rdg, &v, 1, &nodes, false, NULL); | |
520 | ||
521 | for (i = 0; VEC_iterate (int, nodes, i, x); i++) | |
522 | { | |
523 | if (bitmap_bit_p (seen, x)) | |
524 | continue; | |
525 | ||
526 | bitmap_set_bit (seen, x); | |
527 | ||
528 | if (RDG_MEM_WRITE_STMT (rdg, x) | |
529 | || predecessor_has_mem_write (rdg, &(rdg->vertices[x])) | |
530 | /* In anti dependences the read should occur before | |
531 | the write, this is why both the read and the write | |
532 | should be placed in the same partition. */ | |
533 | || has_anti_dependence (&(rdg->vertices[x]))) | |
534 | { | |
801c5610 | 535 | bitmap_set_bit (upstream_mem_writes, x); |
536 | } | |
537 | } | |
538 | ||
539 | VEC_free (int, heap, nodes); | |
540 | } | |
541 | } | |
542 | ||
543 | /* Returns true when vertex u has a memory write node as a predecessor | |
544 | in RDG. */ | |
545 | ||
546 | static bool | |
547 | has_upstream_mem_writes (int u) | |
548 | { | |
549 | return bitmap_bit_p (upstream_mem_writes, u); | |
550 | } | |
551 | ||
552 | static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap, | |
553 | bitmap, bool *); | |
554 | ||
555 | /* Flag all the uses of U. */ | |
556 | ||
557 | static void | |
558 | rdg_flag_all_uses (struct graph *rdg, int u, bitmap partition, bitmap loops, | |
559 | bitmap processed, bool *part_has_writes) | |
560 | { | |
561 | struct graph_edge *e; | |
562 | ||
563 | for (e = rdg->vertices[u].succ; e; e = e->succ_next) | |
564 | if (!bitmap_bit_p (processed, e->dest)) | |
565 | { | |
566 | rdg_flag_vertex_and_dependent (rdg, e->dest, partition, loops, | |
567 | processed, part_has_writes); | |
568 | rdg_flag_all_uses (rdg, e->dest, partition, loops, processed, | |
569 | part_has_writes); | |
570 | } | |
571 | } | |
572 | ||
573 | /* Flag the uses of U stopping following the information from | |
574 | upstream_mem_writes. */ | |
575 | ||
576 | static void | |
577 | rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops, | |
578 | bitmap processed, bool *part_has_writes) | |
579 | { | |
801c5610 | 580 | use_operand_p use_p; |
581 | struct vertex *x = &(rdg->vertices[u]); | |
75a70cf9 | 582 | gimple stmt = RDGV_STMT (x); |
801c5610 | 583 | struct graph_edge *anti_dep = has_anti_dependence (x); |
584 | ||
585 | /* Keep in the same partition the destination of an antidependence, | |
586 | because this is a store to the exact same location. Putting this | |
587 | in another partition is bad for cache locality. */ | |
588 | if (anti_dep) | |
589 | { | |
590 | int v = anti_dep->dest; | |
591 | ||
592 | if (!already_processed_vertex_p (processed, v)) | |
593 | rdg_flag_vertex_and_dependent (rdg, v, partition, loops, | |
594 | processed, part_has_writes); | |
595 | } | |
596 | ||
75a70cf9 | 597 | if (gimple_code (stmt) != GIMPLE_PHI) |
801c5610 | 598 | { |
dd277d48 | 599 | if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P) |
801c5610 | 600 | { |
601 | tree use = USE_FROM_PTR (use_p); | |
602 | ||
603 | if (TREE_CODE (use) == SSA_NAME) | |
604 | { | |
75a70cf9 | 605 | gimple def_stmt = SSA_NAME_DEF_STMT (use); |
801c5610 | 606 | int v = rdg_vertex_for_stmt (rdg, def_stmt); |
607 | ||
608 | if (v >= 0 | |
609 | && !already_processed_vertex_p (processed, v)) | |
610 | rdg_flag_vertex_and_dependent (rdg, v, partition, loops, | |
611 | processed, part_has_writes); | |
612 | } | |
613 | } | |
614 | } | |
615 | ||
75a70cf9 | 616 | if (is_gimple_assign (stmt) && has_upstream_mem_writes (u)) |
801c5610 | 617 | { |
75a70cf9 | 618 | tree op0 = gimple_assign_lhs (stmt); |
801c5610 | 619 | |
620 | /* Scalar channels don't have enough space for transmitting data | |
621 | between tasks, unless we add more storage by privatizing. */ | |
622 | if (is_gimple_reg (op0)) | |
623 | { | |
624 | use_operand_p use_p; | |
625 | imm_use_iterator iter; | |
626 | ||
627 | FOR_EACH_IMM_USE_FAST (use_p, iter, op0) | |
628 | { | |
629 | int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p)); | |
630 | ||
631 | if (!already_processed_vertex_p (processed, v)) | |
632 | rdg_flag_vertex_and_dependent (rdg, v, partition, loops, | |
633 | processed, part_has_writes); | |
634 | } | |
635 | } | |
636 | } | |
637 | } | |
638 | ||
639 | /* Flag V from RDG as part of PARTITION, and also flag its loop number | |
640 | in LOOPS. */ | |
641 | ||
642 | static void | |
643 | rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops, | |
644 | bool *part_has_writes) | |
645 | { | |
646 | struct loop *loop; | |
647 | ||
648 | if (bitmap_bit_p (partition, v)) | |
649 | return; | |
650 | ||
651 | loop = loop_containing_stmt (RDG_STMT (rdg, v)); | |
652 | bitmap_set_bit (loops, loop->num); | |
653 | bitmap_set_bit (partition, v); | |
654 | ||
655 | if (rdg_cannot_recompute_vertex_p (rdg, v)) | |
656 | { | |
657 | *part_has_writes = true; | |
658 | bitmap_clear_bit (remaining_stmts, v); | |
659 | } | |
660 | } | |
661 | ||
662 | /* Flag in the bitmap PARTITION the vertex V and all its predecessors. | |
f0b5f617 | 663 | Also flag their loop number in LOOPS. */ |
801c5610 | 664 | |
665 | static void | |
666 | rdg_flag_vertex_and_dependent (struct graph *rdg, int v, bitmap partition, | |
667 | bitmap loops, bitmap processed, | |
668 | bool *part_has_writes) | |
669 | { | |
670 | unsigned i; | |
671 | VEC (int, heap) *nodes = VEC_alloc (int, heap, 3); | |
672 | int x; | |
673 | ||
674 | bitmap_set_bit (processed, v); | |
675 | rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes); | |
676 | graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts); | |
677 | rdg_flag_vertex (rdg, v, partition, loops, part_has_writes); | |
678 | ||
679 | for (i = 0; VEC_iterate (int, nodes, i, x); i++) | |
680 | if (!already_processed_vertex_p (processed, x)) | |
681 | rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed, | |
682 | part_has_writes); | |
683 | ||
684 | VEC_free (int, heap, nodes); | |
685 | } | |
686 | ||
687 | /* Initialize CONDS with all the condition statements from the basic | |
688 | blocks of LOOP. */ | |
689 | ||
690 | static void | |
75a70cf9 | 691 | collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds) |
801c5610 | 692 | { |
693 | unsigned i; | |
694 | edge e; | |
695 | VEC (edge, heap) *exits = get_loop_exit_edges (loop); | |
696 | ||
697 | for (i = 0; VEC_iterate (edge, exits, i, e); i++) | |
698 | { | |
75a70cf9 | 699 | gimple cond = last_stmt (e->src); |
801c5610 | 700 | |
701 | if (cond) | |
75a70cf9 | 702 | VEC_safe_push (gimple, heap, *conds, cond); |
801c5610 | 703 | } |
704 | ||
705 | VEC_free (edge, heap, exits); | |
706 | } | |
707 | ||
708 | /* Add to PARTITION all the exit condition statements for LOOPS | |
709 | together with all their dependent statements determined from | |
710 | RDG. */ | |
711 | ||
712 | static void | |
713 | rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition, | |
714 | bitmap processed, bool *part_has_writes) | |
715 | { | |
716 | unsigned i; | |
717 | bitmap_iterator bi; | |
75a70cf9 | 718 | VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3); |
801c5610 | 719 | |
720 | EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi) | |
721 | collect_condition_stmts (get_loop (i), &conds); | |
722 | ||
75a70cf9 | 723 | while (!VEC_empty (gimple, conds)) |
801c5610 | 724 | { |
75a70cf9 | 725 | gimple cond = VEC_pop (gimple, conds); |
801c5610 | 726 | int v = rdg_vertex_for_stmt (rdg, cond); |
727 | bitmap new_loops = BITMAP_ALLOC (NULL); | |
728 | ||
729 | if (!already_processed_vertex_p (processed, v)) | |
730 | rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed, | |
731 | part_has_writes); | |
732 | ||
733 | EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi) | |
734 | if (!bitmap_bit_p (loops, i)) | |
735 | { | |
736 | bitmap_set_bit (loops, i); | |
737 | collect_condition_stmts (get_loop (i), &conds); | |
738 | } | |
739 | ||
740 | BITMAP_FREE (new_loops); | |
741 | } | |
742 | } | |
743 | ||
801c5610 | 744 | /* Flag all the nodes of RDG containing memory accesses that could |
745 | potentially belong to arrays already accessed in the current | |
746 | PARTITION. */ | |
747 | ||
748 | static void | |
749 | rdg_flag_similar_memory_accesses (struct graph *rdg, bitmap partition, | |
750 | bitmap loops, bitmap processed, | |
751 | VEC (int, heap) **other_stores) | |
752 | { | |
753 | bool foo; | |
754 | unsigned i, n; | |
755 | int j, k, kk; | |
756 | bitmap_iterator ii; | |
757 | struct graph_edge *e; | |
758 | ||
759 | EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii) | |
760 | if (RDG_MEM_WRITE_STMT (rdg, i) | |
761 | || RDG_MEM_READS_STMT (rdg, i)) | |
762 | { | |
763 | for (j = 0; j < rdg->n_vertices; j++) | |
764 | if (!bitmap_bit_p (processed, j) | |
765 | && (RDG_MEM_WRITE_STMT (rdg, j) | |
766 | || RDG_MEM_READS_STMT (rdg, j)) | |
767 | && rdg_has_similar_memory_accesses (rdg, i, j)) | |
768 | { | |
769 | /* Flag first the node J itself, and all the nodes that | |
770 | are needed to compute J. */ | |
771 | rdg_flag_vertex_and_dependent (rdg, j, partition, loops, | |
772 | processed, &foo); | |
773 | ||
774 | /* When J is a read, we want to coalesce in the same | |
775 | PARTITION all the nodes that are using J: this is | |
776 | needed for better cache locality. */ | |
777 | rdg_flag_all_uses (rdg, j, partition, loops, processed, &foo); | |
778 | ||
779 | /* Remove from OTHER_STORES the vertex that we flagged. */ | |
780 | if (RDG_MEM_WRITE_STMT (rdg, j)) | |
781 | for (k = 0; VEC_iterate (int, *other_stores, k, kk); k++) | |
782 | if (kk == j) | |
783 | { | |
784 | VEC_unordered_remove (int, *other_stores, k); | |
785 | break; | |
786 | } | |
787 | } | |
788 | ||
789 | /* If the node I has two uses, then keep these together in the | |
790 | same PARTITION. */ | |
791 | for (n = 0, e = rdg->vertices[i].succ; e; e = e->succ_next, n++); | |
792 | ||
793 | if (n > 1) | |
794 | rdg_flag_all_uses (rdg, i, partition, loops, processed, &foo); | |
795 | } | |
796 | } | |
797 | ||
798 | /* Returns a bitmap in which all the statements needed for computing | |
799 | the strongly connected component C of the RDG are flagged, also | |
800 | including the loop exit conditions. */ | |
801 | ||
802 | static bitmap | |
803 | build_rdg_partition_for_component (struct graph *rdg, rdgc c, | |
804 | bool *part_has_writes, | |
805 | VEC (int, heap) **other_stores) | |
806 | { | |
807 | int i, v; | |
808 | bitmap partition = BITMAP_ALLOC (NULL); | |
809 | bitmap loops = BITMAP_ALLOC (NULL); | |
810 | bitmap processed = BITMAP_ALLOC (NULL); | |
811 | ||
812 | for (i = 0; VEC_iterate (int, c->vertices, i, v); i++) | |
813 | if (!already_processed_vertex_p (processed, v)) | |
814 | rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed, | |
815 | part_has_writes); | |
816 | ||
817 | /* Also iterate on the array of stores not in the starting vertices, | |
818 | and determine those vertices that have some memory affinity with | |
819 | the current nodes in the component: these are stores to the same | |
820 | arrays, i.e. we're taking care of cache locality. */ | |
821 | rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, | |
822 | other_stores); | |
823 | ||
824 | rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes); | |
825 | ||
826 | BITMAP_FREE (processed); | |
827 | BITMAP_FREE (loops); | |
828 | return partition; | |
829 | } | |
830 | ||
831 | /* Free memory for COMPONENTS. */ | |
832 | ||
833 | static void | |
834 | free_rdg_components (VEC (rdgc, heap) *components) | |
835 | { | |
836 | int i; | |
837 | rdgc x; | |
838 | ||
839 | for (i = 0; VEC_iterate (rdgc, components, i, x); i++) | |
840 | { | |
841 | VEC_free (int, heap, x->vertices); | |
842 | free (x); | |
843 | } | |
844 | } | |
845 | ||
846 | /* Build the COMPONENTS vector with the strongly connected components | |
847 | of RDG in which the STARTING_VERTICES occur. */ | |
848 | ||
849 | static void | |
48e1416a | 850 | rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices, |
801c5610 | 851 | VEC (rdgc, heap) **components) |
852 | { | |
853 | int i, v; | |
854 | bitmap saved_components = BITMAP_ALLOC (NULL); | |
855 | int n_components = graphds_scc (rdg, NULL); | |
856 | VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components); | |
857 | ||
858 | for (i = 0; i < n_components; i++) | |
859 | all_components[i] = VEC_alloc (int, heap, 3); | |
860 | ||
861 | for (i = 0; i < rdg->n_vertices; i++) | |
862 | VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i); | |
863 | ||
864 | for (i = 0; VEC_iterate (int, starting_vertices, i, v); i++) | |
865 | { | |
866 | int c = rdg->vertices[v].component; | |
867 | ||
868 | if (!bitmap_bit_p (saved_components, c)) | |
869 | { | |
870 | rdgc x = XCNEW (struct rdg_component); | |
871 | x->num = c; | |
872 | x->vertices = all_components[c]; | |
873 | ||
874 | VEC_safe_push (rdgc, heap, *components, x); | |
875 | bitmap_set_bit (saved_components, c); | |
876 | } | |
877 | } | |
878 | ||
879 | for (i = 0; i < n_components; i++) | |
880 | if (!bitmap_bit_p (saved_components, i)) | |
881 | VEC_free (int, heap, all_components[i]); | |
882 | ||
883 | free (all_components); | |
884 | BITMAP_FREE (saved_components); | |
885 | } | |
886 | ||
801c5610 | 887 | /* Aggregate several components into a useful partition that is |
888 | registered in the PARTITIONS vector. Partitions will be | |
889 | distributed in different loops. */ | |
890 | ||
891 | static void | |
892 | rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components, | |
893 | VEC (int, heap) **other_stores, | |
894 | VEC (bitmap, heap) **partitions, bitmap processed) | |
895 | { | |
896 | int i; | |
897 | rdgc x; | |
898 | bitmap partition = BITMAP_ALLOC (NULL); | |
899 | ||
900 | for (i = 0; VEC_iterate (rdgc, components, i, x); i++) | |
901 | { | |
902 | bitmap np; | |
903 | bool part_has_writes = false; | |
904 | int v = VEC_index (int, x->vertices, 0); | |
48e1416a | 905 | |
801c5610 | 906 | if (bitmap_bit_p (processed, v)) |
907 | continue; | |
48e1416a | 908 | |
801c5610 | 909 | np = build_rdg_partition_for_component (rdg, x, &part_has_writes, |
910 | other_stores); | |
911 | bitmap_ior_into (partition, np); | |
912 | bitmap_ior_into (processed, np); | |
913 | BITMAP_FREE (np); | |
914 | ||
915 | if (part_has_writes) | |
916 | { | |
917 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
918 | { | |
919 | fprintf (dump_file, "ldist useful partition:\n"); | |
920 | dump_bitmap (dump_file, partition); | |
921 | } | |
922 | ||
923 | VEC_safe_push (bitmap, heap, *partitions, partition); | |
924 | partition = BITMAP_ALLOC (NULL); | |
925 | } | |
926 | } | |
927 | ||
928 | /* Add the nodes from the RDG that were not marked as processed, and | |
929 | that are used outside the current loop. These are scalar | |
930 | computations that are not yet part of previous partitions. */ | |
931 | for (i = 0; i < rdg->n_vertices; i++) | |
932 | if (!bitmap_bit_p (processed, i) | |
933 | && rdg_defs_used_in_other_loops_p (rdg, i)) | |
934 | VEC_safe_push (int, heap, *other_stores, i); | |
935 | ||
936 | /* If there are still statements left in the OTHER_STORES array, | |
937 | create other components and partitions with these stores and | |
938 | their dependences. */ | |
939 | if (VEC_length (int, *other_stores) > 0) | |
940 | { | |
941 | VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3); | |
942 | VEC (int, heap) *foo = VEC_alloc (int, heap, 3); | |
943 | ||
944 | rdg_build_components (rdg, *other_stores, &comps); | |
945 | rdg_build_partitions (rdg, comps, &foo, partitions, processed); | |
946 | ||
947 | VEC_free (int, heap, foo); | |
948 | free_rdg_components (comps); | |
949 | } | |
950 | ||
951 | /* If there is something left in the last partition, save it. */ | |
952 | if (bitmap_count_bits (partition) > 0) | |
953 | VEC_safe_push (bitmap, heap, *partitions, partition); | |
954 | else | |
955 | BITMAP_FREE (partition); | |
956 | } | |
957 | ||
958 | /* Dump to FILE the PARTITIONS. */ | |
959 | ||
960 | static void | |
961 | dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions) | |
962 | { | |
963 | int i; | |
964 | bitmap partition; | |
965 | ||
966 | for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) | |
967 | debug_bitmap_file (file, partition); | |
968 | } | |
969 | ||
970 | /* Debug PARTITIONS. */ | |
971 | extern void debug_rdg_partitions (VEC (bitmap, heap) *); | |
972 | ||
4b987fac | 973 | DEBUG_FUNCTION void |
801c5610 | 974 | debug_rdg_partitions (VEC (bitmap, heap) *partitions) |
975 | { | |
976 | dump_rdg_partitions (stderr, partitions); | |
977 | } | |
978 | ||
577982d8 | 979 | /* Returns the number of read and write operations in the RDG. */ |
980 | ||
981 | static int | |
982 | number_of_rw_in_rdg (struct graph *rdg) | |
983 | { | |
984 | int i, res = 0; | |
985 | ||
986 | for (i = 0; i < rdg->n_vertices; i++) | |
987 | { | |
988 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
989 | ++res; | |
990 | ||
991 | if (RDG_MEM_READS_STMT (rdg, i)) | |
992 | ++res; | |
993 | } | |
994 | ||
995 | return res; | |
996 | } | |
997 | ||
998 | /* Returns the number of read and write operations in a PARTITION of | |
999 | the RDG. */ | |
1000 | ||
1001 | static int | |
1002 | number_of_rw_in_partition (struct graph *rdg, bitmap partition) | |
1003 | { | |
1004 | int res = 0; | |
1005 | unsigned i; | |
1006 | bitmap_iterator ii; | |
1007 | ||
1008 | EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii) | |
1009 | { | |
1010 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1011 | ++res; | |
1012 | ||
1013 | if (RDG_MEM_READS_STMT (rdg, i)) | |
1014 | ++res; | |
1015 | } | |
1016 | ||
1017 | return res; | |
1018 | } | |
1019 | ||
1020 | /* Returns true when one of the PARTITIONS contains all the read or | |
1021 | write operations of RDG. */ | |
1022 | ||
1023 | static bool | |
1024 | partition_contains_all_rw (struct graph *rdg, VEC (bitmap, heap) *partitions) | |
1025 | { | |
1026 | int i; | |
1027 | bitmap partition; | |
1028 | int nrw = number_of_rw_in_rdg (rdg); | |
1029 | ||
1030 | for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) | |
1031 | if (nrw == number_of_rw_in_partition (rdg, partition)) | |
1032 | return true; | |
1033 | ||
1034 | return false; | |
1035 | } | |
1036 | ||
801c5610 | 1037 | /* Generate code from STARTING_VERTICES in RDG. Returns the number of |
1038 | distributed loops. */ | |
1039 | ||
1040 | static int | |
1041 | ldist_gen (struct loop *loop, struct graph *rdg, | |
1042 | VEC (int, heap) *starting_vertices) | |
1043 | { | |
1044 | int i, nbp; | |
1045 | VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3); | |
1046 | VEC (bitmap, heap) *partitions = VEC_alloc (bitmap, heap, 3); | |
1047 | VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3); | |
1048 | bitmap partition, processed = BITMAP_ALLOC (NULL); | |
1049 | ||
1050 | remaining_stmts = BITMAP_ALLOC (NULL); | |
1051 | upstream_mem_writes = BITMAP_ALLOC (NULL); | |
1052 | ||
1053 | for (i = 0; i < rdg->n_vertices; i++) | |
1054 | { | |
1055 | bitmap_set_bit (remaining_stmts, i); | |
1056 | ||
1057 | /* Save in OTHER_STORES all the memory writes that are not in | |
1058 | STARTING_VERTICES. */ | |
1059 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1060 | { | |
1061 | int v; | |
1062 | unsigned j; | |
1063 | bool found = false; | |
1064 | ||
1065 | for (j = 0; VEC_iterate (int, starting_vertices, j, v); j++) | |
1066 | if (i == v) | |
1067 | { | |
1068 | found = true; | |
1069 | break; | |
1070 | } | |
1071 | ||
1072 | if (!found) | |
1073 | VEC_safe_push (int, heap, other_stores, i); | |
1074 | } | |
1075 | } | |
1076 | ||
1077 | mark_nodes_having_upstream_mem_writes (rdg); | |
1078 | rdg_build_components (rdg, starting_vertices, &components); | |
1079 | rdg_build_partitions (rdg, components, &other_stores, &partitions, | |
1080 | processed); | |
1081 | BITMAP_FREE (processed); | |
1082 | nbp = VEC_length (bitmap, partitions); | |
1083 | ||
577982d8 | 1084 | if (nbp <= 1 |
1085 | || partition_contains_all_rw (rdg, partitions)) | |
801c5610 | 1086 | goto ldist_done; |
1087 | ||
1088 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1089 | dump_rdg_partitions (dump_file, partitions); | |
1090 | ||
1091 | for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) | |
1092 | if (!generate_code_for_partition (loop, partition, i < nbp - 1)) | |
1093 | goto ldist_done; | |
1094 | ||
1095 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | |
1096 | update_ssa (TODO_update_ssa_only_virtuals | TODO_update_ssa); | |
1097 | ||
1098 | ldist_done: | |
1099 | ||
1100 | BITMAP_FREE (remaining_stmts); | |
1101 | BITMAP_FREE (upstream_mem_writes); | |
1102 | ||
1103 | for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) | |
1104 | BITMAP_FREE (partition); | |
1105 | ||
1106 | VEC_free (int, heap, other_stores); | |
1107 | VEC_free (bitmap, heap, partitions); | |
1108 | free_rdg_components (components); | |
1109 | return nbp; | |
1110 | } | |
1111 | ||
1112 | /* Distributes the code from LOOP in such a way that producer | |
1113 | statements are placed before consumer statements. When STMTS is | |
1114 | NULL, performs the maximal distribution, if STMTS is not NULL, | |
1115 | tries to separate only these statements from the LOOP's body. | |
1116 | Returns the number of distributed loops. */ | |
1117 | ||
1118 | static int | |
75a70cf9 | 1119 | distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts) |
801c5610 | 1120 | { |
eeb74f9f | 1121 | int res = 0; |
801c5610 | 1122 | struct graph *rdg; |
75a70cf9 | 1123 | gimple s; |
801c5610 | 1124 | unsigned i; |
1125 | VEC (int, heap) *vertices; | |
1126 | ||
1127 | if (loop->num_nodes > 2) | |
1128 | { | |
1129 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1130 | fprintf (dump_file, | |
1131 | "FIXME: Loop %d not distributed: it has more than two basic blocks.\n", | |
1132 | loop->num); | |
1133 | ||
1134 | return res; | |
1135 | } | |
1136 | ||
1137 | rdg = build_rdg (loop); | |
1138 | ||
1139 | if (!rdg) | |
1140 | { | |
1141 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1142 | fprintf (dump_file, | |
1143 | "FIXME: Loop %d not distributed: failed to build the RDG.\n", | |
1144 | loop->num); | |
1145 | ||
1146 | return res; | |
1147 | } | |
1148 | ||
1149 | vertices = VEC_alloc (int, heap, 3); | |
1150 | ||
1151 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1152 | dump_rdg (dump_file, rdg); | |
1153 | ||
75a70cf9 | 1154 | for (i = 0; VEC_iterate (gimple, stmts, i, s); i++) |
801c5610 | 1155 | { |
1156 | int v = rdg_vertex_for_stmt (rdg, s); | |
1157 | ||
1158 | if (v >= 0) | |
1159 | { | |
1160 | VEC_safe_push (int, heap, vertices, v); | |
1161 | ||
1162 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1163 | fprintf (dump_file, | |
1164 | "ldist asked to generate code for vertex %d\n", v); | |
1165 | } | |
1166 | } | |
1167 | ||
1168 | res = ldist_gen (loop, rdg, vertices); | |
1169 | VEC_free (int, heap, vertices); | |
1170 | free_rdg (rdg); | |
1171 | ||
1172 | return res; | |
1173 | } | |
1174 | ||
1175 | /* Distribute all loops in the current function. */ | |
1176 | ||
1177 | static unsigned int | |
1178 | tree_loop_distribution (void) | |
1179 | { | |
1180 | struct loop *loop; | |
1181 | loop_iterator li; | |
1182 | int nb_generated_loops = 0; | |
1183 | ||
1184 | FOR_EACH_LOOP (li, loop, 0) | |
1185 | { | |
75a70cf9 | 1186 | VEC (gimple, heap) *work_list = VEC_alloc (gimple, heap, 3); |
801c5610 | 1187 | |
1188 | /* With the following working list, we're asking distribute_loop | |
1189 | to separate the stores of the loop: when dependences allow, | |
1190 | it will end on having one store per loop. */ | |
1191 | stores_from_loop (loop, &work_list); | |
1192 | ||
1193 | /* A simple heuristic for cache locality is to not split stores | |
1194 | to the same array. Without this call, an unrolled loop would | |
1195 | be split into as many loops as unroll factor, each loop | |
1196 | storing in the same array. */ | |
1197 | remove_similar_memory_refs (&work_list); | |
1198 | ||
1199 | nb_generated_loops = distribute_loop (loop, work_list); | |
1200 | ||
1201 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1202 | { | |
1203 | if (nb_generated_loops > 1) | |
1204 | fprintf (dump_file, "Loop %d distributed: split to %d loops.\n", | |
1205 | loop->num, nb_generated_loops); | |
1206 | else | |
1207 | fprintf (dump_file, "Loop %d is the same.\n", loop->num); | |
1208 | } | |
1209 | ||
1210 | verify_loop_structure (); | |
1211 | ||
75a70cf9 | 1212 | VEC_free (gimple, heap, work_list); |
801c5610 | 1213 | } |
1214 | ||
dd277d48 | 1215 | return 0; |
801c5610 | 1216 | } |
1217 | ||
1218 | static bool | |
1219 | gate_tree_loop_distribution (void) | |
1220 | { | |
1221 | return flag_tree_loop_distribution != 0; | |
1222 | } | |
1223 | ||
20099e35 | 1224 | struct gimple_opt_pass pass_loop_distribution = |
801c5610 | 1225 | { |
20099e35 | 1226 | { |
1227 | GIMPLE_PASS, | |
801c5610 | 1228 | "ldist", /* name */ |
1229 | gate_tree_loop_distribution, /* gate */ | |
1230 | tree_loop_distribution, /* execute */ | |
1231 | NULL, /* sub */ | |
1232 | NULL, /* next */ | |
1233 | 0, /* static_pass_number */ | |
1234 | TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ | |
1235 | PROP_cfg | PROP_ssa, /* properties_required */ | |
1236 | 0, /* properties_provided */ | |
1237 | 0, /* properties_destroyed */ | |
1238 | 0, /* todo_flags_start */ | |
a29ec3eb | 1239 | TODO_dump_func /* todo_flags_finish */ |
20099e35 | 1240 | } |
801c5610 | 1241 | }; |