]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-loop-distribution.c
459b1e2cb1f98536a080128261025043a20aee5b
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 /* This pass performs loop distribution: for example, the loop
24
25 |DO I = 2, N
26 | A(I) = B(I) + C
27 | D(I) = A(I-1)*E
28 |ENDDO
29
30 is transformed to
31
32 |DOALL I = 2, N
33 | A(I) = B(I) + C
34 |ENDDO
35 |
36 |DOALL I = 2, N
37 | D(I) = A(I-1)*E
38 |ENDDO
39
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
44
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tree-flow.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
54
55 enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY };
56
57 typedef struct partition_s
58 {
59 bitmap stmts;
60 bool has_writes;
61 enum partition_kind kind;
62 /* data-references a kind != PKIND_NORMAL partition is about. */
63 data_reference_p main_dr;
64 data_reference_p secondary_dr;
65 } *partition_t;
66
67 DEF_VEC_P (partition_t);
68 DEF_VEC_ALLOC_P (partition_t, heap);
69
70 /* Allocate and initialize a partition from BITMAP. */
71
72 static partition_t
73 partition_alloc (bitmap stmts)
74 {
75 partition_t partition = XCNEW (struct partition_s);
76 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
77 partition->has_writes = false;
78 partition->kind = PKIND_NORMAL;
79 return partition;
80 }
81
82 /* Free PARTITION. */
83
84 static void
85 partition_free (partition_t partition)
86 {
87 BITMAP_FREE (partition->stmts);
88 free (partition);
89 }
90
91 /* Returns true if the partition can be generated as a builtin. */
92
93 static bool
94 partition_builtin_p (partition_t partition)
95 {
96 return partition->kind != PKIND_NORMAL;
97 }
98
99 /* Returns true if the partition has an writes. */
100
101 static bool
102 partition_has_writes (partition_t partition)
103 {
104 return partition->has_writes;
105 }
106
107 /* If bit I is not set, it means that this node represents an
108 operation that has already been performed, and that should not be
109 performed again. This is the subgraph of remaining important
110 computations that is passed to the DFS algorithm for avoiding to
111 include several times the same stores in different loops. */
112 static bitmap remaining_stmts;
113
114 /* A node of the RDG is marked in this bitmap when it has as a
115 predecessor a node that writes to memory. */
116 static bitmap upstream_mem_writes;
117
118 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
119 the LOOP. */
120
121 static bool
122 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
123 {
124 imm_use_iterator imm_iter;
125 use_operand_p use_p;
126
127 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
128 if (loop != loop_containing_stmt (USE_STMT (use_p)))
129 return true;
130
131 return false;
132 }
133
134 /* Returns true when STMT defines a scalar variable used after the
135 loop LOOP. */
136
137 static bool
138 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
139 {
140 def_operand_p def_p;
141 ssa_op_iter op_iter;
142
143 if (gimple_code (stmt) == GIMPLE_PHI)
144 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
145
146 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
147 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
148 return true;
149
150 return false;
151 }
152
153 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
154 ORIG_LOOP. */
155
156 static void
157 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
158 {
159 tree new_ssa_name;
160 gimple_stmt_iterator si_new, si_orig;
161 edge orig_loop_latch = loop_latch_edge (orig_loop);
162 edge orig_entry_e = loop_preheader_edge (orig_loop);
163 edge new_loop_entry_e = loop_preheader_edge (new_loop);
164
165 /* Scan the phis in the headers of the old and new loops
166 (they are organized in exactly the same order). */
167 for (si_new = gsi_start_phis (new_loop->header),
168 si_orig = gsi_start_phis (orig_loop->header);
169 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
170 gsi_next (&si_new), gsi_next (&si_orig))
171 {
172 tree def;
173 source_location locus;
174 tree block;
175 gimple phi_new = gsi_stmt (si_new);
176 gimple phi_orig = gsi_stmt (si_orig);
177
178 /* Add the first phi argument for the phi in NEW_LOOP (the one
179 associated with the entry of NEW_LOOP) */
180 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
181 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
182 block = gimple_phi_arg_block_from_edge (phi_orig, orig_entry_e);
183 add_phi_arg (phi_new, def, new_loop_entry_e, locus, block);
184
185 /* Add the second phi argument for the phi in NEW_LOOP (the one
186 associated with the latch of NEW_LOOP) */
187 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
188 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
189 block = gimple_phi_arg_block_from_edge (phi_orig, orig_loop_latch);
190
191 if (TREE_CODE (def) == SSA_NAME)
192 {
193 new_ssa_name = get_current_def (def);
194
195 if (!new_ssa_name)
196 /* This only happens if there are no definitions inside the
197 loop. Use the the invariant in the new loop as is. */
198 new_ssa_name = def;
199 }
200 else
201 /* Could be an integer. */
202 new_ssa_name = def;
203
204 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus,
205 block);
206 }
207 }
208
209 /* Return a copy of LOOP placed before LOOP. */
210
211 static struct loop *
212 copy_loop_before (struct loop *loop)
213 {
214 struct loop *res;
215 edge preheader = loop_preheader_edge (loop);
216
217 initialize_original_copy_tables ();
218 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
219 gcc_assert (res != NULL);
220 free_original_copy_tables ();
221
222 update_phis_for_loop_copy (loop, res);
223 rename_variables_in_loop (res);
224
225 return res;
226 }
227
228 /* Creates an empty basic block after LOOP. */
229
230 static void
231 create_bb_after_loop (struct loop *loop)
232 {
233 edge exit = single_exit (loop);
234
235 if (!exit)
236 return;
237
238 split_edge (exit);
239 }
240
241 /* Generate code for PARTITION from the code in LOOP. The loop is
242 copied when COPY_P is true. All the statements not flagged in the
243 PARTITION bitmap are removed from the loop or from its copy. The
244 statements are indexed in sequence inside a basic block, and the
245 basic blocks of a loop are taken in dom order. */
246
247 static void
248 generate_loops_for_partition (struct loop *loop, partition_t partition,
249 bool copy_p)
250 {
251 unsigned i, x;
252 gimple_stmt_iterator bsi;
253 basic_block *bbs;
254
255 if (copy_p)
256 {
257 loop = copy_loop_before (loop);
258 gcc_assert (loop != NULL);
259 create_preheader (loop, CP_SIMPLE_PREHEADERS);
260 create_bb_after_loop (loop);
261 }
262
263 /* Remove stmts not in the PARTITION bitmap. The order in which we
264 visit the phi nodes and the statements is exactly as in
265 stmts_from_loop. */
266 bbs = get_loop_body_in_dom_order (loop);
267
268 if (MAY_HAVE_DEBUG_STMTS)
269 for (x = 0, i = 0; i < loop->num_nodes; i++)
270 {
271 basic_block bb = bbs[i];
272
273 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
274 if (!bitmap_bit_p (partition->stmts, x++))
275 reset_debug_uses (gsi_stmt (bsi));
276
277 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
278 {
279 gimple stmt = gsi_stmt (bsi);
280 if (gimple_code (stmt) != GIMPLE_LABEL
281 && !is_gimple_debug (stmt)
282 && !bitmap_bit_p (partition->stmts, x++))
283 reset_debug_uses (stmt);
284 }
285 }
286
287 for (x = 0, i = 0; i < loop->num_nodes; i++)
288 {
289 basic_block bb = bbs[i];
290
291 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
292 if (!bitmap_bit_p (partition->stmts, x++))
293 {
294 gimple phi = gsi_stmt (bsi);
295 if (!is_gimple_reg (gimple_phi_result (phi)))
296 mark_virtual_phi_result_for_renaming (phi);
297 remove_phi_node (&bsi, true);
298 }
299 else
300 gsi_next (&bsi);
301
302 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
303 {
304 gimple stmt = gsi_stmt (bsi);
305 if (gimple_code (stmt) != GIMPLE_LABEL
306 && !is_gimple_debug (stmt)
307 && !bitmap_bit_p (partition->stmts, x++))
308 {
309 unlink_stmt_vdef (stmt);
310 gsi_remove (&bsi, true);
311 release_defs (stmt);
312 }
313 else
314 gsi_next (&bsi);
315 }
316 }
317
318 free (bbs);
319 }
320
321 /* Build the size argument for a memory operation call. */
322
323 static tree
324 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
325 {
326 tree size;
327 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
328 fold_convert_loc (loc, sizetype, nb_iter),
329 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
330 return fold_convert_loc (loc, size_type_node, size);
331 }
332
333 /* Build an address argument for a memory operation call. */
334
335 static tree
336 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
337 {
338 tree addr_base;
339
340 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
341 addr_base = fold_convert_loc (loc, sizetype, addr_base);
342
343 /* Test for a negative stride, iterating over every element. */
344 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
345 {
346 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
347 fold_convert_loc (loc, sizetype, nb_bytes));
348 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
349 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
350 }
351
352 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
353 }
354
355 /* Generate a call to memset for PARTITION in LOOP. */
356
357 static void
358 generate_memset_builtin (struct loop *loop, partition_t partition)
359 {
360 gimple_stmt_iterator gsi;
361 gimple stmt, fn_call;
362 tree nb_iter, mem, fn, nb_bytes;
363 location_t loc;
364 tree val;
365
366 stmt = DR_STMT (partition->main_dr);
367 loc = gimple_location (stmt);
368 if (gimple_bb (stmt) == loop->latch)
369 nb_iter = number_of_latch_executions (loop);
370 else
371 nb_iter = number_of_exit_cond_executions (loop);
372
373 /* The new statements will be placed before LOOP. */
374 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
375
376 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
377 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
378 false, GSI_CONTINUE_LINKING);
379 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
380 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
381 false, GSI_CONTINUE_LINKING);
382
383 /* This exactly matches the pattern recognition in classify_partition. */
384 val = gimple_assign_rhs1 (stmt);
385 if (integer_zerop (val)
386 || real_zerop (val)
387 || TREE_CODE (val) == CONSTRUCTOR)
388 val = integer_zero_node;
389 else if (integer_all_onesp (val))
390 val = build_int_cst (integer_type_node, -1);
391 else
392 {
393 if (TREE_CODE (val) == INTEGER_CST)
394 val = fold_convert (integer_type_node, val);
395 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
396 {
397 gimple cstmt;
398 tree tem = create_tmp_reg (integer_type_node, NULL);
399 tem = make_ssa_name (tem, NULL);
400 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
401 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
402 val = tem;
403 }
404 }
405
406 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
407 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
408 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
409
410 if (dump_file && (dump_flags & TDF_DETAILS))
411 {
412 fprintf (dump_file, "generated memset");
413 if (integer_zerop (val))
414 fprintf (dump_file, " zero\n");
415 else if (integer_all_onesp (val))
416 fprintf (dump_file, " minus one\n");
417 else
418 fprintf (dump_file, "\n");
419 }
420 }
421
422 /* Generate a call to memcpy for PARTITION in LOOP. */
423
424 static void
425 generate_memcpy_builtin (struct loop *loop, partition_t partition)
426 {
427 gimple_stmt_iterator gsi;
428 gimple stmt, fn_call;
429 tree nb_iter, dest, src, fn, nb_bytes;
430 location_t loc;
431 enum built_in_function kind;
432
433 stmt = DR_STMT (partition->main_dr);
434 loc = gimple_location (stmt);
435 if (gimple_bb (stmt) == loop->latch)
436 nb_iter = number_of_latch_executions (loop);
437 else
438 nb_iter = number_of_exit_cond_executions (loop);
439
440 /* The new statements will be placed before LOOP. */
441 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
442
443 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
444 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
445 false, GSI_CONTINUE_LINKING);
446 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
447 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
448 if (ptr_derefs_may_alias_p (dest, src))
449 kind = BUILT_IN_MEMMOVE;
450 else
451 kind = BUILT_IN_MEMCPY;
452
453 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
454 false, GSI_CONTINUE_LINKING);
455 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
456 false, GSI_CONTINUE_LINKING);
457 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
458 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
459 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
460
461 if (dump_file && (dump_flags & TDF_DETAILS))
462 {
463 if (kind == BUILT_IN_MEMCPY)
464 fprintf (dump_file, "generated memcpy\n");
465 else
466 fprintf (dump_file, "generated memmove\n");
467 }
468 }
469
470 /* Remove and destroy the loop LOOP. */
471
472 static void
473 destroy_loop (struct loop *loop)
474 {
475 unsigned nbbs = loop->num_nodes;
476 edge exit = single_exit (loop);
477 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
478 basic_block *bbs;
479 unsigned i;
480
481 bbs = get_loop_body_in_dom_order (loop);
482
483 redirect_edge_pred (exit, src);
484 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
485 exit->flags |= EDGE_FALLTHRU;
486 cancel_loop_tree (loop);
487 rescan_loop_exit (exit, false, true);
488
489 for (i = 0; i < nbbs; i++)
490 {
491 /* We have made sure to not leave any dangling uses of SSA
492 names defined in the loop. With the exception of virtuals.
493 Make sure we replace all uses of virtual defs that will remain
494 outside of the loop with the bare symbol as delete_basic_block
495 will release them. */
496 gimple_stmt_iterator gsi;
497 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
498 {
499 gimple phi = gsi_stmt (gsi);
500 if (!is_gimple_reg (gimple_phi_result (phi)))
501 mark_virtual_phi_result_for_renaming (phi);
502 }
503 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
504 {
505 gimple stmt = gsi_stmt (gsi);
506 tree vdef = gimple_vdef (stmt);
507 if (vdef && TREE_CODE (vdef) == SSA_NAME)
508 mark_virtual_operand_for_renaming (vdef);
509 }
510 delete_basic_block (bbs[i]);
511 }
512 free (bbs);
513
514 set_immediate_dominator (CDI_DOMINATORS, dest,
515 recompute_dominator (CDI_DOMINATORS, dest));
516 }
517
518 /* Generates code for PARTITION. */
519
520 static void
521 generate_code_for_partition (struct loop *loop,
522 partition_t partition, bool copy_p)
523 {
524 switch (partition->kind)
525 {
526 case PKIND_MEMSET:
527 generate_memset_builtin (loop, partition);
528 /* If this is the last partition for which we generate code, we have
529 to destroy the loop. */
530 if (!copy_p)
531 destroy_loop (loop);
532 break;
533
534 case PKIND_MEMCPY:
535 generate_memcpy_builtin (loop, partition);
536 /* If this is the last partition for which we generate code, we have
537 to destroy the loop. */
538 if (!copy_p)
539 destroy_loop (loop);
540 break;
541
542 case PKIND_NORMAL:
543 generate_loops_for_partition (loop, partition, copy_p);
544 break;
545
546 default:
547 gcc_unreachable ();
548 }
549 }
550
551
552 /* Returns true if the node V of RDG cannot be recomputed. */
553
554 static bool
555 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
556 {
557 if (RDG_MEM_WRITE_STMT (rdg, v))
558 return true;
559
560 return false;
561 }
562
563 /* Returns true when the vertex V has already been generated in the
564 current partition (V is in PROCESSED), or when V belongs to another
565 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
566
567 static inline bool
568 already_processed_vertex_p (bitmap processed, int v)
569 {
570 return (bitmap_bit_p (processed, v)
571 || !bitmap_bit_p (remaining_stmts, v));
572 }
573
574 /* Returns NULL when there is no anti-dependence among the successors
575 of vertex V, otherwise returns the edge with the anti-dep. */
576
577 static struct graph_edge *
578 has_anti_dependence (struct vertex *v)
579 {
580 struct graph_edge *e;
581
582 if (v->succ)
583 for (e = v->succ; e; e = e->succ_next)
584 if (RDGE_TYPE (e) == anti_dd)
585 return e;
586
587 return NULL;
588 }
589
590 /* Returns true when V has an anti-dependence edge among its successors. */
591
592 static bool
593 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
594 {
595 struct graph_edge *e;
596
597 if (v->pred)
598 for (e = v->pred; e; e = e->pred_next)
599 if (bitmap_bit_p (upstream_mem_writes, e->src)
600 /* Don't consider flow channels: a write to memory followed
601 by a read from memory. These channels allow the split of
602 the RDG in different partitions. */
603 && !RDG_MEM_WRITE_STMT (rdg, e->src))
604 return true;
605
606 return false;
607 }
608
609 /* Initializes the upstream_mem_writes bitmap following the
610 information from RDG. */
611
612 static void
613 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
614 {
615 int v, x;
616 bitmap seen = BITMAP_ALLOC (NULL);
617
618 for (v = rdg->n_vertices - 1; v >= 0; v--)
619 if (!bitmap_bit_p (seen, v))
620 {
621 unsigned i;
622 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
623
624 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
625
626 FOR_EACH_VEC_ELT (int, nodes, i, x)
627 {
628 if (!bitmap_set_bit (seen, x))
629 continue;
630
631 if (RDG_MEM_WRITE_STMT (rdg, x)
632 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
633 /* In anti dependences the read should occur before
634 the write, this is why both the read and the write
635 should be placed in the same partition. */
636 || has_anti_dependence (&(rdg->vertices[x])))
637 {
638 bitmap_set_bit (upstream_mem_writes, x);
639 }
640 }
641
642 VEC_free (int, heap, nodes);
643 }
644 }
645
646 /* Returns true when vertex u has a memory write node as a predecessor
647 in RDG. */
648
649 static bool
650 has_upstream_mem_writes (int u)
651 {
652 return bitmap_bit_p (upstream_mem_writes, u);
653 }
654
655 static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
656 bitmap, bitmap);
657
658 /* Flag the uses of U stopping following the information from
659 upstream_mem_writes. */
660
661 static void
662 rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
663 bitmap processed)
664 {
665 use_operand_p use_p;
666 struct vertex *x = &(rdg->vertices[u]);
667 gimple stmt = RDGV_STMT (x);
668 struct graph_edge *anti_dep = has_anti_dependence (x);
669
670 /* Keep in the same partition the destination of an antidependence,
671 because this is a store to the exact same location. Putting this
672 in another partition is bad for cache locality. */
673 if (anti_dep)
674 {
675 int v = anti_dep->dest;
676
677 if (!already_processed_vertex_p (processed, v))
678 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
679 processed);
680 }
681
682 if (gimple_code (stmt) != GIMPLE_PHI)
683 {
684 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
685 {
686 tree use = USE_FROM_PTR (use_p);
687
688 if (TREE_CODE (use) == SSA_NAME)
689 {
690 gimple def_stmt = SSA_NAME_DEF_STMT (use);
691 int v = rdg_vertex_for_stmt (rdg, def_stmt);
692
693 if (v >= 0
694 && !already_processed_vertex_p (processed, v))
695 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
696 processed);
697 }
698 }
699 }
700
701 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
702 {
703 tree op0 = gimple_assign_lhs (stmt);
704
705 /* Scalar channels don't have enough space for transmitting data
706 between tasks, unless we add more storage by privatizing. */
707 if (is_gimple_reg (op0))
708 {
709 use_operand_p use_p;
710 imm_use_iterator iter;
711
712 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
713 {
714 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
715
716 if (!already_processed_vertex_p (processed, v))
717 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
718 processed);
719 }
720 }
721 }
722 }
723
724 /* Flag V from RDG as part of PARTITION, and also flag its loop number
725 in LOOPS. */
726
727 static void
728 rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops)
729 {
730 struct loop *loop;
731
732 if (!bitmap_set_bit (partition->stmts, v))
733 return;
734
735 loop = loop_containing_stmt (RDG_STMT (rdg, v));
736 bitmap_set_bit (loops, loop->num);
737
738 if (rdg_cannot_recompute_vertex_p (rdg, v))
739 {
740 partition->has_writes = true;
741 bitmap_clear_bit (remaining_stmts, v);
742 }
743 }
744
745 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
746 Also flag their loop number in LOOPS. */
747
748 static void
749 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
750 bitmap loops, bitmap processed)
751 {
752 unsigned i;
753 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
754 int x;
755
756 bitmap_set_bit (processed, v);
757 rdg_flag_uses (rdg, v, partition, loops, processed);
758 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
759 rdg_flag_vertex (rdg, v, partition, loops);
760
761 FOR_EACH_VEC_ELT (int, nodes, i, x)
762 if (!already_processed_vertex_p (processed, x))
763 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed);
764
765 VEC_free (int, heap, nodes);
766 }
767
768 /* Initialize CONDS with all the condition statements from the basic
769 blocks of LOOP. */
770
771 static void
772 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
773 {
774 unsigned i;
775 edge e;
776 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
777
778 FOR_EACH_VEC_ELT (edge, exits, i, e)
779 {
780 gimple cond = last_stmt (e->src);
781
782 if (cond)
783 VEC_safe_push (gimple, heap, *conds, cond);
784 }
785
786 VEC_free (edge, heap, exits);
787 }
788
789 /* Add to PARTITION all the exit condition statements for LOOPS
790 together with all their dependent statements determined from
791 RDG. */
792
793 static void
794 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
795 bitmap processed)
796 {
797 unsigned i;
798 bitmap_iterator bi;
799 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
800
801 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
802 collect_condition_stmts (get_loop (i), &conds);
803
804 while (!VEC_empty (gimple, conds))
805 {
806 gimple cond = VEC_pop (gimple, conds);
807 int v = rdg_vertex_for_stmt (rdg, cond);
808 bitmap new_loops = BITMAP_ALLOC (NULL);
809
810 if (!already_processed_vertex_p (processed, v))
811 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed);
812
813 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
814 if (bitmap_set_bit (loops, i))
815 collect_condition_stmts (get_loop (i), &conds);
816
817 BITMAP_FREE (new_loops);
818 }
819
820 VEC_free (gimple, heap, conds);
821 }
822
823 /* Returns a bitmap in which all the statements needed for computing
824 the strongly connected component C of the RDG are flagged, also
825 including the loop exit conditions. */
826
827 static partition_t
828 build_rdg_partition_for_component (struct graph *rdg, rdgc c)
829 {
830 int i, v;
831 partition_t partition = partition_alloc (NULL);
832 bitmap loops = BITMAP_ALLOC (NULL);
833 bitmap processed = BITMAP_ALLOC (NULL);
834
835 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
836 if (!already_processed_vertex_p (processed, v))
837 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed);
838
839 rdg_flag_loop_exits (rdg, loops, partition, processed);
840
841 BITMAP_FREE (processed);
842 BITMAP_FREE (loops);
843 return partition;
844 }
845
846 /* Free memory for COMPONENTS. */
847
848 static void
849 free_rdg_components (VEC (rdgc, heap) *components)
850 {
851 int i;
852 rdgc x;
853
854 FOR_EACH_VEC_ELT (rdgc, components, i, x)
855 {
856 VEC_free (int, heap, x->vertices);
857 free (x);
858 }
859
860 VEC_free (rdgc, heap, components);
861 }
862
863 /* Build the COMPONENTS vector with the strongly connected components
864 of RDG in which the STARTING_VERTICES occur. */
865
866 static void
867 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
868 VEC (rdgc, heap) **components)
869 {
870 int i, v;
871 bitmap saved_components = BITMAP_ALLOC (NULL);
872 int n_components = graphds_scc (rdg, NULL);
873 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
874
875 for (i = 0; i < n_components; i++)
876 all_components[i] = VEC_alloc (int, heap, 3);
877
878 for (i = 0; i < rdg->n_vertices; i++)
879 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
880
881 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
882 {
883 int c = rdg->vertices[v].component;
884
885 if (bitmap_set_bit (saved_components, c))
886 {
887 rdgc x = XCNEW (struct rdg_component);
888 x->num = c;
889 x->vertices = all_components[c];
890
891 VEC_safe_push (rdgc, heap, *components, x);
892 }
893 }
894
895 for (i = 0; i < n_components; i++)
896 if (!bitmap_bit_p (saved_components, i))
897 VEC_free (int, heap, all_components[i]);
898
899 free (all_components);
900 BITMAP_FREE (saved_components);
901 }
902
903 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
904 For the moment we detect only the memset zero pattern. */
905
906 static void
907 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
908 {
909 bitmap_iterator bi;
910 unsigned i;
911 tree nb_iter;
912 data_reference_p single_load, single_store;
913
914 partition->kind = PKIND_NORMAL;
915 partition->main_dr = NULL;
916 partition->secondary_dr = NULL;
917
918 if (!flag_tree_loop_distribute_patterns)
919 return;
920
921 /* Perform general partition disqualification for builtins. */
922 nb_iter = number_of_exit_cond_executions (loop);
923 if (!nb_iter || nb_iter == chrec_dont_know)
924 return;
925
926 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
927 {
928 gimple stmt = RDG_STMT (rdg, i);
929
930 if (gimple_has_volatile_ops (stmt))
931 return;
932
933 /* If the stmt has uses outside of the loop fail.
934 ??? If the stmt is generated in another partition that
935 is not created as builtin we can ignore this. */
936 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
937 {
938 if (dump_file && (dump_flags & TDF_DETAILS))
939 fprintf (dump_file, "not generating builtin, partition has "
940 "scalar uses outside of the loop\n");
941 return;
942 }
943 }
944
945 /* Detect memset and memcpy. */
946 single_load = NULL;
947 single_store = NULL;
948 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
949 {
950 gimple stmt = RDG_STMT (rdg, i);
951 data_reference_p dr;
952 unsigned j;
953
954 if (gimple_code (stmt) == GIMPLE_PHI)
955 continue;
956
957 /* Any scalar stmts are ok. */
958 if (!gimple_vuse (stmt))
959 continue;
960
961 /* Otherwise just regular loads/stores. */
962 if (!gimple_assign_single_p (stmt))
963 return;
964
965 /* But exactly one store and/or load. */
966 for (j = 0;
967 VEC_iterate (data_reference_p, RDG_DATAREFS (rdg, i), j, dr); ++j)
968 {
969 if (DR_IS_READ (dr))
970 {
971 if (single_load != NULL)
972 return;
973 single_load = dr;
974 }
975 else
976 {
977 if (single_store != NULL)
978 return;
979 single_store = dr;
980 }
981 }
982 }
983
984 if (single_store && !single_load)
985 {
986 gimple stmt = DR_STMT (single_store);
987 tree rhs = gimple_assign_rhs1 (stmt);
988 if (!(integer_zerop (rhs)
989 || integer_all_onesp (rhs)
990 || real_zerop (rhs)
991 || (TREE_CODE (rhs) == CONSTRUCTOR
992 && !TREE_CLOBBER_P (rhs))
993 || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
994 && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
995 == TYPE_MODE (unsigned_char_type_node)))))
996 return;
997 if (TREE_CODE (rhs) == SSA_NAME
998 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
999 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1000 return;
1001 if (!adjacent_dr_p (single_store))
1002 return;
1003 partition->kind = PKIND_MEMSET;
1004 partition->main_dr = single_store;
1005 }
1006 else if (single_store && single_load)
1007 {
1008 gimple store = DR_STMT (single_store);
1009 gimple load = DR_STMT (single_load);
1010 /* Direct aggregate copy or via an SSA name temporary. */
1011 if (load != store
1012 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1013 return;
1014 if (!adjacent_dr_p (single_store)
1015 || !adjacent_dr_p (single_load)
1016 || !operand_equal_p (DR_STEP (single_store),
1017 DR_STEP (single_load), 0))
1018 return;
1019 partition->kind = PKIND_MEMCPY;
1020 partition->main_dr = single_store;
1021 partition->secondary_dr = single_load;
1022 }
1023 }
1024
1025 /* For a data reference REF, return the declaration of its base
1026 address or NULL_TREE if the base is not determined. */
1027
1028 static tree
1029 ref_base_address (data_reference_p dr)
1030 {
1031 tree base_address = DR_BASE_ADDRESS (dr);
1032 if (base_address
1033 && TREE_CODE (base_address) == ADDR_EXPR)
1034 return TREE_OPERAND (base_address, 0);
1035
1036 return base_address;
1037 }
1038
1039 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1040 accesses in RDG. */
1041
1042 static bool
1043 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1044 partition_t partition2)
1045 {
1046 unsigned i, j, k, l;
1047 bitmap_iterator bi, bj;
1048 data_reference_p ref1, ref2;
1049
1050 /* First check whether in the intersection of the two partitions are
1051 any loads or stores. Common loads are the situation that happens
1052 most often. */
1053 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1054 if (RDG_MEM_WRITE_STMT (rdg, i)
1055 || RDG_MEM_READS_STMT (rdg, i))
1056 return true;
1057
1058 /* Then check all data-references against each other. */
1059 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1060 if (RDG_MEM_WRITE_STMT (rdg, i)
1061 || RDG_MEM_READS_STMT (rdg, i))
1062 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1063 if (RDG_MEM_WRITE_STMT (rdg, j)
1064 || RDG_MEM_READS_STMT (rdg, j))
1065 {
1066 FOR_EACH_VEC_ELT (data_reference_p, RDG_DATAREFS (rdg, i), k, ref1)
1067 {
1068 tree base1 = ref_base_address (ref1);
1069 if (base1)
1070 FOR_EACH_VEC_ELT (data_reference_p,
1071 RDG_DATAREFS (rdg, j), l, ref2)
1072 if (base1 == ref_base_address (ref2))
1073 return true;
1074 }
1075 }
1076
1077 return false;
1078 }
1079
1080 /* Aggregate several components into a useful partition that is
1081 registered in the PARTITIONS vector. Partitions will be
1082 distributed in different loops. */
1083
1084 static void
1085 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
1086 VEC (int, heap) **other_stores,
1087 VEC (partition_t, heap) **partitions, bitmap processed)
1088 {
1089 int i;
1090 rdgc x;
1091 partition_t partition = partition_alloc (NULL);
1092
1093 FOR_EACH_VEC_ELT (rdgc, components, i, x)
1094 {
1095 partition_t np;
1096 int v = VEC_index (int, x->vertices, 0);
1097
1098 if (bitmap_bit_p (processed, v))
1099 continue;
1100
1101 np = build_rdg_partition_for_component (rdg, x);
1102 bitmap_ior_into (partition->stmts, np->stmts);
1103 partition->has_writes = partition_has_writes (np);
1104 bitmap_ior_into (processed, np->stmts);
1105 partition_free (np);
1106
1107 if (partition_has_writes (partition))
1108 {
1109 if (dump_file && (dump_flags & TDF_DETAILS))
1110 {
1111 fprintf (dump_file, "ldist useful partition:\n");
1112 dump_bitmap (dump_file, partition->stmts);
1113 }
1114
1115 VEC_safe_push (partition_t, heap, *partitions, partition);
1116 partition = partition_alloc (NULL);
1117 }
1118 }
1119
1120 /* Add the nodes from the RDG that were not marked as processed, and
1121 that are used outside the current loop. These are scalar
1122 computations that are not yet part of previous partitions. */
1123 for (i = 0; i < rdg->n_vertices; i++)
1124 if (!bitmap_bit_p (processed, i)
1125 && rdg_defs_used_in_other_loops_p (rdg, i))
1126 VEC_safe_push (int, heap, *other_stores, i);
1127
1128 /* If there are still statements left in the OTHER_STORES array,
1129 create other components and partitions with these stores and
1130 their dependences. */
1131 if (VEC_length (int, *other_stores) > 0)
1132 {
1133 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
1134 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
1135
1136 rdg_build_components (rdg, *other_stores, &comps);
1137 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
1138
1139 VEC_free (int, heap, foo);
1140 free_rdg_components (comps);
1141 }
1142
1143 /* If there is something left in the last partition, save it. */
1144 if (bitmap_count_bits (partition->stmts) > 0)
1145 VEC_safe_push (partition_t, heap, *partitions, partition);
1146 else
1147 partition_free (partition);
1148 }
1149
1150 /* Dump to FILE the PARTITIONS. */
1151
1152 static void
1153 dump_rdg_partitions (FILE *file, VEC (partition_t, heap) *partitions)
1154 {
1155 int i;
1156 partition_t partition;
1157
1158 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1159 debug_bitmap_file (file, partition->stmts);
1160 }
1161
1162 /* Debug PARTITIONS. */
1163 extern void debug_rdg_partitions (VEC (partition_t, heap) *);
1164
1165 DEBUG_FUNCTION void
1166 debug_rdg_partitions (VEC (partition_t, heap) *partitions)
1167 {
1168 dump_rdg_partitions (stderr, partitions);
1169 }
1170
1171 /* Returns the number of read and write operations in the RDG. */
1172
1173 static int
1174 number_of_rw_in_rdg (struct graph *rdg)
1175 {
1176 int i, res = 0;
1177
1178 for (i = 0; i < rdg->n_vertices; i++)
1179 {
1180 if (RDG_MEM_WRITE_STMT (rdg, i))
1181 ++res;
1182
1183 if (RDG_MEM_READS_STMT (rdg, i))
1184 ++res;
1185 }
1186
1187 return res;
1188 }
1189
1190 /* Returns the number of read and write operations in a PARTITION of
1191 the RDG. */
1192
1193 static int
1194 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1195 {
1196 int res = 0;
1197 unsigned i;
1198 bitmap_iterator ii;
1199
1200 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1201 {
1202 if (RDG_MEM_WRITE_STMT (rdg, i))
1203 ++res;
1204
1205 if (RDG_MEM_READS_STMT (rdg, i))
1206 ++res;
1207 }
1208
1209 return res;
1210 }
1211
1212 /* Returns true when one of the PARTITIONS contains all the read or
1213 write operations of RDG. */
1214
1215 static bool
1216 partition_contains_all_rw (struct graph *rdg, VEC (partition_t, heap) *partitions)
1217 {
1218 int i;
1219 partition_t partition;
1220 int nrw = number_of_rw_in_rdg (rdg);
1221
1222 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1223 if (nrw == number_of_rw_in_partition (rdg, partition))
1224 return true;
1225
1226 return false;
1227 }
1228
1229 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1230 distributed loops. */
1231
1232 static int
1233 ldist_gen (struct loop *loop, struct graph *rdg,
1234 VEC (int, heap) *starting_vertices)
1235 {
1236 int i, nbp;
1237 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1238 VEC (partition_t, heap) *partitions = VEC_alloc (partition_t, heap, 3);
1239 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1240 partition_t partition;
1241 bitmap processed = BITMAP_ALLOC (NULL);
1242 bool any_builtin;
1243
1244 remaining_stmts = BITMAP_ALLOC (NULL);
1245 upstream_mem_writes = BITMAP_ALLOC (NULL);
1246
1247 for (i = 0; i < rdg->n_vertices; i++)
1248 {
1249 bitmap_set_bit (remaining_stmts, i);
1250
1251 /* Save in OTHER_STORES all the memory writes that are not in
1252 STARTING_VERTICES. */
1253 if (RDG_MEM_WRITE_STMT (rdg, i))
1254 {
1255 int v;
1256 unsigned j;
1257 bool found = false;
1258
1259 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1260 if (i == v)
1261 {
1262 found = true;
1263 break;
1264 }
1265
1266 if (!found)
1267 VEC_safe_push (int, heap, other_stores, i);
1268 }
1269 }
1270
1271 mark_nodes_having_upstream_mem_writes (rdg);
1272 rdg_build_components (rdg, starting_vertices, &components);
1273 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1274 processed);
1275 BITMAP_FREE (processed);
1276
1277 any_builtin = false;
1278 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1279 {
1280 classify_partition (loop, rdg, partition);
1281 any_builtin |= partition_builtin_p (partition);
1282 }
1283
1284 /* If we are only distributing patterns fuse all partitions that
1285 were not properly classified as builtins. Else fuse partitions
1286 with similar memory accesses. */
1287 if (!flag_tree_loop_distribution)
1288 {
1289 partition_t into;
1290 /* If we did not detect any builtin simply bail out. */
1291 if (!any_builtin)
1292 {
1293 nbp = 0;
1294 goto ldist_done;
1295 }
1296 for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i)
1297 if (!partition_builtin_p (into))
1298 break;
1299 for (++i; VEC_iterate (partition_t, partitions, i, partition); ++i)
1300 if (!partition_builtin_p (partition))
1301 {
1302 bitmap_ior_into (into->stmts, partition->stmts);
1303 VEC_ordered_remove (partition_t, partitions, i);
1304 i--;
1305 }
1306 }
1307 else
1308 {
1309 partition_t into;
1310 int j;
1311 for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i)
1312 {
1313 if (partition_builtin_p (into))
1314 continue;
1315 for (j = i + 1;
1316 VEC_iterate (partition_t, partitions, j, partition); ++j)
1317 {
1318 if (!partition_builtin_p (partition)
1319 /* ??? The following is horribly inefficient,
1320 we are re-computing and analyzing data-references
1321 of the stmts in the partitions all the time. */
1322 && similar_memory_accesses (rdg, into, partition))
1323 {
1324 if (dump_file && (dump_flags & TDF_DETAILS))
1325 {
1326 fprintf (dump_file, "fusing partitions\n");
1327 dump_bitmap (dump_file, into->stmts);
1328 dump_bitmap (dump_file, partition->stmts);
1329 fprintf (dump_file, "because they have similar "
1330 "memory accesses\n");
1331 }
1332 bitmap_ior_into (into->stmts, partition->stmts);
1333 VEC_ordered_remove (partition_t, partitions, j);
1334 j--;
1335 }
1336 }
1337 }
1338 }
1339
1340 nbp = VEC_length (partition_t, partitions);
1341 if (nbp == 0
1342 || (nbp == 1
1343 && !partition_builtin_p (VEC_index (partition_t, partitions, 0)))
1344 || (nbp > 1
1345 && partition_contains_all_rw (rdg, partitions)))
1346 {
1347 nbp = 0;
1348 goto ldist_done;
1349 }
1350
1351 if (dump_file && (dump_flags & TDF_DETAILS))
1352 dump_rdg_partitions (dump_file, partitions);
1353
1354 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1355 generate_code_for_partition (loop, partition, i < nbp - 1);
1356
1357 ldist_done:
1358
1359 BITMAP_FREE (remaining_stmts);
1360 BITMAP_FREE (upstream_mem_writes);
1361
1362 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1363 partition_free (partition);
1364
1365 VEC_free (int, heap, other_stores);
1366 VEC_free (partition_t, heap, partitions);
1367 free_rdg_components (components);
1368 return nbp;
1369 }
1370
1371 /* Distributes the code from LOOP in such a way that producer
1372 statements are placed before consumer statements. When STMTS is
1373 NULL, performs the maximal distribution, if STMTS is not NULL,
1374 tries to separate only these statements from the LOOP's body.
1375 Returns the number of distributed loops. */
1376
1377 static int
1378 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1379 {
1380 int res = 0;
1381 struct graph *rdg;
1382 gimple s;
1383 unsigned i;
1384 VEC (int, heap) *vertices;
1385 VEC (ddr_p, heap) *dependence_relations;
1386 VEC (data_reference_p, heap) *datarefs;
1387 VEC (loop_p, heap) *loop_nest;
1388
1389 datarefs = VEC_alloc (data_reference_p, heap, 10);
1390 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1391 loop_nest = VEC_alloc (loop_p, heap, 3);
1392 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1393
1394 if (!rdg)
1395 {
1396 if (dump_file && (dump_flags & TDF_DETAILS))
1397 fprintf (dump_file,
1398 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1399 loop->num);
1400
1401 free_dependence_relations (dependence_relations);
1402 free_data_refs (datarefs);
1403 VEC_free (loop_p, heap, loop_nest);
1404 return res;
1405 }
1406
1407 vertices = VEC_alloc (int, heap, 3);
1408
1409 if (dump_file && (dump_flags & TDF_DETAILS))
1410 dump_rdg (dump_file, rdg);
1411
1412 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1413 {
1414 int v = rdg_vertex_for_stmt (rdg, s);
1415
1416 if (v >= 0)
1417 {
1418 VEC_safe_push (int, heap, vertices, v);
1419
1420 if (dump_file && (dump_flags & TDF_DETAILS))
1421 fprintf (dump_file,
1422 "ldist asked to generate code for vertex %d\n", v);
1423 }
1424 }
1425
1426 res = ldist_gen (loop, rdg, vertices);
1427 VEC_free (int, heap, vertices);
1428 free_rdg (rdg);
1429 free_dependence_relations (dependence_relations);
1430 free_data_refs (datarefs);
1431 VEC_free (loop_p, heap, loop_nest);
1432 return res;
1433 }
1434
1435 /* Distribute all loops in the current function. */
1436
1437 static unsigned int
1438 tree_loop_distribution (void)
1439 {
1440 struct loop *loop;
1441 loop_iterator li;
1442 bool changed = false;
1443 basic_block bb;
1444
1445 FOR_ALL_BB (bb)
1446 {
1447 gimple_stmt_iterator gsi;
1448 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1449 gimple_set_uid (gsi_stmt (gsi), -1);
1450 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1451 gimple_set_uid (gsi_stmt (gsi), -1);
1452 }
1453
1454 /* We can at the moment only distribute non-nested loops, thus restrict
1455 walking to innermost loops. */
1456 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1457 {
1458 VEC (gimple, heap) *work_list = NULL;
1459 basic_block *bbs;
1460 int num = loop->num;
1461 int nb_generated_loops = 0;
1462 unsigned int i;
1463
1464 /* If the loop doesn't have a single exit we will fail anyway,
1465 so do that early. */
1466 if (!single_exit (loop))
1467 continue;
1468
1469 /* Only distribute loops with a header and latch for now. */
1470 if (loop->num_nodes > 2)
1471 continue;
1472
1473 /* Initialize the worklist with stmts we seed the partitions with. */
1474 bbs = get_loop_body_in_dom_order (loop);
1475 for (i = 0; i < loop->num_nodes; ++i)
1476 {
1477 gimple_stmt_iterator gsi;
1478 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1479 {
1480 gimple stmt = gsi_stmt (gsi);
1481 /* Only distribute stores for now.
1482 ??? We should also try to distribute scalar reductions,
1483 thus SSA defs that have scalar uses outside of the loop. */
1484 if (!gimple_assign_single_p (stmt)
1485 || is_gimple_reg (gimple_assign_lhs (stmt)))
1486 continue;
1487
1488 VEC_safe_push (gimple, heap, work_list, stmt);
1489 }
1490 }
1491 free (bbs);
1492
1493 if (VEC_length (gimple, work_list) > 0)
1494 nb_generated_loops = distribute_loop (loop, work_list);
1495
1496 if (nb_generated_loops > 0)
1497 changed = true;
1498
1499 if (dump_file && (dump_flags & TDF_DETAILS))
1500 {
1501 if (nb_generated_loops > 1)
1502 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1503 num, nb_generated_loops);
1504 else
1505 fprintf (dump_file, "Loop %d is the same.\n", num);
1506 }
1507
1508 VEC_free (gimple, heap, work_list);
1509 }
1510
1511 if (changed)
1512 {
1513 mark_sym_for_renaming (gimple_vop (cfun));
1514 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1515 }
1516
1517 #ifdef ENABLE_CHECKING
1518 verify_loop_structure ();
1519 #endif
1520
1521 return 0;
1522 }
1523
1524 static bool
1525 gate_tree_loop_distribution (void)
1526 {
1527 return flag_tree_loop_distribution
1528 || flag_tree_loop_distribute_patterns;
1529 }
1530
1531 struct gimple_opt_pass pass_loop_distribution =
1532 {
1533 {
1534 GIMPLE_PASS,
1535 "ldist", /* name */
1536 gate_tree_loop_distribution, /* gate */
1537 tree_loop_distribution, /* execute */
1538 NULL, /* sub */
1539 NULL, /* next */
1540 0, /* static_pass_number */
1541 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1542 PROP_cfg | PROP_ssa, /* properties_required */
1543 0, /* properties_provided */
1544 0, /* properties_destroyed */
1545 0, /* todo_flags_start */
1546 TODO_ggc_collect
1547 | TODO_verify_ssa /* todo_flags_finish */
1548 }
1549 };