]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-loop-distribution.c
1ffc434f4a4ad201a975f09d3bf23531671aea10
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 /* This pass performs loop distribution: for example, the loop
24
25 |DO I = 2, N
26 | A(I) = B(I) + C
27 | D(I) = A(I-1)*E
28 |ENDDO
29
30 is transformed to
31
32 |DOALL I = 2, N
33 | A(I) = B(I) + C
34 |ENDDO
35 |
36 |DOALL I = 2, N
37 | D(I) = A(I-1)*E
38 |ENDDO
39
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
44
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tree-flow.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
54
55 enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY };
56
57 typedef struct partition_s
58 {
59 bitmap stmts;
60 bool has_writes;
61 enum partition_kind kind;
62 /* data-references a kind != PKIND_NORMAL partition is about. */
63 data_reference_p main_dr;
64 data_reference_p secondary_dr;
65 } *partition_t;
66
67 DEF_VEC_P (partition_t);
68 DEF_VEC_ALLOC_P (partition_t, heap);
69
70 /* Allocate and initialize a partition from BITMAP. */
71
72 static partition_t
73 partition_alloc (bitmap stmts)
74 {
75 partition_t partition = XCNEW (struct partition_s);
76 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
77 partition->has_writes = false;
78 partition->kind = PKIND_NORMAL;
79 return partition;
80 }
81
82 /* Free PARTITION. */
83
84 static void
85 partition_free (partition_t partition)
86 {
87 BITMAP_FREE (partition->stmts);
88 free (partition);
89 }
90
91 /* Returns true if the partition can be generated as a builtin. */
92
93 static bool
94 partition_builtin_p (partition_t partition)
95 {
96 return partition->kind != PKIND_NORMAL;
97 }
98
99 /* Returns true if the partition has an writes. */
100
101 static bool
102 partition_has_writes (partition_t partition)
103 {
104 return partition->has_writes;
105 }
106
107 /* If bit I is not set, it means that this node represents an
108 operation that has already been performed, and that should not be
109 performed again. This is the subgraph of remaining important
110 computations that is passed to the DFS algorithm for avoiding to
111 include several times the same stores in different loops. */
112 static bitmap remaining_stmts;
113
114 /* A node of the RDG is marked in this bitmap when it has as a
115 predecessor a node that writes to memory. */
116 static bitmap upstream_mem_writes;
117
118 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
119 the LOOP. */
120
121 static bool
122 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
123 {
124 imm_use_iterator imm_iter;
125 use_operand_p use_p;
126
127 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
128 {
129 gimple use_stmt = USE_STMT (use_p);
130 if (!is_gimple_debug (use_stmt)
131 && loop != loop_containing_stmt (use_stmt))
132 return true;
133 }
134
135 return false;
136 }
137
138 /* Returns true when STMT defines a scalar variable used after the
139 loop LOOP. */
140
141 static bool
142 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
143 {
144 def_operand_p def_p;
145 ssa_op_iter op_iter;
146
147 if (gimple_code (stmt) == GIMPLE_PHI)
148 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
149
150 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
151 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
152 return true;
153
154 return false;
155 }
156
157 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
158 ORIG_LOOP. */
159
160 static void
161 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
162 {
163 tree new_ssa_name;
164 gimple_stmt_iterator si_new, si_orig;
165 edge orig_loop_latch = loop_latch_edge (orig_loop);
166 edge orig_entry_e = loop_preheader_edge (orig_loop);
167 edge new_loop_entry_e = loop_preheader_edge (new_loop);
168
169 /* Scan the phis in the headers of the old and new loops
170 (they are organized in exactly the same order). */
171 for (si_new = gsi_start_phis (new_loop->header),
172 si_orig = gsi_start_phis (orig_loop->header);
173 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
174 gsi_next (&si_new), gsi_next (&si_orig))
175 {
176 tree def;
177 source_location locus;
178 gimple phi_new = gsi_stmt (si_new);
179 gimple phi_orig = gsi_stmt (si_orig);
180
181 /* Add the first phi argument for the phi in NEW_LOOP (the one
182 associated with the entry of NEW_LOOP) */
183 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
184 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
185 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
186
187 /* Add the second phi argument for the phi in NEW_LOOP (the one
188 associated with the latch of NEW_LOOP) */
189 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
190 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
191
192 if (TREE_CODE (def) == SSA_NAME)
193 {
194 new_ssa_name = get_current_def (def);
195
196 if (!new_ssa_name)
197 /* This only happens if there are no definitions inside the
198 loop. Use the the invariant in the new loop as is. */
199 new_ssa_name = def;
200 }
201 else
202 /* Could be an integer. */
203 new_ssa_name = def;
204
205 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
206 }
207 }
208
209 /* Return a copy of LOOP placed before LOOP. */
210
211 static struct loop *
212 copy_loop_before (struct loop *loop)
213 {
214 struct loop *res;
215 edge preheader = loop_preheader_edge (loop);
216
217 initialize_original_copy_tables ();
218 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
219 gcc_assert (res != NULL);
220 free_original_copy_tables ();
221
222 update_phis_for_loop_copy (loop, res);
223 rename_variables_in_loop (res);
224
225 return res;
226 }
227
228 /* Creates an empty basic block after LOOP. */
229
230 static void
231 create_bb_after_loop (struct loop *loop)
232 {
233 edge exit = single_exit (loop);
234
235 if (!exit)
236 return;
237
238 split_edge (exit);
239 }
240
241 /* Generate code for PARTITION from the code in LOOP. The loop is
242 copied when COPY_P is true. All the statements not flagged in the
243 PARTITION bitmap are removed from the loop or from its copy. The
244 statements are indexed in sequence inside a basic block, and the
245 basic blocks of a loop are taken in dom order. */
246
247 static void
248 generate_loops_for_partition (struct loop *loop, partition_t partition,
249 bool copy_p)
250 {
251 unsigned i, x;
252 gimple_stmt_iterator bsi;
253 basic_block *bbs;
254
255 if (copy_p)
256 {
257 loop = copy_loop_before (loop);
258 gcc_assert (loop != NULL);
259 create_preheader (loop, CP_SIMPLE_PREHEADERS);
260 create_bb_after_loop (loop);
261 }
262
263 /* Remove stmts not in the PARTITION bitmap. The order in which we
264 visit the phi nodes and the statements is exactly as in
265 stmts_from_loop. */
266 bbs = get_loop_body_in_dom_order (loop);
267
268 if (MAY_HAVE_DEBUG_STMTS)
269 for (x = 0, i = 0; i < loop->num_nodes; i++)
270 {
271 basic_block bb = bbs[i];
272
273 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
274 if (!bitmap_bit_p (partition->stmts, x++))
275 reset_debug_uses (gsi_stmt (bsi));
276
277 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
278 {
279 gimple stmt = gsi_stmt (bsi);
280 if (gimple_code (stmt) != GIMPLE_LABEL
281 && !is_gimple_debug (stmt)
282 && !bitmap_bit_p (partition->stmts, x++))
283 reset_debug_uses (stmt);
284 }
285 }
286
287 for (x = 0, i = 0; i < loop->num_nodes; i++)
288 {
289 basic_block bb = bbs[i];
290
291 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
292 if (!bitmap_bit_p (partition->stmts, x++))
293 {
294 gimple phi = gsi_stmt (bsi);
295 if (virtual_operand_p (gimple_phi_result (phi)))
296 mark_virtual_phi_result_for_renaming (phi);
297 remove_phi_node (&bsi, true);
298 }
299 else
300 gsi_next (&bsi);
301
302 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
303 {
304 gimple stmt = gsi_stmt (bsi);
305 if (gimple_code (stmt) != GIMPLE_LABEL
306 && !is_gimple_debug (stmt)
307 && !bitmap_bit_p (partition->stmts, x++))
308 {
309 unlink_stmt_vdef (stmt);
310 gsi_remove (&bsi, true);
311 release_defs (stmt);
312 }
313 else
314 gsi_next (&bsi);
315 }
316 }
317
318 free (bbs);
319 }
320
321 /* Build the size argument for a memory operation call. */
322
323 static tree
324 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
325 {
326 tree size;
327 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
328 fold_convert_loc (loc, sizetype, nb_iter),
329 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
330 return fold_convert_loc (loc, size_type_node, size);
331 }
332
333 /* Build an address argument for a memory operation call. */
334
335 static tree
336 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
337 {
338 tree addr_base;
339
340 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
341 addr_base = fold_convert_loc (loc, sizetype, addr_base);
342
343 /* Test for a negative stride, iterating over every element. */
344 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
345 {
346 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
347 fold_convert_loc (loc, sizetype, nb_bytes));
348 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
349 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
350 }
351
352 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
353 }
354
355 /* Generate a call to memset for PARTITION in LOOP. */
356
357 static void
358 generate_memset_builtin (struct loop *loop, partition_t partition)
359 {
360 gimple_stmt_iterator gsi;
361 gimple stmt, fn_call;
362 tree nb_iter, mem, fn, nb_bytes;
363 location_t loc;
364 tree val;
365
366 stmt = DR_STMT (partition->main_dr);
367 loc = gimple_location (stmt);
368 if (gimple_bb (stmt) == loop->latch)
369 nb_iter = number_of_latch_executions (loop);
370 else
371 nb_iter = number_of_exit_cond_executions (loop);
372
373 /* The new statements will be placed before LOOP. */
374 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
375
376 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
377 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
378 false, GSI_CONTINUE_LINKING);
379 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
380 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
381 false, GSI_CONTINUE_LINKING);
382
383 /* This exactly matches the pattern recognition in classify_partition. */
384 val = gimple_assign_rhs1 (stmt);
385 if (integer_zerop (val)
386 || real_zerop (val)
387 || TREE_CODE (val) == CONSTRUCTOR)
388 val = integer_zero_node;
389 else if (integer_all_onesp (val))
390 val = build_int_cst (integer_type_node, -1);
391 else
392 {
393 if (TREE_CODE (val) == INTEGER_CST)
394 val = fold_convert (integer_type_node, val);
395 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
396 {
397 gimple cstmt;
398 tree tem = make_ssa_name (integer_type_node, NULL);
399 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
400 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
401 val = tem;
402 }
403 }
404
405 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
406 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
407 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
408
409 if (dump_file && (dump_flags & TDF_DETAILS))
410 {
411 fprintf (dump_file, "generated memset");
412 if (integer_zerop (val))
413 fprintf (dump_file, " zero\n");
414 else if (integer_all_onesp (val))
415 fprintf (dump_file, " minus one\n");
416 else
417 fprintf (dump_file, "\n");
418 }
419 }
420
421 /* Generate a call to memcpy for PARTITION in LOOP. */
422
423 static void
424 generate_memcpy_builtin (struct loop *loop, partition_t partition)
425 {
426 gimple_stmt_iterator gsi;
427 gimple stmt, fn_call;
428 tree nb_iter, dest, src, fn, nb_bytes;
429 location_t loc;
430 enum built_in_function kind;
431
432 stmt = DR_STMT (partition->main_dr);
433 loc = gimple_location (stmt);
434 if (gimple_bb (stmt) == loop->latch)
435 nb_iter = number_of_latch_executions (loop);
436 else
437 nb_iter = number_of_exit_cond_executions (loop);
438
439 /* The new statements will be placed before LOOP. */
440 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
441
442 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
443 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
444 false, GSI_CONTINUE_LINKING);
445 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
446 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
447 if (ptr_derefs_may_alias_p (dest, src))
448 kind = BUILT_IN_MEMMOVE;
449 else
450 kind = BUILT_IN_MEMCPY;
451
452 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
453 false, GSI_CONTINUE_LINKING);
454 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
455 false, GSI_CONTINUE_LINKING);
456 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
457 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
458 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
459
460 if (dump_file && (dump_flags & TDF_DETAILS))
461 {
462 if (kind == BUILT_IN_MEMCPY)
463 fprintf (dump_file, "generated memcpy\n");
464 else
465 fprintf (dump_file, "generated memmove\n");
466 }
467 }
468
469 /* Remove and destroy the loop LOOP. */
470
471 static void
472 destroy_loop (struct loop *loop)
473 {
474 unsigned nbbs = loop->num_nodes;
475 edge exit = single_exit (loop);
476 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
477 basic_block *bbs;
478 unsigned i;
479
480 bbs = get_loop_body_in_dom_order (loop);
481
482 redirect_edge_pred (exit, src);
483 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
484 exit->flags |= EDGE_FALLTHRU;
485 cancel_loop_tree (loop);
486 rescan_loop_exit (exit, false, true);
487
488 for (i = 0; i < nbbs; i++)
489 {
490 /* We have made sure to not leave any dangling uses of SSA
491 names defined in the loop. With the exception of virtuals.
492 Make sure we replace all uses of virtual defs that will remain
493 outside of the loop with the bare symbol as delete_basic_block
494 will release them. */
495 gimple_stmt_iterator gsi;
496 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
497 {
498 gimple phi = gsi_stmt (gsi);
499 if (virtual_operand_p (gimple_phi_result (phi)))
500 mark_virtual_phi_result_for_renaming (phi);
501 }
502 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
503 {
504 gimple stmt = gsi_stmt (gsi);
505 tree vdef = gimple_vdef (stmt);
506 if (vdef && TREE_CODE (vdef) == SSA_NAME)
507 mark_virtual_operand_for_renaming (vdef);
508 }
509 delete_basic_block (bbs[i]);
510 }
511 free (bbs);
512
513 set_immediate_dominator (CDI_DOMINATORS, dest,
514 recompute_dominator (CDI_DOMINATORS, dest));
515 }
516
517 /* Generates code for PARTITION. */
518
519 static void
520 generate_code_for_partition (struct loop *loop,
521 partition_t partition, bool copy_p)
522 {
523 switch (partition->kind)
524 {
525 case PKIND_MEMSET:
526 generate_memset_builtin (loop, partition);
527 /* If this is the last partition for which we generate code, we have
528 to destroy the loop. */
529 if (!copy_p)
530 destroy_loop (loop);
531 break;
532
533 case PKIND_MEMCPY:
534 generate_memcpy_builtin (loop, partition);
535 /* If this is the last partition for which we generate code, we have
536 to destroy the loop. */
537 if (!copy_p)
538 destroy_loop (loop);
539 break;
540
541 case PKIND_NORMAL:
542 generate_loops_for_partition (loop, partition, copy_p);
543 break;
544
545 default:
546 gcc_unreachable ();
547 }
548 }
549
550
551 /* Returns true if the node V of RDG cannot be recomputed. */
552
553 static bool
554 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
555 {
556 if (RDG_MEM_WRITE_STMT (rdg, v))
557 return true;
558
559 return false;
560 }
561
562 /* Returns true when the vertex V has already been generated in the
563 current partition (V is in PROCESSED), or when V belongs to another
564 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
565
566 static inline bool
567 already_processed_vertex_p (bitmap processed, int v)
568 {
569 return (bitmap_bit_p (processed, v)
570 || !bitmap_bit_p (remaining_stmts, v));
571 }
572
573 /* Returns NULL when there is no anti-dependence among the successors
574 of vertex V, otherwise returns the edge with the anti-dep. */
575
576 static struct graph_edge *
577 has_anti_dependence (struct vertex *v)
578 {
579 struct graph_edge *e;
580
581 if (v->succ)
582 for (e = v->succ; e; e = e->succ_next)
583 if (RDGE_TYPE (e) == anti_dd)
584 return e;
585
586 return NULL;
587 }
588
589 /* Returns true when V has an anti-dependence edge among its successors. */
590
591 static bool
592 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
593 {
594 struct graph_edge *e;
595
596 if (v->pred)
597 for (e = v->pred; e; e = e->pred_next)
598 if (bitmap_bit_p (upstream_mem_writes, e->src)
599 /* Don't consider flow channels: a write to memory followed
600 by a read from memory. These channels allow the split of
601 the RDG in different partitions. */
602 && !RDG_MEM_WRITE_STMT (rdg, e->src))
603 return true;
604
605 return false;
606 }
607
608 /* Initializes the upstream_mem_writes bitmap following the
609 information from RDG. */
610
611 static void
612 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
613 {
614 int v, x;
615 bitmap seen = BITMAP_ALLOC (NULL);
616
617 for (v = rdg->n_vertices - 1; v >= 0; v--)
618 if (!bitmap_bit_p (seen, v))
619 {
620 unsigned i;
621 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
622
623 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
624
625 FOR_EACH_VEC_ELT (int, nodes, i, x)
626 {
627 if (!bitmap_set_bit (seen, x))
628 continue;
629
630 if (RDG_MEM_WRITE_STMT (rdg, x)
631 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
632 /* In anti dependences the read should occur before
633 the write, this is why both the read and the write
634 should be placed in the same partition. */
635 || has_anti_dependence (&(rdg->vertices[x])))
636 {
637 bitmap_set_bit (upstream_mem_writes, x);
638 }
639 }
640
641 VEC_free (int, heap, nodes);
642 }
643 }
644
645 /* Returns true when vertex u has a memory write node as a predecessor
646 in RDG. */
647
648 static bool
649 has_upstream_mem_writes (int u)
650 {
651 return bitmap_bit_p (upstream_mem_writes, u);
652 }
653
654 static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
655 bitmap, bitmap);
656
657 /* Flag the uses of U stopping following the information from
658 upstream_mem_writes. */
659
660 static void
661 rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
662 bitmap processed)
663 {
664 use_operand_p use_p;
665 struct vertex *x = &(rdg->vertices[u]);
666 gimple stmt = RDGV_STMT (x);
667 struct graph_edge *anti_dep = has_anti_dependence (x);
668
669 /* Keep in the same partition the destination of an antidependence,
670 because this is a store to the exact same location. Putting this
671 in another partition is bad for cache locality. */
672 if (anti_dep)
673 {
674 int v = anti_dep->dest;
675
676 if (!already_processed_vertex_p (processed, v))
677 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
678 processed);
679 }
680
681 if (gimple_code (stmt) != GIMPLE_PHI)
682 {
683 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
684 {
685 tree use = USE_FROM_PTR (use_p);
686
687 if (TREE_CODE (use) == SSA_NAME)
688 {
689 gimple def_stmt = SSA_NAME_DEF_STMT (use);
690 int v = rdg_vertex_for_stmt (rdg, def_stmt);
691
692 if (v >= 0
693 && !already_processed_vertex_p (processed, v))
694 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
695 processed);
696 }
697 }
698 }
699
700 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
701 {
702 tree op0 = gimple_assign_lhs (stmt);
703
704 /* Scalar channels don't have enough space for transmitting data
705 between tasks, unless we add more storage by privatizing. */
706 if (is_gimple_reg (op0))
707 {
708 use_operand_p use_p;
709 imm_use_iterator iter;
710
711 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
712 {
713 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
714
715 if (!already_processed_vertex_p (processed, v))
716 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
717 processed);
718 }
719 }
720 }
721 }
722
723 /* Flag V from RDG as part of PARTITION, and also flag its loop number
724 in LOOPS. */
725
726 static void
727 rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops)
728 {
729 struct loop *loop;
730
731 if (!bitmap_set_bit (partition->stmts, v))
732 return;
733
734 loop = loop_containing_stmt (RDG_STMT (rdg, v));
735 bitmap_set_bit (loops, loop->num);
736
737 if (rdg_cannot_recompute_vertex_p (rdg, v))
738 {
739 partition->has_writes = true;
740 bitmap_clear_bit (remaining_stmts, v);
741 }
742 }
743
744 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
745 Also flag their loop number in LOOPS. */
746
747 static void
748 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
749 bitmap loops, bitmap processed)
750 {
751 unsigned i;
752 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
753 int x;
754
755 bitmap_set_bit (processed, v);
756 rdg_flag_uses (rdg, v, partition, loops, processed);
757 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
758 rdg_flag_vertex (rdg, v, partition, loops);
759
760 FOR_EACH_VEC_ELT (int, nodes, i, x)
761 if (!already_processed_vertex_p (processed, x))
762 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed);
763
764 VEC_free (int, heap, nodes);
765 }
766
767 /* Initialize CONDS with all the condition statements from the basic
768 blocks of LOOP. */
769
770 static void
771 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
772 {
773 unsigned i;
774 edge e;
775 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
776
777 FOR_EACH_VEC_ELT (edge, exits, i, e)
778 {
779 gimple cond = last_stmt (e->src);
780
781 if (cond)
782 VEC_safe_push (gimple, heap, *conds, cond);
783 }
784
785 VEC_free (edge, heap, exits);
786 }
787
788 /* Add to PARTITION all the exit condition statements for LOOPS
789 together with all their dependent statements determined from
790 RDG. */
791
792 static void
793 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
794 bitmap processed)
795 {
796 unsigned i;
797 bitmap_iterator bi;
798 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
799
800 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
801 collect_condition_stmts (get_loop (i), &conds);
802
803 while (!VEC_empty (gimple, conds))
804 {
805 gimple cond = VEC_pop (gimple, conds);
806 int v = rdg_vertex_for_stmt (rdg, cond);
807 bitmap new_loops = BITMAP_ALLOC (NULL);
808
809 if (!already_processed_vertex_p (processed, v))
810 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed);
811
812 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
813 if (bitmap_set_bit (loops, i))
814 collect_condition_stmts (get_loop (i), &conds);
815
816 BITMAP_FREE (new_loops);
817 }
818
819 VEC_free (gimple, heap, conds);
820 }
821
822 /* Returns a bitmap in which all the statements needed for computing
823 the strongly connected component C of the RDG are flagged, also
824 including the loop exit conditions. */
825
826 static partition_t
827 build_rdg_partition_for_component (struct graph *rdg, rdgc c)
828 {
829 int i, v;
830 partition_t partition = partition_alloc (NULL);
831 bitmap loops = BITMAP_ALLOC (NULL);
832 bitmap processed = BITMAP_ALLOC (NULL);
833
834 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
835 if (!already_processed_vertex_p (processed, v))
836 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed);
837
838 rdg_flag_loop_exits (rdg, loops, partition, processed);
839
840 BITMAP_FREE (processed);
841 BITMAP_FREE (loops);
842 return partition;
843 }
844
845 /* Free memory for COMPONENTS. */
846
847 static void
848 free_rdg_components (VEC (rdgc, heap) *components)
849 {
850 int i;
851 rdgc x;
852
853 FOR_EACH_VEC_ELT (rdgc, components, i, x)
854 {
855 VEC_free (int, heap, x->vertices);
856 free (x);
857 }
858
859 VEC_free (rdgc, heap, components);
860 }
861
862 /* Build the COMPONENTS vector with the strongly connected components
863 of RDG in which the STARTING_VERTICES occur. */
864
865 static void
866 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
867 VEC (rdgc, heap) **components)
868 {
869 int i, v;
870 bitmap saved_components = BITMAP_ALLOC (NULL);
871 int n_components = graphds_scc (rdg, NULL);
872 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
873
874 for (i = 0; i < n_components; i++)
875 all_components[i] = VEC_alloc (int, heap, 3);
876
877 for (i = 0; i < rdg->n_vertices; i++)
878 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
879
880 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
881 {
882 int c = rdg->vertices[v].component;
883
884 if (bitmap_set_bit (saved_components, c))
885 {
886 rdgc x = XCNEW (struct rdg_component);
887 x->num = c;
888 x->vertices = all_components[c];
889
890 VEC_safe_push (rdgc, heap, *components, x);
891 }
892 }
893
894 for (i = 0; i < n_components; i++)
895 if (!bitmap_bit_p (saved_components, i))
896 VEC_free (int, heap, all_components[i]);
897
898 free (all_components);
899 BITMAP_FREE (saved_components);
900 }
901
902 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
903 For the moment we detect only the memset zero pattern. */
904
905 static void
906 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
907 {
908 bitmap_iterator bi;
909 unsigned i;
910 tree nb_iter;
911 data_reference_p single_load, single_store;
912
913 partition->kind = PKIND_NORMAL;
914 partition->main_dr = NULL;
915 partition->secondary_dr = NULL;
916
917 if (!flag_tree_loop_distribute_patterns)
918 return;
919
920 /* Perform general partition disqualification for builtins. */
921 nb_iter = number_of_exit_cond_executions (loop);
922 if (!nb_iter || nb_iter == chrec_dont_know)
923 return;
924
925 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
926 {
927 gimple stmt = RDG_STMT (rdg, i);
928
929 if (gimple_has_volatile_ops (stmt))
930 return;
931
932 /* If the stmt has uses outside of the loop fail.
933 ??? If the stmt is generated in another partition that
934 is not created as builtin we can ignore this. */
935 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
936 {
937 if (dump_file && (dump_flags & TDF_DETAILS))
938 fprintf (dump_file, "not generating builtin, partition has "
939 "scalar uses outside of the loop\n");
940 return;
941 }
942 }
943
944 /* Detect memset and memcpy. */
945 single_load = NULL;
946 single_store = NULL;
947 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
948 {
949 gimple stmt = RDG_STMT (rdg, i);
950 data_reference_p dr;
951 unsigned j;
952
953 if (gimple_code (stmt) == GIMPLE_PHI)
954 continue;
955
956 /* Any scalar stmts are ok. */
957 if (!gimple_vuse (stmt))
958 continue;
959
960 /* Otherwise just regular loads/stores. */
961 if (!gimple_assign_single_p (stmt))
962 return;
963
964 /* But exactly one store and/or load. */
965 for (j = 0;
966 VEC_iterate (data_reference_p, RDG_DATAREFS (rdg, i), j, dr); ++j)
967 {
968 if (DR_IS_READ (dr))
969 {
970 if (single_load != NULL)
971 return;
972 single_load = dr;
973 }
974 else
975 {
976 if (single_store != NULL)
977 return;
978 single_store = dr;
979 }
980 }
981 }
982
983 if (single_store && !single_load)
984 {
985 gimple stmt = DR_STMT (single_store);
986 tree rhs = gimple_assign_rhs1 (stmt);
987 if (!(integer_zerop (rhs)
988 || integer_all_onesp (rhs)
989 || real_zerop (rhs)
990 || (TREE_CODE (rhs) == CONSTRUCTOR
991 && !TREE_CLOBBER_P (rhs))
992 || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
993 && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
994 == TYPE_MODE (unsigned_char_type_node)))))
995 return;
996 if (TREE_CODE (rhs) == SSA_NAME
997 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
998 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
999 return;
1000 if (!adjacent_dr_p (single_store))
1001 return;
1002 partition->kind = PKIND_MEMSET;
1003 partition->main_dr = single_store;
1004 }
1005 else if (single_store && single_load)
1006 {
1007 gimple store = DR_STMT (single_store);
1008 gimple load = DR_STMT (single_load);
1009 /* Direct aggregate copy or via an SSA name temporary. */
1010 if (load != store
1011 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1012 return;
1013 if (!adjacent_dr_p (single_store)
1014 || !adjacent_dr_p (single_load)
1015 || !operand_equal_p (DR_STEP (single_store),
1016 DR_STEP (single_load), 0))
1017 return;
1018 /* Now check that if there is a dependence this dependence is
1019 of a suitable form for memmove. */
1020 VEC(loop_p, heap) *loops = NULL;
1021 ddr_p ddr;
1022 VEC_safe_push (loop_p, heap, loops, loop);
1023 ddr = initialize_data_dependence_relation (single_load, single_store,
1024 loops);
1025 compute_affine_dependence (ddr, loop);
1026 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1027 {
1028 free_dependence_relation (ddr);
1029 VEC_free (loop_p, heap, loops);
1030 return;
1031 }
1032 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1033 {
1034 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1035 {
1036 free_dependence_relation (ddr);
1037 VEC_free (loop_p, heap, loops);
1038 return;
1039 }
1040 lambda_vector dist_v;
1041 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1042 {
1043 int dist = dist_v[index_in_loop_nest (loop->num,
1044 DDR_LOOP_NEST (ddr))];
1045 if (dist > 0 && !DDR_REVERSED_P (ddr))
1046 {
1047 free_dependence_relation (ddr);
1048 VEC_free (loop_p, heap, loops);
1049 return;
1050 }
1051 }
1052 }
1053 free_dependence_relation (ddr);
1054 VEC_free (loop_p, heap, loops);
1055 partition->kind = PKIND_MEMCPY;
1056 partition->main_dr = single_store;
1057 partition->secondary_dr = single_load;
1058 }
1059 }
1060
1061 /* For a data reference REF, return the declaration of its base
1062 address or NULL_TREE if the base is not determined. */
1063
1064 static tree
1065 ref_base_address (data_reference_p dr)
1066 {
1067 tree base_address = DR_BASE_ADDRESS (dr);
1068 if (base_address
1069 && TREE_CODE (base_address) == ADDR_EXPR)
1070 return TREE_OPERAND (base_address, 0);
1071
1072 return base_address;
1073 }
1074
1075 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1076 accesses in RDG. */
1077
1078 static bool
1079 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1080 partition_t partition2)
1081 {
1082 unsigned i, j, k, l;
1083 bitmap_iterator bi, bj;
1084 data_reference_p ref1, ref2;
1085
1086 /* First check whether in the intersection of the two partitions are
1087 any loads or stores. Common loads are the situation that happens
1088 most often. */
1089 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1090 if (RDG_MEM_WRITE_STMT (rdg, i)
1091 || RDG_MEM_READS_STMT (rdg, i))
1092 return true;
1093
1094 /* Then check all data-references against each other. */
1095 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1096 if (RDG_MEM_WRITE_STMT (rdg, i)
1097 || RDG_MEM_READS_STMT (rdg, i))
1098 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1099 if (RDG_MEM_WRITE_STMT (rdg, j)
1100 || RDG_MEM_READS_STMT (rdg, j))
1101 {
1102 FOR_EACH_VEC_ELT (data_reference_p, RDG_DATAREFS (rdg, i), k, ref1)
1103 {
1104 tree base1 = ref_base_address (ref1);
1105 if (base1)
1106 FOR_EACH_VEC_ELT (data_reference_p,
1107 RDG_DATAREFS (rdg, j), l, ref2)
1108 if (base1 == ref_base_address (ref2))
1109 return true;
1110 }
1111 }
1112
1113 return false;
1114 }
1115
1116 /* Aggregate several components into a useful partition that is
1117 registered in the PARTITIONS vector. Partitions will be
1118 distributed in different loops. */
1119
1120 static void
1121 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
1122 VEC (int, heap) **other_stores,
1123 VEC (partition_t, heap) **partitions, bitmap processed)
1124 {
1125 int i;
1126 rdgc x;
1127 partition_t partition = partition_alloc (NULL);
1128
1129 FOR_EACH_VEC_ELT (rdgc, components, i, x)
1130 {
1131 partition_t np;
1132 int v = VEC_index (int, x->vertices, 0);
1133
1134 if (bitmap_bit_p (processed, v))
1135 continue;
1136
1137 np = build_rdg_partition_for_component (rdg, x);
1138 bitmap_ior_into (partition->stmts, np->stmts);
1139 partition->has_writes = partition_has_writes (np);
1140 bitmap_ior_into (processed, np->stmts);
1141 partition_free (np);
1142
1143 if (partition_has_writes (partition))
1144 {
1145 if (dump_file && (dump_flags & TDF_DETAILS))
1146 {
1147 fprintf (dump_file, "ldist useful partition:\n");
1148 dump_bitmap (dump_file, partition->stmts);
1149 }
1150
1151 VEC_safe_push (partition_t, heap, *partitions, partition);
1152 partition = partition_alloc (NULL);
1153 }
1154 }
1155
1156 /* Add the nodes from the RDG that were not marked as processed, and
1157 that are used outside the current loop. These are scalar
1158 computations that are not yet part of previous partitions. */
1159 for (i = 0; i < rdg->n_vertices; i++)
1160 if (!bitmap_bit_p (processed, i)
1161 && rdg_defs_used_in_other_loops_p (rdg, i))
1162 VEC_safe_push (int, heap, *other_stores, i);
1163
1164 /* If there are still statements left in the OTHER_STORES array,
1165 create other components and partitions with these stores and
1166 their dependences. */
1167 if (VEC_length (int, *other_stores) > 0)
1168 {
1169 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
1170 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
1171
1172 rdg_build_components (rdg, *other_stores, &comps);
1173 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
1174
1175 VEC_free (int, heap, foo);
1176 free_rdg_components (comps);
1177 }
1178
1179 /* If there is something left in the last partition, save it. */
1180 if (bitmap_count_bits (partition->stmts) > 0)
1181 VEC_safe_push (partition_t, heap, *partitions, partition);
1182 else
1183 partition_free (partition);
1184 }
1185
1186 /* Dump to FILE the PARTITIONS. */
1187
1188 static void
1189 dump_rdg_partitions (FILE *file, VEC (partition_t, heap) *partitions)
1190 {
1191 int i;
1192 partition_t partition;
1193
1194 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1195 debug_bitmap_file (file, partition->stmts);
1196 }
1197
1198 /* Debug PARTITIONS. */
1199 extern void debug_rdg_partitions (VEC (partition_t, heap) *);
1200
1201 DEBUG_FUNCTION void
1202 debug_rdg_partitions (VEC (partition_t, heap) *partitions)
1203 {
1204 dump_rdg_partitions (stderr, partitions);
1205 }
1206
1207 /* Returns the number of read and write operations in the RDG. */
1208
1209 static int
1210 number_of_rw_in_rdg (struct graph *rdg)
1211 {
1212 int i, res = 0;
1213
1214 for (i = 0; i < rdg->n_vertices; i++)
1215 {
1216 if (RDG_MEM_WRITE_STMT (rdg, i))
1217 ++res;
1218
1219 if (RDG_MEM_READS_STMT (rdg, i))
1220 ++res;
1221 }
1222
1223 return res;
1224 }
1225
1226 /* Returns the number of read and write operations in a PARTITION of
1227 the RDG. */
1228
1229 static int
1230 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1231 {
1232 int res = 0;
1233 unsigned i;
1234 bitmap_iterator ii;
1235
1236 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1237 {
1238 if (RDG_MEM_WRITE_STMT (rdg, i))
1239 ++res;
1240
1241 if (RDG_MEM_READS_STMT (rdg, i))
1242 ++res;
1243 }
1244
1245 return res;
1246 }
1247
1248 /* Returns true when one of the PARTITIONS contains all the read or
1249 write operations of RDG. */
1250
1251 static bool
1252 partition_contains_all_rw (struct graph *rdg, VEC (partition_t, heap) *partitions)
1253 {
1254 int i;
1255 partition_t partition;
1256 int nrw = number_of_rw_in_rdg (rdg);
1257
1258 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1259 if (nrw == number_of_rw_in_partition (rdg, partition))
1260 return true;
1261
1262 return false;
1263 }
1264
1265 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1266 distributed loops. */
1267
1268 static int
1269 ldist_gen (struct loop *loop, struct graph *rdg,
1270 VEC (int, heap) *starting_vertices)
1271 {
1272 int i, nbp;
1273 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1274 VEC (partition_t, heap) *partitions = VEC_alloc (partition_t, heap, 3);
1275 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1276 partition_t partition;
1277 bitmap processed = BITMAP_ALLOC (NULL);
1278 bool any_builtin;
1279
1280 remaining_stmts = BITMAP_ALLOC (NULL);
1281 upstream_mem_writes = BITMAP_ALLOC (NULL);
1282
1283 for (i = 0; i < rdg->n_vertices; i++)
1284 {
1285 bitmap_set_bit (remaining_stmts, i);
1286
1287 /* Save in OTHER_STORES all the memory writes that are not in
1288 STARTING_VERTICES. */
1289 if (RDG_MEM_WRITE_STMT (rdg, i))
1290 {
1291 int v;
1292 unsigned j;
1293 bool found = false;
1294
1295 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1296 if (i == v)
1297 {
1298 found = true;
1299 break;
1300 }
1301
1302 if (!found)
1303 VEC_safe_push (int, heap, other_stores, i);
1304 }
1305 }
1306
1307 mark_nodes_having_upstream_mem_writes (rdg);
1308 rdg_build_components (rdg, starting_vertices, &components);
1309 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1310 processed);
1311 BITMAP_FREE (processed);
1312
1313 any_builtin = false;
1314 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1315 {
1316 classify_partition (loop, rdg, partition);
1317 any_builtin |= partition_builtin_p (partition);
1318 }
1319
1320 /* If we are only distributing patterns fuse all partitions that
1321 were not properly classified as builtins. Else fuse partitions
1322 with similar memory accesses. */
1323 if (!flag_tree_loop_distribution)
1324 {
1325 partition_t into;
1326 /* If we did not detect any builtin simply bail out. */
1327 if (!any_builtin)
1328 {
1329 nbp = 0;
1330 goto ldist_done;
1331 }
1332 /* Only fuse adjacent non-builtin partitions, see PR53616.
1333 ??? Use dependence information to improve partition ordering. */
1334 i = 0;
1335 do
1336 {
1337 for (; VEC_iterate (partition_t, partitions, i, into); ++i)
1338 if (!partition_builtin_p (into))
1339 break;
1340 for (++i; VEC_iterate (partition_t, partitions, i, partition); ++i)
1341 if (!partition_builtin_p (partition))
1342 {
1343 bitmap_ior_into (into->stmts, partition->stmts);
1344 VEC_ordered_remove (partition_t, partitions, i);
1345 i--;
1346 }
1347 else
1348 break;
1349 }
1350 while ((unsigned) i < VEC_length (partition_t, partitions));
1351 }
1352 else
1353 {
1354 partition_t into;
1355 int j;
1356 for (i = 0; VEC_iterate (partition_t, partitions, i, into); ++i)
1357 {
1358 if (partition_builtin_p (into))
1359 continue;
1360 for (j = i + 1;
1361 VEC_iterate (partition_t, partitions, j, partition); ++j)
1362 {
1363 if (!partition_builtin_p (partition)
1364 /* ??? The following is horribly inefficient,
1365 we are re-computing and analyzing data-references
1366 of the stmts in the partitions all the time. */
1367 && similar_memory_accesses (rdg, into, partition))
1368 {
1369 if (dump_file && (dump_flags & TDF_DETAILS))
1370 {
1371 fprintf (dump_file, "fusing partitions\n");
1372 dump_bitmap (dump_file, into->stmts);
1373 dump_bitmap (dump_file, partition->stmts);
1374 fprintf (dump_file, "because they have similar "
1375 "memory accesses\n");
1376 }
1377 bitmap_ior_into (into->stmts, partition->stmts);
1378 VEC_ordered_remove (partition_t, partitions, j);
1379 j--;
1380 }
1381 }
1382 }
1383 }
1384
1385 nbp = VEC_length (partition_t, partitions);
1386 if (nbp == 0
1387 || (nbp == 1
1388 && !partition_builtin_p (VEC_index (partition_t, partitions, 0)))
1389 || (nbp > 1
1390 && partition_contains_all_rw (rdg, partitions)))
1391 {
1392 nbp = 0;
1393 goto ldist_done;
1394 }
1395
1396 if (dump_file && (dump_flags & TDF_DETAILS))
1397 dump_rdg_partitions (dump_file, partitions);
1398
1399 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1400 generate_code_for_partition (loop, partition, i < nbp - 1);
1401
1402 ldist_done:
1403
1404 BITMAP_FREE (remaining_stmts);
1405 BITMAP_FREE (upstream_mem_writes);
1406
1407 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1408 partition_free (partition);
1409
1410 VEC_free (int, heap, other_stores);
1411 VEC_free (partition_t, heap, partitions);
1412 free_rdg_components (components);
1413 return nbp;
1414 }
1415
1416 /* Distributes the code from LOOP in such a way that producer
1417 statements are placed before consumer statements. When STMTS is
1418 NULL, performs the maximal distribution, if STMTS is not NULL,
1419 tries to separate only these statements from the LOOP's body.
1420 Returns the number of distributed loops. */
1421
1422 static int
1423 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1424 {
1425 int res = 0;
1426 struct graph *rdg;
1427 gimple s;
1428 unsigned i;
1429 VEC (int, heap) *vertices;
1430 VEC (ddr_p, heap) *dependence_relations;
1431 VEC (data_reference_p, heap) *datarefs;
1432 VEC (loop_p, heap) *loop_nest;
1433
1434 datarefs = VEC_alloc (data_reference_p, heap, 10);
1435 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1436 loop_nest = VEC_alloc (loop_p, heap, 3);
1437 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1438
1439 if (!rdg)
1440 {
1441 if (dump_file && (dump_flags & TDF_DETAILS))
1442 fprintf (dump_file,
1443 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1444 loop->num);
1445
1446 free_dependence_relations (dependence_relations);
1447 free_data_refs (datarefs);
1448 VEC_free (loop_p, heap, loop_nest);
1449 return res;
1450 }
1451
1452 vertices = VEC_alloc (int, heap, 3);
1453
1454 if (dump_file && (dump_flags & TDF_DETAILS))
1455 dump_rdg (dump_file, rdg);
1456
1457 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1458 {
1459 int v = rdg_vertex_for_stmt (rdg, s);
1460
1461 if (v >= 0)
1462 {
1463 VEC_safe_push (int, heap, vertices, v);
1464
1465 if (dump_file && (dump_flags & TDF_DETAILS))
1466 fprintf (dump_file,
1467 "ldist asked to generate code for vertex %d\n", v);
1468 }
1469 }
1470
1471 res = ldist_gen (loop, rdg, vertices);
1472 VEC_free (int, heap, vertices);
1473 free_rdg (rdg);
1474 free_dependence_relations (dependence_relations);
1475 free_data_refs (datarefs);
1476 VEC_free (loop_p, heap, loop_nest);
1477 return res;
1478 }
1479
1480 /* Distribute all loops in the current function. */
1481
1482 static unsigned int
1483 tree_loop_distribution (void)
1484 {
1485 struct loop *loop;
1486 loop_iterator li;
1487 bool changed = false;
1488 basic_block bb;
1489
1490 FOR_ALL_BB (bb)
1491 {
1492 gimple_stmt_iterator gsi;
1493 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1494 gimple_set_uid (gsi_stmt (gsi), -1);
1495 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1496 gimple_set_uid (gsi_stmt (gsi), -1);
1497 }
1498
1499 /* We can at the moment only distribute non-nested loops, thus restrict
1500 walking to innermost loops. */
1501 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1502 {
1503 VEC (gimple, heap) *work_list = NULL;
1504 basic_block *bbs;
1505 int num = loop->num;
1506 int nb_generated_loops = 0;
1507 unsigned int i;
1508
1509 /* If the loop doesn't have a single exit we will fail anyway,
1510 so do that early. */
1511 if (!single_exit (loop))
1512 continue;
1513
1514 /* Only distribute loops with a header and latch for now. */
1515 if (loop->num_nodes > 2)
1516 continue;
1517
1518 /* Initialize the worklist with stmts we seed the partitions with. */
1519 bbs = get_loop_body_in_dom_order (loop);
1520 for (i = 0; i < loop->num_nodes; ++i)
1521 {
1522 gimple_stmt_iterator gsi;
1523 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1524 {
1525 gimple stmt = gsi_stmt (gsi);
1526 /* Only distribute stores for now.
1527 ??? We should also try to distribute scalar reductions,
1528 thus SSA defs that have scalar uses outside of the loop. */
1529 if (!gimple_assign_single_p (stmt)
1530 || is_gimple_reg (gimple_assign_lhs (stmt)))
1531 continue;
1532
1533 VEC_safe_push (gimple, heap, work_list, stmt);
1534 }
1535 }
1536 free (bbs);
1537
1538 if (VEC_length (gimple, work_list) > 0)
1539 nb_generated_loops = distribute_loop (loop, work_list);
1540
1541 if (nb_generated_loops > 0)
1542 changed = true;
1543
1544 if (dump_file && (dump_flags & TDF_DETAILS))
1545 {
1546 if (nb_generated_loops > 1)
1547 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1548 num, nb_generated_loops);
1549 else
1550 fprintf (dump_file, "Loop %d is the same.\n", num);
1551 }
1552
1553 VEC_free (gimple, heap, work_list);
1554 }
1555
1556 if (changed)
1557 {
1558 mark_virtual_operands_for_renaming (cfun);
1559 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1560 }
1561
1562 #ifdef ENABLE_CHECKING
1563 verify_loop_structure ();
1564 #endif
1565
1566 return 0;
1567 }
1568
1569 static bool
1570 gate_tree_loop_distribution (void)
1571 {
1572 return flag_tree_loop_distribution
1573 || flag_tree_loop_distribute_patterns;
1574 }
1575
1576 struct gimple_opt_pass pass_loop_distribution =
1577 {
1578 {
1579 GIMPLE_PASS,
1580 "ldist", /* name */
1581 gate_tree_loop_distribution, /* gate */
1582 tree_loop_distribution, /* execute */
1583 NULL, /* sub */
1584 NULL, /* next */
1585 0, /* static_pass_number */
1586 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1587 PROP_cfg | PROP_ssa, /* properties_required */
1588 0, /* properties_provided */
1589 0, /* properties_destroyed */
1590 0, /* todo_flags_start */
1591 TODO_ggc_collect
1592 | TODO_verify_ssa /* todo_flags_finish */
1593 }
1594 };