]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
Update Copyright years for files modified in 2011 and/or 2012.
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
801c5610 1/* Loop distribution.
71e45bc2 2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
7cf0dbf3 3 Free Software Foundation, Inc.
801c5610 4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
6
7This file is part of GCC.
48e1416a 8
801c5610 9GCC is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 3, or (at your option) any
12later version.
48e1416a 13
801c5610 14GCC is distributed in the hope that it will be useful, but WITHOUT
15ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
48e1416a 18
801c5610 19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23/* This pass performs loop distribution: for example, the loop
24
25 |DO I = 2, N
26 | A(I) = B(I) + C
27 | D(I) = A(I-1)*E
28 |ENDDO
29
48e1416a 30 is transformed to
801c5610 31
32 |DOALL I = 2, N
33 | A(I) = B(I) + C
34 |ENDDO
35 |
36 |DOALL I = 2, N
37 | D(I) = A(I-1)*E
38 |ENDDO
39
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
44
45#include "config.h"
46#include "system.h"
47#include "coretypes.h"
801c5610 48#include "tree-flow.h"
801c5610 49#include "cfgloop.h"
801c5610 50#include "tree-chrec.h"
51#include "tree-data-ref.h"
52#include "tree-scalar-evolution.h"
53#include "tree-pass.h"
801c5610 54
f689d33d 55enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY };
d32bc1d7 56
543506e0 57typedef struct partition_s
58{
59 bitmap stmts;
e5edce84 60 bool has_writes;
d32bc1d7 61 enum partition_kind kind;
f689d33d 62 /* data-references a kind != PKIND_NORMAL partition is about. */
63 data_reference_p main_dr;
64 data_reference_p secondary_dr;
543506e0 65} *partition_t;
66
543506e0 67
68/* Allocate and initialize a partition from BITMAP. */
69
70static partition_t
71partition_alloc (bitmap stmts)
72{
73 partition_t partition = XCNEW (struct partition_s);
74 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
e5edce84 75 partition->has_writes = false;
d32bc1d7 76 partition->kind = PKIND_NORMAL;
543506e0 77 return partition;
78}
79
80/* Free PARTITION. */
81
82static void
83partition_free (partition_t partition)
84{
85 BITMAP_FREE (partition->stmts);
86 free (partition);
87}
88
d32bc1d7 89/* Returns true if the partition can be generated as a builtin. */
90
91static bool
92partition_builtin_p (partition_t partition)
93{
94 return partition->kind != PKIND_NORMAL;
95}
543506e0 96
e5edce84 97/* Returns true if the partition has an writes. */
98
99static bool
100partition_has_writes (partition_t partition)
101{
102 return partition->has_writes;
103}
104
801c5610 105/* If bit I is not set, it means that this node represents an
106 operation that has already been performed, and that should not be
107 performed again. This is the subgraph of remaining important
108 computations that is passed to the DFS algorithm for avoiding to
109 include several times the same stores in different loops. */
110static bitmap remaining_stmts;
111
112/* A node of the RDG is marked in this bitmap when it has as a
113 predecessor a node that writes to memory. */
114static bitmap upstream_mem_writes;
115
b5698e04 116/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
117 the LOOP. */
118
119static bool
120ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
121{
122 imm_use_iterator imm_iter;
123 use_operand_p use_p;
124
125 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
64a31469 126 {
127 gimple use_stmt = USE_STMT (use_p);
128 if (!is_gimple_debug (use_stmt)
129 && loop != loop_containing_stmt (use_stmt))
130 return true;
131 }
b5698e04 132
133 return false;
134}
135
136/* Returns true when STMT defines a scalar variable used after the
bfcf35ff 137 loop LOOP. */
b5698e04 138
139static bool
bfcf35ff 140stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
b5698e04 141{
bfcf35ff 142 def_operand_p def_p;
143 ssa_op_iter op_iter;
b5698e04 144
35ec0372 145 if (gimple_code (stmt) == GIMPLE_PHI)
146 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
147
bfcf35ff 148 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
149 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
150 return true;
b5698e04 151
bfcf35ff 152 return false;
b5698e04 153}
154
801c5610 155/* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
156 ORIG_LOOP. */
157
158static void
159update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
160{
161 tree new_ssa_name;
75a70cf9 162 gimple_stmt_iterator si_new, si_orig;
801c5610 163 edge orig_loop_latch = loop_latch_edge (orig_loop);
164 edge orig_entry_e = loop_preheader_edge (orig_loop);
165 edge new_loop_entry_e = loop_preheader_edge (new_loop);
166
167 /* Scan the phis in the headers of the old and new loops
168 (they are organized in exactly the same order). */
75a70cf9 169 for (si_new = gsi_start_phis (new_loop->header),
170 si_orig = gsi_start_phis (orig_loop->header);
171 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
172 gsi_next (&si_new), gsi_next (&si_orig))
801c5610 173 {
75a70cf9 174 tree def;
efbcb6de 175 source_location locus;
75a70cf9 176 gimple phi_new = gsi_stmt (si_new);
177 gimple phi_orig = gsi_stmt (si_orig);
178
801c5610 179 /* Add the first phi argument for the phi in NEW_LOOP (the one
180 associated with the entry of NEW_LOOP) */
75a70cf9 181 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
efbcb6de 182 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
60d535d2 183 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
801c5610 184
185 /* Add the second phi argument for the phi in NEW_LOOP (the one
186 associated with the latch of NEW_LOOP) */
187 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
efbcb6de 188 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
801c5610 189
190 if (TREE_CODE (def) == SSA_NAME)
191 {
192 new_ssa_name = get_current_def (def);
193
194 if (!new_ssa_name)
195 /* This only happens if there are no definitions inside the
44f1d7db 196 loop. Use the the invariant in the new loop as is. */
197 new_ssa_name = def;
801c5610 198 }
199 else
200 /* Could be an integer. */
201 new_ssa_name = def;
202
60d535d2 203 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
801c5610 204 }
205}
206
207/* Return a copy of LOOP placed before LOOP. */
208
209static struct loop *
210copy_loop_before (struct loop *loop)
211{
212 struct loop *res;
213 edge preheader = loop_preheader_edge (loop);
214
801c5610 215 initialize_original_copy_tables ();
216 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
d32bc1d7 217 gcc_assert (res != NULL);
801c5610 218 free_original_copy_tables ();
219
801c5610 220 update_phis_for_loop_copy (loop, res);
221 rename_variables_in_loop (res);
222
223 return res;
224}
225
226/* Creates an empty basic block after LOOP. */
227
228static void
229create_bb_after_loop (struct loop *loop)
230{
231 edge exit = single_exit (loop);
232
233 if (!exit)
234 return;
235
236 split_edge (exit);
237}
238
239/* Generate code for PARTITION from the code in LOOP. The loop is
240 copied when COPY_P is true. All the statements not flagged in the
241 PARTITION bitmap are removed from the loop or from its copy. The
242 statements are indexed in sequence inside a basic block, and the
d32bc1d7 243 basic blocks of a loop are taken in dom order. */
801c5610 244
d32bc1d7 245static void
543506e0 246generate_loops_for_partition (struct loop *loop, partition_t partition,
247 bool copy_p)
801c5610 248{
249 unsigned i, x;
75a70cf9 250 gimple_stmt_iterator bsi;
801c5610 251 basic_block *bbs;
252
253 if (copy_p)
254 {
255 loop = copy_loop_before (loop);
d32bc1d7 256 gcc_assert (loop != NULL);
801c5610 257 create_preheader (loop, CP_SIMPLE_PREHEADERS);
258 create_bb_after_loop (loop);
259 }
260
801c5610 261 /* Remove stmts not in the PARTITION bitmap. The order in which we
262 visit the phi nodes and the statements is exactly as in
263 stmts_from_loop. */
264 bbs = get_loop_body_in_dom_order (loop);
265
8ebd8f29 266 if (MAY_HAVE_DEBUG_STMTS)
267 for (x = 0, i = 0; i < loop->num_nodes; i++)
268 {
269 basic_block bb = bbs[i];
270
271 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
543506e0 272 if (!bitmap_bit_p (partition->stmts, x++))
8ebd8f29 273 reset_debug_uses (gsi_stmt (bsi));
274
275 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
276 {
277 gimple stmt = gsi_stmt (bsi);
278 if (gimple_code (stmt) != GIMPLE_LABEL
279 && !is_gimple_debug (stmt)
543506e0 280 && !bitmap_bit_p (partition->stmts, x++))
8ebd8f29 281 reset_debug_uses (stmt);
282 }
283 }
284
801c5610 285 for (x = 0, i = 0; i < loop->num_nodes; i++)
286 {
287 basic_block bb = bbs[i];
801c5610 288
75a70cf9 289 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
543506e0 290 if (!bitmap_bit_p (partition->stmts, x++))
ff24e1e2 291 {
292 gimple phi = gsi_stmt (bsi);
7c782c9b 293 if (virtual_operand_p (gimple_phi_result (phi)))
ff24e1e2 294 mark_virtual_phi_result_for_renaming (phi);
295 remove_phi_node (&bsi, true);
296 }
801c5610 297 else
75a70cf9 298 gsi_next (&bsi);
801c5610 299
75a70cf9 300 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
ff24e1e2 301 {
302 gimple stmt = gsi_stmt (bsi);
8ebd8f29 303 if (gimple_code (stmt) != GIMPLE_LABEL
304 && !is_gimple_debug (stmt)
543506e0 305 && !bitmap_bit_p (partition->stmts, x++))
ff24e1e2 306 {
307 unlink_stmt_vdef (stmt);
308 gsi_remove (&bsi, true);
309 release_defs (stmt);
310 }
311 else
312 gsi_next (&bsi);
313 }
801c5610 314 }
315
316 free (bbs);
801c5610 317}
318
f689d33d 319/* Build the size argument for a memory operation call. */
880734c8 320
f689d33d 321static tree
322build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
880734c8 323{
f689d33d 324 tree size;
325 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
326 fold_convert_loc (loc, sizetype, nb_iter),
327 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
328 return fold_convert_loc (loc, size_type_node, size);
329}
330
331/* Build an address argument for a memory operation call. */
332
333static tree
334build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
335{
336 tree addr_base;
337
338 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
339 addr_base = fold_convert_loc (loc, sizetype, addr_base);
340
341 /* Test for a negative stride, iterating over every element. */
342 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
343 {
344 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
345 fold_convert_loc (loc, sizetype, nb_bytes));
346 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
347 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
348 }
349
350 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
880734c8 351}
352
d32bc1d7 353/* Generate a call to memset for PARTITION in LOOP. */
801c5610 354
a136cad6 355static void
f689d33d 356generate_memset_builtin (struct loop *loop, partition_t partition)
801c5610 357{
d32bc1d7 358 gimple_stmt_iterator gsi;
359 gimple stmt, fn_call;
f689d33d 360 tree nb_iter, mem, fn, nb_bytes;
d32bc1d7 361 location_t loc;
0644fcba 362 tree val;
d32bc1d7 363
f689d33d 364 stmt = DR_STMT (partition->main_dr);
d32bc1d7 365 loc = gimple_location (stmt);
d32bc1d7 366 if (gimple_bb (stmt) == loop->latch)
367 nb_iter = number_of_latch_executions (loop);
368 else
369 nb_iter = number_of_exit_cond_executions (loop);
370
371 /* The new statements will be placed before LOOP. */
372 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
801c5610 373
f689d33d 374 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
375 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
376 false, GSI_CONTINUE_LINKING);
377 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
378 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
379 false, GSI_CONTINUE_LINKING);
801c5610 380
0644fcba 381 /* This exactly matches the pattern recognition in classify_partition. */
382 val = gimple_assign_rhs1 (stmt);
383 if (integer_zerop (val)
384 || real_zerop (val)
385 || TREE_CODE (val) == CONSTRUCTOR)
386 val = integer_zero_node;
387 else if (integer_all_onesp (val))
388 val = build_int_cst (integer_type_node, -1);
389 else
390 {
391 if (TREE_CODE (val) == INTEGER_CST)
392 val = fold_convert (integer_type_node, val);
393 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
394 {
395 gimple cstmt;
03d37e4e 396 tree tem = make_ssa_name (integer_type_node, NULL);
0644fcba 397 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
f689d33d 398 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
0644fcba 399 val = tem;
400 }
401 }
402
b9a16870 403 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
0644fcba 404 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
f689d33d 405 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
801c5610 406
407 if (dump_file && (dump_flags & TDF_DETAILS))
0644fcba 408 {
409 fprintf (dump_file, "generated memset");
410 if (integer_zerop (val))
411 fprintf (dump_file, " zero\n");
412 else if (integer_all_onesp (val))
413 fprintf (dump_file, " minus one\n");
414 else
415 fprintf (dump_file, "\n");
416 }
801c5610 417}
418
f689d33d 419/* Generate a call to memcpy for PARTITION in LOOP. */
420
421static void
422generate_memcpy_builtin (struct loop *loop, partition_t partition)
423{
424 gimple_stmt_iterator gsi;
425 gimple stmt, fn_call;
426 tree nb_iter, dest, src, fn, nb_bytes;
427 location_t loc;
428 enum built_in_function kind;
429
430 stmt = DR_STMT (partition->main_dr);
431 loc = gimple_location (stmt);
432 if (gimple_bb (stmt) == loop->latch)
433 nb_iter = number_of_latch_executions (loop);
434 else
435 nb_iter = number_of_exit_cond_executions (loop);
436
437 /* The new statements will be placed before LOOP. */
438 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
439
440 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
441 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
442 false, GSI_CONTINUE_LINKING);
443 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
444 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
445 if (ptr_derefs_may_alias_p (dest, src))
446 kind = BUILT_IN_MEMMOVE;
447 else
448 kind = BUILT_IN_MEMCPY;
449
450 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
451 false, GSI_CONTINUE_LINKING);
452 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
453 false, GSI_CONTINUE_LINKING);
454 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
455 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
456 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
457
458 if (dump_file && (dump_flags & TDF_DETAILS))
459 {
460 if (kind == BUILT_IN_MEMCPY)
461 fprintf (dump_file, "generated memcpy\n");
462 else
463 fprintf (dump_file, "generated memmove\n");
464 }
465}
466
d32bc1d7 467/* Remove and destroy the loop LOOP. */
801c5610 468
d32bc1d7 469static void
470destroy_loop (struct loop *loop)
801c5610 471{
d32bc1d7 472 unsigned nbbs = loop->num_nodes;
473 edge exit = single_exit (loop);
474 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
801c5610 475 basic_block *bbs;
d32bc1d7 476 unsigned i;
801c5610 477
478 bbs = get_loop_body_in_dom_order (loop);
479
d32bc1d7 480 redirect_edge_pred (exit, src);
481 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
482 exit->flags |= EDGE_FALLTHRU;
483 cancel_loop_tree (loop);
484 rescan_loop_exit (exit, false, true);
801c5610 485
d32bc1d7 486 for (i = 0; i < nbbs; i++)
54459dd6 487 {
488 /* We have made sure to not leave any dangling uses of SSA
489 names defined in the loop. With the exception of virtuals.
490 Make sure we replace all uses of virtual defs that will remain
491 outside of the loop with the bare symbol as delete_basic_block
492 will release them. */
493 gimple_stmt_iterator gsi;
494 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
495 {
496 gimple phi = gsi_stmt (gsi);
7c782c9b 497 if (virtual_operand_p (gimple_phi_result (phi)))
54459dd6 498 mark_virtual_phi_result_for_renaming (phi);
499 }
500 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
501 {
502 gimple stmt = gsi_stmt (gsi);
503 tree vdef = gimple_vdef (stmt);
504 if (vdef && TREE_CODE (vdef) == SSA_NAME)
505 mark_virtual_operand_for_renaming (vdef);
506 }
507 delete_basic_block (bbs[i]);
508 }
801c5610 509 free (bbs);
d32bc1d7 510
511 set_immediate_dominator (CDI_DOMINATORS, dest,
512 recompute_dominator (CDI_DOMINATORS, dest));
801c5610 513}
514
d32bc1d7 515/* Generates code for PARTITION. */
801c5610 516
d32bc1d7 517static void
f689d33d 518generate_code_for_partition (struct loop *loop,
6198d968 519 partition_t partition, bool copy_p)
801c5610 520{
d32bc1d7 521 switch (partition->kind)
522 {
523 case PKIND_MEMSET:
f689d33d 524 generate_memset_builtin (loop, partition);
525 /* If this is the last partition for which we generate code, we have
526 to destroy the loop. */
527 if (!copy_p)
528 destroy_loop (loop);
529 break;
530
531 case PKIND_MEMCPY:
532 generate_memcpy_builtin (loop, partition);
d32bc1d7 533 /* If this is the last partition for which we generate code, we have
534 to destroy the loop. */
535 if (!copy_p)
536 destroy_loop (loop);
537 break;
538
539 case PKIND_NORMAL:
540 generate_loops_for_partition (loop, partition, copy_p);
541 break;
542
543 default:
544 gcc_unreachable ();
545 }
801c5610 546}
547
548
549/* Returns true if the node V of RDG cannot be recomputed. */
550
551static bool
552rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
553{
554 if (RDG_MEM_WRITE_STMT (rdg, v))
555 return true;
556
557 return false;
558}
559
560/* Returns true when the vertex V has already been generated in the
561 current partition (V is in PROCESSED), or when V belongs to another
562 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
563
564static inline bool
565already_processed_vertex_p (bitmap processed, int v)
566{
567 return (bitmap_bit_p (processed, v)
568 || !bitmap_bit_p (remaining_stmts, v));
569}
570
571/* Returns NULL when there is no anti-dependence among the successors
572 of vertex V, otherwise returns the edge with the anti-dep. */
573
574static struct graph_edge *
575has_anti_dependence (struct vertex *v)
576{
577 struct graph_edge *e;
578
579 if (v->succ)
580 for (e = v->succ; e; e = e->succ_next)
581 if (RDGE_TYPE (e) == anti_dd)
582 return e;
583
584 return NULL;
585}
586
587/* Returns true when V has an anti-dependence edge among its successors. */
588
589static bool
590predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
591{
592 struct graph_edge *e;
593
594 if (v->pred)
595 for (e = v->pred; e; e = e->pred_next)
596 if (bitmap_bit_p (upstream_mem_writes, e->src)
597 /* Don't consider flow channels: a write to memory followed
598 by a read from memory. These channels allow the split of
599 the RDG in different partitions. */
600 && !RDG_MEM_WRITE_STMT (rdg, e->src))
601 return true;
602
603 return false;
604}
605
606/* Initializes the upstream_mem_writes bitmap following the
607 information from RDG. */
608
609static void
610mark_nodes_having_upstream_mem_writes (struct graph *rdg)
611{
612 int v, x;
613 bitmap seen = BITMAP_ALLOC (NULL);
614
615 for (v = rdg->n_vertices - 1; v >= 0; v--)
616 if (!bitmap_bit_p (seen, v))
617 {
618 unsigned i;
f1f41a6c 619 vec<int> nodes;
620 nodes.create (3);
801c5610 621
622 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
623
f1f41a6c 624 FOR_EACH_VEC_ELT (nodes, i, x)
801c5610 625 {
6ef9bbe0 626 if (!bitmap_set_bit (seen, x))
801c5610 627 continue;
628
801c5610 629 if (RDG_MEM_WRITE_STMT (rdg, x)
630 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
631 /* In anti dependences the read should occur before
632 the write, this is why both the read and the write
633 should be placed in the same partition. */
634 || has_anti_dependence (&(rdg->vertices[x])))
635 {
801c5610 636 bitmap_set_bit (upstream_mem_writes, x);
637 }
638 }
639
f1f41a6c 640 nodes.release ();
801c5610 641 }
642}
643
644/* Returns true when vertex u has a memory write node as a predecessor
645 in RDG. */
646
647static bool
648has_upstream_mem_writes (int u)
649{
650 return bitmap_bit_p (upstream_mem_writes, u);
651}
652
543506e0 653static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
e5edce84 654 bitmap, bitmap);
801c5610 655
801c5610 656/* Flag the uses of U stopping following the information from
657 upstream_mem_writes. */
658
659static void
543506e0 660rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
e5edce84 661 bitmap processed)
801c5610 662{
801c5610 663 use_operand_p use_p;
664 struct vertex *x = &(rdg->vertices[u]);
75a70cf9 665 gimple stmt = RDGV_STMT (x);
801c5610 666 struct graph_edge *anti_dep = has_anti_dependence (x);
667
668 /* Keep in the same partition the destination of an antidependence,
669 because this is a store to the exact same location. Putting this
670 in another partition is bad for cache locality. */
671 if (anti_dep)
672 {
673 int v = anti_dep->dest;
674
675 if (!already_processed_vertex_p (processed, v))
676 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
e5edce84 677 processed);
801c5610 678 }
679
75a70cf9 680 if (gimple_code (stmt) != GIMPLE_PHI)
801c5610 681 {
dd277d48 682 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
801c5610 683 {
684 tree use = USE_FROM_PTR (use_p);
685
686 if (TREE_CODE (use) == SSA_NAME)
687 {
75a70cf9 688 gimple def_stmt = SSA_NAME_DEF_STMT (use);
801c5610 689 int v = rdg_vertex_for_stmt (rdg, def_stmt);
690
691 if (v >= 0
692 && !already_processed_vertex_p (processed, v))
693 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
e5edce84 694 processed);
801c5610 695 }
696 }
697 }
698
75a70cf9 699 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
801c5610 700 {
75a70cf9 701 tree op0 = gimple_assign_lhs (stmt);
801c5610 702
703 /* Scalar channels don't have enough space for transmitting data
704 between tasks, unless we add more storage by privatizing. */
705 if (is_gimple_reg (op0))
706 {
707 use_operand_p use_p;
708 imm_use_iterator iter;
709
710 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
711 {
712 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
713
714 if (!already_processed_vertex_p (processed, v))
715 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
e5edce84 716 processed);
801c5610 717 }
718 }
719 }
720}
721
722/* Flag V from RDG as part of PARTITION, and also flag its loop number
723 in LOOPS. */
724
725static void
e5edce84 726rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops)
801c5610 727{
728 struct loop *loop;
729
543506e0 730 if (!bitmap_set_bit (partition->stmts, v))
801c5610 731 return;
732
733 loop = loop_containing_stmt (RDG_STMT (rdg, v));
734 bitmap_set_bit (loops, loop->num);
801c5610 735
736 if (rdg_cannot_recompute_vertex_p (rdg, v))
737 {
e5edce84 738 partition->has_writes = true;
801c5610 739 bitmap_clear_bit (remaining_stmts, v);
740 }
741}
742
743/* Flag in the bitmap PARTITION the vertex V and all its predecessors.
f0b5f617 744 Also flag their loop number in LOOPS. */
801c5610 745
746static void
543506e0 747rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
e5edce84 748 bitmap loops, bitmap processed)
801c5610 749{
750 unsigned i;
f1f41a6c 751 vec<int> nodes;
752 nodes.create (3);
801c5610 753 int x;
754
755 bitmap_set_bit (processed, v);
e5edce84 756 rdg_flag_uses (rdg, v, partition, loops, processed);
801c5610 757 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
e5edce84 758 rdg_flag_vertex (rdg, v, partition, loops);
801c5610 759
f1f41a6c 760 FOR_EACH_VEC_ELT (nodes, i, x)
801c5610 761 if (!already_processed_vertex_p (processed, x))
e5edce84 762 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed);
801c5610 763
f1f41a6c 764 nodes.release ();
801c5610 765}
766
767/* Initialize CONDS with all the condition statements from the basic
768 blocks of LOOP. */
769
770static void
f1f41a6c 771collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
801c5610 772{
773 unsigned i;
774 edge e;
f1f41a6c 775 vec<edge> exits = get_loop_exit_edges (loop);
801c5610 776
f1f41a6c 777 FOR_EACH_VEC_ELT (exits, i, e)
801c5610 778 {
75a70cf9 779 gimple cond = last_stmt (e->src);
801c5610 780
781 if (cond)
f1f41a6c 782 conds->safe_push (cond);
801c5610 783 }
784
f1f41a6c 785 exits.release ();
801c5610 786}
787
788/* Add to PARTITION all the exit condition statements for LOOPS
789 together with all their dependent statements determined from
790 RDG. */
791
792static void
543506e0 793rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
e5edce84 794 bitmap processed)
801c5610 795{
796 unsigned i;
797 bitmap_iterator bi;
f1f41a6c 798 vec<gimple> conds;
799 conds.create (3);
801c5610 800
801 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
802 collect_condition_stmts (get_loop (i), &conds);
803
f1f41a6c 804 while (!conds.is_empty ())
801c5610 805 {
f1f41a6c 806 gimple cond = conds.pop ();
801c5610 807 int v = rdg_vertex_for_stmt (rdg, cond);
808 bitmap new_loops = BITMAP_ALLOC (NULL);
809
810 if (!already_processed_vertex_p (processed, v))
e5edce84 811 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed);
801c5610 812
813 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
6ef9bbe0 814 if (bitmap_set_bit (loops, i))
815 collect_condition_stmts (get_loop (i), &conds);
801c5610 816
817 BITMAP_FREE (new_loops);
818 }
a8af2e86 819
f1f41a6c 820 conds.release ();
801c5610 821}
822
801c5610 823/* Returns a bitmap in which all the statements needed for computing
824 the strongly connected component C of the RDG are flagged, also
825 including the loop exit conditions. */
826
543506e0 827static partition_t
e5edce84 828build_rdg_partition_for_component (struct graph *rdg, rdgc c)
801c5610 829{
830 int i, v;
543506e0 831 partition_t partition = partition_alloc (NULL);
801c5610 832 bitmap loops = BITMAP_ALLOC (NULL);
833 bitmap processed = BITMAP_ALLOC (NULL);
834
f1f41a6c 835 FOR_EACH_VEC_ELT (c->vertices, i, v)
801c5610 836 if (!already_processed_vertex_p (processed, v))
e5edce84 837 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed);
801c5610 838
e5edce84 839 rdg_flag_loop_exits (rdg, loops, partition, processed);
801c5610 840
841 BITMAP_FREE (processed);
842 BITMAP_FREE (loops);
843 return partition;
844}
845
846/* Free memory for COMPONENTS. */
847
848static void
f1f41a6c 849free_rdg_components (vec<rdgc> components)
801c5610 850{
851 int i;
852 rdgc x;
853
f1f41a6c 854 FOR_EACH_VEC_ELT (components, i, x)
801c5610 855 {
f1f41a6c 856 x->vertices.release ();
801c5610 857 free (x);
858 }
a8af2e86 859
f1f41a6c 860 components.release ();
801c5610 861}
862
863/* Build the COMPONENTS vector with the strongly connected components
864 of RDG in which the STARTING_VERTICES occur. */
865
866static void
f1f41a6c 867rdg_build_components (struct graph *rdg, vec<int> starting_vertices,
868 vec<rdgc> *components)
801c5610 869{
870 int i, v;
871 bitmap saved_components = BITMAP_ALLOC (NULL);
872 int n_components = graphds_scc (rdg, NULL);
f1f41a6c 873 /* ??? Macros cannot process template types with more than one
874 argument, so we need this typedef. */
875 typedef vec<int> vec_int_heap;
876 vec<int> *all_components = XNEWVEC (vec_int_heap, n_components);
801c5610 877
878 for (i = 0; i < n_components; i++)
f1f41a6c 879 all_components[i].create (3);
801c5610 880
881 for (i = 0; i < rdg->n_vertices; i++)
f1f41a6c 882 all_components[rdg->vertices[i].component].safe_push (i);
801c5610 883
f1f41a6c 884 FOR_EACH_VEC_ELT (starting_vertices, i, v)
801c5610 885 {
886 int c = rdg->vertices[v].component;
887
6ef9bbe0 888 if (bitmap_set_bit (saved_components, c))
801c5610 889 {
890 rdgc x = XCNEW (struct rdg_component);
891 x->num = c;
892 x->vertices = all_components[c];
893
f1f41a6c 894 components->safe_push (x);
801c5610 895 }
896 }
897
898 for (i = 0; i < n_components; i++)
899 if (!bitmap_bit_p (saved_components, i))
f1f41a6c 900 all_components[i].release ();
801c5610 901
902 free (all_components);
903 BITMAP_FREE (saved_components);
904}
905
d32bc1d7 906/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
907 For the moment we detect only the memset zero pattern. */
a136cad6 908
d32bc1d7 909static void
910classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
a136cad6 911{
a136cad6 912 bitmap_iterator bi;
d32bc1d7 913 unsigned i;
914 tree nb_iter;
f689d33d 915 data_reference_p single_load, single_store;
d32bc1d7 916
917 partition->kind = PKIND_NORMAL;
f689d33d 918 partition->main_dr = NULL;
919 partition->secondary_dr = NULL;
d32bc1d7 920
54459dd6 921 if (!flag_tree_loop_distribute_patterns)
922 return;
923
d32bc1d7 924 /* Perform general partition disqualification for builtins. */
925 nb_iter = number_of_exit_cond_executions (loop);
926 if (!nb_iter || nb_iter == chrec_dont_know)
927 return;
a136cad6 928
543506e0 929 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
d32bc1d7 930 {
931 gimple stmt = RDG_STMT (rdg, i);
932
933 if (gimple_has_volatile_ops (stmt))
934 return;
a136cad6 935
d32bc1d7 936 /* If the stmt has uses outside of the loop fail.
937 ??? If the stmt is generated in another partition that
938 is not created as builtin we can ignore this. */
35ec0372 939 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
d32bc1d7 940 {
941 if (dump_file && (dump_flags & TDF_DETAILS))
942 fprintf (dump_file, "not generating builtin, partition has "
943 "scalar uses outside of the loop\n");
944 return;
945 }
946 }
947
f689d33d 948 /* Detect memset and memcpy. */
949 single_load = NULL;
950 single_store = NULL;
d32bc1d7 951 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
952 {
953 gimple stmt = RDG_STMT (rdg, i);
f689d33d 954 data_reference_p dr;
955 unsigned j;
d32bc1d7 956
957 if (gimple_code (stmt) == GIMPLE_PHI)
958 continue;
959
960 /* Any scalar stmts are ok. */
961 if (!gimple_vuse (stmt))
962 continue;
963
f689d33d 964 /* Otherwise just regular loads/stores. */
965 if (!gimple_assign_single_p (stmt))
966 return;
967
968 /* But exactly one store and/or load. */
f1f41a6c 969 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
d32bc1d7 970 {
f689d33d 971 if (DR_IS_READ (dr))
972 {
973 if (single_load != NULL)
974 return;
975 single_load = dr;
976 }
977 else
978 {
979 if (single_store != NULL)
980 return;
981 single_store = dr;
982 }
d32bc1d7 983 }
d32bc1d7 984 }
985
f689d33d 986 if (single_store && !single_load)
987 {
988 gimple stmt = DR_STMT (single_store);
989 tree rhs = gimple_assign_rhs1 (stmt);
990 if (!(integer_zerop (rhs)
991 || integer_all_onesp (rhs)
992 || real_zerop (rhs)
993 || (TREE_CODE (rhs) == CONSTRUCTOR
994 && !TREE_CLOBBER_P (rhs))
995 || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
996 && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
997 == TYPE_MODE (unsigned_char_type_node)))))
998 return;
999 if (TREE_CODE (rhs) == SSA_NAME
1000 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1001 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1002 return;
1003 if (!adjacent_dr_p (single_store))
1004 return;
1005 partition->kind = PKIND_MEMSET;
1006 partition->main_dr = single_store;
1007 }
1008 else if (single_store && single_load)
1009 {
1010 gimple store = DR_STMT (single_store);
1011 gimple load = DR_STMT (single_load);
1012 /* Direct aggregate copy or via an SSA name temporary. */
1013 if (load != store
1014 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1015 return;
1016 if (!adjacent_dr_p (single_store)
1017 || !adjacent_dr_p (single_load)
1018 || !operand_equal_p (DR_STEP (single_store),
1019 DR_STEP (single_load), 0))
1020 return;
7b6f8db4 1021 /* Now check that if there is a dependence this dependence is
1022 of a suitable form for memmove. */
1e094109 1023 vec<loop_p> loops = vNULL;
7b6f8db4 1024 ddr_p ddr;
f1f41a6c 1025 loops.safe_push (loop);
7b6f8db4 1026 ddr = initialize_data_dependence_relation (single_load, single_store,
1027 loops);
1028 compute_affine_dependence (ddr, loop);
7b6f8db4 1029 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1030 {
1031 free_dependence_relation (ddr);
f1f41a6c 1032 loops.release ();
7b6f8db4 1033 return;
1034 }
1035 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1036 {
1037 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1038 {
1039 free_dependence_relation (ddr);
f1f41a6c 1040 loops.release ();
7b6f8db4 1041 return;
1042 }
1043 lambda_vector dist_v;
f1f41a6c 1044 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
7b6f8db4 1045 {
1046 int dist = dist_v[index_in_loop_nest (loop->num,
1047 DDR_LOOP_NEST (ddr))];
1048 if (dist > 0 && !DDR_REVERSED_P (ddr))
1049 {
1050 free_dependence_relation (ddr);
f1f41a6c 1051 loops.release ();
7b6f8db4 1052 return;
1053 }
1054 }
1055 }
2792bf09 1056 free_dependence_relation (ddr);
f1f41a6c 1057 loops.release ();
f689d33d 1058 partition->kind = PKIND_MEMCPY;
1059 partition->main_dr = single_store;
1060 partition->secondary_dr = single_load;
1061 }
a136cad6 1062}
1063
f83623cc 1064/* For a data reference REF, return the declaration of its base
1065 address or NULL_TREE if the base is not determined. */
1066
1067static tree
1068ref_base_address (data_reference_p dr)
1069{
1070 tree base_address = DR_BASE_ADDRESS (dr);
1071 if (base_address
1072 && TREE_CODE (base_address) == ADDR_EXPR)
1073 return TREE_OPERAND (base_address, 0);
1074
1075 return base_address;
1076}
1077
a136cad6 1078/* Returns true when PARTITION1 and PARTITION2 have similar memory
1079 accesses in RDG. */
1080
1081static bool
543506e0 1082similar_memory_accesses (struct graph *rdg, partition_t partition1,
1083 partition_t partition2)
a136cad6 1084{
f83623cc 1085 unsigned i, j, k, l;
a136cad6 1086 bitmap_iterator bi, bj;
f83623cc 1087 data_reference_p ref1, ref2;
1088
1089 /* First check whether in the intersection of the two partitions are
1090 any loads or stores. Common loads are the situation that happens
1091 most often. */
1092 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1093 if (RDG_MEM_WRITE_STMT (rdg, i)
1094 || RDG_MEM_READS_STMT (rdg, i))
1095 return true;
a136cad6 1096
f83623cc 1097 /* Then check all data-references against each other. */
543506e0 1098 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
a136cad6 1099 if (RDG_MEM_WRITE_STMT (rdg, i)
1100 || RDG_MEM_READS_STMT (rdg, i))
543506e0 1101 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
a136cad6 1102 if (RDG_MEM_WRITE_STMT (rdg, j)
1103 || RDG_MEM_READS_STMT (rdg, j))
f83623cc 1104 {
f1f41a6c 1105 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
f83623cc 1106 {
1107 tree base1 = ref_base_address (ref1);
1108 if (base1)
f1f41a6c 1109 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
f83623cc 1110 if (base1 == ref_base_address (ref2))
1111 return true;
1112 }
1113 }
a136cad6 1114
1115 return false;
1116}
1117
801c5610 1118/* Aggregate several components into a useful partition that is
1119 registered in the PARTITIONS vector. Partitions will be
1120 distributed in different loops. */
1121
1122static void
f1f41a6c 1123rdg_build_partitions (struct graph *rdg, vec<rdgc> components,
1124 vec<int> *other_stores,
1125 vec<partition_t> *partitions, bitmap processed)
801c5610 1126{
1127 int i;
1128 rdgc x;
543506e0 1129 partition_t partition = partition_alloc (NULL);
801c5610 1130
f1f41a6c 1131 FOR_EACH_VEC_ELT (components, i, x)
801c5610 1132 {
543506e0 1133 partition_t np;
f1f41a6c 1134 int v = x->vertices[0];
48e1416a 1135
801c5610 1136 if (bitmap_bit_p (processed, v))
1137 continue;
48e1416a 1138
e5edce84 1139 np = build_rdg_partition_for_component (rdg, x);
543506e0 1140 bitmap_ior_into (partition->stmts, np->stmts);
e5edce84 1141 partition->has_writes = partition_has_writes (np);
543506e0 1142 bitmap_ior_into (processed, np->stmts);
1143 partition_free (np);
801c5610 1144
e5edce84 1145 if (partition_has_writes (partition))
801c5610 1146 {
1147 if (dump_file && (dump_flags & TDF_DETAILS))
1148 {
1149 fprintf (dump_file, "ldist useful partition:\n");
543506e0 1150 dump_bitmap (dump_file, partition->stmts);
801c5610 1151 }
1152
f1f41a6c 1153 partitions->safe_push (partition);
543506e0 1154 partition = partition_alloc (NULL);
801c5610 1155 }
1156 }
1157
1158 /* Add the nodes from the RDG that were not marked as processed, and
1159 that are used outside the current loop. These are scalar
1160 computations that are not yet part of previous partitions. */
1161 for (i = 0; i < rdg->n_vertices; i++)
1162 if (!bitmap_bit_p (processed, i)
1163 && rdg_defs_used_in_other_loops_p (rdg, i))
f1f41a6c 1164 other_stores->safe_push (i);
801c5610 1165
1166 /* If there are still statements left in the OTHER_STORES array,
1167 create other components and partitions with these stores and
1168 their dependences. */
f1f41a6c 1169 if (other_stores->length () > 0)
801c5610 1170 {
f1f41a6c 1171 vec<rdgc> comps;
1172 comps.create (3);
1173 vec<int> foo;
1174 foo.create (3);
801c5610 1175
1176 rdg_build_components (rdg, *other_stores, &comps);
1177 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
1178
f1f41a6c 1179 foo.release ();
801c5610 1180 free_rdg_components (comps);
1181 }
1182
1183 /* If there is something left in the last partition, save it. */
543506e0 1184 if (bitmap_count_bits (partition->stmts) > 0)
f1f41a6c 1185 partitions->safe_push (partition);
801c5610 1186 else
543506e0 1187 partition_free (partition);
801c5610 1188}
1189
1190/* Dump to FILE the PARTITIONS. */
1191
1192static void
f1f41a6c 1193dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
801c5610 1194{
1195 int i;
543506e0 1196 partition_t partition;
801c5610 1197
f1f41a6c 1198 FOR_EACH_VEC_ELT (partitions, i, partition)
543506e0 1199 debug_bitmap_file (file, partition->stmts);
801c5610 1200}
1201
1202/* Debug PARTITIONS. */
f1f41a6c 1203extern void debug_rdg_partitions (vec<partition_t> );
801c5610 1204
4b987fac 1205DEBUG_FUNCTION void
f1f41a6c 1206debug_rdg_partitions (vec<partition_t> partitions)
801c5610 1207{
1208 dump_rdg_partitions (stderr, partitions);
1209}
1210
577982d8 1211/* Returns the number of read and write operations in the RDG. */
1212
1213static int
1214number_of_rw_in_rdg (struct graph *rdg)
1215{
1216 int i, res = 0;
1217
1218 for (i = 0; i < rdg->n_vertices; i++)
1219 {
1220 if (RDG_MEM_WRITE_STMT (rdg, i))
1221 ++res;
1222
1223 if (RDG_MEM_READS_STMT (rdg, i))
1224 ++res;
1225 }
1226
1227 return res;
1228}
1229
1230/* Returns the number of read and write operations in a PARTITION of
1231 the RDG. */
1232
1233static int
543506e0 1234number_of_rw_in_partition (struct graph *rdg, partition_t partition)
577982d8 1235{
1236 int res = 0;
1237 unsigned i;
1238 bitmap_iterator ii;
1239
543506e0 1240 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
577982d8 1241 {
1242 if (RDG_MEM_WRITE_STMT (rdg, i))
1243 ++res;
1244
1245 if (RDG_MEM_READS_STMT (rdg, i))
1246 ++res;
1247 }
1248
1249 return res;
1250}
1251
1252/* Returns true when one of the PARTITIONS contains all the read or
1253 write operations of RDG. */
1254
1255static bool
f1f41a6c 1256partition_contains_all_rw (struct graph *rdg,
1257 vec<partition_t> partitions)
577982d8 1258{
1259 int i;
543506e0 1260 partition_t partition;
577982d8 1261 int nrw = number_of_rw_in_rdg (rdg);
1262
f1f41a6c 1263 FOR_EACH_VEC_ELT (partitions, i, partition)
577982d8 1264 if (nrw == number_of_rw_in_partition (rdg, partition))
1265 return true;
1266
1267 return false;
1268}
1269
801c5610 1270/* Generate code from STARTING_VERTICES in RDG. Returns the number of
1271 distributed loops. */
1272
1273static int
1274ldist_gen (struct loop *loop, struct graph *rdg,
f1f41a6c 1275 vec<int> starting_vertices)
801c5610 1276{
1277 int i, nbp;
f1f41a6c 1278 vec<rdgc> components;
1279 components.create (3);
1280 vec<partition_t> partitions;
1281 partitions.create (3);
1282 vec<int> other_stores;
1283 other_stores.create (3);
543506e0 1284 partition_t partition;
1285 bitmap processed = BITMAP_ALLOC (NULL);
6198d968 1286 bool any_builtin;
801c5610 1287
1288 remaining_stmts = BITMAP_ALLOC (NULL);
1289 upstream_mem_writes = BITMAP_ALLOC (NULL);
1290
1291 for (i = 0; i < rdg->n_vertices; i++)
1292 {
1293 bitmap_set_bit (remaining_stmts, i);
1294
1295 /* Save in OTHER_STORES all the memory writes that are not in
1296 STARTING_VERTICES. */
1297 if (RDG_MEM_WRITE_STMT (rdg, i))
1298 {
1299 int v;
1300 unsigned j;
1301 bool found = false;
1302
f1f41a6c 1303 FOR_EACH_VEC_ELT (starting_vertices, j, v)
801c5610 1304 if (i == v)
1305 {
1306 found = true;
1307 break;
1308 }
1309
1310 if (!found)
f1f41a6c 1311 other_stores.safe_push (i);
801c5610 1312 }
1313 }
1314
1315 mark_nodes_having_upstream_mem_writes (rdg);
1316 rdg_build_components (rdg, starting_vertices, &components);
1317 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1318 processed);
1319 BITMAP_FREE (processed);
801c5610 1320
6198d968 1321 any_builtin = false;
f1f41a6c 1322 FOR_EACH_VEC_ELT (partitions, i, partition)
6198d968 1323 {
1324 classify_partition (loop, rdg, partition);
1325 any_builtin |= partition_builtin_p (partition);
1326 }
d32bc1d7 1327
54459dd6 1328 /* If we are only distributing patterns fuse all partitions that
1329 were not properly classified as builtins. Else fuse partitions
1330 with similar memory accesses. */
1331 if (!flag_tree_loop_distribution)
1332 {
1333 partition_t into;
6198d968 1334 /* If we did not detect any builtin simply bail out. */
1335 if (!any_builtin)
1336 {
1337 nbp = 0;
1338 goto ldist_done;
1339 }
ed74aa44 1340 /* Only fuse adjacent non-builtin partitions, see PR53616.
1341 ??? Use dependence information to improve partition ordering. */
1342 i = 0;
1343 do
1344 {
f1f41a6c 1345 for (; partitions.iterate (i, &into); ++i)
ed74aa44 1346 if (!partition_builtin_p (into))
1347 break;
f1f41a6c 1348 for (++i; partitions.iterate (i, &partition); ++i)
ed74aa44 1349 if (!partition_builtin_p (partition))
1350 {
1351 bitmap_ior_into (into->stmts, partition->stmts);
f1f41a6c 1352 partitions.ordered_remove (i);
ed74aa44 1353 i--;
1354 }
1355 else
1356 break;
1357 }
f1f41a6c 1358 while ((unsigned) i < partitions.length ());
54459dd6 1359 }
1360 else
1361 {
1362 partition_t into;
1363 int j;
f1f41a6c 1364 for (i = 0; partitions.iterate (i, &into); ++i)
54459dd6 1365 {
1366 if (partition_builtin_p (into))
1367 continue;
1368 for (j = i + 1;
f1f41a6c 1369 partitions.iterate (j, &partition); ++j)
54459dd6 1370 {
1371 if (!partition_builtin_p (partition)
1372 /* ??? The following is horribly inefficient,
1373 we are re-computing and analyzing data-references
1374 of the stmts in the partitions all the time. */
1375 && similar_memory_accesses (rdg, into, partition))
1376 {
1377 if (dump_file && (dump_flags & TDF_DETAILS))
1378 {
1379 fprintf (dump_file, "fusing partitions\n");
1380 dump_bitmap (dump_file, into->stmts);
1381 dump_bitmap (dump_file, partition->stmts);
1382 fprintf (dump_file, "because they have similar "
1383 "memory accesses\n");
1384 }
1385 bitmap_ior_into (into->stmts, partition->stmts);
f1f41a6c 1386 partitions.ordered_remove (j);
54459dd6 1387 j--;
1388 }
1389 }
1390 }
1391 }
d32bc1d7 1392
f1f41a6c 1393 nbp = partitions.length ();
58ccfbea 1394 if (nbp == 0
f1f41a6c 1395 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1396 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
54459dd6 1397 {
1398 nbp = 0;
1399 goto ldist_done;
1400 }
801c5610 1401
1402 if (dump_file && (dump_flags & TDF_DETAILS))
1403 dump_rdg_partitions (dump_file, partitions);
1404
f1f41a6c 1405 FOR_EACH_VEC_ELT (partitions, i, partition)
f689d33d 1406 generate_code_for_partition (loop, partition, i < nbp - 1);
801c5610 1407
801c5610 1408 ldist_done:
1409
1410 BITMAP_FREE (remaining_stmts);
1411 BITMAP_FREE (upstream_mem_writes);
1412
f1f41a6c 1413 FOR_EACH_VEC_ELT (partitions, i, partition)
543506e0 1414 partition_free (partition);
801c5610 1415
f1f41a6c 1416 other_stores.release ();
1417 partitions.release ();
801c5610 1418 free_rdg_components (components);
1419 return nbp;
1420}
1421
1422/* Distributes the code from LOOP in such a way that producer
1423 statements are placed before consumer statements. When STMTS is
1424 NULL, performs the maximal distribution, if STMTS is not NULL,
1425 tries to separate only these statements from the LOOP's body.
1426 Returns the number of distributed loops. */
1427
1428static int
f1f41a6c 1429distribute_loop (struct loop *loop, vec<gimple> stmts)
801c5610 1430{
eeb74f9f 1431 int res = 0;
801c5610 1432 struct graph *rdg;
75a70cf9 1433 gimple s;
801c5610 1434 unsigned i;
f1f41a6c 1435 vec<int> vertices;
1436 vec<ddr_p> dependence_relations;
1437 vec<data_reference_p> datarefs;
1438 vec<loop_p> loop_nest;
1439
1440 datarefs.create (10);
1441 dependence_relations.create (100);
1442 loop_nest.create (3);
a8af2e86 1443 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
801c5610 1444
1445 if (!rdg)
1446 {
1447 if (dump_file && (dump_flags & TDF_DETAILS))
1448 fprintf (dump_file,
1449 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1450 loop->num);
1451
a8af2e86 1452 free_dependence_relations (dependence_relations);
1453 free_data_refs (datarefs);
f1f41a6c 1454 loop_nest.release ();
801c5610 1455 return res;
1456 }
1457
f1f41a6c 1458 vertices.create (3);
801c5610 1459
1460 if (dump_file && (dump_flags & TDF_DETAILS))
1461 dump_rdg (dump_file, rdg);
1462
f1f41a6c 1463 FOR_EACH_VEC_ELT (stmts, i, s)
801c5610 1464 {
1465 int v = rdg_vertex_for_stmt (rdg, s);
1466
1467 if (v >= 0)
1468 {
f1f41a6c 1469 vertices.safe_push (v);
801c5610 1470
1471 if (dump_file && (dump_flags & TDF_DETAILS))
1472 fprintf (dump_file,
1473 "ldist asked to generate code for vertex %d\n", v);
1474 }
1475 }
1476
1477 res = ldist_gen (loop, rdg, vertices);
f1f41a6c 1478 vertices.release ();
801c5610 1479 free_rdg (rdg);
a8af2e86 1480 free_dependence_relations (dependence_relations);
1481 free_data_refs (datarefs);
f1f41a6c 1482 loop_nest.release ();
801c5610 1483 return res;
1484}
1485
1486/* Distribute all loops in the current function. */
1487
1488static unsigned int
1489tree_loop_distribution (void)
1490{
1491 struct loop *loop;
1492 loop_iterator li;
54459dd6 1493 bool changed = false;
f83623cc 1494 basic_block bb;
1495
1496 FOR_ALL_BB (bb)
1497 {
1498 gimple_stmt_iterator gsi;
1499 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1500 gimple_set_uid (gsi_stmt (gsi), -1);
1501 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1502 gimple_set_uid (gsi_stmt (gsi), -1);
1503 }
801c5610 1504
54459dd6 1505 /* We can at the moment only distribute non-nested loops, thus restrict
1506 walking to innermost loops. */
1507 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
801c5610 1508 {
1e094109 1509 vec<gimple> work_list = vNULL;
6198d968 1510 basic_block *bbs;
95f41c04 1511 int num = loop->num;
54459dd6 1512 int nb_generated_loops = 0;
6198d968 1513 unsigned int i;
180f0a7e 1514
1515 /* If the loop doesn't have a single exit we will fail anyway,
1516 so do that early. */
1517 if (!single_exit (loop))
1518 continue;
801c5610 1519
5f38d9ef 1520 /* Only optimize hot loops. */
1521 if (!optimize_loop_for_speed_p (loop))
1522 continue;
1523
54459dd6 1524 /* Only distribute loops with a header and latch for now. */
1525 if (loop->num_nodes > 2)
1526 continue;
1527
6198d968 1528 /* Initialize the worklist with stmts we seed the partitions with. */
1529 bbs = get_loop_body_in_dom_order (loop);
1530 for (i = 0; i < loop->num_nodes; ++i)
1531 {
1532 gimple_stmt_iterator gsi;
1533 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1534 {
1535 gimple stmt = gsi_stmt (gsi);
1536 /* Only distribute stores for now.
1537 ??? We should also try to distribute scalar reductions,
1538 thus SSA defs that have scalar uses outside of the loop. */
1539 if (!gimple_assign_single_p (stmt)
1540 || is_gimple_reg (gimple_assign_lhs (stmt)))
1541 continue;
1542
f1f41a6c 1543 work_list.safe_push (stmt);
6198d968 1544 }
1545 }
1546 free (bbs);
54459dd6 1547
f1f41a6c 1548 if (work_list.length () > 0)
54459dd6 1549 nb_generated_loops = distribute_loop (loop, work_list);
1550
1551 if (nb_generated_loops > 0)
1552 changed = true;
801c5610 1553
1554 if (dump_file && (dump_flags & TDF_DETAILS))
1555 {
1556 if (nb_generated_loops > 1)
1557 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
95f41c04 1558 num, nb_generated_loops);
801c5610 1559 else
95f41c04 1560 fprintf (dump_file, "Loop %d is the same.\n", num);
801c5610 1561 }
1562
f1f41a6c 1563 work_list.release ();
801c5610 1564 }
1565
54459dd6 1566 if (changed)
1567 {
278611f2 1568 mark_virtual_operands_for_renaming (cfun);
54459dd6 1569 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1570 }
1571
1572#ifdef ENABLE_CHECKING
1573 verify_loop_structure ();
1574#endif
1575
dd277d48 1576 return 0;
801c5610 1577}
1578
1579static bool
1580gate_tree_loop_distribution (void)
1581{
0acf3477 1582 return flag_tree_loop_distribution
1583 || flag_tree_loop_distribute_patterns;
801c5610 1584}
1585
20099e35 1586struct gimple_opt_pass pass_loop_distribution =
801c5610 1587{
20099e35 1588 {
1589 GIMPLE_PASS,
801c5610 1590 "ldist", /* name */
c7875731 1591 OPTGROUP_LOOP, /* optinfo_flags */
801c5610 1592 gate_tree_loop_distribution, /* gate */
1593 tree_loop_distribution, /* execute */
1594 NULL, /* sub */
1595 NULL, /* next */
1596 0, /* static_pass_number */
1597 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1598 PROP_cfg | PROP_ssa, /* properties_required */
1599 0, /* properties_provided */
1600 0, /* properties_destroyed */
1601 0, /* todo_flags_start */
b5698e04 1602 TODO_ggc_collect
1603 | TODO_verify_ssa /* todo_flags_finish */
20099e35 1604 }
801c5610 1605};