]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
function.h (loops_for_fn): New inline function.
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
dea61d92 1/* Loop distribution.
d1e082c2 2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
dea61d92
SP
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6This file is part of GCC.
b8698a0f 7
dea61d92
SP
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
b8698a0f 12
dea61d92
SP
13GCC is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
b8698a0f 17
dea61d92
SP
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
b8698a0f 29 is transformed to
dea61d92
SP
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44#include "config.h"
45#include "system.h"
46#include "coretypes.h"
dea61d92 47#include "tree-flow.h"
dea61d92 48#include "cfgloop.h"
dea61d92
SP
49#include "tree-chrec.h"
50#include "tree-data-ref.h"
51#include "tree-scalar-evolution.h"
52#include "tree-pass.h"
dea61d92 53
b9fc0497
RB
54enum partition_kind {
55 PKIND_NORMAL, PKIND_REDUCTION, PKIND_MEMSET, PKIND_MEMCPY
56};
30d55936 57
c61f8985
RG
58typedef struct partition_s
59{
60 bitmap stmts;
7ad672e4 61 bool has_writes;
30d55936 62 enum partition_kind kind;
d0582dc1
RG
63 /* data-references a kind != PKIND_NORMAL partition is about. */
64 data_reference_p main_dr;
65 data_reference_p secondary_dr;
c61f8985
RG
66} *partition_t;
67
c61f8985
RG
68
69/* Allocate and initialize a partition from BITMAP. */
70
71static partition_t
72partition_alloc (bitmap stmts)
73{
74 partition_t partition = XCNEW (struct partition_s);
75 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
7ad672e4 76 partition->has_writes = false;
30d55936 77 partition->kind = PKIND_NORMAL;
c61f8985
RG
78 return partition;
79}
80
81/* Free PARTITION. */
82
83static void
84partition_free (partition_t partition)
85{
86 BITMAP_FREE (partition->stmts);
87 free (partition);
88}
89
30d55936
RG
90/* Returns true if the partition can be generated as a builtin. */
91
92static bool
93partition_builtin_p (partition_t partition)
94{
b9fc0497 95 return partition->kind > PKIND_REDUCTION;
30d55936 96}
c61f8985 97
7ad672e4
RG
98/* Returns true if the partition has an writes. */
99
100static bool
101partition_has_writes (partition_t partition)
102{
103 return partition->has_writes;
104}
105
dea61d92
SP
106/* If bit I is not set, it means that this node represents an
107 operation that has already been performed, and that should not be
108 performed again. This is the subgraph of remaining important
109 computations that is passed to the DFS algorithm for avoiding to
110 include several times the same stores in different loops. */
111static bitmap remaining_stmts;
112
113/* A node of the RDG is marked in this bitmap when it has as a
114 predecessor a node that writes to memory. */
115static bitmap upstream_mem_writes;
116
c07a8cb3
RG
117/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
118 the LOOP. */
119
120static bool
121ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
122{
123 imm_use_iterator imm_iter;
124 use_operand_p use_p;
125
126 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
e665269a
RG
127 {
128 gimple use_stmt = USE_STMT (use_p);
129 if (!is_gimple_debug (use_stmt)
130 && loop != loop_containing_stmt (use_stmt))
131 return true;
132 }
c07a8cb3
RG
133
134 return false;
135}
136
137/* Returns true when STMT defines a scalar variable used after the
88af7c1a 138 loop LOOP. */
c07a8cb3
RG
139
140static bool
88af7c1a 141stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
c07a8cb3 142{
88af7c1a
RG
143 def_operand_p def_p;
144 ssa_op_iter op_iter;
c07a8cb3 145
9ca86fc3
RG
146 if (gimple_code (stmt) == GIMPLE_PHI)
147 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
148
88af7c1a
RG
149 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
150 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
151 return true;
c07a8cb3 152
88af7c1a 153 return false;
c07a8cb3
RG
154}
155
dea61d92
SP
156/* Return a copy of LOOP placed before LOOP. */
157
158static struct loop *
159copy_loop_before (struct loop *loop)
160{
161 struct loop *res;
162 edge preheader = loop_preheader_edge (loop);
163
dea61d92
SP
164 initialize_original_copy_tables ();
165 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
30d55936 166 gcc_assert (res != NULL);
dea61d92 167 free_original_copy_tables ();
2cfc56b9 168 delete_update_ssa ();
dea61d92
SP
169
170 return res;
171}
172
173/* Creates an empty basic block after LOOP. */
174
175static void
176create_bb_after_loop (struct loop *loop)
177{
178 edge exit = single_exit (loop);
179
180 if (!exit)
181 return;
182
183 split_edge (exit);
184}
185
186/* Generate code for PARTITION from the code in LOOP. The loop is
187 copied when COPY_P is true. All the statements not flagged in the
188 PARTITION bitmap are removed from the loop or from its copy. The
189 statements are indexed in sequence inside a basic block, and the
30d55936 190 basic blocks of a loop are taken in dom order. */
dea61d92 191
30d55936 192static void
c61f8985
RG
193generate_loops_for_partition (struct loop *loop, partition_t partition,
194 bool copy_p)
dea61d92
SP
195{
196 unsigned i, x;
726a989a 197 gimple_stmt_iterator bsi;
dea61d92
SP
198 basic_block *bbs;
199
200 if (copy_p)
201 {
202 loop = copy_loop_before (loop);
30d55936 203 gcc_assert (loop != NULL);
dea61d92
SP
204 create_preheader (loop, CP_SIMPLE_PREHEADERS);
205 create_bb_after_loop (loop);
206 }
207
dea61d92
SP
208 /* Remove stmts not in the PARTITION bitmap. The order in which we
209 visit the phi nodes and the statements is exactly as in
210 stmts_from_loop. */
211 bbs = get_loop_body_in_dom_order (loop);
212
b03c3082
JJ
213 if (MAY_HAVE_DEBUG_STMTS)
214 for (x = 0, i = 0; i < loop->num_nodes; i++)
215 {
216 basic_block bb = bbs[i];
217
218 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
c61f8985 219 if (!bitmap_bit_p (partition->stmts, x++))
b03c3082
JJ
220 reset_debug_uses (gsi_stmt (bsi));
221
222 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
223 {
224 gimple stmt = gsi_stmt (bsi);
225 if (gimple_code (stmt) != GIMPLE_LABEL
226 && !is_gimple_debug (stmt)
c61f8985 227 && !bitmap_bit_p (partition->stmts, x++))
b03c3082
JJ
228 reset_debug_uses (stmt);
229 }
230 }
231
dea61d92
SP
232 for (x = 0, i = 0; i < loop->num_nodes; i++)
233 {
234 basic_block bb = bbs[i];
dea61d92 235
726a989a 236 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
c61f8985 237 if (!bitmap_bit_p (partition->stmts, x++))
2706a615
RG
238 {
239 gimple phi = gsi_stmt (bsi);
ea057359 240 if (virtual_operand_p (gimple_phi_result (phi)))
2706a615
RG
241 mark_virtual_phi_result_for_renaming (phi);
242 remove_phi_node (&bsi, true);
243 }
dea61d92 244 else
726a989a 245 gsi_next (&bsi);
dea61d92 246
726a989a 247 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
2706a615
RG
248 {
249 gimple stmt = gsi_stmt (bsi);
b03c3082
JJ
250 if (gimple_code (stmt) != GIMPLE_LABEL
251 && !is_gimple_debug (stmt)
c61f8985 252 && !bitmap_bit_p (partition->stmts, x++))
2706a615
RG
253 {
254 unlink_stmt_vdef (stmt);
255 gsi_remove (&bsi, true);
256 release_defs (stmt);
257 }
258 else
259 gsi_next (&bsi);
260 }
dea61d92
SP
261 }
262
263 free (bbs);
dea61d92
SP
264}
265
d0582dc1 266/* Build the size argument for a memory operation call. */
3661e899 267
d0582dc1
RG
268static tree
269build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
3661e899 270{
d0582dc1
RG
271 tree size;
272 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
273 fold_convert_loc (loc, sizetype, nb_iter),
274 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
275 return fold_convert_loc (loc, size_type_node, size);
276}
277
278/* Build an address argument for a memory operation call. */
279
280static tree
281build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
282{
283 tree addr_base;
284
285 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
286 addr_base = fold_convert_loc (loc, sizetype, addr_base);
287
288 /* Test for a negative stride, iterating over every element. */
289 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
290 {
291 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
292 fold_convert_loc (loc, sizetype, nb_bytes));
293 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
294 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
295 }
296
297 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
3661e899
TB
298}
299
401f3a81
JJ
300/* If VAL memory representation contains the same value in all bytes,
301 return that value, otherwise return -1.
302 E.g. for 0x24242424 return 0x24, for IEEE double
303 747708026454360457216.0 return 0x44, etc. */
304
305static int
306const_with_all_bytes_same (tree val)
307{
308 unsigned char buf[64];
309 int i, len;
310
311 if (integer_zerop (val)
312 || real_zerop (val)
313 || (TREE_CODE (val) == CONSTRUCTOR
314 && !TREE_CLOBBER_P (val)
315 && CONSTRUCTOR_NELTS (val) == 0))
316 return 0;
317
318 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
319 return -1;
320
321 len = native_encode_expr (val, buf, sizeof (buf));
322 if (len == 0)
323 return -1;
324 for (i = 1; i < len; i++)
325 if (buf[i] != buf[0])
326 return -1;
327 return buf[0];
328}
329
30d55936 330/* Generate a call to memset for PARTITION in LOOP. */
dea61d92 331
cfee318d 332static void
d0582dc1 333generate_memset_builtin (struct loop *loop, partition_t partition)
dea61d92 334{
30d55936
RG
335 gimple_stmt_iterator gsi;
336 gimple stmt, fn_call;
d0582dc1 337 tree nb_iter, mem, fn, nb_bytes;
30d55936 338 location_t loc;
b6dd5261 339 tree val;
30d55936 340
d0582dc1 341 stmt = DR_STMT (partition->main_dr);
30d55936 342 loc = gimple_location (stmt);
30d55936
RG
343 if (gimple_bb (stmt) == loop->latch)
344 nb_iter = number_of_latch_executions (loop);
345 else
346 nb_iter = number_of_exit_cond_executions (loop);
347
348 /* The new statements will be placed before LOOP. */
349 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dea61d92 350
d0582dc1
RG
351 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
352 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
353 false, GSI_CONTINUE_LINKING);
354 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
355 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
356 false, GSI_CONTINUE_LINKING);
dea61d92 357
b6dd5261
RG
358 /* This exactly matches the pattern recognition in classify_partition. */
359 val = gimple_assign_rhs1 (stmt);
401f3a81
JJ
360 /* Handle constants like 0x15151515 and similarly
361 floating point constants etc. where all bytes are the same. */
362 int bytev = const_with_all_bytes_same (val);
363 if (bytev != -1)
364 val = build_int_cst (integer_type_node, bytev);
365 else if (TREE_CODE (val) == INTEGER_CST)
366 val = fold_convert (integer_type_node, val);
367 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
b6dd5261 368 {
401f3a81
JJ
369 gimple cstmt;
370 tree tem = make_ssa_name (integer_type_node, NULL);
371 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
372 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
373 val = tem;
b6dd5261
RG
374 }
375
e79983f4 376 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
b6dd5261 377 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
d0582dc1 378 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
dea61d92
SP
379
380 if (dump_file && (dump_flags & TDF_DETAILS))
b6dd5261
RG
381 {
382 fprintf (dump_file, "generated memset");
401f3a81 383 if (bytev == 0)
b6dd5261 384 fprintf (dump_file, " zero\n");
b6dd5261
RG
385 else
386 fprintf (dump_file, "\n");
387 }
dea61d92
SP
388}
389
d0582dc1
RG
390/* Generate a call to memcpy for PARTITION in LOOP. */
391
392static void
393generate_memcpy_builtin (struct loop *loop, partition_t partition)
394{
395 gimple_stmt_iterator gsi;
396 gimple stmt, fn_call;
397 tree nb_iter, dest, src, fn, nb_bytes;
398 location_t loc;
399 enum built_in_function kind;
400
401 stmt = DR_STMT (partition->main_dr);
402 loc = gimple_location (stmt);
403 if (gimple_bb (stmt) == loop->latch)
404 nb_iter = number_of_latch_executions (loop);
405 else
406 nb_iter = number_of_exit_cond_executions (loop);
407
408 /* The new statements will be placed before LOOP. */
409 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
410
411 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
412 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
413 false, GSI_CONTINUE_LINKING);
414 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
415 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
416 if (ptr_derefs_may_alias_p (dest, src))
417 kind = BUILT_IN_MEMMOVE;
418 else
419 kind = BUILT_IN_MEMCPY;
420
421 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
422 false, GSI_CONTINUE_LINKING);
423 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
424 false, GSI_CONTINUE_LINKING);
425 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
426 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
427 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
428
429 if (dump_file && (dump_flags & TDF_DETAILS))
430 {
431 if (kind == BUILT_IN_MEMCPY)
432 fprintf (dump_file, "generated memcpy\n");
433 else
434 fprintf (dump_file, "generated memmove\n");
435 }
436}
437
30d55936 438/* Remove and destroy the loop LOOP. */
dea61d92 439
30d55936
RG
440static void
441destroy_loop (struct loop *loop)
dea61d92 442{
30d55936
RG
443 unsigned nbbs = loop->num_nodes;
444 edge exit = single_exit (loop);
445 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
dea61d92 446 basic_block *bbs;
30d55936 447 unsigned i;
dea61d92
SP
448
449 bbs = get_loop_body_in_dom_order (loop);
450
30d55936
RG
451 redirect_edge_pred (exit, src);
452 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
453 exit->flags |= EDGE_FALLTHRU;
454 cancel_loop_tree (loop);
455 rescan_loop_exit (exit, false, true);
dea61d92 456
30d55936 457 for (i = 0; i < nbbs; i++)
c014f6f5
RG
458 {
459 /* We have made sure to not leave any dangling uses of SSA
460 names defined in the loop. With the exception of virtuals.
461 Make sure we replace all uses of virtual defs that will remain
462 outside of the loop with the bare symbol as delete_basic_block
463 will release them. */
464 gimple_stmt_iterator gsi;
465 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
466 {
467 gimple phi = gsi_stmt (gsi);
ea057359 468 if (virtual_operand_p (gimple_phi_result (phi)))
c014f6f5
RG
469 mark_virtual_phi_result_for_renaming (phi);
470 }
471 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
472 {
473 gimple stmt = gsi_stmt (gsi);
474 tree vdef = gimple_vdef (stmt);
475 if (vdef && TREE_CODE (vdef) == SSA_NAME)
476 mark_virtual_operand_for_renaming (vdef);
477 }
478 delete_basic_block (bbs[i]);
479 }
dea61d92 480 free (bbs);
30d55936
RG
481
482 set_immediate_dominator (CDI_DOMINATORS, dest,
483 recompute_dominator (CDI_DOMINATORS, dest));
dea61d92
SP
484}
485
30d55936 486/* Generates code for PARTITION. */
dea61d92 487
30d55936 488static void
d0582dc1 489generate_code_for_partition (struct loop *loop,
be6b029b 490 partition_t partition, bool copy_p)
dea61d92 491{
30d55936
RG
492 switch (partition->kind)
493 {
494 case PKIND_MEMSET:
d0582dc1
RG
495 generate_memset_builtin (loop, partition);
496 /* If this is the last partition for which we generate code, we have
497 to destroy the loop. */
498 if (!copy_p)
499 destroy_loop (loop);
500 break;
501
502 case PKIND_MEMCPY:
503 generate_memcpy_builtin (loop, partition);
30d55936
RG
504 /* If this is the last partition for which we generate code, we have
505 to destroy the loop. */
506 if (!copy_p)
507 destroy_loop (loop);
508 break;
509
b9fc0497
RB
510 case PKIND_REDUCTION:
511 /* Reductions all have to be in the last partition. */
512 gcc_assert (!copy_p);
30d55936
RG
513 case PKIND_NORMAL:
514 generate_loops_for_partition (loop, partition, copy_p);
515 break;
516
517 default:
518 gcc_unreachable ();
519 }
dea61d92
SP
520}
521
522
523/* Returns true if the node V of RDG cannot be recomputed. */
524
525static bool
526rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
527{
528 if (RDG_MEM_WRITE_STMT (rdg, v))
529 return true;
530
531 return false;
532}
533
534/* Returns true when the vertex V has already been generated in the
535 current partition (V is in PROCESSED), or when V belongs to another
536 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
537
538static inline bool
539already_processed_vertex_p (bitmap processed, int v)
540{
541 return (bitmap_bit_p (processed, v)
542 || !bitmap_bit_p (remaining_stmts, v));
543}
544
545/* Returns NULL when there is no anti-dependence among the successors
546 of vertex V, otherwise returns the edge with the anti-dep. */
547
548static struct graph_edge *
549has_anti_dependence (struct vertex *v)
550{
551 struct graph_edge *e;
552
553 if (v->succ)
554 for (e = v->succ; e; e = e->succ_next)
555 if (RDGE_TYPE (e) == anti_dd)
556 return e;
557
558 return NULL;
559}
560
561/* Returns true when V has an anti-dependence edge among its successors. */
562
563static bool
564predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
565{
566 struct graph_edge *e;
567
568 if (v->pred)
569 for (e = v->pred; e; e = e->pred_next)
570 if (bitmap_bit_p (upstream_mem_writes, e->src)
571 /* Don't consider flow channels: a write to memory followed
572 by a read from memory. These channels allow the split of
573 the RDG in different partitions. */
574 && !RDG_MEM_WRITE_STMT (rdg, e->src))
575 return true;
576
577 return false;
578}
579
580/* Initializes the upstream_mem_writes bitmap following the
581 information from RDG. */
582
583static void
584mark_nodes_having_upstream_mem_writes (struct graph *rdg)
585{
586 int v, x;
587 bitmap seen = BITMAP_ALLOC (NULL);
588
589 for (v = rdg->n_vertices - 1; v >= 0; v--)
590 if (!bitmap_bit_p (seen, v))
591 {
592 unsigned i;
9771b263
DN
593 vec<int> nodes;
594 nodes.create (3);
dea61d92
SP
595
596 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
597
9771b263 598 FOR_EACH_VEC_ELT (nodes, i, x)
dea61d92 599 {
fcaa4ca4 600 if (!bitmap_set_bit (seen, x))
dea61d92
SP
601 continue;
602
dea61d92
SP
603 if (RDG_MEM_WRITE_STMT (rdg, x)
604 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
605 /* In anti dependences the read should occur before
606 the write, this is why both the read and the write
607 should be placed in the same partition. */
608 || has_anti_dependence (&(rdg->vertices[x])))
609 {
dea61d92
SP
610 bitmap_set_bit (upstream_mem_writes, x);
611 }
612 }
613
9771b263 614 nodes.release ();
dea61d92
SP
615 }
616}
617
618/* Returns true when vertex u has a memory write node as a predecessor
619 in RDG. */
620
621static bool
622has_upstream_mem_writes (int u)
623{
624 return bitmap_bit_p (upstream_mem_writes, u);
625}
626
c61f8985 627static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
7ad672e4 628 bitmap, bitmap);
dea61d92 629
dea61d92
SP
630/* Flag the uses of U stopping following the information from
631 upstream_mem_writes. */
632
633static void
c61f8985 634rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
7ad672e4 635 bitmap processed)
dea61d92 636{
dea61d92
SP
637 use_operand_p use_p;
638 struct vertex *x = &(rdg->vertices[u]);
726a989a 639 gimple stmt = RDGV_STMT (x);
dea61d92
SP
640 struct graph_edge *anti_dep = has_anti_dependence (x);
641
642 /* Keep in the same partition the destination of an antidependence,
643 because this is a store to the exact same location. Putting this
644 in another partition is bad for cache locality. */
645 if (anti_dep)
646 {
647 int v = anti_dep->dest;
648
649 if (!already_processed_vertex_p (processed, v))
650 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
7ad672e4 651 processed);
dea61d92
SP
652 }
653
726a989a 654 if (gimple_code (stmt) != GIMPLE_PHI)
dea61d92 655 {
5006671f 656 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
dea61d92
SP
657 {
658 tree use = USE_FROM_PTR (use_p);
659
b9fc0497
RB
660 if (TREE_CODE (use) == SSA_NAME
661 && !SSA_NAME_IS_DEFAULT_DEF (use))
dea61d92 662 {
726a989a 663 gimple def_stmt = SSA_NAME_DEF_STMT (use);
dea61d92
SP
664 int v = rdg_vertex_for_stmt (rdg, def_stmt);
665
666 if (v >= 0
667 && !already_processed_vertex_p (processed, v))
668 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
7ad672e4 669 processed);
dea61d92
SP
670 }
671 }
672 }
673
726a989a 674 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
dea61d92 675 {
726a989a 676 tree op0 = gimple_assign_lhs (stmt);
dea61d92
SP
677
678 /* Scalar channels don't have enough space for transmitting data
679 between tasks, unless we add more storage by privatizing. */
680 if (is_gimple_reg (op0))
681 {
682 use_operand_p use_p;
683 imm_use_iterator iter;
684
685 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
686 {
687 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
688
689 if (!already_processed_vertex_p (processed, v))
690 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
7ad672e4 691 processed);
dea61d92
SP
692 }
693 }
694 }
695}
696
697/* Flag V from RDG as part of PARTITION, and also flag its loop number
698 in LOOPS. */
699
700static void
7ad672e4 701rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops)
dea61d92
SP
702{
703 struct loop *loop;
704
c61f8985 705 if (!bitmap_set_bit (partition->stmts, v))
dea61d92
SP
706 return;
707
708 loop = loop_containing_stmt (RDG_STMT (rdg, v));
709 bitmap_set_bit (loops, loop->num);
dea61d92
SP
710
711 if (rdg_cannot_recompute_vertex_p (rdg, v))
712 {
7ad672e4 713 partition->has_writes = true;
dea61d92
SP
714 bitmap_clear_bit (remaining_stmts, v);
715 }
716}
717
718/* Flag in the bitmap PARTITION the vertex V and all its predecessors.
fa10beec 719 Also flag their loop number in LOOPS. */
dea61d92
SP
720
721static void
c61f8985 722rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
7ad672e4 723 bitmap loops, bitmap processed)
dea61d92
SP
724{
725 unsigned i;
9771b263
DN
726 vec<int> nodes;
727 nodes.create (3);
dea61d92
SP
728 int x;
729
730 bitmap_set_bit (processed, v);
7ad672e4 731 rdg_flag_uses (rdg, v, partition, loops, processed);
dea61d92 732 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
7ad672e4 733 rdg_flag_vertex (rdg, v, partition, loops);
dea61d92 734
9771b263 735 FOR_EACH_VEC_ELT (nodes, i, x)
dea61d92 736 if (!already_processed_vertex_p (processed, x))
7ad672e4 737 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed);
dea61d92 738
9771b263 739 nodes.release ();
dea61d92
SP
740}
741
742/* Initialize CONDS with all the condition statements from the basic
743 blocks of LOOP. */
744
745static void
9771b263 746collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
dea61d92
SP
747{
748 unsigned i;
749 edge e;
9771b263 750 vec<edge> exits = get_loop_exit_edges (loop);
dea61d92 751
9771b263 752 FOR_EACH_VEC_ELT (exits, i, e)
dea61d92 753 {
726a989a 754 gimple cond = last_stmt (e->src);
dea61d92
SP
755
756 if (cond)
9771b263 757 conds->safe_push (cond);
dea61d92
SP
758 }
759
9771b263 760 exits.release ();
dea61d92
SP
761}
762
763/* Add to PARTITION all the exit condition statements for LOOPS
764 together with all their dependent statements determined from
765 RDG. */
766
767static void
c61f8985 768rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
7ad672e4 769 bitmap processed)
dea61d92
SP
770{
771 unsigned i;
772 bitmap_iterator bi;
9771b263
DN
773 vec<gimple> conds;
774 conds.create (3);
dea61d92
SP
775
776 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
0fc822d0 777 collect_condition_stmts (get_loop (cfun, i), &conds);
dea61d92 778
9771b263 779 while (!conds.is_empty ())
dea61d92 780 {
9771b263 781 gimple cond = conds.pop ();
dea61d92
SP
782 int v = rdg_vertex_for_stmt (rdg, cond);
783 bitmap new_loops = BITMAP_ALLOC (NULL);
784
785 if (!already_processed_vertex_p (processed, v))
7ad672e4 786 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed);
dea61d92
SP
787
788 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
fcaa4ca4 789 if (bitmap_set_bit (loops, i))
0fc822d0 790 collect_condition_stmts (get_loop (cfun, i), &conds);
dea61d92
SP
791
792 BITMAP_FREE (new_loops);
793 }
01be8516 794
9771b263 795 conds.release ();
dea61d92
SP
796}
797
dea61d92
SP
798/* Returns a bitmap in which all the statements needed for computing
799 the strongly connected component C of the RDG are flagged, also
800 including the loop exit conditions. */
801
c61f8985 802static partition_t
7ad672e4 803build_rdg_partition_for_component (struct graph *rdg, rdgc c)
dea61d92
SP
804{
805 int i, v;
c61f8985 806 partition_t partition = partition_alloc (NULL);
dea61d92
SP
807 bitmap loops = BITMAP_ALLOC (NULL);
808 bitmap processed = BITMAP_ALLOC (NULL);
809
9771b263 810 FOR_EACH_VEC_ELT (c->vertices, i, v)
dea61d92 811 if (!already_processed_vertex_p (processed, v))
7ad672e4 812 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed);
dea61d92 813
7ad672e4 814 rdg_flag_loop_exits (rdg, loops, partition, processed);
dea61d92
SP
815
816 BITMAP_FREE (processed);
817 BITMAP_FREE (loops);
818 return partition;
819}
820
821/* Free memory for COMPONENTS. */
822
823static void
9771b263 824free_rdg_components (vec<rdgc> components)
dea61d92
SP
825{
826 int i;
827 rdgc x;
828
9771b263 829 FOR_EACH_VEC_ELT (components, i, x)
dea61d92 830 {
9771b263 831 x->vertices.release ();
dea61d92
SP
832 free (x);
833 }
01be8516 834
9771b263 835 components.release ();
dea61d92
SP
836}
837
838/* Build the COMPONENTS vector with the strongly connected components
839 of RDG in which the STARTING_VERTICES occur. */
840
841static void
9771b263
DN
842rdg_build_components (struct graph *rdg, vec<int> starting_vertices,
843 vec<rdgc> *components)
dea61d92
SP
844{
845 int i, v;
846 bitmap saved_components = BITMAP_ALLOC (NULL);
847 int n_components = graphds_scc (rdg, NULL);
9771b263
DN
848 /* ??? Macros cannot process template types with more than one
849 argument, so we need this typedef. */
850 typedef vec<int> vec_int_heap;
851 vec<int> *all_components = XNEWVEC (vec_int_heap, n_components);
dea61d92
SP
852
853 for (i = 0; i < n_components; i++)
9771b263 854 all_components[i].create (3);
dea61d92
SP
855
856 for (i = 0; i < rdg->n_vertices; i++)
9771b263 857 all_components[rdg->vertices[i].component].safe_push (i);
dea61d92 858
9771b263 859 FOR_EACH_VEC_ELT (starting_vertices, i, v)
dea61d92
SP
860 {
861 int c = rdg->vertices[v].component;
862
fcaa4ca4 863 if (bitmap_set_bit (saved_components, c))
dea61d92
SP
864 {
865 rdgc x = XCNEW (struct rdg_component);
866 x->num = c;
867 x->vertices = all_components[c];
868
9771b263 869 components->safe_push (x);
dea61d92
SP
870 }
871 }
872
873 for (i = 0; i < n_components; i++)
874 if (!bitmap_bit_p (saved_components, i))
9771b263 875 all_components[i].release ();
dea61d92
SP
876
877 free (all_components);
878 BITMAP_FREE (saved_components);
879}
880
30d55936
RG
881/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
882 For the moment we detect only the memset zero pattern. */
cfee318d 883
30d55936
RG
884static void
885classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
cfee318d 886{
cfee318d 887 bitmap_iterator bi;
30d55936
RG
888 unsigned i;
889 tree nb_iter;
d0582dc1 890 data_reference_p single_load, single_store;
b9fc0497 891 bool volatiles_p = false;
30d55936
RG
892
893 partition->kind = PKIND_NORMAL;
d0582dc1
RG
894 partition->main_dr = NULL;
895 partition->secondary_dr = NULL;
30d55936 896
c61f8985 897 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
30d55936
RG
898 {
899 gimple stmt = RDG_STMT (rdg, i);
900
901 if (gimple_has_volatile_ops (stmt))
b9fc0497 902 volatiles_p = true;
cfee318d 903
30d55936
RG
904 /* If the stmt has uses outside of the loop fail.
905 ??? If the stmt is generated in another partition that
906 is not created as builtin we can ignore this. */
9ca86fc3 907 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
30d55936
RG
908 {
909 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "not generating builtin, partition has "
911 "scalar uses outside of the loop\n");
b9fc0497 912 partition->kind = PKIND_REDUCTION;
30d55936
RG
913 return;
914 }
915 }
916
b9fc0497
RB
917 /* Perform general partition disqualification for builtins. */
918 if (volatiles_p
919 || !flag_tree_loop_distribute_patterns)
920 return;
921
922 nb_iter = number_of_exit_cond_executions (loop);
923 if (!nb_iter || nb_iter == chrec_dont_know)
924 return;
925
d0582dc1
RG
926 /* Detect memset and memcpy. */
927 single_load = NULL;
928 single_store = NULL;
30d55936
RG
929 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
930 {
931 gimple stmt = RDG_STMT (rdg, i);
d0582dc1
RG
932 data_reference_p dr;
933 unsigned j;
30d55936
RG
934
935 if (gimple_code (stmt) == GIMPLE_PHI)
936 continue;
937
938 /* Any scalar stmts are ok. */
939 if (!gimple_vuse (stmt))
940 continue;
941
d0582dc1
RG
942 /* Otherwise just regular loads/stores. */
943 if (!gimple_assign_single_p (stmt))
944 return;
945
946 /* But exactly one store and/or load. */
9771b263 947 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
30d55936 948 {
d0582dc1
RG
949 if (DR_IS_READ (dr))
950 {
951 if (single_load != NULL)
952 return;
953 single_load = dr;
954 }
955 else
956 {
957 if (single_store != NULL)
958 return;
959 single_store = dr;
960 }
30d55936 961 }
30d55936
RG
962 }
963
d0582dc1
RG
964 if (single_store && !single_load)
965 {
966 gimple stmt = DR_STMT (single_store);
967 tree rhs = gimple_assign_rhs1 (stmt);
401f3a81
JJ
968 if (const_with_all_bytes_same (rhs) == -1
969 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
970 || (TYPE_MODE (TREE_TYPE (rhs))
971 != TYPE_MODE (unsigned_char_type_node))))
d0582dc1
RG
972 return;
973 if (TREE_CODE (rhs) == SSA_NAME
974 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
975 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
976 return;
977 if (!adjacent_dr_p (single_store))
978 return;
979 partition->kind = PKIND_MEMSET;
980 partition->main_dr = single_store;
981 }
982 else if (single_store && single_load)
983 {
984 gimple store = DR_STMT (single_store);
985 gimple load = DR_STMT (single_load);
986 /* Direct aggregate copy or via an SSA name temporary. */
987 if (load != store
988 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
989 return;
990 if (!adjacent_dr_p (single_store)
991 || !adjacent_dr_p (single_load)
992 || !operand_equal_p (DR_STEP (single_store),
993 DR_STEP (single_load), 0))
994 return;
f20132e7
RG
995 /* Now check that if there is a dependence this dependence is
996 of a suitable form for memmove. */
6e1aa848 997 vec<loop_p> loops = vNULL;
f20132e7 998 ddr_p ddr;
9771b263 999 loops.safe_push (loop);
f20132e7
RG
1000 ddr = initialize_data_dependence_relation (single_load, single_store,
1001 loops);
1002 compute_affine_dependence (ddr, loop);
f20132e7
RG
1003 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1004 {
1005 free_dependence_relation (ddr);
9771b263 1006 loops.release ();
f20132e7
RG
1007 return;
1008 }
1009 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1010 {
1011 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1012 {
1013 free_dependence_relation (ddr);
9771b263 1014 loops.release ();
f20132e7
RG
1015 return;
1016 }
1017 lambda_vector dist_v;
9771b263 1018 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
f20132e7
RG
1019 {
1020 int dist = dist_v[index_in_loop_nest (loop->num,
1021 DDR_LOOP_NEST (ddr))];
1022 if (dist > 0 && !DDR_REVERSED_P (ddr))
1023 {
1024 free_dependence_relation (ddr);
9771b263 1025 loops.release ();
f20132e7
RG
1026 return;
1027 }
1028 }
1029 }
b7ce70b3 1030 free_dependence_relation (ddr);
9771b263 1031 loops.release ();
d0582dc1
RG
1032 partition->kind = PKIND_MEMCPY;
1033 partition->main_dr = single_store;
1034 partition->secondary_dr = single_load;
1035 }
cfee318d
SP
1036}
1037
1fa0c180
RG
1038/* For a data reference REF, return the declaration of its base
1039 address or NULL_TREE if the base is not determined. */
1040
1041static tree
1042ref_base_address (data_reference_p dr)
1043{
1044 tree base_address = DR_BASE_ADDRESS (dr);
1045 if (base_address
1046 && TREE_CODE (base_address) == ADDR_EXPR)
1047 return TREE_OPERAND (base_address, 0);
1048
1049 return base_address;
1050}
1051
cfee318d
SP
1052/* Returns true when PARTITION1 and PARTITION2 have similar memory
1053 accesses in RDG. */
1054
1055static bool
c61f8985
RG
1056similar_memory_accesses (struct graph *rdg, partition_t partition1,
1057 partition_t partition2)
cfee318d 1058{
1fa0c180 1059 unsigned i, j, k, l;
cfee318d 1060 bitmap_iterator bi, bj;
1fa0c180
RG
1061 data_reference_p ref1, ref2;
1062
1063 /* First check whether in the intersection of the two partitions are
1064 any loads or stores. Common loads are the situation that happens
1065 most often. */
1066 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1067 if (RDG_MEM_WRITE_STMT (rdg, i)
1068 || RDG_MEM_READS_STMT (rdg, i))
1069 return true;
cfee318d 1070
1fa0c180 1071 /* Then check all data-references against each other. */
c61f8985 1072 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
cfee318d
SP
1073 if (RDG_MEM_WRITE_STMT (rdg, i)
1074 || RDG_MEM_READS_STMT (rdg, i))
c61f8985 1075 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
cfee318d
SP
1076 if (RDG_MEM_WRITE_STMT (rdg, j)
1077 || RDG_MEM_READS_STMT (rdg, j))
1fa0c180 1078 {
9771b263 1079 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1fa0c180
RG
1080 {
1081 tree base1 = ref_base_address (ref1);
1082 if (base1)
9771b263 1083 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1fa0c180
RG
1084 if (base1 == ref_base_address (ref2))
1085 return true;
1086 }
1087 }
cfee318d
SP
1088
1089 return false;
1090}
1091
dea61d92
SP
1092/* Aggregate several components into a useful partition that is
1093 registered in the PARTITIONS vector. Partitions will be
1094 distributed in different loops. */
1095
1096static void
9771b263
DN
1097rdg_build_partitions (struct graph *rdg, vec<rdgc> components,
1098 vec<int> *other_stores,
1099 vec<partition_t> *partitions, bitmap processed)
dea61d92
SP
1100{
1101 int i;
1102 rdgc x;
c61f8985 1103 partition_t partition = partition_alloc (NULL);
dea61d92 1104
9771b263 1105 FOR_EACH_VEC_ELT (components, i, x)
dea61d92 1106 {
c61f8985 1107 partition_t np;
9771b263 1108 int v = x->vertices[0];
b8698a0f 1109
dea61d92
SP
1110 if (bitmap_bit_p (processed, v))
1111 continue;
b8698a0f 1112
7ad672e4 1113 np = build_rdg_partition_for_component (rdg, x);
c61f8985 1114 bitmap_ior_into (partition->stmts, np->stmts);
7ad672e4 1115 partition->has_writes = partition_has_writes (np);
c61f8985
RG
1116 bitmap_ior_into (processed, np->stmts);
1117 partition_free (np);
dea61d92 1118
7ad672e4 1119 if (partition_has_writes (partition))
dea61d92
SP
1120 {
1121 if (dump_file && (dump_flags & TDF_DETAILS))
1122 {
1123 fprintf (dump_file, "ldist useful partition:\n");
c61f8985 1124 dump_bitmap (dump_file, partition->stmts);
dea61d92
SP
1125 }
1126
9771b263 1127 partitions->safe_push (partition);
c61f8985 1128 partition = partition_alloc (NULL);
dea61d92
SP
1129 }
1130 }
1131
1132 /* Add the nodes from the RDG that were not marked as processed, and
1133 that are used outside the current loop. These are scalar
1134 computations that are not yet part of previous partitions. */
1135 for (i = 0; i < rdg->n_vertices; i++)
1136 if (!bitmap_bit_p (processed, i)
1137 && rdg_defs_used_in_other_loops_p (rdg, i))
9771b263 1138 other_stores->safe_push (i);
dea61d92
SP
1139
1140 /* If there are still statements left in the OTHER_STORES array,
1141 create other components and partitions with these stores and
1142 their dependences. */
9771b263 1143 if (other_stores->length () > 0)
dea61d92 1144 {
9771b263
DN
1145 vec<rdgc> comps;
1146 comps.create (3);
1147 vec<int> foo;
1148 foo.create (3);
dea61d92
SP
1149
1150 rdg_build_components (rdg, *other_stores, &comps);
1151 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
1152
9771b263 1153 foo.release ();
dea61d92
SP
1154 free_rdg_components (comps);
1155 }
1156
1157 /* If there is something left in the last partition, save it. */
c61f8985 1158 if (bitmap_count_bits (partition->stmts) > 0)
9771b263 1159 partitions->safe_push (partition);
dea61d92 1160 else
c61f8985 1161 partition_free (partition);
dea61d92
SP
1162}
1163
1164/* Dump to FILE the PARTITIONS. */
1165
1166static void
9771b263 1167dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
dea61d92
SP
1168{
1169 int i;
c61f8985 1170 partition_t partition;
dea61d92 1171
9771b263 1172 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1173 debug_bitmap_file (file, partition->stmts);
dea61d92
SP
1174}
1175
1176/* Debug PARTITIONS. */
9771b263 1177extern void debug_rdg_partitions (vec<partition_t> );
dea61d92 1178
24e47c76 1179DEBUG_FUNCTION void
9771b263 1180debug_rdg_partitions (vec<partition_t> partitions)
dea61d92
SP
1181{
1182 dump_rdg_partitions (stderr, partitions);
1183}
1184
2b8aee8e
SP
1185/* Returns the number of read and write operations in the RDG. */
1186
1187static int
1188number_of_rw_in_rdg (struct graph *rdg)
1189{
1190 int i, res = 0;
1191
1192 for (i = 0; i < rdg->n_vertices; i++)
1193 {
1194 if (RDG_MEM_WRITE_STMT (rdg, i))
1195 ++res;
1196
1197 if (RDG_MEM_READS_STMT (rdg, i))
1198 ++res;
1199 }
1200
1201 return res;
1202}
1203
1204/* Returns the number of read and write operations in a PARTITION of
1205 the RDG. */
1206
1207static int
c61f8985 1208number_of_rw_in_partition (struct graph *rdg, partition_t partition)
2b8aee8e
SP
1209{
1210 int res = 0;
1211 unsigned i;
1212 bitmap_iterator ii;
1213
c61f8985 1214 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2b8aee8e
SP
1215 {
1216 if (RDG_MEM_WRITE_STMT (rdg, i))
1217 ++res;
1218
1219 if (RDG_MEM_READS_STMT (rdg, i))
1220 ++res;
1221 }
1222
1223 return res;
1224}
1225
1226/* Returns true when one of the PARTITIONS contains all the read or
1227 write operations of RDG. */
1228
1229static bool
9771b263
DN
1230partition_contains_all_rw (struct graph *rdg,
1231 vec<partition_t> partitions)
2b8aee8e
SP
1232{
1233 int i;
c61f8985 1234 partition_t partition;
2b8aee8e
SP
1235 int nrw = number_of_rw_in_rdg (rdg);
1236
9771b263 1237 FOR_EACH_VEC_ELT (partitions, i, partition)
2b8aee8e
SP
1238 if (nrw == number_of_rw_in_partition (rdg, partition))
1239 return true;
1240
1241 return false;
1242}
1243
dea61d92
SP
1244/* Generate code from STARTING_VERTICES in RDG. Returns the number of
1245 distributed loops. */
1246
1247static int
1248ldist_gen (struct loop *loop, struct graph *rdg,
9771b263 1249 vec<int> starting_vertices)
dea61d92
SP
1250{
1251 int i, nbp;
9771b263
DN
1252 vec<rdgc> components;
1253 components.create (3);
1254 vec<partition_t> partitions;
1255 partitions.create (3);
1256 vec<int> other_stores;
1257 other_stores.create (3);
c61f8985
RG
1258 partition_t partition;
1259 bitmap processed = BITMAP_ALLOC (NULL);
be6b029b 1260 bool any_builtin;
dea61d92
SP
1261
1262 remaining_stmts = BITMAP_ALLOC (NULL);
1263 upstream_mem_writes = BITMAP_ALLOC (NULL);
1264
1265 for (i = 0; i < rdg->n_vertices; i++)
1266 {
1267 bitmap_set_bit (remaining_stmts, i);
1268
1269 /* Save in OTHER_STORES all the memory writes that are not in
1270 STARTING_VERTICES. */
1271 if (RDG_MEM_WRITE_STMT (rdg, i))
1272 {
1273 int v;
1274 unsigned j;
1275 bool found = false;
1276
9771b263 1277 FOR_EACH_VEC_ELT (starting_vertices, j, v)
dea61d92
SP
1278 if (i == v)
1279 {
1280 found = true;
1281 break;
1282 }
1283
1284 if (!found)
9771b263 1285 other_stores.safe_push (i);
dea61d92
SP
1286 }
1287 }
1288
1289 mark_nodes_having_upstream_mem_writes (rdg);
1290 rdg_build_components (rdg, starting_vertices, &components);
1291 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1292 processed);
1293 BITMAP_FREE (processed);
dea61d92 1294
be6b029b 1295 any_builtin = false;
9771b263 1296 FOR_EACH_VEC_ELT (partitions, i, partition)
be6b029b
RG
1297 {
1298 classify_partition (loop, rdg, partition);
1299 any_builtin |= partition_builtin_p (partition);
1300 }
30d55936 1301
c014f6f5
RG
1302 /* If we are only distributing patterns fuse all partitions that
1303 were not properly classified as builtins. Else fuse partitions
1304 with similar memory accesses. */
1305 if (!flag_tree_loop_distribution)
1306 {
1307 partition_t into;
be6b029b
RG
1308 /* If we did not detect any builtin simply bail out. */
1309 if (!any_builtin)
1310 {
1311 nbp = 0;
1312 goto ldist_done;
1313 }
38ad2d07
RG
1314 /* Only fuse adjacent non-builtin partitions, see PR53616.
1315 ??? Use dependence information to improve partition ordering. */
1316 i = 0;
1317 do
1318 {
9771b263 1319 for (; partitions.iterate (i, &into); ++i)
38ad2d07
RG
1320 if (!partition_builtin_p (into))
1321 break;
9771b263 1322 for (++i; partitions.iterate (i, &partition); ++i)
38ad2d07
RG
1323 if (!partition_builtin_p (partition))
1324 {
1325 bitmap_ior_into (into->stmts, partition->stmts);
b9fc0497
RB
1326 if (partition->kind == PKIND_REDUCTION)
1327 into->kind = PKIND_REDUCTION;
9771b263 1328 partitions.ordered_remove (i);
5eb010bc 1329 partition_free (partition);
38ad2d07
RG
1330 i--;
1331 }
1332 else
1333 break;
1334 }
9771b263 1335 while ((unsigned) i < partitions.length ());
c014f6f5
RG
1336 }
1337 else
1338 {
1339 partition_t into;
1340 int j;
9771b263 1341 for (i = 0; partitions.iterate (i, &into); ++i)
c014f6f5
RG
1342 {
1343 if (partition_builtin_p (into))
1344 continue;
1345 for (j = i + 1;
9771b263 1346 partitions.iterate (j, &partition); ++j)
c014f6f5
RG
1347 {
1348 if (!partition_builtin_p (partition)
1349 /* ??? The following is horribly inefficient,
1350 we are re-computing and analyzing data-references
1351 of the stmts in the partitions all the time. */
1352 && similar_memory_accesses (rdg, into, partition))
1353 {
1354 if (dump_file && (dump_flags & TDF_DETAILS))
1355 {
1356 fprintf (dump_file, "fusing partitions\n");
1357 dump_bitmap (dump_file, into->stmts);
1358 dump_bitmap (dump_file, partition->stmts);
1359 fprintf (dump_file, "because they have similar "
1360 "memory accesses\n");
1361 }
1362 bitmap_ior_into (into->stmts, partition->stmts);
b9fc0497
RB
1363 if (partition->kind == PKIND_REDUCTION)
1364 into->kind = PKIND_REDUCTION;
9771b263 1365 partitions.ordered_remove (j);
5eb010bc 1366 partition_free (partition);
c014f6f5
RG
1367 j--;
1368 }
1369 }
1370 }
1371 }
30d55936 1372
b9fc0497
RB
1373 /* Fuse all reduction partitions into the last. */
1374 if (partitions.length () > 1)
1375 {
1376 partition_t into = partitions.last ();
1377 for (i = partitions.length () - 2; i >= 0; --i)
1378 {
1379 partition_t what = partitions[i];
1380 if (what->kind == PKIND_REDUCTION)
1381 {
1382 if (dump_file && (dump_flags & TDF_DETAILS))
1383 {
1384 fprintf (dump_file, "fusing partitions\n");
1385 dump_bitmap (dump_file, into->stmts);
1386 dump_bitmap (dump_file, what->stmts);
1387 fprintf (dump_file, "because the latter has reductions\n");
1388 }
1389 bitmap_ior_into (into->stmts, what->stmts);
1390 into->kind = PKIND_REDUCTION;
1391 partitions.ordered_remove (i);
5eb010bc 1392 partition_free (what);
b9fc0497
RB
1393 }
1394 }
1395 }
1396
9771b263 1397 nbp = partitions.length ();
a4293fa6 1398 if (nbp == 0
9771b263
DN
1399 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1400 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
c014f6f5
RG
1401 {
1402 nbp = 0;
1403 goto ldist_done;
1404 }
dea61d92
SP
1405
1406 if (dump_file && (dump_flags & TDF_DETAILS))
1407 dump_rdg_partitions (dump_file, partitions);
1408
9771b263 1409 FOR_EACH_VEC_ELT (partitions, i, partition)
d0582dc1 1410 generate_code_for_partition (loop, partition, i < nbp - 1);
dea61d92 1411
dea61d92
SP
1412 ldist_done:
1413
1414 BITMAP_FREE (remaining_stmts);
1415 BITMAP_FREE (upstream_mem_writes);
1416
9771b263 1417 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1418 partition_free (partition);
dea61d92 1419
9771b263
DN
1420 other_stores.release ();
1421 partitions.release ();
dea61d92
SP
1422 free_rdg_components (components);
1423 return nbp;
1424}
1425
1426/* Distributes the code from LOOP in such a way that producer
1427 statements are placed before consumer statements. When STMTS is
1428 NULL, performs the maximal distribution, if STMTS is not NULL,
1429 tries to separate only these statements from the LOOP's body.
1430 Returns the number of distributed loops. */
1431
1432static int
9771b263 1433distribute_loop (struct loop *loop, vec<gimple> stmts)
dea61d92 1434{
e96d7dd7 1435 int res = 0;
dea61d92 1436 struct graph *rdg;
726a989a 1437 gimple s;
dea61d92 1438 unsigned i;
9771b263
DN
1439 vec<int> vertices;
1440 vec<ddr_p> dependence_relations;
1441 vec<data_reference_p> datarefs;
1442 vec<loop_p> loop_nest;
1443
1444 datarefs.create (10);
1445 dependence_relations.create (100);
1446 loop_nest.create (3);
01be8516 1447 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
dea61d92
SP
1448
1449 if (!rdg)
1450 {
1451 if (dump_file && (dump_flags & TDF_DETAILS))
1452 fprintf (dump_file,
1453 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1454 loop->num);
1455
01be8516
SP
1456 free_dependence_relations (dependence_relations);
1457 free_data_refs (datarefs);
9771b263 1458 loop_nest.release ();
dea61d92
SP
1459 return res;
1460 }
1461
9771b263 1462 vertices.create (3);
dea61d92
SP
1463
1464 if (dump_file && (dump_flags & TDF_DETAILS))
1465 dump_rdg (dump_file, rdg);
1466
9771b263 1467 FOR_EACH_VEC_ELT (stmts, i, s)
dea61d92
SP
1468 {
1469 int v = rdg_vertex_for_stmt (rdg, s);
1470
1471 if (v >= 0)
1472 {
9771b263 1473 vertices.safe_push (v);
dea61d92
SP
1474
1475 if (dump_file && (dump_flags & TDF_DETAILS))
1476 fprintf (dump_file,
1477 "ldist asked to generate code for vertex %d\n", v);
1478 }
1479 }
1480
1481 res = ldist_gen (loop, rdg, vertices);
9771b263 1482 vertices.release ();
dea61d92 1483 free_rdg (rdg);
01be8516
SP
1484 free_dependence_relations (dependence_relations);
1485 free_data_refs (datarefs);
9771b263 1486 loop_nest.release ();
dea61d92
SP
1487 return res;
1488}
1489
1490/* Distribute all loops in the current function. */
1491
1492static unsigned int
1493tree_loop_distribution (void)
1494{
1495 struct loop *loop;
1496 loop_iterator li;
c014f6f5 1497 bool changed = false;
1fa0c180
RG
1498 basic_block bb;
1499
1500 FOR_ALL_BB (bb)
1501 {
1502 gimple_stmt_iterator gsi;
1503 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1504 gimple_set_uid (gsi_stmt (gsi), -1);
1505 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1506 gimple_set_uid (gsi_stmt (gsi), -1);
1507 }
dea61d92 1508
c014f6f5
RG
1509 /* We can at the moment only distribute non-nested loops, thus restrict
1510 walking to innermost loops. */
1511 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
dea61d92 1512 {
6e1aa848 1513 vec<gimple> work_list = vNULL;
be6b029b 1514 basic_block *bbs;
0e20c89f 1515 int num = loop->num;
c014f6f5 1516 int nb_generated_loops = 0;
be6b029b 1517 unsigned int i;
a3357f7d
RG
1518
1519 /* If the loop doesn't have a single exit we will fail anyway,
1520 so do that early. */
1521 if (!single_exit (loop))
1522 continue;
dea61d92 1523
f56f2d33
JH
1524 /* Only optimize hot loops. */
1525 if (!optimize_loop_for_speed_p (loop))
1526 continue;
1527
c014f6f5
RG
1528 /* Only distribute loops with a header and latch for now. */
1529 if (loop->num_nodes > 2)
1530 continue;
1531
be6b029b
RG
1532 /* Initialize the worklist with stmts we seed the partitions with. */
1533 bbs = get_loop_body_in_dom_order (loop);
1534 for (i = 0; i < loop->num_nodes; ++i)
1535 {
1536 gimple_stmt_iterator gsi;
1537 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1538 {
1539 gimple stmt = gsi_stmt (gsi);
b9fc0497
RB
1540 /* Distribute stmts which have defs that are used outside of
1541 the loop. */
1542 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1543 ;
1544 /* Otherwise only distribute stores for now. */
1545 else if (!gimple_assign_single_p (stmt)
1546 || is_gimple_reg (gimple_assign_lhs (stmt)))
be6b029b
RG
1547 continue;
1548
9771b263 1549 work_list.safe_push (stmt);
be6b029b
RG
1550 }
1551 }
1552 free (bbs);
c014f6f5 1553
9771b263 1554 if (work_list.length () > 0)
c014f6f5
RG
1555 nb_generated_loops = distribute_loop (loop, work_list);
1556
1557 if (nb_generated_loops > 0)
1558 changed = true;
dea61d92
SP
1559
1560 if (dump_file && (dump_flags & TDF_DETAILS))
1561 {
1562 if (nb_generated_loops > 1)
1563 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
0e20c89f 1564 num, nb_generated_loops);
dea61d92 1565 else
0e20c89f 1566 fprintf (dump_file, "Loop %d is the same.\n", num);
dea61d92
SP
1567 }
1568
9771b263 1569 work_list.release ();
dea61d92
SP
1570 }
1571
c014f6f5
RG
1572 if (changed)
1573 {
525174a2 1574 mark_virtual_operands_for_renaming (cfun);
c014f6f5
RG
1575 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1576 }
1577
1578#ifdef ENABLE_CHECKING
1579 verify_loop_structure ();
1580#endif
1581
5006671f 1582 return 0;
dea61d92
SP
1583}
1584
1585static bool
1586gate_tree_loop_distribution (void)
1587{
20769d5e
SP
1588 return flag_tree_loop_distribution
1589 || flag_tree_loop_distribute_patterns;
dea61d92
SP
1590}
1591
8ddbbcae 1592struct gimple_opt_pass pass_loop_distribution =
dea61d92 1593{
8ddbbcae
JH
1594 {
1595 GIMPLE_PASS,
dea61d92 1596 "ldist", /* name */
2b4e6bf1 1597 OPTGROUP_LOOP, /* optinfo_flags */
dea61d92
SP
1598 gate_tree_loop_distribution, /* gate */
1599 tree_loop_distribution, /* execute */
1600 NULL, /* sub */
1601 NULL, /* next */
1602 0, /* static_pass_number */
1603 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1604 PROP_cfg | PROP_ssa, /* properties_required */
1605 0, /* properties_provided */
1606 0, /* properties_destroyed */
1607 0, /* todo_flags_start */
bb313b93 1608 TODO_verify_ssa /* todo_flags_finish */
8ddbbcae 1609 }
dea61d92 1610};