]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
* Makefile.in (GIMPLE_H): Do not depend on TARGET_H.
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
dea61d92 1/* Loop distribution.
b03c3082 2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
c75c517d 3 Free Software Foundation, Inc.
dea61d92
SP
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
6
7This file is part of GCC.
b8698a0f 8
dea61d92
SP
9GCC is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 3, or (at your option) any
12later version.
b8698a0f 13
dea61d92
SP
14GCC is distributed in the hope that it will be useful, but WITHOUT
15ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
b8698a0f 18
dea61d92
SP
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23/* This pass performs loop distribution: for example, the loop
24
25 |DO I = 2, N
26 | A(I) = B(I) + C
27 | D(I) = A(I-1)*E
28 |ENDDO
29
b8698a0f 30 is transformed to
dea61d92
SP
31
32 |DOALL I = 2, N
33 | A(I) = B(I) + C
34 |ENDDO
35 |
36 |DOALL I = 2, N
37 | D(I) = A(I-1)*E
38 |ENDDO
39
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
44
45#include "config.h"
46#include "system.h"
47#include "coretypes.h"
dea61d92 48#include "tree-flow.h"
dea61d92 49#include "cfgloop.h"
dea61d92
SP
50#include "tree-chrec.h"
51#include "tree-data-ref.h"
52#include "tree-scalar-evolution.h"
53#include "tree-pass.h"
dea61d92 54
30d55936
RG
55enum partition_kind { PKIND_NORMAL, PKIND_MEMSET };
56
c61f8985
RG
57typedef struct partition_s
58{
59 bitmap stmts;
30d55936
RG
60 enum partition_kind kind;
61 /* Main statement a kind != PKIND_NORMAL partition is about. */
62 gimple main_stmt;
c61f8985
RG
63} *partition_t;
64
65DEF_VEC_P (partition_t);
66DEF_VEC_ALLOC_P (partition_t, heap);
67
68/* Allocate and initialize a partition from BITMAP. */
69
70static partition_t
71partition_alloc (bitmap stmts)
72{
73 partition_t partition = XCNEW (struct partition_s);
74 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
30d55936 75 partition->kind = PKIND_NORMAL;
c61f8985
RG
76 return partition;
77}
78
79/* Free PARTITION. */
80
81static void
82partition_free (partition_t partition)
83{
84 BITMAP_FREE (partition->stmts);
85 free (partition);
86}
87
30d55936
RG
88/* Returns true if the partition can be generated as a builtin. */
89
90static bool
91partition_builtin_p (partition_t partition)
92{
93 return partition->kind != PKIND_NORMAL;
94}
c61f8985 95
dea61d92
SP
96/* If bit I is not set, it means that this node represents an
97 operation that has already been performed, and that should not be
98 performed again. This is the subgraph of remaining important
99 computations that is passed to the DFS algorithm for avoiding to
100 include several times the same stores in different loops. */
101static bitmap remaining_stmts;
102
103/* A node of the RDG is marked in this bitmap when it has as a
104 predecessor a node that writes to memory. */
105static bitmap upstream_mem_writes;
106
c07a8cb3
RG
107/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
108 the LOOP. */
109
110static bool
111ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
112{
113 imm_use_iterator imm_iter;
114 use_operand_p use_p;
115
116 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
117 if (loop != loop_containing_stmt (USE_STMT (use_p)))
118 return true;
119
120 return false;
121}
122
123/* Returns true when STMT defines a scalar variable used after the
88af7c1a 124 loop LOOP. */
c07a8cb3
RG
125
126static bool
88af7c1a 127stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
c07a8cb3 128{
88af7c1a
RG
129 def_operand_p def_p;
130 ssa_op_iter op_iter;
c07a8cb3 131
9ca86fc3
RG
132 if (gimple_code (stmt) == GIMPLE_PHI)
133 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
134
88af7c1a
RG
135 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
136 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
137 return true;
c07a8cb3 138
88af7c1a 139 return false;
c07a8cb3
RG
140}
141
dea61d92
SP
142/* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
143 ORIG_LOOP. */
144
145static void
146update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
147{
148 tree new_ssa_name;
726a989a 149 gimple_stmt_iterator si_new, si_orig;
dea61d92
SP
150 edge orig_loop_latch = loop_latch_edge (orig_loop);
151 edge orig_entry_e = loop_preheader_edge (orig_loop);
152 edge new_loop_entry_e = loop_preheader_edge (new_loop);
153
154 /* Scan the phis in the headers of the old and new loops
155 (they are organized in exactly the same order). */
726a989a
RB
156 for (si_new = gsi_start_phis (new_loop->header),
157 si_orig = gsi_start_phis (orig_loop->header);
158 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
159 gsi_next (&si_new), gsi_next (&si_orig))
dea61d92 160 {
726a989a 161 tree def;
f5045c96 162 source_location locus;
726a989a
RB
163 gimple phi_new = gsi_stmt (si_new);
164 gimple phi_orig = gsi_stmt (si_orig);
165
dea61d92
SP
166 /* Add the first phi argument for the phi in NEW_LOOP (the one
167 associated with the entry of NEW_LOOP) */
726a989a 168 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
f5045c96
AM
169 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
170 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
dea61d92
SP
171
172 /* Add the second phi argument for the phi in NEW_LOOP (the one
173 associated with the latch of NEW_LOOP) */
174 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
f5045c96 175 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
dea61d92
SP
176
177 if (TREE_CODE (def) == SSA_NAME)
178 {
179 new_ssa_name = get_current_def (def);
180
181 if (!new_ssa_name)
182 /* This only happens if there are no definitions inside the
61226dc8
SP
183 loop. Use the the invariant in the new loop as is. */
184 new_ssa_name = def;
dea61d92
SP
185 }
186 else
187 /* Could be an integer. */
188 new_ssa_name = def;
189
f5045c96 190 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
dea61d92
SP
191 }
192}
193
194/* Return a copy of LOOP placed before LOOP. */
195
196static struct loop *
197copy_loop_before (struct loop *loop)
198{
199 struct loop *res;
200 edge preheader = loop_preheader_edge (loop);
201
dea61d92
SP
202 initialize_original_copy_tables ();
203 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
30d55936 204 gcc_assert (res != NULL);
dea61d92
SP
205 free_original_copy_tables ();
206
dea61d92
SP
207 update_phis_for_loop_copy (loop, res);
208 rename_variables_in_loop (res);
209
210 return res;
211}
212
213/* Creates an empty basic block after LOOP. */
214
215static void
216create_bb_after_loop (struct loop *loop)
217{
218 edge exit = single_exit (loop);
219
220 if (!exit)
221 return;
222
223 split_edge (exit);
224}
225
226/* Generate code for PARTITION from the code in LOOP. The loop is
227 copied when COPY_P is true. All the statements not flagged in the
228 PARTITION bitmap are removed from the loop or from its copy. The
229 statements are indexed in sequence inside a basic block, and the
30d55936 230 basic blocks of a loop are taken in dom order. */
dea61d92 231
30d55936 232static void
c61f8985
RG
233generate_loops_for_partition (struct loop *loop, partition_t partition,
234 bool copy_p)
dea61d92
SP
235{
236 unsigned i, x;
726a989a 237 gimple_stmt_iterator bsi;
dea61d92
SP
238 basic_block *bbs;
239
240 if (copy_p)
241 {
242 loop = copy_loop_before (loop);
30d55936 243 gcc_assert (loop != NULL);
dea61d92
SP
244 create_preheader (loop, CP_SIMPLE_PREHEADERS);
245 create_bb_after_loop (loop);
246 }
247
dea61d92
SP
248 /* Remove stmts not in the PARTITION bitmap. The order in which we
249 visit the phi nodes and the statements is exactly as in
250 stmts_from_loop. */
251 bbs = get_loop_body_in_dom_order (loop);
252
b03c3082
JJ
253 if (MAY_HAVE_DEBUG_STMTS)
254 for (x = 0, i = 0; i < loop->num_nodes; i++)
255 {
256 basic_block bb = bbs[i];
257
258 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
c61f8985 259 if (!bitmap_bit_p (partition->stmts, x++))
b03c3082
JJ
260 reset_debug_uses (gsi_stmt (bsi));
261
262 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
263 {
264 gimple stmt = gsi_stmt (bsi);
265 if (gimple_code (stmt) != GIMPLE_LABEL
266 && !is_gimple_debug (stmt)
c61f8985 267 && !bitmap_bit_p (partition->stmts, x++))
b03c3082
JJ
268 reset_debug_uses (stmt);
269 }
270 }
271
dea61d92
SP
272 for (x = 0, i = 0; i < loop->num_nodes; i++)
273 {
274 basic_block bb = bbs[i];
dea61d92 275
726a989a 276 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
c61f8985 277 if (!bitmap_bit_p (partition->stmts, x++))
2706a615
RG
278 {
279 gimple phi = gsi_stmt (bsi);
280 if (!is_gimple_reg (gimple_phi_result (phi)))
281 mark_virtual_phi_result_for_renaming (phi);
282 remove_phi_node (&bsi, true);
283 }
dea61d92 284 else
726a989a 285 gsi_next (&bsi);
dea61d92 286
726a989a 287 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
2706a615
RG
288 {
289 gimple stmt = gsi_stmt (bsi);
b03c3082
JJ
290 if (gimple_code (stmt) != GIMPLE_LABEL
291 && !is_gimple_debug (stmt)
c61f8985 292 && !bitmap_bit_p (partition->stmts, x++))
2706a615
RG
293 {
294 unlink_stmt_vdef (stmt);
295 gsi_remove (&bsi, true);
296 release_defs (stmt);
297 }
298 else
299 gsi_next (&bsi);
300 }
dea61d92
SP
301 }
302
303 free (bbs);
dea61d92
SP
304}
305
ada39f0b 306/* Build the size argument for a memset call. */
3661e899
TB
307
308static inline tree
fc81a369
RH
309build_size_arg_loc (location_t loc, tree nb_iter, tree op,
310 gimple_seq *stmt_list)
3661e899 311{
fc81a369 312 gimple_seq stmts;
0d82a1c8
RG
313 tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
314 fold_convert_loc (loc, size_type_node, nb_iter),
315 fold_convert_loc (loc, size_type_node,
316 TYPE_SIZE_UNIT (TREE_TYPE (op))));
fc81a369
RH
317 x = force_gimple_operand (x, &stmts, true, NULL);
318 gimple_seq_add_seq (stmt_list, stmts);
3661e899 319
fc81a369 320 return x;
3661e899
TB
321}
322
30d55936 323/* Generate a call to memset for PARTITION in LOOP. */
dea61d92 324
cfee318d 325static void
30d55936 326generate_memset_builtin (struct loop *loop, partition_t partition)
dea61d92 327{
30d55936
RG
328 gimple_stmt_iterator gsi;
329 gimple stmt, fn_call;
330 tree op0, nb_iter, mem, fn, addr_base, nb_bytes;
fc81a369 331 gimple_seq stmt_list = NULL, stmts;
dea61d92 332 struct data_reference *dr = XCNEW (struct data_reference);
30d55936
RG
333 location_t loc;
334 bool res;
335
336 stmt = partition->main_stmt;
337 loc = gimple_location (stmt);
338 op0 = gimple_assign_lhs (stmt);
339 if (gimple_bb (stmt) == loop->latch)
340 nb_iter = number_of_latch_executions (loop);
341 else
342 nb_iter = number_of_exit_cond_executions (loop);
343
344 /* The new statements will be placed before LOOP. */
345 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dea61d92 346
dea61d92
SP
347 DR_STMT (dr) = stmt;
348 DR_REF (dr) = op0;
4e4452b6 349 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
cfee318d 350 gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
5e37ea0e
SP
351
352 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
353 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
354 addr_base = fold_convert_loc (loc, sizetype, addr_base);
dea61d92
SP
355
356 /* Test for a negative stride, iterating over every element. */
0d82a1c8 357 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
dea61d92 358 {
fc81a369
RH
359 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
360 fold_convert_loc (loc, sizetype, nb_bytes));
6edd8198
AM
361 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
362 TYPE_SIZE_UNIT (TREE_TYPE (op0)));
dea61d92 363 }
dea61d92 364
5d49b6a7
RG
365 addr_base = fold_build_pointer_plus_loc (loc,
366 DR_BASE_ADDRESS (dr), addr_base);
dea61d92 367 mem = force_gimple_operand (addr_base, &stmts, true, NULL);
726a989a 368 gimple_seq_add_seq (&stmt_list, stmts);
dea61d92 369
e79983f4 370 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
726a989a
RB
371 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
372 gimple_seq_add_stmt (&stmt_list, fn_call);
30d55936 373 gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING);
dea61d92
SP
374
375 if (dump_file && (dump_flags & TDF_DETAILS))
376 fprintf (dump_file, "generated memset zero\n");
377
dea61d92 378 free_data_ref (dr);
dea61d92
SP
379}
380
30d55936 381/* Remove and destroy the loop LOOP. */
dea61d92 382
30d55936
RG
383static void
384destroy_loop (struct loop *loop)
dea61d92 385{
30d55936
RG
386 unsigned nbbs = loop->num_nodes;
387 edge exit = single_exit (loop);
388 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
dea61d92 389 basic_block *bbs;
30d55936 390 unsigned i;
dea61d92
SP
391
392 bbs = get_loop_body_in_dom_order (loop);
393
30d55936
RG
394 redirect_edge_pred (exit, src);
395 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
396 exit->flags |= EDGE_FALLTHRU;
397 cancel_loop_tree (loop);
398 rescan_loop_exit (exit, false, true);
dea61d92 399
30d55936
RG
400 for (i = 0; i < nbbs; i++)
401 delete_basic_block (bbs[i]);
dea61d92 402 free (bbs);
30d55936
RG
403
404 set_immediate_dominator (CDI_DOMINATORS, dest,
405 recompute_dominator (CDI_DOMINATORS, dest));
dea61d92
SP
406}
407
30d55936 408/* Generates code for PARTITION. */
dea61d92 409
30d55936 410static void
c61f8985
RG
411generate_code_for_partition (struct loop *loop, partition_t partition,
412 bool copy_p)
dea61d92 413{
30d55936
RG
414 switch (partition->kind)
415 {
416 case PKIND_MEMSET:
417 generate_memset_builtin (loop, partition);
418 /* If this is the last partition for which we generate code, we have
419 to destroy the loop. */
420 if (!copy_p)
421 destroy_loop (loop);
422 break;
423
424 case PKIND_NORMAL:
425 generate_loops_for_partition (loop, partition, copy_p);
426 break;
427
428 default:
429 gcc_unreachable ();
430 }
dea61d92
SP
431}
432
433
434/* Returns true if the node V of RDG cannot be recomputed. */
435
436static bool
437rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
438{
439 if (RDG_MEM_WRITE_STMT (rdg, v))
440 return true;
441
442 return false;
443}
444
445/* Returns true when the vertex V has already been generated in the
446 current partition (V is in PROCESSED), or when V belongs to another
447 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
448
449static inline bool
450already_processed_vertex_p (bitmap processed, int v)
451{
452 return (bitmap_bit_p (processed, v)
453 || !bitmap_bit_p (remaining_stmts, v));
454}
455
456/* Returns NULL when there is no anti-dependence among the successors
457 of vertex V, otherwise returns the edge with the anti-dep. */
458
459static struct graph_edge *
460has_anti_dependence (struct vertex *v)
461{
462 struct graph_edge *e;
463
464 if (v->succ)
465 for (e = v->succ; e; e = e->succ_next)
466 if (RDGE_TYPE (e) == anti_dd)
467 return e;
468
469 return NULL;
470}
471
472/* Returns true when V has an anti-dependence edge among its successors. */
473
474static bool
475predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
476{
477 struct graph_edge *e;
478
479 if (v->pred)
480 for (e = v->pred; e; e = e->pred_next)
481 if (bitmap_bit_p (upstream_mem_writes, e->src)
482 /* Don't consider flow channels: a write to memory followed
483 by a read from memory. These channels allow the split of
484 the RDG in different partitions. */
485 && !RDG_MEM_WRITE_STMT (rdg, e->src))
486 return true;
487
488 return false;
489}
490
491/* Initializes the upstream_mem_writes bitmap following the
492 information from RDG. */
493
494static void
495mark_nodes_having_upstream_mem_writes (struct graph *rdg)
496{
497 int v, x;
498 bitmap seen = BITMAP_ALLOC (NULL);
499
500 for (v = rdg->n_vertices - 1; v >= 0; v--)
501 if (!bitmap_bit_p (seen, v))
502 {
503 unsigned i;
504 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
dea61d92
SP
505
506 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
507
ac47786e 508 FOR_EACH_VEC_ELT (int, nodes, i, x)
dea61d92 509 {
fcaa4ca4 510 if (!bitmap_set_bit (seen, x))
dea61d92
SP
511 continue;
512
dea61d92
SP
513 if (RDG_MEM_WRITE_STMT (rdg, x)
514 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
515 /* In anti dependences the read should occur before
516 the write, this is why both the read and the write
517 should be placed in the same partition. */
518 || has_anti_dependence (&(rdg->vertices[x])))
519 {
dea61d92
SP
520 bitmap_set_bit (upstream_mem_writes, x);
521 }
522 }
523
524 VEC_free (int, heap, nodes);
525 }
526}
527
528/* Returns true when vertex u has a memory write node as a predecessor
529 in RDG. */
530
531static bool
532has_upstream_mem_writes (int u)
533{
534 return bitmap_bit_p (upstream_mem_writes, u);
535}
536
c61f8985
RG
537static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
538 bitmap, bitmap, bool *);
dea61d92 539
dea61d92
SP
540/* Flag the uses of U stopping following the information from
541 upstream_mem_writes. */
542
543static void
c61f8985 544rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
dea61d92
SP
545 bitmap processed, bool *part_has_writes)
546{
dea61d92
SP
547 use_operand_p use_p;
548 struct vertex *x = &(rdg->vertices[u]);
726a989a 549 gimple stmt = RDGV_STMT (x);
dea61d92
SP
550 struct graph_edge *anti_dep = has_anti_dependence (x);
551
552 /* Keep in the same partition the destination of an antidependence,
553 because this is a store to the exact same location. Putting this
554 in another partition is bad for cache locality. */
555 if (anti_dep)
556 {
557 int v = anti_dep->dest;
558
559 if (!already_processed_vertex_p (processed, v))
560 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
561 processed, part_has_writes);
562 }
563
726a989a 564 if (gimple_code (stmt) != GIMPLE_PHI)
dea61d92 565 {
5006671f 566 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
dea61d92
SP
567 {
568 tree use = USE_FROM_PTR (use_p);
569
570 if (TREE_CODE (use) == SSA_NAME)
571 {
726a989a 572 gimple def_stmt = SSA_NAME_DEF_STMT (use);
dea61d92
SP
573 int v = rdg_vertex_for_stmt (rdg, def_stmt);
574
575 if (v >= 0
576 && !already_processed_vertex_p (processed, v))
577 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
578 processed, part_has_writes);
579 }
580 }
581 }
582
726a989a 583 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
dea61d92 584 {
726a989a 585 tree op0 = gimple_assign_lhs (stmt);
dea61d92
SP
586
587 /* Scalar channels don't have enough space for transmitting data
588 between tasks, unless we add more storage by privatizing. */
589 if (is_gimple_reg (op0))
590 {
591 use_operand_p use_p;
592 imm_use_iterator iter;
593
594 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
595 {
596 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
597
598 if (!already_processed_vertex_p (processed, v))
599 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
600 processed, part_has_writes);
601 }
602 }
603 }
604}
605
606/* Flag V from RDG as part of PARTITION, and also flag its loop number
607 in LOOPS. */
608
609static void
c61f8985 610rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops,
dea61d92
SP
611 bool *part_has_writes)
612{
613 struct loop *loop;
614
c61f8985 615 if (!bitmap_set_bit (partition->stmts, v))
dea61d92
SP
616 return;
617
618 loop = loop_containing_stmt (RDG_STMT (rdg, v));
619 bitmap_set_bit (loops, loop->num);
dea61d92
SP
620
621 if (rdg_cannot_recompute_vertex_p (rdg, v))
622 {
623 *part_has_writes = true;
624 bitmap_clear_bit (remaining_stmts, v);
625 }
626}
627
628/* Flag in the bitmap PARTITION the vertex V and all its predecessors.
fa10beec 629 Also flag their loop number in LOOPS. */
dea61d92
SP
630
631static void
c61f8985 632rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
dea61d92
SP
633 bitmap loops, bitmap processed,
634 bool *part_has_writes)
635{
636 unsigned i;
637 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
638 int x;
639
640 bitmap_set_bit (processed, v);
641 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
642 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
643 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
644
ac47786e 645 FOR_EACH_VEC_ELT (int, nodes, i, x)
dea61d92
SP
646 if (!already_processed_vertex_p (processed, x))
647 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
648 part_has_writes);
649
650 VEC_free (int, heap, nodes);
651}
652
653/* Initialize CONDS with all the condition statements from the basic
654 blocks of LOOP. */
655
656static void
726a989a 657collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
dea61d92
SP
658{
659 unsigned i;
660 edge e;
661 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
662
ac47786e 663 FOR_EACH_VEC_ELT (edge, exits, i, e)
dea61d92 664 {
726a989a 665 gimple cond = last_stmt (e->src);
dea61d92
SP
666
667 if (cond)
726a989a 668 VEC_safe_push (gimple, heap, *conds, cond);
dea61d92
SP
669 }
670
671 VEC_free (edge, heap, exits);
672}
673
674/* Add to PARTITION all the exit condition statements for LOOPS
675 together with all their dependent statements determined from
676 RDG. */
677
678static void
c61f8985 679rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
dea61d92
SP
680 bitmap processed, bool *part_has_writes)
681{
682 unsigned i;
683 bitmap_iterator bi;
726a989a 684 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
dea61d92
SP
685
686 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
687 collect_condition_stmts (get_loop (i), &conds);
688
726a989a 689 while (!VEC_empty (gimple, conds))
dea61d92 690 {
726a989a 691 gimple cond = VEC_pop (gimple, conds);
dea61d92
SP
692 int v = rdg_vertex_for_stmt (rdg, cond);
693 bitmap new_loops = BITMAP_ALLOC (NULL);
694
695 if (!already_processed_vertex_p (processed, v))
696 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
697 part_has_writes);
698
699 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
fcaa4ca4
NF
700 if (bitmap_set_bit (loops, i))
701 collect_condition_stmts (get_loop (i), &conds);
dea61d92
SP
702
703 BITMAP_FREE (new_loops);
704 }
01be8516
SP
705
706 VEC_free (gimple, heap, conds);
dea61d92
SP
707}
708
dea61d92
SP
709/* Returns a bitmap in which all the statements needed for computing
710 the strongly connected component C of the RDG are flagged, also
711 including the loop exit conditions. */
712
c61f8985 713static partition_t
dea61d92 714build_rdg_partition_for_component (struct graph *rdg, rdgc c,
cfee318d 715 bool *part_has_writes)
dea61d92
SP
716{
717 int i, v;
c61f8985 718 partition_t partition = partition_alloc (NULL);
dea61d92
SP
719 bitmap loops = BITMAP_ALLOC (NULL);
720 bitmap processed = BITMAP_ALLOC (NULL);
721
ac47786e 722 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
dea61d92
SP
723 if (!already_processed_vertex_p (processed, v))
724 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
725 part_has_writes);
726
dea61d92
SP
727 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
728
729 BITMAP_FREE (processed);
730 BITMAP_FREE (loops);
731 return partition;
732}
733
734/* Free memory for COMPONENTS. */
735
736static void
737free_rdg_components (VEC (rdgc, heap) *components)
738{
739 int i;
740 rdgc x;
741
ac47786e 742 FOR_EACH_VEC_ELT (rdgc, components, i, x)
dea61d92
SP
743 {
744 VEC_free (int, heap, x->vertices);
745 free (x);
746 }
01be8516
SP
747
748 VEC_free (rdgc, heap, components);
dea61d92
SP
749}
750
751/* Build the COMPONENTS vector with the strongly connected components
752 of RDG in which the STARTING_VERTICES occur. */
753
754static void
b8698a0f 755rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
dea61d92
SP
756 VEC (rdgc, heap) **components)
757{
758 int i, v;
759 bitmap saved_components = BITMAP_ALLOC (NULL);
760 int n_components = graphds_scc (rdg, NULL);
761 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
762
763 for (i = 0; i < n_components; i++)
764 all_components[i] = VEC_alloc (int, heap, 3);
765
766 for (i = 0; i < rdg->n_vertices; i++)
767 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
768
ac47786e 769 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
dea61d92
SP
770 {
771 int c = rdg->vertices[v].component;
772
fcaa4ca4 773 if (bitmap_set_bit (saved_components, c))
dea61d92
SP
774 {
775 rdgc x = XCNEW (struct rdg_component);
776 x->num = c;
777 x->vertices = all_components[c];
778
779 VEC_safe_push (rdgc, heap, *components, x);
dea61d92
SP
780 }
781 }
782
783 for (i = 0; i < n_components; i++)
784 if (!bitmap_bit_p (saved_components, i))
785 VEC_free (int, heap, all_components[i]);
786
787 free (all_components);
788 BITMAP_FREE (saved_components);
789}
790
30d55936
RG
791/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
792 For the moment we detect only the memset zero pattern. */
cfee318d 793
30d55936
RG
794static void
795classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
cfee318d 796{
cfee318d 797 bitmap_iterator bi;
30d55936
RG
798 unsigned i;
799 tree nb_iter;
800
801 partition->kind = PKIND_NORMAL;
802 partition->main_stmt = NULL;
803
804 /* Perform general partition disqualification for builtins. */
805 nb_iter = number_of_exit_cond_executions (loop);
806 if (!nb_iter || nb_iter == chrec_dont_know)
807 return;
cfee318d 808
c61f8985 809 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
30d55936
RG
810 {
811 gimple stmt = RDG_STMT (rdg, i);
812
813 if (gimple_has_volatile_ops (stmt))
814 return;
cfee318d 815
30d55936
RG
816 /* If the stmt has uses outside of the loop fail.
817 ??? If the stmt is generated in another partition that
818 is not created as builtin we can ignore this. */
9ca86fc3 819 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
30d55936
RG
820 {
821 if (dump_file && (dump_flags & TDF_DETAILS))
822 fprintf (dump_file, "not generating builtin, partition has "
823 "scalar uses outside of the loop\n");
824 return;
825 }
826 }
827
828 /* Detect memset. */
829 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
830 {
831 gimple stmt = RDG_STMT (rdg, i);
832
833 if (gimple_code (stmt) == GIMPLE_PHI)
834 continue;
835
836 /* Any scalar stmts are ok. */
837 if (!gimple_vuse (stmt))
838 continue;
839
840 /* Exactly one store. */
841 if (gimple_assign_single_p (stmt)
842 && !is_gimple_reg (gimple_assign_lhs (stmt)))
843 {
844 if (partition->main_stmt != NULL)
845 return;
846 partition->main_stmt = stmt;
847 }
848 else
849 return;
850 }
851
852 if (partition->main_stmt != NULL
853 && stmt_with_adjacent_zero_store_dr_p (partition->main_stmt))
854 partition->kind = PKIND_MEMSET;
cfee318d
SP
855}
856
857/* Returns true when PARTITION1 and PARTITION2 have similar memory
858 accesses in RDG. */
859
860static bool
c61f8985
RG
861similar_memory_accesses (struct graph *rdg, partition_t partition1,
862 partition_t partition2)
cfee318d
SP
863{
864 unsigned i, j;
865 bitmap_iterator bi, bj;
866
c61f8985 867 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
cfee318d
SP
868 if (RDG_MEM_WRITE_STMT (rdg, i)
869 || RDG_MEM_READS_STMT (rdg, i))
c61f8985 870 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
cfee318d
SP
871 if (RDG_MEM_WRITE_STMT (rdg, j)
872 || RDG_MEM_READS_STMT (rdg, j))
873 if (rdg_has_similar_memory_accesses (rdg, i, j))
874 return true;
875
876 return false;
877}
878
879/* Fuse all the partitions from PARTITIONS that contain similar memory
880 references, i.e., we're taking care of cache locality. This
881 function does not fuse those partitions that contain patterns that
882 can be code generated with builtins. */
883
884static void
885fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
c61f8985 886 VEC (partition_t, heap) **partitions)
cfee318d
SP
887{
888 int p1, p2;
c61f8985 889 partition_t partition1, partition2;
cfee318d 890
c61f8985 891 FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1)
30d55936 892 if (!partition_builtin_p (partition1))
c61f8985 893 FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2)
cfee318d 894 if (p1 != p2
30d55936 895 && !partition_builtin_p (partition2)
cfee318d
SP
896 && similar_memory_accesses (rdg, partition1, partition2))
897 {
c61f8985
RG
898 bitmap_ior_into (partition1->stmts, partition2->stmts);
899 VEC_ordered_remove (partition_t, *partitions, p2);
cfee318d
SP
900 p2--;
901 }
902}
903
dea61d92
SP
904/* Aggregate several components into a useful partition that is
905 registered in the PARTITIONS vector. Partitions will be
906 distributed in different loops. */
907
908static void
909rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
910 VEC (int, heap) **other_stores,
c61f8985 911 VEC (partition_t, heap) **partitions, bitmap processed)
dea61d92
SP
912{
913 int i;
914 rdgc x;
c61f8985 915 partition_t partition = partition_alloc (NULL);
dea61d92 916
ac47786e 917 FOR_EACH_VEC_ELT (rdgc, components, i, x)
dea61d92 918 {
c61f8985 919 partition_t np;
dea61d92
SP
920 bool part_has_writes = false;
921 int v = VEC_index (int, x->vertices, 0);
b8698a0f 922
dea61d92
SP
923 if (bitmap_bit_p (processed, v))
924 continue;
b8698a0f 925
cfee318d 926 np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
c61f8985
RG
927 bitmap_ior_into (partition->stmts, np->stmts);
928 bitmap_ior_into (processed, np->stmts);
929 partition_free (np);
dea61d92
SP
930
931 if (part_has_writes)
932 {
933 if (dump_file && (dump_flags & TDF_DETAILS))
934 {
935 fprintf (dump_file, "ldist useful partition:\n");
c61f8985 936 dump_bitmap (dump_file, partition->stmts);
dea61d92
SP
937 }
938
c61f8985
RG
939 VEC_safe_push (partition_t, heap, *partitions, partition);
940 partition = partition_alloc (NULL);
dea61d92
SP
941 }
942 }
943
944 /* Add the nodes from the RDG that were not marked as processed, and
945 that are used outside the current loop. These are scalar
946 computations that are not yet part of previous partitions. */
947 for (i = 0; i < rdg->n_vertices; i++)
948 if (!bitmap_bit_p (processed, i)
949 && rdg_defs_used_in_other_loops_p (rdg, i))
950 VEC_safe_push (int, heap, *other_stores, i);
951
952 /* If there are still statements left in the OTHER_STORES array,
953 create other components and partitions with these stores and
954 their dependences. */
955 if (VEC_length (int, *other_stores) > 0)
956 {
957 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
958 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
959
960 rdg_build_components (rdg, *other_stores, &comps);
961 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
962
963 VEC_free (int, heap, foo);
964 free_rdg_components (comps);
965 }
966
967 /* If there is something left in the last partition, save it. */
c61f8985
RG
968 if (bitmap_count_bits (partition->stmts) > 0)
969 VEC_safe_push (partition_t, heap, *partitions, partition);
dea61d92 970 else
c61f8985 971 partition_free (partition);
dea61d92
SP
972}
973
974/* Dump to FILE the PARTITIONS. */
975
976static void
c61f8985 977dump_rdg_partitions (FILE *file, VEC (partition_t, heap) *partitions)
dea61d92
SP
978{
979 int i;
c61f8985 980 partition_t partition;
dea61d92 981
c61f8985
RG
982 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
983 debug_bitmap_file (file, partition->stmts);
dea61d92
SP
984}
985
986/* Debug PARTITIONS. */
c61f8985 987extern void debug_rdg_partitions (VEC (partition_t, heap) *);
dea61d92 988
24e47c76 989DEBUG_FUNCTION void
c61f8985 990debug_rdg_partitions (VEC (partition_t, heap) *partitions)
dea61d92
SP
991{
992 dump_rdg_partitions (stderr, partitions);
993}
994
2b8aee8e
SP
995/* Returns the number of read and write operations in the RDG. */
996
997static int
998number_of_rw_in_rdg (struct graph *rdg)
999{
1000 int i, res = 0;
1001
1002 for (i = 0; i < rdg->n_vertices; i++)
1003 {
1004 if (RDG_MEM_WRITE_STMT (rdg, i))
1005 ++res;
1006
1007 if (RDG_MEM_READS_STMT (rdg, i))
1008 ++res;
1009 }
1010
1011 return res;
1012}
1013
1014/* Returns the number of read and write operations in a PARTITION of
1015 the RDG. */
1016
1017static int
c61f8985 1018number_of_rw_in_partition (struct graph *rdg, partition_t partition)
2b8aee8e
SP
1019{
1020 int res = 0;
1021 unsigned i;
1022 bitmap_iterator ii;
1023
c61f8985 1024 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2b8aee8e
SP
1025 {
1026 if (RDG_MEM_WRITE_STMT (rdg, i))
1027 ++res;
1028
1029 if (RDG_MEM_READS_STMT (rdg, i))
1030 ++res;
1031 }
1032
1033 return res;
1034}
1035
1036/* Returns true when one of the PARTITIONS contains all the read or
1037 write operations of RDG. */
1038
1039static bool
c61f8985 1040partition_contains_all_rw (struct graph *rdg, VEC (partition_t, heap) *partitions)
2b8aee8e
SP
1041{
1042 int i;
c61f8985 1043 partition_t partition;
2b8aee8e
SP
1044 int nrw = number_of_rw_in_rdg (rdg);
1045
c61f8985 1046 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
2b8aee8e
SP
1047 if (nrw == number_of_rw_in_partition (rdg, partition))
1048 return true;
1049
1050 return false;
1051}
1052
dea61d92
SP
1053/* Generate code from STARTING_VERTICES in RDG. Returns the number of
1054 distributed loops. */
1055
1056static int
1057ldist_gen (struct loop *loop, struct graph *rdg,
1058 VEC (int, heap) *starting_vertices)
1059{
1060 int i, nbp;
1061 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
c61f8985 1062 VEC (partition_t, heap) *partitions = VEC_alloc (partition_t, heap, 3);
dea61d92 1063 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
c61f8985
RG
1064 partition_t partition;
1065 bitmap processed = BITMAP_ALLOC (NULL);
dea61d92
SP
1066
1067 remaining_stmts = BITMAP_ALLOC (NULL);
1068 upstream_mem_writes = BITMAP_ALLOC (NULL);
1069
1070 for (i = 0; i < rdg->n_vertices; i++)
1071 {
1072 bitmap_set_bit (remaining_stmts, i);
1073
1074 /* Save in OTHER_STORES all the memory writes that are not in
1075 STARTING_VERTICES. */
1076 if (RDG_MEM_WRITE_STMT (rdg, i))
1077 {
1078 int v;
1079 unsigned j;
1080 bool found = false;
1081
ac47786e 1082 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
dea61d92
SP
1083 if (i == v)
1084 {
1085 found = true;
1086 break;
1087 }
1088
1089 if (!found)
1090 VEC_safe_push (int, heap, other_stores, i);
1091 }
1092 }
1093
1094 mark_nodes_having_upstream_mem_writes (rdg);
1095 rdg_build_components (rdg, starting_vertices, &components);
1096 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1097 processed);
1098 BITMAP_FREE (processed);
dea61d92 1099
30d55936
RG
1100 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1101 classify_partition (loop, rdg, partition);
1102
1103 fuse_partitions_with_similar_memory_accesses (rdg, &partitions);
1104
c61f8985 1105 nbp = VEC_length (partition_t, partitions);
a4293fa6
RG
1106 if (nbp == 0
1107 || (nbp == 1
30d55936 1108 && !partition_builtin_p (VEC_index (partition_t, partitions, 0)))
a4293fa6
RG
1109 || (nbp > 1
1110 && partition_contains_all_rw (rdg, partitions)))
dea61d92
SP
1111 goto ldist_done;
1112
1113 if (dump_file && (dump_flags & TDF_DETAILS))
1114 dump_rdg_partitions (dump_file, partitions);
1115
c61f8985 1116 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
30d55936 1117 generate_code_for_partition (loop, partition, i < nbp - 1);
dea61d92
SP
1118
1119 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
e799d447
RG
1120 mark_sym_for_renaming (gimple_vop (cfun));
1121 update_ssa (TODO_update_ssa_only_virtuals);
dea61d92
SP
1122
1123 ldist_done:
1124
1125 BITMAP_FREE (remaining_stmts);
1126 BITMAP_FREE (upstream_mem_writes);
1127
c61f8985
RG
1128 FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
1129 partition_free (partition);
dea61d92
SP
1130
1131 VEC_free (int, heap, other_stores);
c61f8985 1132 VEC_free (partition_t, heap, partitions);
dea61d92
SP
1133 free_rdg_components (components);
1134 return nbp;
1135}
1136
1137/* Distributes the code from LOOP in such a way that producer
1138 statements are placed before consumer statements. When STMTS is
1139 NULL, performs the maximal distribution, if STMTS is not NULL,
1140 tries to separate only these statements from the LOOP's body.
1141 Returns the number of distributed loops. */
1142
1143static int
726a989a 1144distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
dea61d92 1145{
e96d7dd7 1146 int res = 0;
dea61d92 1147 struct graph *rdg;
726a989a 1148 gimple s;
dea61d92
SP
1149 unsigned i;
1150 VEC (int, heap) *vertices;
01be8516
SP
1151 VEC (ddr_p, heap) *dependence_relations;
1152 VEC (data_reference_p, heap) *datarefs;
1153 VEC (loop_p, heap) *loop_nest;
dea61d92
SP
1154
1155 if (loop->num_nodes > 2)
1156 {
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1158 fprintf (dump_file,
1159 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1160 loop->num);
1161
1162 return res;
1163 }
1164
01be8516
SP
1165 datarefs = VEC_alloc (data_reference_p, heap, 10);
1166 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1167 loop_nest = VEC_alloc (loop_p, heap, 3);
1168 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
dea61d92
SP
1169
1170 if (!rdg)
1171 {
1172 if (dump_file && (dump_flags & TDF_DETAILS))
1173 fprintf (dump_file,
1174 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1175 loop->num);
1176
01be8516
SP
1177 free_dependence_relations (dependence_relations);
1178 free_data_refs (datarefs);
1179 VEC_free (loop_p, heap, loop_nest);
dea61d92
SP
1180 return res;
1181 }
1182
1183 vertices = VEC_alloc (int, heap, 3);
1184
1185 if (dump_file && (dump_flags & TDF_DETAILS))
1186 dump_rdg (dump_file, rdg);
1187
ac47786e 1188 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
dea61d92
SP
1189 {
1190 int v = rdg_vertex_for_stmt (rdg, s);
1191
1192 if (v >= 0)
1193 {
1194 VEC_safe_push (int, heap, vertices, v);
1195
1196 if (dump_file && (dump_flags & TDF_DETAILS))
1197 fprintf (dump_file,
1198 "ldist asked to generate code for vertex %d\n", v);
1199 }
1200 }
1201
1202 res = ldist_gen (loop, rdg, vertices);
1203 VEC_free (int, heap, vertices);
1204 free_rdg (rdg);
01be8516
SP
1205 free_dependence_relations (dependence_relations);
1206 free_data_refs (datarefs);
1207 VEC_free (loop_p, heap, loop_nest);
dea61d92
SP
1208 return res;
1209}
1210
1211/* Distribute all loops in the current function. */
1212
1213static unsigned int
1214tree_loop_distribution (void)
1215{
1216 struct loop *loop;
1217 loop_iterator li;
1218 int nb_generated_loops = 0;
1219
1220 FOR_EACH_LOOP (li, loop, 0)
1221 {
a3357f7d 1222 VEC (gimple, heap) *work_list = NULL;
0e20c89f 1223 int num = loop->num;
a3357f7d
RG
1224
1225 /* If the loop doesn't have a single exit we will fail anyway,
1226 so do that early. */
1227 if (!single_exit (loop))
1228 continue;
dea61d92 1229
20769d5e
SP
1230 /* If both flag_tree_loop_distribute_patterns and
1231 flag_tree_loop_distribution are set, then only
1232 distribute_patterns is executed. */
1233 if (flag_tree_loop_distribute_patterns)
1234 {
1235 /* With the following working list, we're asking
1236 distribute_loop to separate from the rest of the loop the
1237 stores of the form "A[i] = 0". */
1238 stores_zero_from_loop (loop, &work_list);
1239
1240 /* Do nothing if there are no patterns to be distributed. */
1241 if (VEC_length (gimple, work_list) > 0)
1242 nb_generated_loops = distribute_loop (loop, work_list);
1243 }
1244 else if (flag_tree_loop_distribution)
1245 {
1246 /* With the following working list, we're asking
1247 distribute_loop to separate the stores of the loop: when
1248 dependences allow, it will end on having one store per
1249 loop. */
1250 stores_from_loop (loop, &work_list);
1251
1252 /* A simple heuristic for cache locality is to not split
1253 stores to the same array. Without this call, an unrolled
1254 loop would be split into as many loops as unroll factor,
1255 each loop storing in the same array. */
1256 remove_similar_memory_refs (&work_list);
1257
1258 nb_generated_loops = distribute_loop (loop, work_list);
1259 }
dea61d92
SP
1260
1261 if (dump_file && (dump_flags & TDF_DETAILS))
1262 {
1263 if (nb_generated_loops > 1)
1264 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
0e20c89f 1265 num, nb_generated_loops);
dea61d92 1266 else
0e20c89f 1267 fprintf (dump_file, "Loop %d is the same.\n", num);
dea61d92
SP
1268 }
1269
510dbcce 1270#ifdef ENABLE_CHECKING
dea61d92 1271 verify_loop_structure ();
510dbcce 1272#endif
dea61d92 1273
726a989a 1274 VEC_free (gimple, heap, work_list);
dea61d92
SP
1275 }
1276
5006671f 1277 return 0;
dea61d92
SP
1278}
1279
1280static bool
1281gate_tree_loop_distribution (void)
1282{
20769d5e
SP
1283 return flag_tree_loop_distribution
1284 || flag_tree_loop_distribute_patterns;
dea61d92
SP
1285}
1286
8ddbbcae 1287struct gimple_opt_pass pass_loop_distribution =
dea61d92 1288{
8ddbbcae
JH
1289 {
1290 GIMPLE_PASS,
dea61d92
SP
1291 "ldist", /* name */
1292 gate_tree_loop_distribution, /* gate */
1293 tree_loop_distribution, /* execute */
1294 NULL, /* sub */
1295 NULL, /* next */
1296 0, /* static_pass_number */
1297 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1298 PROP_cfg | PROP_ssa, /* properties_required */
1299 0, /* properties_provided */
1300 0, /* properties_destroyed */
1301 0, /* todo_flags_start */
c07a8cb3
RG
1302 TODO_ggc_collect
1303 | TODO_verify_ssa /* todo_flags_finish */
8ddbbcae 1304 }
dea61d92 1305};