]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
gimple-ssa-isolate-paths.c (pass_isolate_erroneous_paths): Comment fix.
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
dea61d92 1/* Loop distribution.
d1e082c2 2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
dea61d92
SP
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6This file is part of GCC.
b8698a0f 7
dea61d92
SP
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
b8698a0f 12
dea61d92
SP
13GCC is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
b8698a0f 17
dea61d92
SP
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
b8698a0f 29 is transformed to
dea61d92
SP
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44#include "config.h"
45#include "system.h"
46#include "coretypes.h"
4d648807 47#include "tree.h"
45b0be94 48#include "gimplify.h"
442b4905
AM
49#include "gimple-ssa.h"
50#include "tree-cfg.h"
51#include "tree-phinodes.h"
52#include "ssa-iterators.h"
53#include "tree-ssanames.h"
e28030cf 54#include "tree-ssa-loop-manip.h"
442b4905
AM
55#include "tree-ssa-loop.h"
56#include "tree-into-ssa.h"
7a300452 57#include "tree-ssa.h"
dea61d92 58#include "cfgloop.h"
dea61d92
SP
59#include "tree-chrec.h"
60#include "tree-data-ref.h"
61#include "tree-scalar-evolution.h"
62#include "tree-pass.h"
80ab0b19 63#include "gimple-pretty-print.h"
826a536d 64#include "tree-vectorizer.h"
80ab0b19
RB
65
66
67/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
68typedef struct rdg_vertex
69{
70 /* The statement represented by this vertex. */
71 gimple stmt;
72
73 /* Vector of data-references in this statement. */
74 vec<data_reference_p> datarefs;
75
76 /* True when the statement contains a write to memory. */
77 bool has_mem_write;
78
79 /* True when the statement contains a read from memory. */
80 bool has_mem_reads;
81} *rdg_vertex_p;
82
83#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
84#define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
85#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
86#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
87#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
88#define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
89#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
90#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
91
92/* Data dependence type. */
93
94enum rdg_dep_type
95{
96 /* Read After Write (RAW). */
97 flow_dd = 'f',
98
36875e8f
RB
99 /* Control dependence (execute conditional on). */
100 control_dd = 'c'
80ab0b19
RB
101};
102
103/* Dependence information attached to an edge of the RDG. */
104
105typedef struct rdg_edge
106{
107 /* Type of the dependence. */
108 enum rdg_dep_type type;
80ab0b19
RB
109} *rdg_edge_p;
110
111#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
80ab0b19 112
80ab0b19
RB
113/* Dump vertex I in RDG to FILE. */
114
115static void
116dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
117{
118 struct vertex *v = &(rdg->vertices[i]);
119 struct graph_edge *e;
120
121 fprintf (file, "(vertex %d: (%s%s) (in:", i,
122 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
123 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
124
125 if (v->pred)
126 for (e = v->pred; e; e = e->pred_next)
127 fprintf (file, " %d", e->src);
128
129 fprintf (file, ") (out:");
130
131 if (v->succ)
132 for (e = v->succ; e; e = e->succ_next)
133 fprintf (file, " %d", e->dest);
134
135 fprintf (file, ")\n");
136 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
137 fprintf (file, ")\n");
138}
139
140/* Call dump_rdg_vertex on stderr. */
141
142DEBUG_FUNCTION void
143debug_rdg_vertex (struct graph *rdg, int i)
144{
145 dump_rdg_vertex (stderr, rdg, i);
146}
147
80ab0b19
RB
148/* Dump the reduced dependence graph RDG to FILE. */
149
150static void
151dump_rdg (FILE *file, struct graph *rdg)
152{
80ab0b19 153 fprintf (file, "(rdg\n");
2fd5894f
RB
154 for (int i = 0; i < rdg->n_vertices; i++)
155 dump_rdg_vertex (file, rdg, i);
80ab0b19 156 fprintf (file, ")\n");
80ab0b19
RB
157}
158
159/* Call dump_rdg on stderr. */
160
161DEBUG_FUNCTION void
162debug_rdg (struct graph *rdg)
163{
164 dump_rdg (stderr, rdg);
165}
166
167static void
168dot_rdg_1 (FILE *file, struct graph *rdg)
169{
170 int i;
174ec470
RB
171 pretty_printer buffer;
172 pp_needs_newline (&buffer) = false;
173 buffer.buffer->stream = file;
80ab0b19
RB
174
175 fprintf (file, "digraph RDG {\n");
176
177 for (i = 0; i < rdg->n_vertices; i++)
178 {
179 struct vertex *v = &(rdg->vertices[i]);
180 struct graph_edge *e;
181
174ec470
RB
182 fprintf (file, "%d [label=\"[%d] ", i, i);
183 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
184 pp_flush (&buffer);
185 fprintf (file, "\"]\n");
186
80ab0b19
RB
187 /* Highlight reads from memory. */
188 if (RDG_MEM_READS_STMT (rdg, i))
189 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
190
191 /* Highlight stores to memory. */
192 if (RDG_MEM_WRITE_STMT (rdg, i))
193 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
194
195 if (v->succ)
196 for (e = v->succ; e; e = e->succ_next)
197 switch (RDGE_TYPE (e))
198 {
80ab0b19
RB
199 case flow_dd:
200 /* These are the most common dependences: don't print these. */
201 fprintf (file, "%d -> %d \n", i, e->dest);
202 break;
203
36875e8f
RB
204 case control_dd:
205 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
206 break;
207
80ab0b19
RB
208 default:
209 gcc_unreachable ();
210 }
211 }
212
213 fprintf (file, "}\n\n");
214}
215
216/* Display the Reduced Dependence Graph using dotty. */
217
218DEBUG_FUNCTION void
219dot_rdg (struct graph *rdg)
220{
174ec470
RB
221 /* When debugging, you may want to enable the following code. */
222#if 1
c3284718 223 FILE *file = popen ("dot -Tx11", "w");
174ec470
RB
224 if (!file)
225 return;
80ab0b19 226 dot_rdg_1 (file, rdg);
174ec470
RB
227 fflush (file);
228 close (fileno (file));
229 pclose (file);
80ab0b19
RB
230#else
231 dot_rdg_1 (stderr, rdg);
232#endif
233}
234
235/* Returns the index of STMT in RDG. */
236
237static int
238rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
239{
240 int index = gimple_uid (stmt);
241 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
242 return index;
243}
244
80ab0b19
RB
245/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
246 the index of DEF in RDG. */
247
248static void
249create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
250{
251 use_operand_p imm_use_p;
252 imm_use_iterator iterator;
253
254 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
255 {
256 struct graph_edge *e;
257 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
258
259 if (use < 0)
260 continue;
261
262 e = add_edge (rdg, idef, use);
263 e->data = XNEW (struct rdg_edge);
264 RDGE_TYPE (e) = flow_dd;
80ab0b19
RB
265 }
266}
267
36875e8f
RB
268/* Creates an edge for the control dependences of BB to the vertex V. */
269
270static void
271create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
272 int v, control_dependences *cd)
273{
274 bitmap_iterator bi;
275 unsigned edge_n;
276 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
277 0, edge_n, bi)
278 {
279 basic_block cond_bb = cd->get_edge (edge_n)->src;
280 gimple stmt = last_stmt (cond_bb);
281 if (stmt && is_ctrl_stmt (stmt))
282 {
283 struct graph_edge *e;
284 int c = rdg_vertex_for_stmt (rdg, stmt);
285 if (c < 0)
286 continue;
287
288 e = add_edge (rdg, c, v);
289 e->data = XNEW (struct rdg_edge);
290 RDGE_TYPE (e) = control_dd;
36875e8f
RB
291 }
292 }
293}
294
80ab0b19
RB
295/* Creates the edges of the reduced dependence graph RDG. */
296
297static void
447f3223 298create_rdg_flow_edges (struct graph *rdg)
80ab0b19
RB
299{
300 int i;
80ab0b19
RB
301 def_operand_p def_p;
302 ssa_op_iter iter;
303
80ab0b19
RB
304 for (i = 0; i < rdg->n_vertices; i++)
305 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
306 iter, SSA_OP_DEF)
307 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
447f3223 308}
36875e8f 309
447f3223
RB
310/* Creates the edges of the reduced dependence graph RDG. */
311
312static void
313create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
314{
315 int i;
316
317 for (i = 0; i < rdg->n_vertices; i++)
318 {
319 gimple stmt = RDG_STMT (rdg, i);
320 if (gimple_code (stmt) == GIMPLE_PHI)
321 {
322 edge_iterator ei;
323 edge e;
324 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
36875e8f 325 create_edge_for_control_dependence (rdg, e->src, i, cd);
447f3223
RB
326 }
327 else
328 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
329 }
80ab0b19
RB
330}
331
332/* Build the vertices of the reduced dependence graph RDG. Return false
333 if that failed. */
334
335static bool
336create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
337 vec<data_reference_p> *datarefs)
338{
339 int i;
340 gimple stmt;
341
342 FOR_EACH_VEC_ELT (stmts, i, stmt)
343 {
344 struct vertex *v = &(rdg->vertices[i]);
345
346 /* Record statement to vertex mapping. */
347 gimple_set_uid (stmt, i);
348
349 v->data = XNEW (struct rdg_vertex);
350 RDGV_STMT (v) = stmt;
351 RDGV_DATAREFS (v).create (0);
352 RDGV_HAS_MEM_WRITE (v) = false;
353 RDGV_HAS_MEM_READS (v) = false;
354 if (gimple_code (stmt) == GIMPLE_PHI)
355 continue;
356
357 unsigned drp = datarefs->length ();
358 if (!find_data_references_in_stmt (loop, stmt, datarefs))
359 return false;
360 for (unsigned j = drp; j < datarefs->length (); ++j)
361 {
362 data_reference_p dr = (*datarefs)[j];
363 if (DR_IS_READ (dr))
364 RDGV_HAS_MEM_READS (v) = true;
365 else
366 RDGV_HAS_MEM_WRITE (v) = true;
367 RDGV_DATAREFS (v).safe_push (dr);
368 }
369 }
370 return true;
371}
372
2fd5894f 373/* Initialize STMTS with all the statements of LOOP. The order in
80ab0b19
RB
374 which we discover statements is important as
375 generate_loops_for_partition is using the same traversal for
2fd5894f 376 identifying statements in loop copies. */
80ab0b19
RB
377
378static void
379stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
380{
381 unsigned int i;
382 basic_block *bbs = get_loop_body_in_dom_order (loop);
383
384 for (i = 0; i < loop->num_nodes; i++)
385 {
386 basic_block bb = bbs[i];
387 gimple_stmt_iterator bsi;
388 gimple stmt;
389
390 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
83a95546
RB
391 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
392 stmts->safe_push (gsi_stmt (bsi));
80ab0b19
RB
393
394 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
395 {
396 stmt = gsi_stmt (bsi);
397 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
398 stmts->safe_push (stmt);
399 }
400 }
401
402 free (bbs);
403}
404
80ab0b19
RB
405/* Free the reduced dependence graph RDG. */
406
407static void
408free_rdg (struct graph *rdg)
409{
410 int i;
411
412 for (i = 0; i < rdg->n_vertices; i++)
413 {
414 struct vertex *v = &(rdg->vertices[i]);
415 struct graph_edge *e;
416
417 for (e = v->succ; e; e = e->succ_next)
447f3223 418 free (e->data);
80ab0b19
RB
419
420 if (v->data)
421 {
422 gimple_set_uid (RDGV_STMT (v), -1);
423 free_data_refs (RDGV_DATAREFS (v));
424 free (v->data);
425 }
426 }
427
428 free_graph (rdg);
429}
430
431/* Build the Reduced Dependence Graph (RDG) with one vertex per
97463b2b 432 statement of the loop nest LOOP_NEST, and one edge per data dependence or
80ab0b19
RB
433 scalar dependence. */
434
435static struct graph *
36875e8f 436build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
80ab0b19
RB
437{
438 struct graph *rdg;
80ab0b19 439 vec<data_reference_p> datarefs;
80ab0b19 440
97463b2b 441 /* Create the RDG vertices from the stmts of the loop nest. */
07687835 442 stack_vec<gimple, 10> stmts;
97463b2b 443 stmts_from_loop (loop_nest[0], &stmts);
24f161fd 444 rdg = new_graph (stmts.length ());
80ab0b19 445 datarefs.create (10);
97463b2b 446 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
80ab0b19 447 {
97463b2b 448 datarefs.release ();
80ab0b19
RB
449 free_rdg (rdg);
450 return NULL;
451 }
452 stmts.release ();
97463b2b 453
447f3223
RB
454 create_rdg_flow_edges (rdg);
455 if (cd)
456 create_rdg_cd_edges (rdg, cd);
457
97463b2b 458 datarefs.release ();
80ab0b19
RB
459
460 return rdg;
461}
462
80ab0b19 463
dea61d92 464
b9fc0497 465enum partition_kind {
826a536d 466 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
b9fc0497 467};
30d55936 468
c61f8985
RG
469typedef struct partition_s
470{
471 bitmap stmts;
80ab0b19 472 bitmap loops;
826a536d 473 bool reduction_p;
30d55936 474 enum partition_kind kind;
d0582dc1
RG
475 /* data-references a kind != PKIND_NORMAL partition is about. */
476 data_reference_p main_dr;
477 data_reference_p secondary_dr;
818625cf 478 tree niter;
c61f8985
RG
479} *partition_t;
480
c61f8985
RG
481
482/* Allocate and initialize a partition from BITMAP. */
483
484static partition_t
80ab0b19 485partition_alloc (bitmap stmts, bitmap loops)
c61f8985
RG
486{
487 partition_t partition = XCNEW (struct partition_s);
488 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
80ab0b19 489 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
826a536d 490 partition->reduction_p = false;
30d55936 491 partition->kind = PKIND_NORMAL;
c61f8985
RG
492 return partition;
493}
494
495/* Free PARTITION. */
496
497static void
498partition_free (partition_t partition)
499{
500 BITMAP_FREE (partition->stmts);
80ab0b19 501 BITMAP_FREE (partition->loops);
c61f8985
RG
502 free (partition);
503}
504
30d55936
RG
505/* Returns true if the partition can be generated as a builtin. */
506
507static bool
508partition_builtin_p (partition_t partition)
509{
826a536d 510 return partition->kind != PKIND_NORMAL;
30d55936 511}
c61f8985 512
826a536d 513/* Returns true if the partition contains a reduction. */
7ad672e4
RG
514
515static bool
826a536d 516partition_reduction_p (partition_t partition)
7ad672e4 517{
826a536d 518 return partition->reduction_p;
7ad672e4
RG
519}
520
826a536d
RB
521/* Merge PARTITION into the partition DEST. */
522
523static void
524partition_merge_into (partition_t dest, partition_t partition)
525{
526 dest->kind = PKIND_NORMAL;
527 bitmap_ior_into (dest->stmts, partition->stmts);
528 if (partition_reduction_p (partition))
529 dest->reduction_p = true;
530}
531
532
c07a8cb3
RG
533/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
534 the LOOP. */
535
536static bool
537ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
538{
539 imm_use_iterator imm_iter;
540 use_operand_p use_p;
541
542 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
e665269a
RG
543 {
544 gimple use_stmt = USE_STMT (use_p);
545 if (!is_gimple_debug (use_stmt)
546 && loop != loop_containing_stmt (use_stmt))
547 return true;
548 }
c07a8cb3
RG
549
550 return false;
551}
552
553/* Returns true when STMT defines a scalar variable used after the
88af7c1a 554 loop LOOP. */
c07a8cb3
RG
555
556static bool
88af7c1a 557stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
c07a8cb3 558{
88af7c1a
RG
559 def_operand_p def_p;
560 ssa_op_iter op_iter;
c07a8cb3 561
9ca86fc3
RG
562 if (gimple_code (stmt) == GIMPLE_PHI)
563 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
564
88af7c1a
RG
565 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
566 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
567 return true;
c07a8cb3 568
88af7c1a 569 return false;
c07a8cb3
RG
570}
571
dea61d92
SP
572/* Return a copy of LOOP placed before LOOP. */
573
574static struct loop *
575copy_loop_before (struct loop *loop)
576{
577 struct loop *res;
578 edge preheader = loop_preheader_edge (loop);
579
dea61d92
SP
580 initialize_original_copy_tables ();
581 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
30d55936 582 gcc_assert (res != NULL);
dea61d92 583 free_original_copy_tables ();
2cfc56b9 584 delete_update_ssa ();
dea61d92
SP
585
586 return res;
587}
588
589/* Creates an empty basic block after LOOP. */
590
591static void
592create_bb_after_loop (struct loop *loop)
593{
594 edge exit = single_exit (loop);
595
596 if (!exit)
597 return;
598
599 split_edge (exit);
600}
601
602/* Generate code for PARTITION from the code in LOOP. The loop is
603 copied when COPY_P is true. All the statements not flagged in the
604 PARTITION bitmap are removed from the loop or from its copy. The
605 statements are indexed in sequence inside a basic block, and the
30d55936 606 basic blocks of a loop are taken in dom order. */
dea61d92 607
30d55936 608static void
c61f8985
RG
609generate_loops_for_partition (struct loop *loop, partition_t partition,
610 bool copy_p)
dea61d92 611{
2fd5894f 612 unsigned i;
726a989a 613 gimple_stmt_iterator bsi;
dea61d92
SP
614 basic_block *bbs;
615
616 if (copy_p)
617 {
618 loop = copy_loop_before (loop);
30d55936 619 gcc_assert (loop != NULL);
dea61d92
SP
620 create_preheader (loop, CP_SIMPLE_PREHEADERS);
621 create_bb_after_loop (loop);
622 }
623
2fd5894f 624 /* Remove stmts not in the PARTITION bitmap. */
dea61d92
SP
625 bbs = get_loop_body_in_dom_order (loop);
626
b03c3082 627 if (MAY_HAVE_DEBUG_STMTS)
2fd5894f 628 for (i = 0; i < loop->num_nodes; i++)
b03c3082
JJ
629 {
630 basic_block bb = bbs[i];
631
632 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2fd5894f
RB
633 {
634 gimple phi = gsi_stmt (bsi);
635 if (!virtual_operand_p (gimple_phi_result (phi))
636 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
637 reset_debug_uses (phi);
638 }
b03c3082
JJ
639
640 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
641 {
642 gimple stmt = gsi_stmt (bsi);
643 if (gimple_code (stmt) != GIMPLE_LABEL
644 && !is_gimple_debug (stmt)
2fd5894f 645 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
b03c3082
JJ
646 reset_debug_uses (stmt);
647 }
648 }
649
2fd5894f 650 for (i = 0; i < loop->num_nodes; i++)
dea61d92
SP
651 {
652 basic_block bb = bbs[i];
dea61d92 653
726a989a 654 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
2fd5894f
RB
655 {
656 gimple phi = gsi_stmt (bsi);
657 if (!virtual_operand_p (gimple_phi_result (phi))
658 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
2706a615 659 remove_phi_node (&bsi, true);
2fd5894f
RB
660 else
661 gsi_next (&bsi);
662 }
dea61d92 663
726a989a 664 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
2706a615
RG
665 {
666 gimple stmt = gsi_stmt (bsi);
b03c3082
JJ
667 if (gimple_code (stmt) != GIMPLE_LABEL
668 && !is_gimple_debug (stmt)
2fd5894f 669 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
2706a615 670 {
36875e8f
RB
671 /* Choose an arbitrary path through the empty CFG part
672 that this unnecessary control stmt controls. */
673 if (gimple_code (stmt) == GIMPLE_COND)
674 {
675 gimple_cond_make_false (stmt);
676 update_stmt (stmt);
677 }
678 else if (gimple_code (stmt) == GIMPLE_SWITCH)
679 {
680 gimple_switch_set_index
681 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
682 update_stmt (stmt);
683 }
684 else
685 {
686 unlink_stmt_vdef (stmt);
687 gsi_remove (&bsi, true);
688 release_defs (stmt);
689 continue;
690 }
2706a615 691 }
36875e8f 692 gsi_next (&bsi);
2706a615 693 }
dea61d92
SP
694 }
695
696 free (bbs);
dea61d92
SP
697}
698
d0582dc1 699/* Build the size argument for a memory operation call. */
3661e899 700
d0582dc1
RG
701static tree
702build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
3661e899 703{
d0582dc1
RG
704 tree size;
705 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
706 fold_convert_loc (loc, sizetype, nb_iter),
707 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
708 return fold_convert_loc (loc, size_type_node, size);
709}
710
711/* Build an address argument for a memory operation call. */
712
713static tree
714build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
715{
716 tree addr_base;
717
718 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
719 addr_base = fold_convert_loc (loc, sizetype, addr_base);
720
721 /* Test for a negative stride, iterating over every element. */
722 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
723 {
724 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
725 fold_convert_loc (loc, sizetype, nb_bytes));
726 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
727 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
728 }
729
730 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
3661e899
TB
731}
732
401f3a81
JJ
733/* If VAL memory representation contains the same value in all bytes,
734 return that value, otherwise return -1.
735 E.g. for 0x24242424 return 0x24, for IEEE double
736 747708026454360457216.0 return 0x44, etc. */
737
738static int
739const_with_all_bytes_same (tree val)
740{
741 unsigned char buf[64];
742 int i, len;
743
744 if (integer_zerop (val)
745 || real_zerop (val)
746 || (TREE_CODE (val) == CONSTRUCTOR
747 && !TREE_CLOBBER_P (val)
748 && CONSTRUCTOR_NELTS (val) == 0))
749 return 0;
750
751 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
752 return -1;
753
754 len = native_encode_expr (val, buf, sizeof (buf));
755 if (len == 0)
756 return -1;
757 for (i = 1; i < len; i++)
758 if (buf[i] != buf[0])
759 return -1;
760 return buf[0];
761}
762
30d55936 763/* Generate a call to memset for PARTITION in LOOP. */
dea61d92 764
cfee318d 765static void
d0582dc1 766generate_memset_builtin (struct loop *loop, partition_t partition)
dea61d92 767{
30d55936
RG
768 gimple_stmt_iterator gsi;
769 gimple stmt, fn_call;
818625cf 770 tree mem, fn, nb_bytes;
30d55936 771 location_t loc;
b6dd5261 772 tree val;
30d55936 773
d0582dc1 774 stmt = DR_STMT (partition->main_dr);
30d55936 775 loc = gimple_location (stmt);
30d55936
RG
776
777 /* The new statements will be placed before LOOP. */
778 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dea61d92 779
818625cf 780 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
d0582dc1
RG
781 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
782 false, GSI_CONTINUE_LINKING);
783 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
784 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
785 false, GSI_CONTINUE_LINKING);
dea61d92 786
b6dd5261
RG
787 /* This exactly matches the pattern recognition in classify_partition. */
788 val = gimple_assign_rhs1 (stmt);
401f3a81
JJ
789 /* Handle constants like 0x15151515 and similarly
790 floating point constants etc. where all bytes are the same. */
791 int bytev = const_with_all_bytes_same (val);
792 if (bytev != -1)
793 val = build_int_cst (integer_type_node, bytev);
794 else if (TREE_CODE (val) == INTEGER_CST)
795 val = fold_convert (integer_type_node, val);
796 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
b6dd5261 797 {
401f3a81
JJ
798 gimple cstmt;
799 tree tem = make_ssa_name (integer_type_node, NULL);
800 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
801 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
802 val = tem;
b6dd5261
RG
803 }
804
e79983f4 805 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
b6dd5261 806 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
d0582dc1 807 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
dea61d92
SP
808
809 if (dump_file && (dump_flags & TDF_DETAILS))
b6dd5261
RG
810 {
811 fprintf (dump_file, "generated memset");
401f3a81 812 if (bytev == 0)
b6dd5261 813 fprintf (dump_file, " zero\n");
b6dd5261
RG
814 else
815 fprintf (dump_file, "\n");
816 }
dea61d92
SP
817}
818
d0582dc1
RG
819/* Generate a call to memcpy for PARTITION in LOOP. */
820
821static void
822generate_memcpy_builtin (struct loop *loop, partition_t partition)
823{
824 gimple_stmt_iterator gsi;
825 gimple stmt, fn_call;
818625cf 826 tree dest, src, fn, nb_bytes;
d0582dc1
RG
827 location_t loc;
828 enum built_in_function kind;
829
830 stmt = DR_STMT (partition->main_dr);
831 loc = gimple_location (stmt);
d0582dc1
RG
832
833 /* The new statements will be placed before LOOP. */
834 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
835
818625cf 836 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
d0582dc1
RG
837 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
838 false, GSI_CONTINUE_LINKING);
839 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
840 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
841 if (ptr_derefs_may_alias_p (dest, src))
842 kind = BUILT_IN_MEMMOVE;
843 else
844 kind = BUILT_IN_MEMCPY;
845
846 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
847 false, GSI_CONTINUE_LINKING);
848 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
849 false, GSI_CONTINUE_LINKING);
850 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
851 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
852 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
853
854 if (dump_file && (dump_flags & TDF_DETAILS))
855 {
856 if (kind == BUILT_IN_MEMCPY)
857 fprintf (dump_file, "generated memcpy\n");
858 else
859 fprintf (dump_file, "generated memmove\n");
860 }
861}
862
30d55936 863/* Remove and destroy the loop LOOP. */
dea61d92 864
30d55936
RG
865static void
866destroy_loop (struct loop *loop)
dea61d92 867{
30d55936
RG
868 unsigned nbbs = loop->num_nodes;
869 edge exit = single_exit (loop);
870 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
dea61d92 871 basic_block *bbs;
30d55936 872 unsigned i;
dea61d92
SP
873
874 bbs = get_loop_body_in_dom_order (loop);
875
30d55936
RG
876 redirect_edge_pred (exit, src);
877 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
878 exit->flags |= EDGE_FALLTHRU;
879 cancel_loop_tree (loop);
880 rescan_loop_exit (exit, false, true);
dea61d92 881
30d55936 882 for (i = 0; i < nbbs; i++)
c014f6f5
RG
883 {
884 /* We have made sure to not leave any dangling uses of SSA
885 names defined in the loop. With the exception of virtuals.
886 Make sure we replace all uses of virtual defs that will remain
887 outside of the loop with the bare symbol as delete_basic_block
888 will release them. */
889 gimple_stmt_iterator gsi;
890 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
891 {
892 gimple phi = gsi_stmt (gsi);
ea057359 893 if (virtual_operand_p (gimple_phi_result (phi)))
c014f6f5
RG
894 mark_virtual_phi_result_for_renaming (phi);
895 }
896 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
897 {
898 gimple stmt = gsi_stmt (gsi);
899 tree vdef = gimple_vdef (stmt);
900 if (vdef && TREE_CODE (vdef) == SSA_NAME)
901 mark_virtual_operand_for_renaming (vdef);
902 }
903 delete_basic_block (bbs[i]);
904 }
dea61d92 905 free (bbs);
30d55936
RG
906
907 set_immediate_dominator (CDI_DOMINATORS, dest,
908 recompute_dominator (CDI_DOMINATORS, dest));
dea61d92
SP
909}
910
30d55936 911/* Generates code for PARTITION. */
dea61d92 912
30d55936 913static void
d0582dc1 914generate_code_for_partition (struct loop *loop,
be6b029b 915 partition_t partition, bool copy_p)
dea61d92 916{
30d55936
RG
917 switch (partition->kind)
918 {
826a536d
RB
919 case PKIND_NORMAL:
920 /* Reductions all have to be in the last partition. */
921 gcc_assert (!partition_reduction_p (partition)
922 || !copy_p);
923 generate_loops_for_partition (loop, partition, copy_p);
924 return;
925
30d55936 926 case PKIND_MEMSET:
d0582dc1 927 generate_memset_builtin (loop, partition);
d0582dc1
RG
928 break;
929
930 case PKIND_MEMCPY:
931 generate_memcpy_builtin (loop, partition);
30d55936
RG
932 break;
933
934 default:
935 gcc_unreachable ();
936 }
dea61d92 937
826a536d
RB
938 /* Common tail for partitions we turn into a call. If this was the last
939 partition for which we generate code, we have to destroy the loop. */
940 if (!copy_p)
941 destroy_loop (loop);
dea61d92
SP
942}
943
dea61d92 944
24f161fd
RB
945/* Returns a partition with all the statements needed for computing
946 the vertex V of the RDG, also including the loop exit conditions. */
dea61d92 947
24f161fd
RB
948static partition_t
949build_rdg_partition_for_vertex (struct graph *rdg, int v)
dea61d92 950{
24f161fd 951 partition_t partition = partition_alloc (NULL, NULL);
07687835 952 stack_vec<int, 3> nodes;
24f161fd 953 unsigned i;
dea61d92
SP
954 int x;
955
174ec470 956 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
dea61d92 957
9771b263 958 FOR_EACH_VEC_ELT (nodes, i, x)
24f161fd
RB
959 {
960 bitmap_set_bit (partition->stmts, x);
961 bitmap_set_bit (partition->loops,
962 loop_containing_stmt (RDG_STMT (rdg, x))->num);
963 }
dea61d92 964
dea61d92
SP
965 return partition;
966}
967
30d55936
RG
968/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
969 For the moment we detect only the memset zero pattern. */
cfee318d 970
30d55936
RG
971static void
972classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
cfee318d 973{
cfee318d 974 bitmap_iterator bi;
30d55936
RG
975 unsigned i;
976 tree nb_iter;
d0582dc1 977 data_reference_p single_load, single_store;
b9fc0497 978 bool volatiles_p = false;
30d55936
RG
979
980 partition->kind = PKIND_NORMAL;
d0582dc1
RG
981 partition->main_dr = NULL;
982 partition->secondary_dr = NULL;
818625cf 983 partition->niter = NULL_TREE;
30d55936 984
c61f8985 985 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
30d55936
RG
986 {
987 gimple stmt = RDG_STMT (rdg, i);
988
989 if (gimple_has_volatile_ops (stmt))
b9fc0497 990 volatiles_p = true;
cfee318d 991
826a536d 992 /* If the stmt has uses outside of the loop mark it as reduction. */
9ca86fc3 993 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
30d55936 994 {
826a536d 995 partition->reduction_p = true;
30d55936
RG
996 return;
997 }
998 }
999
b9fc0497
RB
1000 /* Perform general partition disqualification for builtins. */
1001 if (volatiles_p
1002 || !flag_tree_loop_distribute_patterns)
1003 return;
1004
d0582dc1
RG
1005 /* Detect memset and memcpy. */
1006 single_load = NULL;
1007 single_store = NULL;
30d55936
RG
1008 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1009 {
1010 gimple stmt = RDG_STMT (rdg, i);
d0582dc1
RG
1011 data_reference_p dr;
1012 unsigned j;
30d55936
RG
1013
1014 if (gimple_code (stmt) == GIMPLE_PHI)
1015 continue;
1016
1017 /* Any scalar stmts are ok. */
1018 if (!gimple_vuse (stmt))
1019 continue;
1020
d0582dc1
RG
1021 /* Otherwise just regular loads/stores. */
1022 if (!gimple_assign_single_p (stmt))
1023 return;
1024
1025 /* But exactly one store and/or load. */
9771b263 1026 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
30d55936 1027 {
d0582dc1
RG
1028 if (DR_IS_READ (dr))
1029 {
1030 if (single_load != NULL)
1031 return;
1032 single_load = dr;
1033 }
1034 else
1035 {
1036 if (single_store != NULL)
1037 return;
1038 single_store = dr;
1039 }
30d55936 1040 }
30d55936
RG
1041 }
1042
818625cf
RB
1043 if (!single_store)
1044 return;
1045
1046 if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1047 gimple_bb (DR_STMT (single_store))))
1048 nb_iter = number_of_latch_executions (loop);
1049 else
1050 nb_iter = number_of_exit_cond_executions (loop);
1051 if (!nb_iter || nb_iter == chrec_dont_know)
1052 return;
1053
d0582dc1
RG
1054 if (single_store && !single_load)
1055 {
1056 gimple stmt = DR_STMT (single_store);
1057 tree rhs = gimple_assign_rhs1 (stmt);
401f3a81
JJ
1058 if (const_with_all_bytes_same (rhs) == -1
1059 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1060 || (TYPE_MODE (TREE_TYPE (rhs))
1061 != TYPE_MODE (unsigned_char_type_node))))
d0582dc1
RG
1062 return;
1063 if (TREE_CODE (rhs) == SSA_NAME
1064 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1065 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1066 return;
ca406576
RB
1067 if (!adjacent_dr_p (single_store)
1068 || !dominated_by_p (CDI_DOMINATORS,
1069 loop->latch, gimple_bb (stmt)))
d0582dc1
RG
1070 return;
1071 partition->kind = PKIND_MEMSET;
1072 partition->main_dr = single_store;
818625cf 1073 partition->niter = nb_iter;
d0582dc1
RG
1074 }
1075 else if (single_store && single_load)
1076 {
1077 gimple store = DR_STMT (single_store);
1078 gimple load = DR_STMT (single_load);
1079 /* Direct aggregate copy or via an SSA name temporary. */
1080 if (load != store
1081 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1082 return;
1083 if (!adjacent_dr_p (single_store)
1084 || !adjacent_dr_p (single_load)
1085 || !operand_equal_p (DR_STEP (single_store),
ca406576
RB
1086 DR_STEP (single_load), 0)
1087 || !dominated_by_p (CDI_DOMINATORS,
1088 loop->latch, gimple_bb (store)))
d0582dc1 1089 return;
f20132e7
RG
1090 /* Now check that if there is a dependence this dependence is
1091 of a suitable form for memmove. */
6e1aa848 1092 vec<loop_p> loops = vNULL;
f20132e7 1093 ddr_p ddr;
9771b263 1094 loops.safe_push (loop);
f20132e7
RG
1095 ddr = initialize_data_dependence_relation (single_load, single_store,
1096 loops);
1097 compute_affine_dependence (ddr, loop);
f20132e7
RG
1098 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1099 {
1100 free_dependence_relation (ddr);
9771b263 1101 loops.release ();
f20132e7
RG
1102 return;
1103 }
1104 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1105 {
1106 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1107 {
1108 free_dependence_relation (ddr);
9771b263 1109 loops.release ();
f20132e7
RG
1110 return;
1111 }
1112 lambda_vector dist_v;
9771b263 1113 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
f20132e7
RG
1114 {
1115 int dist = dist_v[index_in_loop_nest (loop->num,
1116 DDR_LOOP_NEST (ddr))];
1117 if (dist > 0 && !DDR_REVERSED_P (ddr))
1118 {
1119 free_dependence_relation (ddr);
9771b263 1120 loops.release ();
f20132e7
RG
1121 return;
1122 }
1123 }
1124 }
b7ce70b3 1125 free_dependence_relation (ddr);
9771b263 1126 loops.release ();
d0582dc1
RG
1127 partition->kind = PKIND_MEMCPY;
1128 partition->main_dr = single_store;
1129 partition->secondary_dr = single_load;
818625cf 1130 partition->niter = nb_iter;
d0582dc1 1131 }
cfee318d
SP
1132}
1133
1fa0c180
RG
1134/* For a data reference REF, return the declaration of its base
1135 address or NULL_TREE if the base is not determined. */
1136
1137static tree
1138ref_base_address (data_reference_p dr)
1139{
1140 tree base_address = DR_BASE_ADDRESS (dr);
1141 if (base_address
1142 && TREE_CODE (base_address) == ADDR_EXPR)
1143 return TREE_OPERAND (base_address, 0);
1144
1145 return base_address;
1146}
1147
cfee318d
SP
1148/* Returns true when PARTITION1 and PARTITION2 have similar memory
1149 accesses in RDG. */
1150
1151static bool
c61f8985
RG
1152similar_memory_accesses (struct graph *rdg, partition_t partition1,
1153 partition_t partition2)
cfee318d 1154{
1fa0c180 1155 unsigned i, j, k, l;
cfee318d 1156 bitmap_iterator bi, bj;
1fa0c180
RG
1157 data_reference_p ref1, ref2;
1158
1159 /* First check whether in the intersection of the two partitions are
1160 any loads or stores. Common loads are the situation that happens
1161 most often. */
1162 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1163 if (RDG_MEM_WRITE_STMT (rdg, i)
1164 || RDG_MEM_READS_STMT (rdg, i))
1165 return true;
cfee318d 1166
1fa0c180 1167 /* Then check all data-references against each other. */
c61f8985 1168 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
cfee318d
SP
1169 if (RDG_MEM_WRITE_STMT (rdg, i)
1170 || RDG_MEM_READS_STMT (rdg, i))
c61f8985 1171 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
cfee318d
SP
1172 if (RDG_MEM_WRITE_STMT (rdg, j)
1173 || RDG_MEM_READS_STMT (rdg, j))
1fa0c180 1174 {
9771b263 1175 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1fa0c180
RG
1176 {
1177 tree base1 = ref_base_address (ref1);
1178 if (base1)
9771b263 1179 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1fa0c180
RG
1180 if (base1 == ref_base_address (ref2))
1181 return true;
1182 }
1183 }
cfee318d
SP
1184
1185 return false;
1186}
1187
dea61d92
SP
1188/* Aggregate several components into a useful partition that is
1189 registered in the PARTITIONS vector. Partitions will be
1190 distributed in different loops. */
1191
1192static void
83a95546 1193rdg_build_partitions (struct graph *rdg,
2fd5894f 1194 vec<gimple> starting_stmts,
83a95546 1195 vec<partition_t> *partitions)
dea61d92 1196{
83a95546 1197 bitmap processed = BITMAP_ALLOC (NULL);
2fd5894f
RB
1198 int i;
1199 gimple stmt;
dea61d92 1200
2fd5894f 1201 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
dea61d92 1202 {
2fd5894f
RB
1203 int v = rdg_vertex_for_stmt (rdg, stmt);
1204
1205 if (dump_file && (dump_flags & TDF_DETAILS))
1206 fprintf (dump_file,
1207 "ldist asked to generate code for vertex %d\n", v);
b8698a0f 1208
24f161fd
RB
1209 /* If the vertex is already contained in another partition so
1210 is the partition rooted at it. */
dea61d92
SP
1211 if (bitmap_bit_p (processed, v))
1212 continue;
b8698a0f 1213
24f161fd
RB
1214 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1215 bitmap_ior_into (processed, partition->stmts);
dea61d92 1216
826a536d 1217 if (dump_file && (dump_flags & TDF_DETAILS))
dea61d92 1218 {
826a536d
RB
1219 fprintf (dump_file, "ldist useful partition:\n");
1220 dump_bitmap (dump_file, partition->stmts);
dea61d92 1221 }
826a536d
RB
1222
1223 partitions->safe_push (partition);
dea61d92
SP
1224 }
1225
83a95546
RB
1226 /* All vertices should have been assigned to at least one partition now,
1227 other than vertices belonging to dead code. */
dea61d92 1228
83a95546 1229 BITMAP_FREE (processed);
dea61d92
SP
1230}
1231
1232/* Dump to FILE the PARTITIONS. */
1233
1234static void
9771b263 1235dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
dea61d92
SP
1236{
1237 int i;
c61f8985 1238 partition_t partition;
dea61d92 1239
9771b263 1240 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1241 debug_bitmap_file (file, partition->stmts);
dea61d92
SP
1242}
1243
1244/* Debug PARTITIONS. */
9771b263 1245extern void debug_rdg_partitions (vec<partition_t> );
dea61d92 1246
24e47c76 1247DEBUG_FUNCTION void
9771b263 1248debug_rdg_partitions (vec<partition_t> partitions)
dea61d92
SP
1249{
1250 dump_rdg_partitions (stderr, partitions);
1251}
1252
2b8aee8e
SP
1253/* Returns the number of read and write operations in the RDG. */
1254
1255static int
1256number_of_rw_in_rdg (struct graph *rdg)
1257{
1258 int i, res = 0;
1259
1260 for (i = 0; i < rdg->n_vertices; i++)
1261 {
1262 if (RDG_MEM_WRITE_STMT (rdg, i))
1263 ++res;
1264
1265 if (RDG_MEM_READS_STMT (rdg, i))
1266 ++res;
1267 }
1268
1269 return res;
1270}
1271
1272/* Returns the number of read and write operations in a PARTITION of
1273 the RDG. */
1274
1275static int
c61f8985 1276number_of_rw_in_partition (struct graph *rdg, partition_t partition)
2b8aee8e
SP
1277{
1278 int res = 0;
1279 unsigned i;
1280 bitmap_iterator ii;
1281
c61f8985 1282 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2b8aee8e
SP
1283 {
1284 if (RDG_MEM_WRITE_STMT (rdg, i))
1285 ++res;
1286
1287 if (RDG_MEM_READS_STMT (rdg, i))
1288 ++res;
1289 }
1290
1291 return res;
1292}
1293
1294/* Returns true when one of the PARTITIONS contains all the read or
1295 write operations of RDG. */
1296
1297static bool
9771b263
DN
1298partition_contains_all_rw (struct graph *rdg,
1299 vec<partition_t> partitions)
2b8aee8e
SP
1300{
1301 int i;
c61f8985 1302 partition_t partition;
2b8aee8e
SP
1303 int nrw = number_of_rw_in_rdg (rdg);
1304
9771b263 1305 FOR_EACH_VEC_ELT (partitions, i, partition)
2b8aee8e
SP
1306 if (nrw == number_of_rw_in_partition (rdg, partition))
1307 return true;
1308
1309 return false;
1310}
1311
447f3223
RB
1312/* Compute partition dependence created by the data references in DRS1
1313 and DRS2 and modify and return DIR according to that. */
1314
1315static int
1316pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1317 vec<data_reference_p> drs1,
1318 vec<data_reference_p> drs2)
1319{
1320 data_reference_p dr1, dr2;
1321
1322 /* dependence direction - 0 is no dependence, -1 is back,
1323 1 is forth, 2 is both (we can stop then, merging will occur). */
1324 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1325 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1326 {
2cf19e26 1327 int this_dir = 1;
447f3223
RB
1328 ddr_p ddr;
1329 /* Re-shuffle data-refs to be in dominator order. */
1330 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1331 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1332 {
1333 data_reference_p tem = dr1;
1334 dr1 = dr2;
1335 dr2 = tem;
1336 this_dir = -this_dir;
1337 }
1338 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1339 compute_affine_dependence (ddr, loops[0]);
1340 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1341 this_dir = 2;
1342 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1343 {
1344 if (DDR_REVERSED_P (ddr))
1345 {
1346 data_reference_p tem = dr1;
1347 dr1 = dr2;
1348 dr2 = tem;
1349 this_dir = -this_dir;
1350 }
1351 /* Known dependences can still be unordered througout the
1352 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
2cf19e26 1353 if (DDR_NUM_DIST_VECTS (ddr) != 1)
447f3223 1354 this_dir = 2;
2cf19e26
RB
1355 /* If the overlap is exact preserve stmt order. */
1356 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1357 ;
1358 else
1359 {
1360 /* Else as the distance vector is lexicographic positive
1361 swap the dependence direction. */
1362 this_dir = -this_dir;
1363 }
447f3223
RB
1364 }
1365 else
1366 this_dir = 0;
1367 free_dependence_relation (ddr);
1368 if (dir == 0)
1369 dir = this_dir;
1370 else if (dir != this_dir)
1371 return 2;
1372 }
1373 return dir;
1374}
1375
1376/* Compare postorder number of the partition graph vertices V1 and V2. */
1377
1378static int
1379pgcmp (const void *v1_, const void *v2_)
1380{
1381 const vertex *v1 = (const vertex *)v1_;
1382 const vertex *v2 = (const vertex *)v2_;
1383 return v2->post - v1->post;
1384}
2fd5894f
RB
1385
1386/* Distributes the code from LOOP in such a way that producer
1387 statements are placed before consumer statements. Tries to separate
1388 only the statements from STMTS into separate loops.
1389 Returns the number of distributed loops. */
dea61d92
SP
1390
1391static int
36875e8f 1392distribute_loop (struct loop *loop, vec<gimple> stmts,
826a536d 1393 control_dependences *cd, int *nb_calls)
dea61d92 1394{
2fd5894f 1395 struct graph *rdg;
9771b263 1396 vec<partition_t> partitions;
c61f8985 1397 partition_t partition;
be6b029b 1398 bool any_builtin;
2fd5894f 1399 int i, nbp;
447f3223
RB
1400 graph *pg = NULL;
1401 int num_sccs = 1;
dea61d92 1402
826a536d 1403 *nb_calls = 0;
07687835 1404 stack_vec<loop_p, 3> loop_nest;
2fd5894f 1405 if (!find_loop_nest (loop, &loop_nest))
07687835 1406 return 0;
2fd5894f 1407
36875e8f 1408 rdg = build_rdg (loop_nest, cd);
2fd5894f
RB
1409 if (!rdg)
1410 {
1411 if (dump_file && (dump_flags & TDF_DETAILS))
1412 fprintf (dump_file,
1413 "Loop %d not distributed: failed to build the RDG.\n",
1414 loop->num);
1415
2fd5894f
RB
1416 return 0;
1417 }
1418
1419 if (dump_file && (dump_flags & TDF_DETAILS))
1420 dump_rdg (dump_file, rdg);
1421
1422 partitions.create (3);
1423 rdg_build_partitions (rdg, stmts, &partitions);
dea61d92 1424
be6b029b 1425 any_builtin = false;
9771b263 1426 FOR_EACH_VEC_ELT (partitions, i, partition)
be6b029b
RG
1427 {
1428 classify_partition (loop, rdg, partition);
1429 any_builtin |= partition_builtin_p (partition);
1430 }
30d55936 1431
447f3223
RB
1432 /* If we are only distributing patterns but did not detect any,
1433 simply bail out. */
9fed7f3a
RB
1434 if (!flag_tree_loop_distribution
1435 && !any_builtin)
1436 {
1437 nbp = 0;
1438 goto ldist_done;
1439 }
1440
447f3223
RB
1441 /* If we are only distributing patterns fuse all partitions that
1442 were not classified as builtins. This also avoids chopping
1443 a loop into pieces, separated by builtin calls. That is, we
1444 only want no or a single loop body remaining. */
1445 partition_t into;
1446 if (!flag_tree_loop_distribution)
1447 {
1448 for (i = 0; partitions.iterate (i, &into); ++i)
1449 if (!partition_builtin_p (into))
1450 break;
1451 for (++i; partitions.iterate (i, &partition); ++i)
1452 if (!partition_builtin_p (partition))
1453 {
1454 if (dump_file && (dump_flags & TDF_DETAILS))
1455 {
1456 fprintf (dump_file, "fusing non-builtin partitions\n");
1457 dump_bitmap (dump_file, into->stmts);
1458 dump_bitmap (dump_file, partition->stmts);
1459 }
1460 partition_merge_into (into, partition);
1461 partitions.unordered_remove (i);
1462 partition_free (partition);
1463 i--;
1464 }
1465 }
1466
1467 /* Due to limitations in the transform phase we have to fuse all
1468 reduction partitions into the last partition so the existing
1469 loop will contain all loop-closed PHI nodes. */
1470 for (i = 0; partitions.iterate (i, &into); ++i)
1471 if (partition_reduction_p (into))
1472 break;
1473 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1474 if (partition_reduction_p (partition))
1475 {
1476 if (dump_file && (dump_flags & TDF_DETAILS))
1477 {
1478 fprintf (dump_file, "fusing partitions\n");
1479 dump_bitmap (dump_file, into->stmts);
1480 dump_bitmap (dump_file, partition->stmts);
1481 fprintf (dump_file, "because they have reductions\n");
1482 }
1483 partition_merge_into (into, partition);
1484 partitions.unordered_remove (i);
1485 partition_free (partition);
1486 i--;
1487 }
1488
9fed7f3a
RB
1489 /* Apply our simple cost model - fuse partitions with similar
1490 memory accesses. */
9fed7f3a
RB
1491 for (i = 0; partitions.iterate (i, &into); ++i)
1492 {
1493 if (partition_builtin_p (into))
1494 continue;
1495 for (int j = i + 1;
1496 partitions.iterate (j, &partition); ++j)
1497 {
1498 if (!partition_builtin_p (partition)
1499 && similar_memory_accesses (rdg, into, partition))
1500 {
1501 if (dump_file && (dump_flags & TDF_DETAILS))
1502 {
1503 fprintf (dump_file, "fusing partitions\n");
1504 dump_bitmap (dump_file, into->stmts);
1505 dump_bitmap (dump_file, partition->stmts);
1506 fprintf (dump_file, "because they have similar "
1507 "memory accesses\n");
1508 }
826a536d 1509 partition_merge_into (into, partition);
447f3223 1510 partitions.unordered_remove (j);
9fed7f3a
RB
1511 partition_free (partition);
1512 j--;
1513 }
1514 }
1515 }
1516
447f3223
RB
1517 /* Build the partition dependency graph. */
1518 if (partitions.length () > 1)
c014f6f5 1519 {
447f3223
RB
1520 pg = new_graph (partitions.length ());
1521 struct pgdata {
1522 partition_t partition;
1523 vec<data_reference_p> writes;
1524 vec<data_reference_p> reads;
1525 };
1526#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1527 for (i = 0; partitions.iterate (i, &partition); ++i)
1528 {
1529 vertex *v = &pg->vertices[i];
1530 pgdata *data = new pgdata;
1531 data_reference_p dr;
1532 /* FIXME - leaks. */
1533 v->data = data;
1534 bitmap_iterator bi;
1535 unsigned j;
1536 data->partition = partition;
1537 data->reads = vNULL;
1538 data->writes = vNULL;
1539 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1540 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1541 if (DR_IS_READ (dr))
1542 data->reads.safe_push (dr);
1543 else
1544 data->writes.safe_push (dr);
1545 }
1546 partition_t partition1, partition2;
1547 for (i = 0; partitions.iterate (i, &partition1); ++i)
1548 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1549 {
1550 /* dependence direction - 0 is no dependence, -1 is back,
1551 1 is forth, 2 is both (we can stop then, merging will occur). */
1552 int dir = 0;
1553 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1554 PGDATA(i)->writes,
1555 PGDATA(j)->reads);
1556 if (dir != 2)
1557 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1558 PGDATA(i)->reads,
1559 PGDATA(j)->writes);
1560 if (dir != 2)
1561 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1562 PGDATA(i)->writes,
1563 PGDATA(j)->writes);
1564 if (dir == 1 || dir == 2)
1565 add_edge (pg, i, j);
1566 if (dir == -1 || dir == 2)
1567 add_edge (pg, j, i);
1568 }
1569
1570 /* Add edges to the reduction partition (if any) to force it last. */
1571 unsigned j;
1572 for (j = 0; partitions.iterate (j, &partition); ++j)
1573 if (partition_reduction_p (partition))
1574 break;
1575 if (j < partitions.length ())
38ad2d07 1576 {
447f3223
RB
1577 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1578 if (i != j)
1579 add_edge (pg, i, j);
1580 }
1581
1582 /* Compute partitions we cannot separate and fuse them. */
1583 num_sccs = graphds_scc (pg, NULL);
1584 for (i = 0; i < num_sccs; ++i)
1585 {
1586 partition_t first;
1587 int j;
1588 for (j = 0; partitions.iterate (j, &first); ++j)
1589 if (pg->vertices[j].component == i)
38ad2d07 1590 break;
447f3223
RB
1591 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1592 if (pg->vertices[j].component == i)
38ad2d07 1593 {
447f3223
RB
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1595 {
1596 fprintf (dump_file, "fusing partitions\n");
1597 dump_bitmap (dump_file, first->stmts);
1598 dump_bitmap (dump_file, partition->stmts);
1599 fprintf (dump_file, "because they are in the same "
1600 "dependence SCC\n");
1601 }
1602 partition_merge_into (first, partition);
1603 partitions[j] = NULL;
5eb010bc 1604 partition_free (partition);
447f3223 1605 PGDATA (j)->partition = NULL;
38ad2d07 1606 }
38ad2d07 1607 }
30d55936 1608
447f3223
RB
1609 /* Now order the remaining nodes in postorder. */
1610 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1611 partitions.truncate (0);
1612 for (i = 0; i < pg->n_vertices; ++i)
b9fc0497 1613 {
447f3223
RB
1614 pgdata *data = PGDATA (i);
1615 if (data->partition)
1616 partitions.safe_push (data->partition);
1617 data->reads.release ();
1618 data->writes.release ();
1619 delete data;
b9fc0497 1620 }
447f3223
RB
1621 gcc_assert (partitions.length () == (unsigned)num_sccs);
1622 free_graph (pg);
b9fc0497
RB
1623 }
1624
9771b263 1625 nbp = partitions.length ();
a4293fa6 1626 if (nbp == 0
9771b263
DN
1627 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1628 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
c014f6f5
RG
1629 {
1630 nbp = 0;
1631 goto ldist_done;
1632 }
dea61d92
SP
1633
1634 if (dump_file && (dump_flags & TDF_DETAILS))
1635 dump_rdg_partitions (dump_file, partitions);
1636
9771b263 1637 FOR_EACH_VEC_ELT (partitions, i, partition)
826a536d
RB
1638 {
1639 if (partition_builtin_p (partition))
1640 (*nb_calls)++;
1641 generate_code_for_partition (loop, partition, i < nbp - 1);
1642 }
dea61d92 1643
dea61d92
SP
1644 ldist_done:
1645
9771b263 1646 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1647 partition_free (partition);
9771b263 1648 partitions.release ();
dea61d92 1649
dea61d92 1650 free_rdg (rdg);
826a536d 1651 return nbp - *nb_calls;
dea61d92
SP
1652}
1653
1654/* Distribute all loops in the current function. */
1655
1656static unsigned int
1657tree_loop_distribution (void)
1658{
1659 struct loop *loop;
1660 loop_iterator li;
c014f6f5 1661 bool changed = false;
1fa0c180 1662 basic_block bb;
36875e8f 1663 control_dependences *cd = NULL;
1fa0c180
RG
1664
1665 FOR_ALL_BB (bb)
1666 {
1667 gimple_stmt_iterator gsi;
1668 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1669 gimple_set_uid (gsi_stmt (gsi), -1);
1670 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1671 gimple_set_uid (gsi_stmt (gsi), -1);
1672 }
dea61d92 1673
c014f6f5
RG
1674 /* We can at the moment only distribute non-nested loops, thus restrict
1675 walking to innermost loops. */
1676 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
dea61d92 1677 {
6e1aa848 1678 vec<gimple> work_list = vNULL;
be6b029b 1679 basic_block *bbs;
0e20c89f 1680 int num = loop->num;
be6b029b 1681 unsigned int i;
a3357f7d
RG
1682
1683 /* If the loop doesn't have a single exit we will fail anyway,
1684 so do that early. */
1685 if (!single_exit (loop))
1686 continue;
dea61d92 1687
f56f2d33
JH
1688 /* Only optimize hot loops. */
1689 if (!optimize_loop_for_speed_p (loop))
1690 continue;
1691
be6b029b
RG
1692 /* Initialize the worklist with stmts we seed the partitions with. */
1693 bbs = get_loop_body_in_dom_order (loop);
1694 for (i = 0; i < loop->num_nodes; ++i)
1695 {
1696 gimple_stmt_iterator gsi;
deb6c11a
RB
1697 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1698 {
1699 gimple phi = gsi_stmt (gsi);
1700 if (virtual_operand_p (gimple_phi_result (phi)))
1701 continue;
1702 /* Distribute stmts which have defs that are used outside of
1703 the loop. */
1704 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1705 continue;
1706 work_list.safe_push (phi);
1707 }
be6b029b
RG
1708 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1709 {
1710 gimple stmt = gsi_stmt (gsi);
83a95546
RB
1711
1712 /* If there is a stmt with side-effects bail out - we
1713 cannot and should not distribute this loop. */
1714 if (gimple_has_side_effects (stmt))
1715 {
1716 work_list.truncate (0);
1717 goto out;
1718 }
1719
b9fc0497
RB
1720 /* Distribute stmts which have defs that are used outside of
1721 the loop. */
1722 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1723 ;
1724 /* Otherwise only distribute stores for now. */
1725 else if (!gimple_assign_single_p (stmt)
1726 || is_gimple_reg (gimple_assign_lhs (stmt)))
be6b029b
RG
1727 continue;
1728
9771b263 1729 work_list.safe_push (stmt);
be6b029b
RG
1730 }
1731 }
83a95546 1732out:
be6b029b 1733 free (bbs);
c014f6f5 1734
826a536d
RB
1735 int nb_generated_loops = 0;
1736 int nb_generated_calls = 0;
1737 location_t loc = find_loop_location (loop);
9771b263 1738 if (work_list.length () > 0)
36875e8f
RB
1739 {
1740 if (!cd)
1741 {
ca406576 1742 calculate_dominance_info (CDI_DOMINATORS);
36875e8f
RB
1743 calculate_dominance_info (CDI_POST_DOMINATORS);
1744 cd = new control_dependences (create_edge_list ());
1745 free_dominance_info (CDI_POST_DOMINATORS);
1746 }
826a536d
RB
1747 nb_generated_loops = distribute_loop (loop, work_list, cd,
1748 &nb_generated_calls);
36875e8f 1749 }
c014f6f5 1750
826a536d 1751 if (nb_generated_loops + nb_generated_calls > 0)
dea61d92 1752 {
826a536d
RB
1753 changed = true;
1754 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1755 loc, "Loop %d distributed: split to %d loops "
1756 "and %d library calls.\n",
1757 num, nb_generated_loops, nb_generated_calls);
dea61d92 1758 }
826a536d
RB
1759 else if (dump_file && (dump_flags & TDF_DETAILS))
1760 fprintf (dump_file, "Loop %d is the same.\n", num);
dea61d92 1761
9771b263 1762 work_list.release ();
dea61d92
SP
1763 }
1764
36875e8f
RB
1765 if (cd)
1766 delete cd;
1767
c014f6f5
RG
1768 if (changed)
1769 {
525174a2 1770 mark_virtual_operands_for_renaming (cfun);
c014f6f5
RG
1771 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1772 }
1773
1774#ifdef ENABLE_CHECKING
1775 verify_loop_structure ();
1776#endif
1777
5006671f 1778 return 0;
dea61d92
SP
1779}
1780
1781static bool
1782gate_tree_loop_distribution (void)
1783{
20769d5e
SP
1784 return flag_tree_loop_distribution
1785 || flag_tree_loop_distribute_patterns;
dea61d92
SP
1786}
1787
27a4cd48
DM
1788namespace {
1789
1790const pass_data pass_data_loop_distribution =
dea61d92 1791{
27a4cd48
DM
1792 GIMPLE_PASS, /* type */
1793 "ldist", /* name */
1794 OPTGROUP_LOOP, /* optinfo_flags */
1795 true, /* has_gate */
1796 true, /* has_execute */
1797 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1798 ( PROP_cfg | PROP_ssa ), /* properties_required */
1799 0, /* properties_provided */
1800 0, /* properties_destroyed */
1801 0, /* todo_flags_start */
1802 TODO_verify_ssa, /* todo_flags_finish */
dea61d92 1803};
27a4cd48
DM
1804
1805class pass_loop_distribution : public gimple_opt_pass
1806{
1807public:
c3284718
RS
1808 pass_loop_distribution (gcc::context *ctxt)
1809 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
27a4cd48
DM
1810 {}
1811
1812 /* opt_pass methods: */
1813 bool gate () { return gate_tree_loop_distribution (); }
1814 unsigned int execute () { return tree_loop_distribution (); }
1815
1816}; // class pass_loop_distribution
1817
1818} // anon namespace
1819
1820gimple_opt_pass *
1821make_pass_loop_distribution (gcc::context *ctxt)
1822{
1823 return new pass_loop_distribution (ctxt);
1824}