]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
bootstrap-ubsan.mk (POSTSTAGE1_LDFLAGS): Add -ldl.
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
dea61d92 1/* Loop distribution.
d1e082c2 2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
dea61d92
SP
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6This file is part of GCC.
b8698a0f 7
dea61d92
SP
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
b8698a0f 12
dea61d92
SP
13GCC is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
b8698a0f 17
dea61d92
SP
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
b8698a0f 29 is transformed to
dea61d92
SP
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44#include "config.h"
45#include "system.h"
46#include "coretypes.h"
4d648807 47#include "tree.h"
18f429e2 48#include "gimple.h"
5be5c238 49#include "gimple-iterator.h"
18f429e2 50#include "gimplify-me.h"
442b4905
AM
51#include "gimple-ssa.h"
52#include "tree-cfg.h"
53#include "tree-phinodes.h"
54#include "ssa-iterators.h"
55#include "tree-ssanames.h"
e28030cf 56#include "tree-ssa-loop-manip.h"
442b4905
AM
57#include "tree-ssa-loop.h"
58#include "tree-into-ssa.h"
7a300452 59#include "tree-ssa.h"
dea61d92 60#include "cfgloop.h"
dea61d92
SP
61#include "tree-chrec.h"
62#include "tree-data-ref.h"
63#include "tree-scalar-evolution.h"
64#include "tree-pass.h"
80ab0b19 65#include "gimple-pretty-print.h"
826a536d 66#include "tree-vectorizer.h"
80ab0b19
RB
67
68
69/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
70typedef struct rdg_vertex
71{
72 /* The statement represented by this vertex. */
73 gimple stmt;
74
75 /* Vector of data-references in this statement. */
76 vec<data_reference_p> datarefs;
77
78 /* True when the statement contains a write to memory. */
79 bool has_mem_write;
80
81 /* True when the statement contains a read from memory. */
82 bool has_mem_reads;
83} *rdg_vertex_p;
84
85#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
86#define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90#define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
93
94/* Data dependence type. */
95
96enum rdg_dep_type
97{
98 /* Read After Write (RAW). */
99 flow_dd = 'f',
100
36875e8f
RB
101 /* Control dependence (execute conditional on). */
102 control_dd = 'c'
80ab0b19
RB
103};
104
105/* Dependence information attached to an edge of the RDG. */
106
107typedef struct rdg_edge
108{
109 /* Type of the dependence. */
110 enum rdg_dep_type type;
80ab0b19
RB
111} *rdg_edge_p;
112
113#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
80ab0b19 114
80ab0b19
RB
115/* Dump vertex I in RDG to FILE. */
116
117static void
118dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
119{
120 struct vertex *v = &(rdg->vertices[i]);
121 struct graph_edge *e;
122
123 fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
126
127 if (v->pred)
128 for (e = v->pred; e; e = e->pred_next)
129 fprintf (file, " %d", e->src);
130
131 fprintf (file, ") (out:");
132
133 if (v->succ)
134 for (e = v->succ; e; e = e->succ_next)
135 fprintf (file, " %d", e->dest);
136
137 fprintf (file, ")\n");
138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139 fprintf (file, ")\n");
140}
141
142/* Call dump_rdg_vertex on stderr. */
143
144DEBUG_FUNCTION void
145debug_rdg_vertex (struct graph *rdg, int i)
146{
147 dump_rdg_vertex (stderr, rdg, i);
148}
149
80ab0b19
RB
150/* Dump the reduced dependence graph RDG to FILE. */
151
152static void
153dump_rdg (FILE *file, struct graph *rdg)
154{
80ab0b19 155 fprintf (file, "(rdg\n");
2fd5894f
RB
156 for (int i = 0; i < rdg->n_vertices; i++)
157 dump_rdg_vertex (file, rdg, i);
80ab0b19 158 fprintf (file, ")\n");
80ab0b19
RB
159}
160
161/* Call dump_rdg on stderr. */
162
163DEBUG_FUNCTION void
164debug_rdg (struct graph *rdg)
165{
166 dump_rdg (stderr, rdg);
167}
168
169static void
170dot_rdg_1 (FILE *file, struct graph *rdg)
171{
172 int i;
174ec470
RB
173 pretty_printer buffer;
174 pp_needs_newline (&buffer) = false;
175 buffer.buffer->stream = file;
80ab0b19
RB
176
177 fprintf (file, "digraph RDG {\n");
178
179 for (i = 0; i < rdg->n_vertices; i++)
180 {
181 struct vertex *v = &(rdg->vertices[i]);
182 struct graph_edge *e;
183
174ec470
RB
184 fprintf (file, "%d [label=\"[%d] ", i, i);
185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186 pp_flush (&buffer);
187 fprintf (file, "\"]\n");
188
80ab0b19
RB
189 /* Highlight reads from memory. */
190 if (RDG_MEM_READS_STMT (rdg, i))
191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
192
193 /* Highlight stores to memory. */
194 if (RDG_MEM_WRITE_STMT (rdg, i))
195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
196
197 if (v->succ)
198 for (e = v->succ; e; e = e->succ_next)
199 switch (RDGE_TYPE (e))
200 {
80ab0b19
RB
201 case flow_dd:
202 /* These are the most common dependences: don't print these. */
203 fprintf (file, "%d -> %d \n", i, e->dest);
204 break;
205
36875e8f
RB
206 case control_dd:
207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208 break;
209
80ab0b19
RB
210 default:
211 gcc_unreachable ();
212 }
213 }
214
215 fprintf (file, "}\n\n");
216}
217
218/* Display the Reduced Dependence Graph using dotty. */
219
220DEBUG_FUNCTION void
221dot_rdg (struct graph *rdg)
222{
174ec470
RB
223 /* When debugging, you may want to enable the following code. */
224#if 1
c3284718 225 FILE *file = popen ("dot -Tx11", "w");
174ec470
RB
226 if (!file)
227 return;
80ab0b19 228 dot_rdg_1 (file, rdg);
174ec470
RB
229 fflush (file);
230 close (fileno (file));
231 pclose (file);
80ab0b19
RB
232#else
233 dot_rdg_1 (stderr, rdg);
234#endif
235}
236
237/* Returns the index of STMT in RDG. */
238
239static int
240rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
241{
242 int index = gimple_uid (stmt);
243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244 return index;
245}
246
80ab0b19
RB
247/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
248 the index of DEF in RDG. */
249
250static void
251create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
252{
253 use_operand_p imm_use_p;
254 imm_use_iterator iterator;
255
256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
257 {
258 struct graph_edge *e;
259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
260
261 if (use < 0)
262 continue;
263
264 e = add_edge (rdg, idef, use);
265 e->data = XNEW (struct rdg_edge);
266 RDGE_TYPE (e) = flow_dd;
80ab0b19
RB
267 }
268}
269
36875e8f
RB
270/* Creates an edge for the control dependences of BB to the vertex V. */
271
272static void
273create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 int v, control_dependences *cd)
275{
276 bitmap_iterator bi;
277 unsigned edge_n;
278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 0, edge_n, bi)
280 {
281 basic_block cond_bb = cd->get_edge (edge_n)->src;
282 gimple stmt = last_stmt (cond_bb);
283 if (stmt && is_ctrl_stmt (stmt))
284 {
285 struct graph_edge *e;
286 int c = rdg_vertex_for_stmt (rdg, stmt);
287 if (c < 0)
288 continue;
289
290 e = add_edge (rdg, c, v);
291 e->data = XNEW (struct rdg_edge);
292 RDGE_TYPE (e) = control_dd;
36875e8f
RB
293 }
294 }
295}
296
80ab0b19
RB
297/* Creates the edges of the reduced dependence graph RDG. */
298
299static void
447f3223 300create_rdg_flow_edges (struct graph *rdg)
80ab0b19
RB
301{
302 int i;
80ab0b19
RB
303 def_operand_p def_p;
304 ssa_op_iter iter;
305
80ab0b19
RB
306 for (i = 0; i < rdg->n_vertices; i++)
307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 iter, SSA_OP_DEF)
309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
447f3223 310}
36875e8f 311
447f3223
RB
312/* Creates the edges of the reduced dependence graph RDG. */
313
314static void
315create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
316{
317 int i;
318
319 for (i = 0; i < rdg->n_vertices; i++)
320 {
321 gimple stmt = RDG_STMT (rdg, i);
322 if (gimple_code (stmt) == GIMPLE_PHI)
323 {
324 edge_iterator ei;
325 edge e;
326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
36875e8f 327 create_edge_for_control_dependence (rdg, e->src, i, cd);
447f3223
RB
328 }
329 else
330 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
331 }
80ab0b19
RB
332}
333
334/* Build the vertices of the reduced dependence graph RDG. Return false
335 if that failed. */
336
337static bool
338create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
339 vec<data_reference_p> *datarefs)
340{
341 int i;
342 gimple stmt;
343
344 FOR_EACH_VEC_ELT (stmts, i, stmt)
345 {
346 struct vertex *v = &(rdg->vertices[i]);
347
348 /* Record statement to vertex mapping. */
349 gimple_set_uid (stmt, i);
350
351 v->data = XNEW (struct rdg_vertex);
352 RDGV_STMT (v) = stmt;
353 RDGV_DATAREFS (v).create (0);
354 RDGV_HAS_MEM_WRITE (v) = false;
355 RDGV_HAS_MEM_READS (v) = false;
356 if (gimple_code (stmt) == GIMPLE_PHI)
357 continue;
358
359 unsigned drp = datarefs->length ();
360 if (!find_data_references_in_stmt (loop, stmt, datarefs))
361 return false;
362 for (unsigned j = drp; j < datarefs->length (); ++j)
363 {
364 data_reference_p dr = (*datarefs)[j];
365 if (DR_IS_READ (dr))
366 RDGV_HAS_MEM_READS (v) = true;
367 else
368 RDGV_HAS_MEM_WRITE (v) = true;
369 RDGV_DATAREFS (v).safe_push (dr);
370 }
371 }
372 return true;
373}
374
2fd5894f 375/* Initialize STMTS with all the statements of LOOP. The order in
80ab0b19
RB
376 which we discover statements is important as
377 generate_loops_for_partition is using the same traversal for
2fd5894f 378 identifying statements in loop copies. */
80ab0b19
RB
379
380static void
381stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
382{
383 unsigned int i;
384 basic_block *bbs = get_loop_body_in_dom_order (loop);
385
386 for (i = 0; i < loop->num_nodes; i++)
387 {
388 basic_block bb = bbs[i];
389 gimple_stmt_iterator bsi;
390 gimple stmt;
391
392 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
83a95546
RB
393 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
394 stmts->safe_push (gsi_stmt (bsi));
80ab0b19
RB
395
396 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
397 {
398 stmt = gsi_stmt (bsi);
399 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
400 stmts->safe_push (stmt);
401 }
402 }
403
404 free (bbs);
405}
406
80ab0b19
RB
407/* Free the reduced dependence graph RDG. */
408
409static void
410free_rdg (struct graph *rdg)
411{
412 int i;
413
414 for (i = 0; i < rdg->n_vertices; i++)
415 {
416 struct vertex *v = &(rdg->vertices[i]);
417 struct graph_edge *e;
418
419 for (e = v->succ; e; e = e->succ_next)
447f3223 420 free (e->data);
80ab0b19
RB
421
422 if (v->data)
423 {
424 gimple_set_uid (RDGV_STMT (v), -1);
425 free_data_refs (RDGV_DATAREFS (v));
426 free (v->data);
427 }
428 }
429
430 free_graph (rdg);
431}
432
433/* Build the Reduced Dependence Graph (RDG) with one vertex per
97463b2b 434 statement of the loop nest LOOP_NEST, and one edge per data dependence or
80ab0b19
RB
435 scalar dependence. */
436
437static struct graph *
36875e8f 438build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
80ab0b19
RB
439{
440 struct graph *rdg;
80ab0b19 441 vec<data_reference_p> datarefs;
80ab0b19 442
97463b2b 443 /* Create the RDG vertices from the stmts of the loop nest. */
07687835 444 stack_vec<gimple, 10> stmts;
97463b2b 445 stmts_from_loop (loop_nest[0], &stmts);
24f161fd 446 rdg = new_graph (stmts.length ());
80ab0b19 447 datarefs.create (10);
97463b2b 448 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
80ab0b19 449 {
97463b2b 450 datarefs.release ();
80ab0b19
RB
451 free_rdg (rdg);
452 return NULL;
453 }
454 stmts.release ();
97463b2b 455
447f3223
RB
456 create_rdg_flow_edges (rdg);
457 if (cd)
458 create_rdg_cd_edges (rdg, cd);
459
97463b2b 460 datarefs.release ();
80ab0b19
RB
461
462 return rdg;
463}
464
80ab0b19 465
dea61d92 466
b9fc0497 467enum partition_kind {
826a536d 468 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
b9fc0497 469};
30d55936 470
c61f8985
RG
471typedef struct partition_s
472{
473 bitmap stmts;
80ab0b19 474 bitmap loops;
826a536d 475 bool reduction_p;
30d55936 476 enum partition_kind kind;
d0582dc1
RG
477 /* data-references a kind != PKIND_NORMAL partition is about. */
478 data_reference_p main_dr;
479 data_reference_p secondary_dr;
818625cf 480 tree niter;
c61f8985
RG
481} *partition_t;
482
c61f8985
RG
483
484/* Allocate and initialize a partition from BITMAP. */
485
486static partition_t
80ab0b19 487partition_alloc (bitmap stmts, bitmap loops)
c61f8985
RG
488{
489 partition_t partition = XCNEW (struct partition_s);
490 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
80ab0b19 491 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
826a536d 492 partition->reduction_p = false;
30d55936 493 partition->kind = PKIND_NORMAL;
c61f8985
RG
494 return partition;
495}
496
497/* Free PARTITION. */
498
499static void
500partition_free (partition_t partition)
501{
502 BITMAP_FREE (partition->stmts);
80ab0b19 503 BITMAP_FREE (partition->loops);
c61f8985
RG
504 free (partition);
505}
506
30d55936
RG
507/* Returns true if the partition can be generated as a builtin. */
508
509static bool
510partition_builtin_p (partition_t partition)
511{
826a536d 512 return partition->kind != PKIND_NORMAL;
30d55936 513}
c61f8985 514
826a536d 515/* Returns true if the partition contains a reduction. */
7ad672e4
RG
516
517static bool
826a536d 518partition_reduction_p (partition_t partition)
7ad672e4 519{
826a536d 520 return partition->reduction_p;
7ad672e4
RG
521}
522
826a536d
RB
523/* Merge PARTITION into the partition DEST. */
524
525static void
526partition_merge_into (partition_t dest, partition_t partition)
527{
528 dest->kind = PKIND_NORMAL;
529 bitmap_ior_into (dest->stmts, partition->stmts);
530 if (partition_reduction_p (partition))
531 dest->reduction_p = true;
532}
533
534
c07a8cb3
RG
535/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
536 the LOOP. */
537
538static bool
539ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
540{
541 imm_use_iterator imm_iter;
542 use_operand_p use_p;
543
544 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
e665269a
RG
545 {
546 gimple use_stmt = USE_STMT (use_p);
547 if (!is_gimple_debug (use_stmt)
548 && loop != loop_containing_stmt (use_stmt))
549 return true;
550 }
c07a8cb3
RG
551
552 return false;
553}
554
555/* Returns true when STMT defines a scalar variable used after the
88af7c1a 556 loop LOOP. */
c07a8cb3
RG
557
558static bool
88af7c1a 559stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
c07a8cb3 560{
88af7c1a
RG
561 def_operand_p def_p;
562 ssa_op_iter op_iter;
c07a8cb3 563
9ca86fc3
RG
564 if (gimple_code (stmt) == GIMPLE_PHI)
565 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
566
88af7c1a
RG
567 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
568 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
569 return true;
c07a8cb3 570
88af7c1a 571 return false;
c07a8cb3
RG
572}
573
dea61d92
SP
574/* Return a copy of LOOP placed before LOOP. */
575
576static struct loop *
577copy_loop_before (struct loop *loop)
578{
579 struct loop *res;
580 edge preheader = loop_preheader_edge (loop);
581
dea61d92
SP
582 initialize_original_copy_tables ();
583 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
30d55936 584 gcc_assert (res != NULL);
dea61d92 585 free_original_copy_tables ();
2cfc56b9 586 delete_update_ssa ();
dea61d92
SP
587
588 return res;
589}
590
591/* Creates an empty basic block after LOOP. */
592
593static void
594create_bb_after_loop (struct loop *loop)
595{
596 edge exit = single_exit (loop);
597
598 if (!exit)
599 return;
600
601 split_edge (exit);
602}
603
604/* Generate code for PARTITION from the code in LOOP. The loop is
605 copied when COPY_P is true. All the statements not flagged in the
606 PARTITION bitmap are removed from the loop or from its copy. The
607 statements are indexed in sequence inside a basic block, and the
30d55936 608 basic blocks of a loop are taken in dom order. */
dea61d92 609
30d55936 610static void
c61f8985
RG
611generate_loops_for_partition (struct loop *loop, partition_t partition,
612 bool copy_p)
dea61d92 613{
2fd5894f 614 unsigned i;
726a989a 615 gimple_stmt_iterator bsi;
dea61d92
SP
616 basic_block *bbs;
617
618 if (copy_p)
619 {
620 loop = copy_loop_before (loop);
30d55936 621 gcc_assert (loop != NULL);
dea61d92
SP
622 create_preheader (loop, CP_SIMPLE_PREHEADERS);
623 create_bb_after_loop (loop);
624 }
625
2fd5894f 626 /* Remove stmts not in the PARTITION bitmap. */
dea61d92
SP
627 bbs = get_loop_body_in_dom_order (loop);
628
b03c3082 629 if (MAY_HAVE_DEBUG_STMTS)
2fd5894f 630 for (i = 0; i < loop->num_nodes; i++)
b03c3082
JJ
631 {
632 basic_block bb = bbs[i];
633
634 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2fd5894f
RB
635 {
636 gimple phi = gsi_stmt (bsi);
637 if (!virtual_operand_p (gimple_phi_result (phi))
638 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
639 reset_debug_uses (phi);
640 }
b03c3082
JJ
641
642 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
643 {
644 gimple stmt = gsi_stmt (bsi);
645 if (gimple_code (stmt) != GIMPLE_LABEL
646 && !is_gimple_debug (stmt)
2fd5894f 647 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
b03c3082
JJ
648 reset_debug_uses (stmt);
649 }
650 }
651
2fd5894f 652 for (i = 0; i < loop->num_nodes; i++)
dea61d92
SP
653 {
654 basic_block bb = bbs[i];
dea61d92 655
726a989a 656 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
2fd5894f
RB
657 {
658 gimple phi = gsi_stmt (bsi);
659 if (!virtual_operand_p (gimple_phi_result (phi))
660 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
2706a615 661 remove_phi_node (&bsi, true);
2fd5894f
RB
662 else
663 gsi_next (&bsi);
664 }
dea61d92 665
726a989a 666 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
2706a615
RG
667 {
668 gimple stmt = gsi_stmt (bsi);
b03c3082
JJ
669 if (gimple_code (stmt) != GIMPLE_LABEL
670 && !is_gimple_debug (stmt)
2fd5894f 671 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
2706a615 672 {
36875e8f
RB
673 /* Choose an arbitrary path through the empty CFG part
674 that this unnecessary control stmt controls. */
675 if (gimple_code (stmt) == GIMPLE_COND)
676 {
677 gimple_cond_make_false (stmt);
678 update_stmt (stmt);
679 }
680 else if (gimple_code (stmt) == GIMPLE_SWITCH)
681 {
682 gimple_switch_set_index
683 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
684 update_stmt (stmt);
685 }
686 else
687 {
688 unlink_stmt_vdef (stmt);
689 gsi_remove (&bsi, true);
690 release_defs (stmt);
691 continue;
692 }
2706a615 693 }
36875e8f 694 gsi_next (&bsi);
2706a615 695 }
dea61d92
SP
696 }
697
698 free (bbs);
dea61d92
SP
699}
700
d0582dc1 701/* Build the size argument for a memory operation call. */
3661e899 702
d0582dc1
RG
703static tree
704build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
3661e899 705{
d0582dc1
RG
706 tree size;
707 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
708 fold_convert_loc (loc, sizetype, nb_iter),
709 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
710 return fold_convert_loc (loc, size_type_node, size);
711}
712
713/* Build an address argument for a memory operation call. */
714
715static tree
716build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
717{
718 tree addr_base;
719
720 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
721 addr_base = fold_convert_loc (loc, sizetype, addr_base);
722
723 /* Test for a negative stride, iterating over every element. */
724 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
725 {
726 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
727 fold_convert_loc (loc, sizetype, nb_bytes));
728 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
729 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
730 }
731
732 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
3661e899
TB
733}
734
401f3a81
JJ
735/* If VAL memory representation contains the same value in all bytes,
736 return that value, otherwise return -1.
737 E.g. for 0x24242424 return 0x24, for IEEE double
738 747708026454360457216.0 return 0x44, etc. */
739
740static int
741const_with_all_bytes_same (tree val)
742{
743 unsigned char buf[64];
744 int i, len;
745
746 if (integer_zerop (val)
747 || real_zerop (val)
748 || (TREE_CODE (val) == CONSTRUCTOR
749 && !TREE_CLOBBER_P (val)
750 && CONSTRUCTOR_NELTS (val) == 0))
751 return 0;
752
753 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
754 return -1;
755
756 len = native_encode_expr (val, buf, sizeof (buf));
757 if (len == 0)
758 return -1;
759 for (i = 1; i < len; i++)
760 if (buf[i] != buf[0])
761 return -1;
762 return buf[0];
763}
764
30d55936 765/* Generate a call to memset for PARTITION in LOOP. */
dea61d92 766
cfee318d 767static void
d0582dc1 768generate_memset_builtin (struct loop *loop, partition_t partition)
dea61d92 769{
30d55936
RG
770 gimple_stmt_iterator gsi;
771 gimple stmt, fn_call;
818625cf 772 tree mem, fn, nb_bytes;
30d55936 773 location_t loc;
b6dd5261 774 tree val;
30d55936 775
d0582dc1 776 stmt = DR_STMT (partition->main_dr);
30d55936 777 loc = gimple_location (stmt);
30d55936
RG
778
779 /* The new statements will be placed before LOOP. */
780 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dea61d92 781
818625cf 782 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
d0582dc1
RG
783 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
784 false, GSI_CONTINUE_LINKING);
785 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
786 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
787 false, GSI_CONTINUE_LINKING);
dea61d92 788
b6dd5261
RG
789 /* This exactly matches the pattern recognition in classify_partition. */
790 val = gimple_assign_rhs1 (stmt);
401f3a81
JJ
791 /* Handle constants like 0x15151515 and similarly
792 floating point constants etc. where all bytes are the same. */
793 int bytev = const_with_all_bytes_same (val);
794 if (bytev != -1)
795 val = build_int_cst (integer_type_node, bytev);
796 else if (TREE_CODE (val) == INTEGER_CST)
797 val = fold_convert (integer_type_node, val);
798 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
b6dd5261 799 {
401f3a81
JJ
800 gimple cstmt;
801 tree tem = make_ssa_name (integer_type_node, NULL);
802 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
803 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
804 val = tem;
b6dd5261
RG
805 }
806
e79983f4 807 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
b6dd5261 808 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
d0582dc1 809 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
dea61d92
SP
810
811 if (dump_file && (dump_flags & TDF_DETAILS))
b6dd5261
RG
812 {
813 fprintf (dump_file, "generated memset");
401f3a81 814 if (bytev == 0)
b6dd5261 815 fprintf (dump_file, " zero\n");
b6dd5261
RG
816 else
817 fprintf (dump_file, "\n");
818 }
dea61d92
SP
819}
820
d0582dc1
RG
821/* Generate a call to memcpy for PARTITION in LOOP. */
822
823static void
824generate_memcpy_builtin (struct loop *loop, partition_t partition)
825{
826 gimple_stmt_iterator gsi;
827 gimple stmt, fn_call;
818625cf 828 tree dest, src, fn, nb_bytes;
d0582dc1
RG
829 location_t loc;
830 enum built_in_function kind;
831
832 stmt = DR_STMT (partition->main_dr);
833 loc = gimple_location (stmt);
d0582dc1
RG
834
835 /* The new statements will be placed before LOOP. */
836 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
837
818625cf 838 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
d0582dc1
RG
839 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
840 false, GSI_CONTINUE_LINKING);
841 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
842 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
843 if (ptr_derefs_may_alias_p (dest, src))
844 kind = BUILT_IN_MEMMOVE;
845 else
846 kind = BUILT_IN_MEMCPY;
847
848 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
849 false, GSI_CONTINUE_LINKING);
850 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
851 false, GSI_CONTINUE_LINKING);
852 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
853 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
854 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
855
856 if (dump_file && (dump_flags & TDF_DETAILS))
857 {
858 if (kind == BUILT_IN_MEMCPY)
859 fprintf (dump_file, "generated memcpy\n");
860 else
861 fprintf (dump_file, "generated memmove\n");
862 }
863}
864
30d55936 865/* Remove and destroy the loop LOOP. */
dea61d92 866
30d55936
RG
867static void
868destroy_loop (struct loop *loop)
dea61d92 869{
30d55936
RG
870 unsigned nbbs = loop->num_nodes;
871 edge exit = single_exit (loop);
872 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
dea61d92 873 basic_block *bbs;
30d55936 874 unsigned i;
dea61d92
SP
875
876 bbs = get_loop_body_in_dom_order (loop);
877
30d55936
RG
878 redirect_edge_pred (exit, src);
879 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
880 exit->flags |= EDGE_FALLTHRU;
881 cancel_loop_tree (loop);
882 rescan_loop_exit (exit, false, true);
dea61d92 883
30d55936 884 for (i = 0; i < nbbs; i++)
c014f6f5
RG
885 {
886 /* We have made sure to not leave any dangling uses of SSA
887 names defined in the loop. With the exception of virtuals.
888 Make sure we replace all uses of virtual defs that will remain
889 outside of the loop with the bare symbol as delete_basic_block
890 will release them. */
891 gimple_stmt_iterator gsi;
892 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
893 {
894 gimple phi = gsi_stmt (gsi);
ea057359 895 if (virtual_operand_p (gimple_phi_result (phi)))
c014f6f5
RG
896 mark_virtual_phi_result_for_renaming (phi);
897 }
898 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
899 {
900 gimple stmt = gsi_stmt (gsi);
901 tree vdef = gimple_vdef (stmt);
902 if (vdef && TREE_CODE (vdef) == SSA_NAME)
903 mark_virtual_operand_for_renaming (vdef);
904 }
905 delete_basic_block (bbs[i]);
906 }
dea61d92 907 free (bbs);
30d55936
RG
908
909 set_immediate_dominator (CDI_DOMINATORS, dest,
910 recompute_dominator (CDI_DOMINATORS, dest));
dea61d92
SP
911}
912
30d55936 913/* Generates code for PARTITION. */
dea61d92 914
30d55936 915static void
d0582dc1 916generate_code_for_partition (struct loop *loop,
be6b029b 917 partition_t partition, bool copy_p)
dea61d92 918{
30d55936
RG
919 switch (partition->kind)
920 {
826a536d
RB
921 case PKIND_NORMAL:
922 /* Reductions all have to be in the last partition. */
923 gcc_assert (!partition_reduction_p (partition)
924 || !copy_p);
925 generate_loops_for_partition (loop, partition, copy_p);
926 return;
927
30d55936 928 case PKIND_MEMSET:
d0582dc1 929 generate_memset_builtin (loop, partition);
d0582dc1
RG
930 break;
931
932 case PKIND_MEMCPY:
933 generate_memcpy_builtin (loop, partition);
30d55936
RG
934 break;
935
936 default:
937 gcc_unreachable ();
938 }
dea61d92 939
826a536d
RB
940 /* Common tail for partitions we turn into a call. If this was the last
941 partition for which we generate code, we have to destroy the loop. */
942 if (!copy_p)
943 destroy_loop (loop);
dea61d92
SP
944}
945
dea61d92 946
24f161fd
RB
947/* Returns a partition with all the statements needed for computing
948 the vertex V of the RDG, also including the loop exit conditions. */
dea61d92 949
24f161fd
RB
950static partition_t
951build_rdg_partition_for_vertex (struct graph *rdg, int v)
dea61d92 952{
24f161fd 953 partition_t partition = partition_alloc (NULL, NULL);
07687835 954 stack_vec<int, 3> nodes;
24f161fd 955 unsigned i;
dea61d92
SP
956 int x;
957
174ec470 958 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
dea61d92 959
9771b263 960 FOR_EACH_VEC_ELT (nodes, i, x)
24f161fd
RB
961 {
962 bitmap_set_bit (partition->stmts, x);
963 bitmap_set_bit (partition->loops,
964 loop_containing_stmt (RDG_STMT (rdg, x))->num);
965 }
dea61d92 966
dea61d92
SP
967 return partition;
968}
969
30d55936
RG
970/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
971 For the moment we detect only the memset zero pattern. */
cfee318d 972
30d55936
RG
973static void
974classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
cfee318d 975{
cfee318d 976 bitmap_iterator bi;
30d55936
RG
977 unsigned i;
978 tree nb_iter;
d0582dc1 979 data_reference_p single_load, single_store;
b9fc0497 980 bool volatiles_p = false;
30d55936
RG
981
982 partition->kind = PKIND_NORMAL;
d0582dc1
RG
983 partition->main_dr = NULL;
984 partition->secondary_dr = NULL;
818625cf 985 partition->niter = NULL_TREE;
30d55936 986
c61f8985 987 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
30d55936
RG
988 {
989 gimple stmt = RDG_STMT (rdg, i);
990
991 if (gimple_has_volatile_ops (stmt))
b9fc0497 992 volatiles_p = true;
cfee318d 993
826a536d 994 /* If the stmt has uses outside of the loop mark it as reduction. */
9ca86fc3 995 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
30d55936 996 {
826a536d 997 partition->reduction_p = true;
30d55936
RG
998 return;
999 }
1000 }
1001
b9fc0497
RB
1002 /* Perform general partition disqualification for builtins. */
1003 if (volatiles_p
1004 || !flag_tree_loop_distribute_patterns)
1005 return;
1006
d0582dc1
RG
1007 /* Detect memset and memcpy. */
1008 single_load = NULL;
1009 single_store = NULL;
30d55936
RG
1010 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1011 {
1012 gimple stmt = RDG_STMT (rdg, i);
d0582dc1
RG
1013 data_reference_p dr;
1014 unsigned j;
30d55936
RG
1015
1016 if (gimple_code (stmt) == GIMPLE_PHI)
1017 continue;
1018
1019 /* Any scalar stmts are ok. */
1020 if (!gimple_vuse (stmt))
1021 continue;
1022
d0582dc1
RG
1023 /* Otherwise just regular loads/stores. */
1024 if (!gimple_assign_single_p (stmt))
1025 return;
1026
1027 /* But exactly one store and/or load. */
9771b263 1028 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
30d55936 1029 {
d0582dc1
RG
1030 if (DR_IS_READ (dr))
1031 {
1032 if (single_load != NULL)
1033 return;
1034 single_load = dr;
1035 }
1036 else
1037 {
1038 if (single_store != NULL)
1039 return;
1040 single_store = dr;
1041 }
30d55936 1042 }
30d55936
RG
1043 }
1044
818625cf
RB
1045 if (!single_store)
1046 return;
1047
1048 if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1049 gimple_bb (DR_STMT (single_store))))
1050 nb_iter = number_of_latch_executions (loop);
1051 else
1052 nb_iter = number_of_exit_cond_executions (loop);
1053 if (!nb_iter || nb_iter == chrec_dont_know)
1054 return;
1055
d0582dc1
RG
1056 if (single_store && !single_load)
1057 {
1058 gimple stmt = DR_STMT (single_store);
1059 tree rhs = gimple_assign_rhs1 (stmt);
401f3a81
JJ
1060 if (const_with_all_bytes_same (rhs) == -1
1061 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1062 || (TYPE_MODE (TREE_TYPE (rhs))
1063 != TYPE_MODE (unsigned_char_type_node))))
d0582dc1
RG
1064 return;
1065 if (TREE_CODE (rhs) == SSA_NAME
1066 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1067 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1068 return;
ca406576
RB
1069 if (!adjacent_dr_p (single_store)
1070 || !dominated_by_p (CDI_DOMINATORS,
1071 loop->latch, gimple_bb (stmt)))
d0582dc1
RG
1072 return;
1073 partition->kind = PKIND_MEMSET;
1074 partition->main_dr = single_store;
818625cf 1075 partition->niter = nb_iter;
d0582dc1
RG
1076 }
1077 else if (single_store && single_load)
1078 {
1079 gimple store = DR_STMT (single_store);
1080 gimple load = DR_STMT (single_load);
1081 /* Direct aggregate copy or via an SSA name temporary. */
1082 if (load != store
1083 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1084 return;
1085 if (!adjacent_dr_p (single_store)
1086 || !adjacent_dr_p (single_load)
1087 || !operand_equal_p (DR_STEP (single_store),
ca406576
RB
1088 DR_STEP (single_load), 0)
1089 || !dominated_by_p (CDI_DOMINATORS,
1090 loop->latch, gimple_bb (store)))
d0582dc1 1091 return;
f20132e7
RG
1092 /* Now check that if there is a dependence this dependence is
1093 of a suitable form for memmove. */
6e1aa848 1094 vec<loop_p> loops = vNULL;
f20132e7 1095 ddr_p ddr;
9771b263 1096 loops.safe_push (loop);
f20132e7
RG
1097 ddr = initialize_data_dependence_relation (single_load, single_store,
1098 loops);
1099 compute_affine_dependence (ddr, loop);
f20132e7
RG
1100 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1101 {
1102 free_dependence_relation (ddr);
9771b263 1103 loops.release ();
f20132e7
RG
1104 return;
1105 }
1106 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1107 {
1108 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1109 {
1110 free_dependence_relation (ddr);
9771b263 1111 loops.release ();
f20132e7
RG
1112 return;
1113 }
1114 lambda_vector dist_v;
9771b263 1115 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
f20132e7
RG
1116 {
1117 int dist = dist_v[index_in_loop_nest (loop->num,
1118 DDR_LOOP_NEST (ddr))];
1119 if (dist > 0 && !DDR_REVERSED_P (ddr))
1120 {
1121 free_dependence_relation (ddr);
9771b263 1122 loops.release ();
f20132e7
RG
1123 return;
1124 }
1125 }
1126 }
b7ce70b3 1127 free_dependence_relation (ddr);
9771b263 1128 loops.release ();
d0582dc1
RG
1129 partition->kind = PKIND_MEMCPY;
1130 partition->main_dr = single_store;
1131 partition->secondary_dr = single_load;
818625cf 1132 partition->niter = nb_iter;
d0582dc1 1133 }
cfee318d
SP
1134}
1135
1fa0c180
RG
1136/* For a data reference REF, return the declaration of its base
1137 address or NULL_TREE if the base is not determined. */
1138
1139static tree
1140ref_base_address (data_reference_p dr)
1141{
1142 tree base_address = DR_BASE_ADDRESS (dr);
1143 if (base_address
1144 && TREE_CODE (base_address) == ADDR_EXPR)
1145 return TREE_OPERAND (base_address, 0);
1146
1147 return base_address;
1148}
1149
cfee318d
SP
1150/* Returns true when PARTITION1 and PARTITION2 have similar memory
1151 accesses in RDG. */
1152
1153static bool
c61f8985
RG
1154similar_memory_accesses (struct graph *rdg, partition_t partition1,
1155 partition_t partition2)
cfee318d 1156{
1fa0c180 1157 unsigned i, j, k, l;
cfee318d 1158 bitmap_iterator bi, bj;
1fa0c180
RG
1159 data_reference_p ref1, ref2;
1160
1161 /* First check whether in the intersection of the two partitions are
1162 any loads or stores. Common loads are the situation that happens
1163 most often. */
1164 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1165 if (RDG_MEM_WRITE_STMT (rdg, i)
1166 || RDG_MEM_READS_STMT (rdg, i))
1167 return true;
cfee318d 1168
1fa0c180 1169 /* Then check all data-references against each other. */
c61f8985 1170 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
cfee318d
SP
1171 if (RDG_MEM_WRITE_STMT (rdg, i)
1172 || RDG_MEM_READS_STMT (rdg, i))
c61f8985 1173 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
cfee318d
SP
1174 if (RDG_MEM_WRITE_STMT (rdg, j)
1175 || RDG_MEM_READS_STMT (rdg, j))
1fa0c180 1176 {
9771b263 1177 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1fa0c180
RG
1178 {
1179 tree base1 = ref_base_address (ref1);
1180 if (base1)
9771b263 1181 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1fa0c180
RG
1182 if (base1 == ref_base_address (ref2))
1183 return true;
1184 }
1185 }
cfee318d
SP
1186
1187 return false;
1188}
1189
dea61d92
SP
1190/* Aggregate several components into a useful partition that is
1191 registered in the PARTITIONS vector. Partitions will be
1192 distributed in different loops. */
1193
1194static void
83a95546 1195rdg_build_partitions (struct graph *rdg,
2fd5894f 1196 vec<gimple> starting_stmts,
83a95546 1197 vec<partition_t> *partitions)
dea61d92 1198{
83a95546 1199 bitmap processed = BITMAP_ALLOC (NULL);
2fd5894f
RB
1200 int i;
1201 gimple stmt;
dea61d92 1202
2fd5894f 1203 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
dea61d92 1204 {
2fd5894f
RB
1205 int v = rdg_vertex_for_stmt (rdg, stmt);
1206
1207 if (dump_file && (dump_flags & TDF_DETAILS))
1208 fprintf (dump_file,
1209 "ldist asked to generate code for vertex %d\n", v);
b8698a0f 1210
24f161fd
RB
1211 /* If the vertex is already contained in another partition so
1212 is the partition rooted at it. */
dea61d92
SP
1213 if (bitmap_bit_p (processed, v))
1214 continue;
b8698a0f 1215
24f161fd
RB
1216 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1217 bitmap_ior_into (processed, partition->stmts);
dea61d92 1218
826a536d 1219 if (dump_file && (dump_flags & TDF_DETAILS))
dea61d92 1220 {
826a536d
RB
1221 fprintf (dump_file, "ldist useful partition:\n");
1222 dump_bitmap (dump_file, partition->stmts);
dea61d92 1223 }
826a536d
RB
1224
1225 partitions->safe_push (partition);
dea61d92
SP
1226 }
1227
83a95546
RB
1228 /* All vertices should have been assigned to at least one partition now,
1229 other than vertices belonging to dead code. */
dea61d92 1230
83a95546 1231 BITMAP_FREE (processed);
dea61d92
SP
1232}
1233
1234/* Dump to FILE the PARTITIONS. */
1235
1236static void
9771b263 1237dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
dea61d92
SP
1238{
1239 int i;
c61f8985 1240 partition_t partition;
dea61d92 1241
9771b263 1242 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1243 debug_bitmap_file (file, partition->stmts);
dea61d92
SP
1244}
1245
1246/* Debug PARTITIONS. */
9771b263 1247extern void debug_rdg_partitions (vec<partition_t> );
dea61d92 1248
24e47c76 1249DEBUG_FUNCTION void
9771b263 1250debug_rdg_partitions (vec<partition_t> partitions)
dea61d92
SP
1251{
1252 dump_rdg_partitions (stderr, partitions);
1253}
1254
2b8aee8e
SP
1255/* Returns the number of read and write operations in the RDG. */
1256
1257static int
1258number_of_rw_in_rdg (struct graph *rdg)
1259{
1260 int i, res = 0;
1261
1262 for (i = 0; i < rdg->n_vertices; i++)
1263 {
1264 if (RDG_MEM_WRITE_STMT (rdg, i))
1265 ++res;
1266
1267 if (RDG_MEM_READS_STMT (rdg, i))
1268 ++res;
1269 }
1270
1271 return res;
1272}
1273
1274/* Returns the number of read and write operations in a PARTITION of
1275 the RDG. */
1276
1277static int
c61f8985 1278number_of_rw_in_partition (struct graph *rdg, partition_t partition)
2b8aee8e
SP
1279{
1280 int res = 0;
1281 unsigned i;
1282 bitmap_iterator ii;
1283
c61f8985 1284 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2b8aee8e
SP
1285 {
1286 if (RDG_MEM_WRITE_STMT (rdg, i))
1287 ++res;
1288
1289 if (RDG_MEM_READS_STMT (rdg, i))
1290 ++res;
1291 }
1292
1293 return res;
1294}
1295
1296/* Returns true when one of the PARTITIONS contains all the read or
1297 write operations of RDG. */
1298
1299static bool
9771b263
DN
1300partition_contains_all_rw (struct graph *rdg,
1301 vec<partition_t> partitions)
2b8aee8e
SP
1302{
1303 int i;
c61f8985 1304 partition_t partition;
2b8aee8e
SP
1305 int nrw = number_of_rw_in_rdg (rdg);
1306
9771b263 1307 FOR_EACH_VEC_ELT (partitions, i, partition)
2b8aee8e
SP
1308 if (nrw == number_of_rw_in_partition (rdg, partition))
1309 return true;
1310
1311 return false;
1312}
1313
447f3223
RB
1314/* Compute partition dependence created by the data references in DRS1
1315 and DRS2 and modify and return DIR according to that. */
1316
1317static int
1318pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1319 vec<data_reference_p> drs1,
1320 vec<data_reference_p> drs2)
1321{
1322 data_reference_p dr1, dr2;
1323
1324 /* dependence direction - 0 is no dependence, -1 is back,
1325 1 is forth, 2 is both (we can stop then, merging will occur). */
1326 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1327 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1328 {
2cf19e26 1329 int this_dir = 1;
447f3223
RB
1330 ddr_p ddr;
1331 /* Re-shuffle data-refs to be in dominator order. */
1332 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1333 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1334 {
1335 data_reference_p tem = dr1;
1336 dr1 = dr2;
1337 dr2 = tem;
1338 this_dir = -this_dir;
1339 }
1340 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1341 compute_affine_dependence (ddr, loops[0]);
1342 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1343 this_dir = 2;
1344 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1345 {
1346 if (DDR_REVERSED_P (ddr))
1347 {
1348 data_reference_p tem = dr1;
1349 dr1 = dr2;
1350 dr2 = tem;
1351 this_dir = -this_dir;
1352 }
1353 /* Known dependences can still be unordered througout the
1354 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
2cf19e26 1355 if (DDR_NUM_DIST_VECTS (ddr) != 1)
447f3223 1356 this_dir = 2;
2cf19e26
RB
1357 /* If the overlap is exact preserve stmt order. */
1358 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1359 ;
1360 else
1361 {
1362 /* Else as the distance vector is lexicographic positive
1363 swap the dependence direction. */
1364 this_dir = -this_dir;
1365 }
447f3223
RB
1366 }
1367 else
1368 this_dir = 0;
1369 free_dependence_relation (ddr);
1370 if (dir == 0)
1371 dir = this_dir;
1372 else if (dir != this_dir)
1373 return 2;
1374 }
1375 return dir;
1376}
1377
1378/* Compare postorder number of the partition graph vertices V1 and V2. */
1379
1380static int
1381pgcmp (const void *v1_, const void *v2_)
1382{
1383 const vertex *v1 = (const vertex *)v1_;
1384 const vertex *v2 = (const vertex *)v2_;
1385 return v2->post - v1->post;
1386}
2fd5894f
RB
1387
1388/* Distributes the code from LOOP in such a way that producer
1389 statements are placed before consumer statements. Tries to separate
1390 only the statements from STMTS into separate loops.
1391 Returns the number of distributed loops. */
dea61d92
SP
1392
1393static int
36875e8f 1394distribute_loop (struct loop *loop, vec<gimple> stmts,
826a536d 1395 control_dependences *cd, int *nb_calls)
dea61d92 1396{
2fd5894f 1397 struct graph *rdg;
9771b263 1398 vec<partition_t> partitions;
c61f8985 1399 partition_t partition;
be6b029b 1400 bool any_builtin;
2fd5894f 1401 int i, nbp;
447f3223
RB
1402 graph *pg = NULL;
1403 int num_sccs = 1;
dea61d92 1404
826a536d 1405 *nb_calls = 0;
07687835 1406 stack_vec<loop_p, 3> loop_nest;
2fd5894f 1407 if (!find_loop_nest (loop, &loop_nest))
07687835 1408 return 0;
2fd5894f 1409
36875e8f 1410 rdg = build_rdg (loop_nest, cd);
2fd5894f
RB
1411 if (!rdg)
1412 {
1413 if (dump_file && (dump_flags & TDF_DETAILS))
1414 fprintf (dump_file,
1415 "Loop %d not distributed: failed to build the RDG.\n",
1416 loop->num);
1417
2fd5894f
RB
1418 return 0;
1419 }
1420
1421 if (dump_file && (dump_flags & TDF_DETAILS))
1422 dump_rdg (dump_file, rdg);
1423
1424 partitions.create (3);
1425 rdg_build_partitions (rdg, stmts, &partitions);
dea61d92 1426
be6b029b 1427 any_builtin = false;
9771b263 1428 FOR_EACH_VEC_ELT (partitions, i, partition)
be6b029b
RG
1429 {
1430 classify_partition (loop, rdg, partition);
1431 any_builtin |= partition_builtin_p (partition);
1432 }
30d55936 1433
447f3223
RB
1434 /* If we are only distributing patterns but did not detect any,
1435 simply bail out. */
9fed7f3a
RB
1436 if (!flag_tree_loop_distribution
1437 && !any_builtin)
1438 {
1439 nbp = 0;
1440 goto ldist_done;
1441 }
1442
447f3223
RB
1443 /* If we are only distributing patterns fuse all partitions that
1444 were not classified as builtins. This also avoids chopping
1445 a loop into pieces, separated by builtin calls. That is, we
1446 only want no or a single loop body remaining. */
1447 partition_t into;
1448 if (!flag_tree_loop_distribution)
1449 {
1450 for (i = 0; partitions.iterate (i, &into); ++i)
1451 if (!partition_builtin_p (into))
1452 break;
1453 for (++i; partitions.iterate (i, &partition); ++i)
1454 if (!partition_builtin_p (partition))
1455 {
1456 if (dump_file && (dump_flags & TDF_DETAILS))
1457 {
1458 fprintf (dump_file, "fusing non-builtin partitions\n");
1459 dump_bitmap (dump_file, into->stmts);
1460 dump_bitmap (dump_file, partition->stmts);
1461 }
1462 partition_merge_into (into, partition);
1463 partitions.unordered_remove (i);
1464 partition_free (partition);
1465 i--;
1466 }
1467 }
1468
1469 /* Due to limitations in the transform phase we have to fuse all
1470 reduction partitions into the last partition so the existing
1471 loop will contain all loop-closed PHI nodes. */
1472 for (i = 0; partitions.iterate (i, &into); ++i)
1473 if (partition_reduction_p (into))
1474 break;
1475 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1476 if (partition_reduction_p (partition))
1477 {
1478 if (dump_file && (dump_flags & TDF_DETAILS))
1479 {
1480 fprintf (dump_file, "fusing partitions\n");
1481 dump_bitmap (dump_file, into->stmts);
1482 dump_bitmap (dump_file, partition->stmts);
1483 fprintf (dump_file, "because they have reductions\n");
1484 }
1485 partition_merge_into (into, partition);
1486 partitions.unordered_remove (i);
1487 partition_free (partition);
1488 i--;
1489 }
1490
9fed7f3a
RB
1491 /* Apply our simple cost model - fuse partitions with similar
1492 memory accesses. */
9fed7f3a
RB
1493 for (i = 0; partitions.iterate (i, &into); ++i)
1494 {
1495 if (partition_builtin_p (into))
1496 continue;
1497 for (int j = i + 1;
1498 partitions.iterate (j, &partition); ++j)
1499 {
1500 if (!partition_builtin_p (partition)
1501 && similar_memory_accesses (rdg, into, partition))
1502 {
1503 if (dump_file && (dump_flags & TDF_DETAILS))
1504 {
1505 fprintf (dump_file, "fusing partitions\n");
1506 dump_bitmap (dump_file, into->stmts);
1507 dump_bitmap (dump_file, partition->stmts);
1508 fprintf (dump_file, "because they have similar "
1509 "memory accesses\n");
1510 }
826a536d 1511 partition_merge_into (into, partition);
447f3223 1512 partitions.unordered_remove (j);
9fed7f3a
RB
1513 partition_free (partition);
1514 j--;
1515 }
1516 }
1517 }
1518
447f3223
RB
1519 /* Build the partition dependency graph. */
1520 if (partitions.length () > 1)
c014f6f5 1521 {
447f3223
RB
1522 pg = new_graph (partitions.length ());
1523 struct pgdata {
1524 partition_t partition;
1525 vec<data_reference_p> writes;
1526 vec<data_reference_p> reads;
1527 };
1528#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1529 for (i = 0; partitions.iterate (i, &partition); ++i)
1530 {
1531 vertex *v = &pg->vertices[i];
1532 pgdata *data = new pgdata;
1533 data_reference_p dr;
1534 /* FIXME - leaks. */
1535 v->data = data;
1536 bitmap_iterator bi;
1537 unsigned j;
1538 data->partition = partition;
1539 data->reads = vNULL;
1540 data->writes = vNULL;
1541 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1542 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1543 if (DR_IS_READ (dr))
1544 data->reads.safe_push (dr);
1545 else
1546 data->writes.safe_push (dr);
1547 }
1548 partition_t partition1, partition2;
1549 for (i = 0; partitions.iterate (i, &partition1); ++i)
1550 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1551 {
1552 /* dependence direction - 0 is no dependence, -1 is back,
1553 1 is forth, 2 is both (we can stop then, merging will occur). */
1554 int dir = 0;
1555 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1556 PGDATA(i)->writes,
1557 PGDATA(j)->reads);
1558 if (dir != 2)
1559 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1560 PGDATA(i)->reads,
1561 PGDATA(j)->writes);
1562 if (dir != 2)
1563 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1564 PGDATA(i)->writes,
1565 PGDATA(j)->writes);
1566 if (dir == 1 || dir == 2)
1567 add_edge (pg, i, j);
1568 if (dir == -1 || dir == 2)
1569 add_edge (pg, j, i);
1570 }
1571
1572 /* Add edges to the reduction partition (if any) to force it last. */
1573 unsigned j;
1574 for (j = 0; partitions.iterate (j, &partition); ++j)
1575 if (partition_reduction_p (partition))
1576 break;
1577 if (j < partitions.length ())
38ad2d07 1578 {
447f3223
RB
1579 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1580 if (i != j)
1581 add_edge (pg, i, j);
1582 }
1583
1584 /* Compute partitions we cannot separate and fuse them. */
1585 num_sccs = graphds_scc (pg, NULL);
1586 for (i = 0; i < num_sccs; ++i)
1587 {
1588 partition_t first;
1589 int j;
1590 for (j = 0; partitions.iterate (j, &first); ++j)
1591 if (pg->vertices[j].component == i)
38ad2d07 1592 break;
447f3223
RB
1593 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1594 if (pg->vertices[j].component == i)
38ad2d07 1595 {
447f3223
RB
1596 if (dump_file && (dump_flags & TDF_DETAILS))
1597 {
1598 fprintf (dump_file, "fusing partitions\n");
1599 dump_bitmap (dump_file, first->stmts);
1600 dump_bitmap (dump_file, partition->stmts);
1601 fprintf (dump_file, "because they are in the same "
1602 "dependence SCC\n");
1603 }
1604 partition_merge_into (first, partition);
1605 partitions[j] = NULL;
5eb010bc 1606 partition_free (partition);
447f3223 1607 PGDATA (j)->partition = NULL;
38ad2d07 1608 }
38ad2d07 1609 }
30d55936 1610
447f3223
RB
1611 /* Now order the remaining nodes in postorder. */
1612 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1613 partitions.truncate (0);
1614 for (i = 0; i < pg->n_vertices; ++i)
b9fc0497 1615 {
447f3223
RB
1616 pgdata *data = PGDATA (i);
1617 if (data->partition)
1618 partitions.safe_push (data->partition);
1619 data->reads.release ();
1620 data->writes.release ();
1621 delete data;
b9fc0497 1622 }
447f3223
RB
1623 gcc_assert (partitions.length () == (unsigned)num_sccs);
1624 free_graph (pg);
b9fc0497
RB
1625 }
1626
9771b263 1627 nbp = partitions.length ();
a4293fa6 1628 if (nbp == 0
9771b263
DN
1629 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1630 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
c014f6f5
RG
1631 {
1632 nbp = 0;
1633 goto ldist_done;
1634 }
dea61d92
SP
1635
1636 if (dump_file && (dump_flags & TDF_DETAILS))
1637 dump_rdg_partitions (dump_file, partitions);
1638
9771b263 1639 FOR_EACH_VEC_ELT (partitions, i, partition)
826a536d
RB
1640 {
1641 if (partition_builtin_p (partition))
1642 (*nb_calls)++;
1643 generate_code_for_partition (loop, partition, i < nbp - 1);
1644 }
dea61d92 1645
dea61d92
SP
1646 ldist_done:
1647
9771b263 1648 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1649 partition_free (partition);
9771b263 1650 partitions.release ();
dea61d92 1651
dea61d92 1652 free_rdg (rdg);
826a536d 1653 return nbp - *nb_calls;
dea61d92
SP
1654}
1655
1656/* Distribute all loops in the current function. */
1657
1658static unsigned int
1659tree_loop_distribution (void)
1660{
1661 struct loop *loop;
1662 loop_iterator li;
c014f6f5 1663 bool changed = false;
1fa0c180 1664 basic_block bb;
36875e8f 1665 control_dependences *cd = NULL;
1fa0c180
RG
1666
1667 FOR_ALL_BB (bb)
1668 {
1669 gimple_stmt_iterator gsi;
1670 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1671 gimple_set_uid (gsi_stmt (gsi), -1);
1672 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1673 gimple_set_uid (gsi_stmt (gsi), -1);
1674 }
dea61d92 1675
c014f6f5
RG
1676 /* We can at the moment only distribute non-nested loops, thus restrict
1677 walking to innermost loops. */
1678 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
dea61d92 1679 {
6e1aa848 1680 vec<gimple> work_list = vNULL;
be6b029b 1681 basic_block *bbs;
0e20c89f 1682 int num = loop->num;
be6b029b 1683 unsigned int i;
a3357f7d
RG
1684
1685 /* If the loop doesn't have a single exit we will fail anyway,
1686 so do that early. */
1687 if (!single_exit (loop))
1688 continue;
dea61d92 1689
f56f2d33
JH
1690 /* Only optimize hot loops. */
1691 if (!optimize_loop_for_speed_p (loop))
1692 continue;
1693
be6b029b
RG
1694 /* Initialize the worklist with stmts we seed the partitions with. */
1695 bbs = get_loop_body_in_dom_order (loop);
1696 for (i = 0; i < loop->num_nodes; ++i)
1697 {
1698 gimple_stmt_iterator gsi;
deb6c11a
RB
1699 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1700 {
1701 gimple phi = gsi_stmt (gsi);
1702 if (virtual_operand_p (gimple_phi_result (phi)))
1703 continue;
1704 /* Distribute stmts which have defs that are used outside of
1705 the loop. */
1706 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1707 continue;
1708 work_list.safe_push (phi);
1709 }
be6b029b
RG
1710 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1711 {
1712 gimple stmt = gsi_stmt (gsi);
83a95546
RB
1713
1714 /* If there is a stmt with side-effects bail out - we
1715 cannot and should not distribute this loop. */
1716 if (gimple_has_side_effects (stmt))
1717 {
1718 work_list.truncate (0);
1719 goto out;
1720 }
1721
b9fc0497
RB
1722 /* Distribute stmts which have defs that are used outside of
1723 the loop. */
1724 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1725 ;
1726 /* Otherwise only distribute stores for now. */
e179190c 1727 else if (!gimple_vdef (stmt))
be6b029b
RG
1728 continue;
1729
9771b263 1730 work_list.safe_push (stmt);
be6b029b
RG
1731 }
1732 }
83a95546 1733out:
be6b029b 1734 free (bbs);
c014f6f5 1735
826a536d
RB
1736 int nb_generated_loops = 0;
1737 int nb_generated_calls = 0;
1738 location_t loc = find_loop_location (loop);
9771b263 1739 if (work_list.length () > 0)
36875e8f
RB
1740 {
1741 if (!cd)
1742 {
ca406576 1743 calculate_dominance_info (CDI_DOMINATORS);
36875e8f
RB
1744 calculate_dominance_info (CDI_POST_DOMINATORS);
1745 cd = new control_dependences (create_edge_list ());
1746 free_dominance_info (CDI_POST_DOMINATORS);
1747 }
826a536d
RB
1748 nb_generated_loops = distribute_loop (loop, work_list, cd,
1749 &nb_generated_calls);
36875e8f 1750 }
c014f6f5 1751
826a536d 1752 if (nb_generated_loops + nb_generated_calls > 0)
dea61d92 1753 {
826a536d
RB
1754 changed = true;
1755 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1756 loc, "Loop %d distributed: split to %d loops "
1757 "and %d library calls.\n",
1758 num, nb_generated_loops, nb_generated_calls);
dea61d92 1759 }
826a536d
RB
1760 else if (dump_file && (dump_flags & TDF_DETAILS))
1761 fprintf (dump_file, "Loop %d is the same.\n", num);
dea61d92 1762
9771b263 1763 work_list.release ();
dea61d92
SP
1764 }
1765
36875e8f
RB
1766 if (cd)
1767 delete cd;
1768
c014f6f5
RG
1769 if (changed)
1770 {
525174a2 1771 mark_virtual_operands_for_renaming (cfun);
c014f6f5
RG
1772 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1773 }
1774
1775#ifdef ENABLE_CHECKING
1776 verify_loop_structure ();
1777#endif
1778
5006671f 1779 return 0;
dea61d92
SP
1780}
1781
1782static bool
1783gate_tree_loop_distribution (void)
1784{
20769d5e
SP
1785 return flag_tree_loop_distribution
1786 || flag_tree_loop_distribute_patterns;
dea61d92
SP
1787}
1788
27a4cd48
DM
1789namespace {
1790
1791const pass_data pass_data_loop_distribution =
dea61d92 1792{
27a4cd48
DM
1793 GIMPLE_PASS, /* type */
1794 "ldist", /* name */
1795 OPTGROUP_LOOP, /* optinfo_flags */
1796 true, /* has_gate */
1797 true, /* has_execute */
1798 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1799 ( PROP_cfg | PROP_ssa ), /* properties_required */
1800 0, /* properties_provided */
1801 0, /* properties_destroyed */
1802 0, /* todo_flags_start */
1803 TODO_verify_ssa, /* todo_flags_finish */
dea61d92 1804};
27a4cd48
DM
1805
1806class pass_loop_distribution : public gimple_opt_pass
1807{
1808public:
c3284718
RS
1809 pass_loop_distribution (gcc::context *ctxt)
1810 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
27a4cd48
DM
1811 {}
1812
1813 /* opt_pass methods: */
1814 bool gate () { return gate_tree_loop_distribution (); }
1815 unsigned int execute () { return tree_loop_distribution (); }
1816
1817}; // class pass_loop_distribution
1818
1819} // anon namespace
1820
1821gimple_opt_pass *
1822make_pass_loop_distribution (gcc::context *ctxt)
1823{
1824 return new pass_loop_distribution (ctxt);
1825}