]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
arc.md (doloop_begin_i): Remove extra alignment; use (.&-4) idiom.
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
dea61d92 1/* Loop distribution.
d1e082c2 2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
dea61d92
SP
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6This file is part of GCC.
b8698a0f 7
dea61d92
SP
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
b8698a0f 12
dea61d92
SP
13GCC is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
b8698a0f 17
dea61d92
SP
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
b8698a0f 29 is transformed to
dea61d92
SP
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44#include "config.h"
45#include "system.h"
46#include "coretypes.h"
4d648807 47#include "tree.h"
45b0be94 48#include "gimplify.h"
5be5c238 49#include "gimple-iterator.h"
442b4905
AM
50#include "gimple-ssa.h"
51#include "tree-cfg.h"
52#include "tree-phinodes.h"
53#include "ssa-iterators.h"
54#include "tree-ssanames.h"
e28030cf 55#include "tree-ssa-loop-manip.h"
442b4905
AM
56#include "tree-ssa-loop.h"
57#include "tree-into-ssa.h"
7a300452 58#include "tree-ssa.h"
dea61d92 59#include "cfgloop.h"
dea61d92
SP
60#include "tree-chrec.h"
61#include "tree-data-ref.h"
62#include "tree-scalar-evolution.h"
63#include "tree-pass.h"
80ab0b19 64#include "gimple-pretty-print.h"
826a536d 65#include "tree-vectorizer.h"
80ab0b19
RB
66
67
68/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
69typedef struct rdg_vertex
70{
71 /* The statement represented by this vertex. */
72 gimple stmt;
73
74 /* Vector of data-references in this statement. */
75 vec<data_reference_p> datarefs;
76
77 /* True when the statement contains a write to memory. */
78 bool has_mem_write;
79
80 /* True when the statement contains a read from memory. */
81 bool has_mem_reads;
82} *rdg_vertex_p;
83
84#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
85#define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
86#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
87#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
88#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
89#define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
90#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
91#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
92
93/* Data dependence type. */
94
95enum rdg_dep_type
96{
97 /* Read After Write (RAW). */
98 flow_dd = 'f',
99
36875e8f
RB
100 /* Control dependence (execute conditional on). */
101 control_dd = 'c'
80ab0b19
RB
102};
103
104/* Dependence information attached to an edge of the RDG. */
105
106typedef struct rdg_edge
107{
108 /* Type of the dependence. */
109 enum rdg_dep_type type;
80ab0b19
RB
110} *rdg_edge_p;
111
112#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
80ab0b19 113
80ab0b19
RB
114/* Dump vertex I in RDG to FILE. */
115
116static void
117dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
118{
119 struct vertex *v = &(rdg->vertices[i]);
120 struct graph_edge *e;
121
122 fprintf (file, "(vertex %d: (%s%s) (in:", i,
123 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
124 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
125
126 if (v->pred)
127 for (e = v->pred; e; e = e->pred_next)
128 fprintf (file, " %d", e->src);
129
130 fprintf (file, ") (out:");
131
132 if (v->succ)
133 for (e = v->succ; e; e = e->succ_next)
134 fprintf (file, " %d", e->dest);
135
136 fprintf (file, ")\n");
137 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
138 fprintf (file, ")\n");
139}
140
141/* Call dump_rdg_vertex on stderr. */
142
143DEBUG_FUNCTION void
144debug_rdg_vertex (struct graph *rdg, int i)
145{
146 dump_rdg_vertex (stderr, rdg, i);
147}
148
80ab0b19
RB
149/* Dump the reduced dependence graph RDG to FILE. */
150
151static void
152dump_rdg (FILE *file, struct graph *rdg)
153{
80ab0b19 154 fprintf (file, "(rdg\n");
2fd5894f
RB
155 for (int i = 0; i < rdg->n_vertices; i++)
156 dump_rdg_vertex (file, rdg, i);
80ab0b19 157 fprintf (file, ")\n");
80ab0b19
RB
158}
159
160/* Call dump_rdg on stderr. */
161
162DEBUG_FUNCTION void
163debug_rdg (struct graph *rdg)
164{
165 dump_rdg (stderr, rdg);
166}
167
168static void
169dot_rdg_1 (FILE *file, struct graph *rdg)
170{
171 int i;
174ec470
RB
172 pretty_printer buffer;
173 pp_needs_newline (&buffer) = false;
174 buffer.buffer->stream = file;
80ab0b19
RB
175
176 fprintf (file, "digraph RDG {\n");
177
178 for (i = 0; i < rdg->n_vertices; i++)
179 {
180 struct vertex *v = &(rdg->vertices[i]);
181 struct graph_edge *e;
182
174ec470
RB
183 fprintf (file, "%d [label=\"[%d] ", i, i);
184 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
185 pp_flush (&buffer);
186 fprintf (file, "\"]\n");
187
80ab0b19
RB
188 /* Highlight reads from memory. */
189 if (RDG_MEM_READS_STMT (rdg, i))
190 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
191
192 /* Highlight stores to memory. */
193 if (RDG_MEM_WRITE_STMT (rdg, i))
194 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
195
196 if (v->succ)
197 for (e = v->succ; e; e = e->succ_next)
198 switch (RDGE_TYPE (e))
199 {
80ab0b19
RB
200 case flow_dd:
201 /* These are the most common dependences: don't print these. */
202 fprintf (file, "%d -> %d \n", i, e->dest);
203 break;
204
36875e8f
RB
205 case control_dd:
206 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
207 break;
208
80ab0b19
RB
209 default:
210 gcc_unreachable ();
211 }
212 }
213
214 fprintf (file, "}\n\n");
215}
216
217/* Display the Reduced Dependence Graph using dotty. */
218
219DEBUG_FUNCTION void
220dot_rdg (struct graph *rdg)
221{
174ec470
RB
222 /* When debugging, you may want to enable the following code. */
223#if 1
c3284718 224 FILE *file = popen ("dot -Tx11", "w");
174ec470
RB
225 if (!file)
226 return;
80ab0b19 227 dot_rdg_1 (file, rdg);
174ec470
RB
228 fflush (file);
229 close (fileno (file));
230 pclose (file);
80ab0b19
RB
231#else
232 dot_rdg_1 (stderr, rdg);
233#endif
234}
235
236/* Returns the index of STMT in RDG. */
237
238static int
239rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
240{
241 int index = gimple_uid (stmt);
242 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
243 return index;
244}
245
80ab0b19
RB
246/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
247 the index of DEF in RDG. */
248
249static void
250create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
251{
252 use_operand_p imm_use_p;
253 imm_use_iterator iterator;
254
255 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
256 {
257 struct graph_edge *e;
258 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
259
260 if (use < 0)
261 continue;
262
263 e = add_edge (rdg, idef, use);
264 e->data = XNEW (struct rdg_edge);
265 RDGE_TYPE (e) = flow_dd;
80ab0b19
RB
266 }
267}
268
36875e8f
RB
269/* Creates an edge for the control dependences of BB to the vertex V. */
270
271static void
272create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
273 int v, control_dependences *cd)
274{
275 bitmap_iterator bi;
276 unsigned edge_n;
277 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
278 0, edge_n, bi)
279 {
280 basic_block cond_bb = cd->get_edge (edge_n)->src;
281 gimple stmt = last_stmt (cond_bb);
282 if (stmt && is_ctrl_stmt (stmt))
283 {
284 struct graph_edge *e;
285 int c = rdg_vertex_for_stmt (rdg, stmt);
286 if (c < 0)
287 continue;
288
289 e = add_edge (rdg, c, v);
290 e->data = XNEW (struct rdg_edge);
291 RDGE_TYPE (e) = control_dd;
36875e8f
RB
292 }
293 }
294}
295
80ab0b19
RB
296/* Creates the edges of the reduced dependence graph RDG. */
297
298static void
447f3223 299create_rdg_flow_edges (struct graph *rdg)
80ab0b19
RB
300{
301 int i;
80ab0b19
RB
302 def_operand_p def_p;
303 ssa_op_iter iter;
304
80ab0b19
RB
305 for (i = 0; i < rdg->n_vertices; i++)
306 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
307 iter, SSA_OP_DEF)
308 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
447f3223 309}
36875e8f 310
447f3223
RB
311/* Creates the edges of the reduced dependence graph RDG. */
312
313static void
314create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
315{
316 int i;
317
318 for (i = 0; i < rdg->n_vertices; i++)
319 {
320 gimple stmt = RDG_STMT (rdg, i);
321 if (gimple_code (stmt) == GIMPLE_PHI)
322 {
323 edge_iterator ei;
324 edge e;
325 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
36875e8f 326 create_edge_for_control_dependence (rdg, e->src, i, cd);
447f3223
RB
327 }
328 else
329 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
330 }
80ab0b19
RB
331}
332
333/* Build the vertices of the reduced dependence graph RDG. Return false
334 if that failed. */
335
336static bool
337create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
338 vec<data_reference_p> *datarefs)
339{
340 int i;
341 gimple stmt;
342
343 FOR_EACH_VEC_ELT (stmts, i, stmt)
344 {
345 struct vertex *v = &(rdg->vertices[i]);
346
347 /* Record statement to vertex mapping. */
348 gimple_set_uid (stmt, i);
349
350 v->data = XNEW (struct rdg_vertex);
351 RDGV_STMT (v) = stmt;
352 RDGV_DATAREFS (v).create (0);
353 RDGV_HAS_MEM_WRITE (v) = false;
354 RDGV_HAS_MEM_READS (v) = false;
355 if (gimple_code (stmt) == GIMPLE_PHI)
356 continue;
357
358 unsigned drp = datarefs->length ();
359 if (!find_data_references_in_stmt (loop, stmt, datarefs))
360 return false;
361 for (unsigned j = drp; j < datarefs->length (); ++j)
362 {
363 data_reference_p dr = (*datarefs)[j];
364 if (DR_IS_READ (dr))
365 RDGV_HAS_MEM_READS (v) = true;
366 else
367 RDGV_HAS_MEM_WRITE (v) = true;
368 RDGV_DATAREFS (v).safe_push (dr);
369 }
370 }
371 return true;
372}
373
2fd5894f 374/* Initialize STMTS with all the statements of LOOP. The order in
80ab0b19
RB
375 which we discover statements is important as
376 generate_loops_for_partition is using the same traversal for
2fd5894f 377 identifying statements in loop copies. */
80ab0b19
RB
378
379static void
380stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
381{
382 unsigned int i;
383 basic_block *bbs = get_loop_body_in_dom_order (loop);
384
385 for (i = 0; i < loop->num_nodes; i++)
386 {
387 basic_block bb = bbs[i];
388 gimple_stmt_iterator bsi;
389 gimple stmt;
390
391 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
83a95546
RB
392 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
393 stmts->safe_push (gsi_stmt (bsi));
80ab0b19
RB
394
395 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
396 {
397 stmt = gsi_stmt (bsi);
398 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
399 stmts->safe_push (stmt);
400 }
401 }
402
403 free (bbs);
404}
405
80ab0b19
RB
406/* Free the reduced dependence graph RDG. */
407
408static void
409free_rdg (struct graph *rdg)
410{
411 int i;
412
413 for (i = 0; i < rdg->n_vertices; i++)
414 {
415 struct vertex *v = &(rdg->vertices[i]);
416 struct graph_edge *e;
417
418 for (e = v->succ; e; e = e->succ_next)
447f3223 419 free (e->data);
80ab0b19
RB
420
421 if (v->data)
422 {
423 gimple_set_uid (RDGV_STMT (v), -1);
424 free_data_refs (RDGV_DATAREFS (v));
425 free (v->data);
426 }
427 }
428
429 free_graph (rdg);
430}
431
432/* Build the Reduced Dependence Graph (RDG) with one vertex per
97463b2b 433 statement of the loop nest LOOP_NEST, and one edge per data dependence or
80ab0b19
RB
434 scalar dependence. */
435
436static struct graph *
36875e8f 437build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
80ab0b19
RB
438{
439 struct graph *rdg;
80ab0b19 440 vec<data_reference_p> datarefs;
80ab0b19 441
97463b2b 442 /* Create the RDG vertices from the stmts of the loop nest. */
07687835 443 stack_vec<gimple, 10> stmts;
97463b2b 444 stmts_from_loop (loop_nest[0], &stmts);
24f161fd 445 rdg = new_graph (stmts.length ());
80ab0b19 446 datarefs.create (10);
97463b2b 447 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
80ab0b19 448 {
97463b2b 449 datarefs.release ();
80ab0b19
RB
450 free_rdg (rdg);
451 return NULL;
452 }
453 stmts.release ();
97463b2b 454
447f3223
RB
455 create_rdg_flow_edges (rdg);
456 if (cd)
457 create_rdg_cd_edges (rdg, cd);
458
97463b2b 459 datarefs.release ();
80ab0b19
RB
460
461 return rdg;
462}
463
80ab0b19 464
dea61d92 465
b9fc0497 466enum partition_kind {
826a536d 467 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
b9fc0497 468};
30d55936 469
c61f8985
RG
470typedef struct partition_s
471{
472 bitmap stmts;
80ab0b19 473 bitmap loops;
826a536d 474 bool reduction_p;
30d55936 475 enum partition_kind kind;
d0582dc1
RG
476 /* data-references a kind != PKIND_NORMAL partition is about. */
477 data_reference_p main_dr;
478 data_reference_p secondary_dr;
818625cf 479 tree niter;
c61f8985
RG
480} *partition_t;
481
c61f8985
RG
482
483/* Allocate and initialize a partition from BITMAP. */
484
485static partition_t
80ab0b19 486partition_alloc (bitmap stmts, bitmap loops)
c61f8985
RG
487{
488 partition_t partition = XCNEW (struct partition_s);
489 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
80ab0b19 490 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
826a536d 491 partition->reduction_p = false;
30d55936 492 partition->kind = PKIND_NORMAL;
c61f8985
RG
493 return partition;
494}
495
496/* Free PARTITION. */
497
498static void
499partition_free (partition_t partition)
500{
501 BITMAP_FREE (partition->stmts);
80ab0b19 502 BITMAP_FREE (partition->loops);
c61f8985
RG
503 free (partition);
504}
505
30d55936
RG
506/* Returns true if the partition can be generated as a builtin. */
507
508static bool
509partition_builtin_p (partition_t partition)
510{
826a536d 511 return partition->kind != PKIND_NORMAL;
30d55936 512}
c61f8985 513
826a536d 514/* Returns true if the partition contains a reduction. */
7ad672e4
RG
515
516static bool
826a536d 517partition_reduction_p (partition_t partition)
7ad672e4 518{
826a536d 519 return partition->reduction_p;
7ad672e4
RG
520}
521
826a536d
RB
522/* Merge PARTITION into the partition DEST. */
523
524static void
525partition_merge_into (partition_t dest, partition_t partition)
526{
527 dest->kind = PKIND_NORMAL;
528 bitmap_ior_into (dest->stmts, partition->stmts);
529 if (partition_reduction_p (partition))
530 dest->reduction_p = true;
531}
532
533
c07a8cb3
RG
534/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
535 the LOOP. */
536
537static bool
538ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
539{
540 imm_use_iterator imm_iter;
541 use_operand_p use_p;
542
543 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
e665269a
RG
544 {
545 gimple use_stmt = USE_STMT (use_p);
546 if (!is_gimple_debug (use_stmt)
547 && loop != loop_containing_stmt (use_stmt))
548 return true;
549 }
c07a8cb3
RG
550
551 return false;
552}
553
554/* Returns true when STMT defines a scalar variable used after the
88af7c1a 555 loop LOOP. */
c07a8cb3
RG
556
557static bool
88af7c1a 558stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
c07a8cb3 559{
88af7c1a
RG
560 def_operand_p def_p;
561 ssa_op_iter op_iter;
c07a8cb3 562
9ca86fc3
RG
563 if (gimple_code (stmt) == GIMPLE_PHI)
564 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
565
88af7c1a
RG
566 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
567 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
568 return true;
c07a8cb3 569
88af7c1a 570 return false;
c07a8cb3
RG
571}
572
dea61d92
SP
573/* Return a copy of LOOP placed before LOOP. */
574
575static struct loop *
576copy_loop_before (struct loop *loop)
577{
578 struct loop *res;
579 edge preheader = loop_preheader_edge (loop);
580
dea61d92
SP
581 initialize_original_copy_tables ();
582 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
30d55936 583 gcc_assert (res != NULL);
dea61d92 584 free_original_copy_tables ();
2cfc56b9 585 delete_update_ssa ();
dea61d92
SP
586
587 return res;
588}
589
590/* Creates an empty basic block after LOOP. */
591
592static void
593create_bb_after_loop (struct loop *loop)
594{
595 edge exit = single_exit (loop);
596
597 if (!exit)
598 return;
599
600 split_edge (exit);
601}
602
603/* Generate code for PARTITION from the code in LOOP. The loop is
604 copied when COPY_P is true. All the statements not flagged in the
605 PARTITION bitmap are removed from the loop or from its copy. The
606 statements are indexed in sequence inside a basic block, and the
30d55936 607 basic blocks of a loop are taken in dom order. */
dea61d92 608
30d55936 609static void
c61f8985
RG
610generate_loops_for_partition (struct loop *loop, partition_t partition,
611 bool copy_p)
dea61d92 612{
2fd5894f 613 unsigned i;
726a989a 614 gimple_stmt_iterator bsi;
dea61d92
SP
615 basic_block *bbs;
616
617 if (copy_p)
618 {
619 loop = copy_loop_before (loop);
30d55936 620 gcc_assert (loop != NULL);
dea61d92
SP
621 create_preheader (loop, CP_SIMPLE_PREHEADERS);
622 create_bb_after_loop (loop);
623 }
624
2fd5894f 625 /* Remove stmts not in the PARTITION bitmap. */
dea61d92
SP
626 bbs = get_loop_body_in_dom_order (loop);
627
b03c3082 628 if (MAY_HAVE_DEBUG_STMTS)
2fd5894f 629 for (i = 0; i < loop->num_nodes; i++)
b03c3082
JJ
630 {
631 basic_block bb = bbs[i];
632
633 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2fd5894f
RB
634 {
635 gimple phi = gsi_stmt (bsi);
636 if (!virtual_operand_p (gimple_phi_result (phi))
637 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
638 reset_debug_uses (phi);
639 }
b03c3082
JJ
640
641 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
642 {
643 gimple stmt = gsi_stmt (bsi);
644 if (gimple_code (stmt) != GIMPLE_LABEL
645 && !is_gimple_debug (stmt)
2fd5894f 646 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
b03c3082
JJ
647 reset_debug_uses (stmt);
648 }
649 }
650
2fd5894f 651 for (i = 0; i < loop->num_nodes; i++)
dea61d92
SP
652 {
653 basic_block bb = bbs[i];
dea61d92 654
726a989a 655 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
2fd5894f
RB
656 {
657 gimple phi = gsi_stmt (bsi);
658 if (!virtual_operand_p (gimple_phi_result (phi))
659 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
2706a615 660 remove_phi_node (&bsi, true);
2fd5894f
RB
661 else
662 gsi_next (&bsi);
663 }
dea61d92 664
726a989a 665 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
2706a615
RG
666 {
667 gimple stmt = gsi_stmt (bsi);
b03c3082
JJ
668 if (gimple_code (stmt) != GIMPLE_LABEL
669 && !is_gimple_debug (stmt)
2fd5894f 670 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
2706a615 671 {
36875e8f
RB
672 /* Choose an arbitrary path through the empty CFG part
673 that this unnecessary control stmt controls. */
674 if (gimple_code (stmt) == GIMPLE_COND)
675 {
676 gimple_cond_make_false (stmt);
677 update_stmt (stmt);
678 }
679 else if (gimple_code (stmt) == GIMPLE_SWITCH)
680 {
681 gimple_switch_set_index
682 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
683 update_stmt (stmt);
684 }
685 else
686 {
687 unlink_stmt_vdef (stmt);
688 gsi_remove (&bsi, true);
689 release_defs (stmt);
690 continue;
691 }
2706a615 692 }
36875e8f 693 gsi_next (&bsi);
2706a615 694 }
dea61d92
SP
695 }
696
697 free (bbs);
dea61d92
SP
698}
699
d0582dc1 700/* Build the size argument for a memory operation call. */
3661e899 701
d0582dc1
RG
702static tree
703build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
3661e899 704{
d0582dc1
RG
705 tree size;
706 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
707 fold_convert_loc (loc, sizetype, nb_iter),
708 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
709 return fold_convert_loc (loc, size_type_node, size);
710}
711
712/* Build an address argument for a memory operation call. */
713
714static tree
715build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
716{
717 tree addr_base;
718
719 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
720 addr_base = fold_convert_loc (loc, sizetype, addr_base);
721
722 /* Test for a negative stride, iterating over every element. */
723 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
724 {
725 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
726 fold_convert_loc (loc, sizetype, nb_bytes));
727 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
728 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
729 }
730
731 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
3661e899
TB
732}
733
401f3a81
JJ
734/* If VAL memory representation contains the same value in all bytes,
735 return that value, otherwise return -1.
736 E.g. for 0x24242424 return 0x24, for IEEE double
737 747708026454360457216.0 return 0x44, etc. */
738
739static int
740const_with_all_bytes_same (tree val)
741{
742 unsigned char buf[64];
743 int i, len;
744
745 if (integer_zerop (val)
746 || real_zerop (val)
747 || (TREE_CODE (val) == CONSTRUCTOR
748 && !TREE_CLOBBER_P (val)
749 && CONSTRUCTOR_NELTS (val) == 0))
750 return 0;
751
752 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
753 return -1;
754
755 len = native_encode_expr (val, buf, sizeof (buf));
756 if (len == 0)
757 return -1;
758 for (i = 1; i < len; i++)
759 if (buf[i] != buf[0])
760 return -1;
761 return buf[0];
762}
763
30d55936 764/* Generate a call to memset for PARTITION in LOOP. */
dea61d92 765
cfee318d 766static void
d0582dc1 767generate_memset_builtin (struct loop *loop, partition_t partition)
dea61d92 768{
30d55936
RG
769 gimple_stmt_iterator gsi;
770 gimple stmt, fn_call;
818625cf 771 tree mem, fn, nb_bytes;
30d55936 772 location_t loc;
b6dd5261 773 tree val;
30d55936 774
d0582dc1 775 stmt = DR_STMT (partition->main_dr);
30d55936 776 loc = gimple_location (stmt);
30d55936
RG
777
778 /* The new statements will be placed before LOOP. */
779 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dea61d92 780
818625cf 781 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
d0582dc1
RG
782 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
783 false, GSI_CONTINUE_LINKING);
784 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
785 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
786 false, GSI_CONTINUE_LINKING);
dea61d92 787
b6dd5261
RG
788 /* This exactly matches the pattern recognition in classify_partition. */
789 val = gimple_assign_rhs1 (stmt);
401f3a81
JJ
790 /* Handle constants like 0x15151515 and similarly
791 floating point constants etc. where all bytes are the same. */
792 int bytev = const_with_all_bytes_same (val);
793 if (bytev != -1)
794 val = build_int_cst (integer_type_node, bytev);
795 else if (TREE_CODE (val) == INTEGER_CST)
796 val = fold_convert (integer_type_node, val);
797 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
b6dd5261 798 {
401f3a81
JJ
799 gimple cstmt;
800 tree tem = make_ssa_name (integer_type_node, NULL);
801 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
802 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
803 val = tem;
b6dd5261
RG
804 }
805
e79983f4 806 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
b6dd5261 807 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
d0582dc1 808 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
dea61d92
SP
809
810 if (dump_file && (dump_flags & TDF_DETAILS))
b6dd5261
RG
811 {
812 fprintf (dump_file, "generated memset");
401f3a81 813 if (bytev == 0)
b6dd5261 814 fprintf (dump_file, " zero\n");
b6dd5261
RG
815 else
816 fprintf (dump_file, "\n");
817 }
dea61d92
SP
818}
819
d0582dc1
RG
820/* Generate a call to memcpy for PARTITION in LOOP. */
821
822static void
823generate_memcpy_builtin (struct loop *loop, partition_t partition)
824{
825 gimple_stmt_iterator gsi;
826 gimple stmt, fn_call;
818625cf 827 tree dest, src, fn, nb_bytes;
d0582dc1
RG
828 location_t loc;
829 enum built_in_function kind;
830
831 stmt = DR_STMT (partition->main_dr);
832 loc = gimple_location (stmt);
d0582dc1
RG
833
834 /* The new statements will be placed before LOOP. */
835 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
836
818625cf 837 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
d0582dc1
RG
838 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
839 false, GSI_CONTINUE_LINKING);
840 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
841 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
842 if (ptr_derefs_may_alias_p (dest, src))
843 kind = BUILT_IN_MEMMOVE;
844 else
845 kind = BUILT_IN_MEMCPY;
846
847 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
848 false, GSI_CONTINUE_LINKING);
849 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
850 false, GSI_CONTINUE_LINKING);
851 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
852 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
853 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
854
855 if (dump_file && (dump_flags & TDF_DETAILS))
856 {
857 if (kind == BUILT_IN_MEMCPY)
858 fprintf (dump_file, "generated memcpy\n");
859 else
860 fprintf (dump_file, "generated memmove\n");
861 }
862}
863
30d55936 864/* Remove and destroy the loop LOOP. */
dea61d92 865
30d55936
RG
866static void
867destroy_loop (struct loop *loop)
dea61d92 868{
30d55936
RG
869 unsigned nbbs = loop->num_nodes;
870 edge exit = single_exit (loop);
871 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
dea61d92 872 basic_block *bbs;
30d55936 873 unsigned i;
dea61d92
SP
874
875 bbs = get_loop_body_in_dom_order (loop);
876
30d55936
RG
877 redirect_edge_pred (exit, src);
878 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
879 exit->flags |= EDGE_FALLTHRU;
880 cancel_loop_tree (loop);
881 rescan_loop_exit (exit, false, true);
dea61d92 882
30d55936 883 for (i = 0; i < nbbs; i++)
c014f6f5
RG
884 {
885 /* We have made sure to not leave any dangling uses of SSA
886 names defined in the loop. With the exception of virtuals.
887 Make sure we replace all uses of virtual defs that will remain
888 outside of the loop with the bare symbol as delete_basic_block
889 will release them. */
890 gimple_stmt_iterator gsi;
891 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
892 {
893 gimple phi = gsi_stmt (gsi);
ea057359 894 if (virtual_operand_p (gimple_phi_result (phi)))
c014f6f5
RG
895 mark_virtual_phi_result_for_renaming (phi);
896 }
897 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
898 {
899 gimple stmt = gsi_stmt (gsi);
900 tree vdef = gimple_vdef (stmt);
901 if (vdef && TREE_CODE (vdef) == SSA_NAME)
902 mark_virtual_operand_for_renaming (vdef);
903 }
904 delete_basic_block (bbs[i]);
905 }
dea61d92 906 free (bbs);
30d55936
RG
907
908 set_immediate_dominator (CDI_DOMINATORS, dest,
909 recompute_dominator (CDI_DOMINATORS, dest));
dea61d92
SP
910}
911
30d55936 912/* Generates code for PARTITION. */
dea61d92 913
30d55936 914static void
d0582dc1 915generate_code_for_partition (struct loop *loop,
be6b029b 916 partition_t partition, bool copy_p)
dea61d92 917{
30d55936
RG
918 switch (partition->kind)
919 {
826a536d
RB
920 case PKIND_NORMAL:
921 /* Reductions all have to be in the last partition. */
922 gcc_assert (!partition_reduction_p (partition)
923 || !copy_p);
924 generate_loops_for_partition (loop, partition, copy_p);
925 return;
926
30d55936 927 case PKIND_MEMSET:
d0582dc1 928 generate_memset_builtin (loop, partition);
d0582dc1
RG
929 break;
930
931 case PKIND_MEMCPY:
932 generate_memcpy_builtin (loop, partition);
30d55936
RG
933 break;
934
935 default:
936 gcc_unreachable ();
937 }
dea61d92 938
826a536d
RB
939 /* Common tail for partitions we turn into a call. If this was the last
940 partition for which we generate code, we have to destroy the loop. */
941 if (!copy_p)
942 destroy_loop (loop);
dea61d92
SP
943}
944
dea61d92 945
24f161fd
RB
946/* Returns a partition with all the statements needed for computing
947 the vertex V of the RDG, also including the loop exit conditions. */
dea61d92 948
24f161fd
RB
949static partition_t
950build_rdg_partition_for_vertex (struct graph *rdg, int v)
dea61d92 951{
24f161fd 952 partition_t partition = partition_alloc (NULL, NULL);
07687835 953 stack_vec<int, 3> nodes;
24f161fd 954 unsigned i;
dea61d92
SP
955 int x;
956
174ec470 957 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
dea61d92 958
9771b263 959 FOR_EACH_VEC_ELT (nodes, i, x)
24f161fd
RB
960 {
961 bitmap_set_bit (partition->stmts, x);
962 bitmap_set_bit (partition->loops,
963 loop_containing_stmt (RDG_STMT (rdg, x))->num);
964 }
dea61d92 965
dea61d92
SP
966 return partition;
967}
968
30d55936
RG
969/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
970 For the moment we detect only the memset zero pattern. */
cfee318d 971
30d55936
RG
972static void
973classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
cfee318d 974{
cfee318d 975 bitmap_iterator bi;
30d55936
RG
976 unsigned i;
977 tree nb_iter;
d0582dc1 978 data_reference_p single_load, single_store;
b9fc0497 979 bool volatiles_p = false;
30d55936
RG
980
981 partition->kind = PKIND_NORMAL;
d0582dc1
RG
982 partition->main_dr = NULL;
983 partition->secondary_dr = NULL;
818625cf 984 partition->niter = NULL_TREE;
30d55936 985
c61f8985 986 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
30d55936
RG
987 {
988 gimple stmt = RDG_STMT (rdg, i);
989
990 if (gimple_has_volatile_ops (stmt))
b9fc0497 991 volatiles_p = true;
cfee318d 992
826a536d 993 /* If the stmt has uses outside of the loop mark it as reduction. */
9ca86fc3 994 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
30d55936 995 {
826a536d 996 partition->reduction_p = true;
30d55936
RG
997 return;
998 }
999 }
1000
b9fc0497
RB
1001 /* Perform general partition disqualification for builtins. */
1002 if (volatiles_p
1003 || !flag_tree_loop_distribute_patterns)
1004 return;
1005
d0582dc1
RG
1006 /* Detect memset and memcpy. */
1007 single_load = NULL;
1008 single_store = NULL;
30d55936
RG
1009 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1010 {
1011 gimple stmt = RDG_STMT (rdg, i);
d0582dc1
RG
1012 data_reference_p dr;
1013 unsigned j;
30d55936
RG
1014
1015 if (gimple_code (stmt) == GIMPLE_PHI)
1016 continue;
1017
1018 /* Any scalar stmts are ok. */
1019 if (!gimple_vuse (stmt))
1020 continue;
1021
d0582dc1
RG
1022 /* Otherwise just regular loads/stores. */
1023 if (!gimple_assign_single_p (stmt))
1024 return;
1025
1026 /* But exactly one store and/or load. */
9771b263 1027 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
30d55936 1028 {
d0582dc1
RG
1029 if (DR_IS_READ (dr))
1030 {
1031 if (single_load != NULL)
1032 return;
1033 single_load = dr;
1034 }
1035 else
1036 {
1037 if (single_store != NULL)
1038 return;
1039 single_store = dr;
1040 }
30d55936 1041 }
30d55936
RG
1042 }
1043
818625cf
RB
1044 if (!single_store)
1045 return;
1046
1047 if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1048 gimple_bb (DR_STMT (single_store))))
1049 nb_iter = number_of_latch_executions (loop);
1050 else
1051 nb_iter = number_of_exit_cond_executions (loop);
1052 if (!nb_iter || nb_iter == chrec_dont_know)
1053 return;
1054
d0582dc1
RG
1055 if (single_store && !single_load)
1056 {
1057 gimple stmt = DR_STMT (single_store);
1058 tree rhs = gimple_assign_rhs1 (stmt);
401f3a81
JJ
1059 if (const_with_all_bytes_same (rhs) == -1
1060 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1061 || (TYPE_MODE (TREE_TYPE (rhs))
1062 != TYPE_MODE (unsigned_char_type_node))))
d0582dc1
RG
1063 return;
1064 if (TREE_CODE (rhs) == SSA_NAME
1065 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1066 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1067 return;
ca406576
RB
1068 if (!adjacent_dr_p (single_store)
1069 || !dominated_by_p (CDI_DOMINATORS,
1070 loop->latch, gimple_bb (stmt)))
d0582dc1
RG
1071 return;
1072 partition->kind = PKIND_MEMSET;
1073 partition->main_dr = single_store;
818625cf 1074 partition->niter = nb_iter;
d0582dc1
RG
1075 }
1076 else if (single_store && single_load)
1077 {
1078 gimple store = DR_STMT (single_store);
1079 gimple load = DR_STMT (single_load);
1080 /* Direct aggregate copy or via an SSA name temporary. */
1081 if (load != store
1082 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1083 return;
1084 if (!adjacent_dr_p (single_store)
1085 || !adjacent_dr_p (single_load)
1086 || !operand_equal_p (DR_STEP (single_store),
ca406576
RB
1087 DR_STEP (single_load), 0)
1088 || !dominated_by_p (CDI_DOMINATORS,
1089 loop->latch, gimple_bb (store)))
d0582dc1 1090 return;
f20132e7
RG
1091 /* Now check that if there is a dependence this dependence is
1092 of a suitable form for memmove. */
6e1aa848 1093 vec<loop_p> loops = vNULL;
f20132e7 1094 ddr_p ddr;
9771b263 1095 loops.safe_push (loop);
f20132e7
RG
1096 ddr = initialize_data_dependence_relation (single_load, single_store,
1097 loops);
1098 compute_affine_dependence (ddr, loop);
f20132e7
RG
1099 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1100 {
1101 free_dependence_relation (ddr);
9771b263 1102 loops.release ();
f20132e7
RG
1103 return;
1104 }
1105 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1106 {
1107 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1108 {
1109 free_dependence_relation (ddr);
9771b263 1110 loops.release ();
f20132e7
RG
1111 return;
1112 }
1113 lambda_vector dist_v;
9771b263 1114 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
f20132e7
RG
1115 {
1116 int dist = dist_v[index_in_loop_nest (loop->num,
1117 DDR_LOOP_NEST (ddr))];
1118 if (dist > 0 && !DDR_REVERSED_P (ddr))
1119 {
1120 free_dependence_relation (ddr);
9771b263 1121 loops.release ();
f20132e7
RG
1122 return;
1123 }
1124 }
1125 }
b7ce70b3 1126 free_dependence_relation (ddr);
9771b263 1127 loops.release ();
d0582dc1
RG
1128 partition->kind = PKIND_MEMCPY;
1129 partition->main_dr = single_store;
1130 partition->secondary_dr = single_load;
818625cf 1131 partition->niter = nb_iter;
d0582dc1 1132 }
cfee318d
SP
1133}
1134
1fa0c180
RG
1135/* For a data reference REF, return the declaration of its base
1136 address or NULL_TREE if the base is not determined. */
1137
1138static tree
1139ref_base_address (data_reference_p dr)
1140{
1141 tree base_address = DR_BASE_ADDRESS (dr);
1142 if (base_address
1143 && TREE_CODE (base_address) == ADDR_EXPR)
1144 return TREE_OPERAND (base_address, 0);
1145
1146 return base_address;
1147}
1148
cfee318d
SP
1149/* Returns true when PARTITION1 and PARTITION2 have similar memory
1150 accesses in RDG. */
1151
1152static bool
c61f8985
RG
1153similar_memory_accesses (struct graph *rdg, partition_t partition1,
1154 partition_t partition2)
cfee318d 1155{
1fa0c180 1156 unsigned i, j, k, l;
cfee318d 1157 bitmap_iterator bi, bj;
1fa0c180
RG
1158 data_reference_p ref1, ref2;
1159
1160 /* First check whether in the intersection of the two partitions are
1161 any loads or stores. Common loads are the situation that happens
1162 most often. */
1163 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1164 if (RDG_MEM_WRITE_STMT (rdg, i)
1165 || RDG_MEM_READS_STMT (rdg, i))
1166 return true;
cfee318d 1167
1fa0c180 1168 /* Then check all data-references against each other. */
c61f8985 1169 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
cfee318d
SP
1170 if (RDG_MEM_WRITE_STMT (rdg, i)
1171 || RDG_MEM_READS_STMT (rdg, i))
c61f8985 1172 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
cfee318d
SP
1173 if (RDG_MEM_WRITE_STMT (rdg, j)
1174 || RDG_MEM_READS_STMT (rdg, j))
1fa0c180 1175 {
9771b263 1176 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1fa0c180
RG
1177 {
1178 tree base1 = ref_base_address (ref1);
1179 if (base1)
9771b263 1180 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1fa0c180
RG
1181 if (base1 == ref_base_address (ref2))
1182 return true;
1183 }
1184 }
cfee318d
SP
1185
1186 return false;
1187}
1188
dea61d92
SP
1189/* Aggregate several components into a useful partition that is
1190 registered in the PARTITIONS vector. Partitions will be
1191 distributed in different loops. */
1192
1193static void
83a95546 1194rdg_build_partitions (struct graph *rdg,
2fd5894f 1195 vec<gimple> starting_stmts,
83a95546 1196 vec<partition_t> *partitions)
dea61d92 1197{
83a95546 1198 bitmap processed = BITMAP_ALLOC (NULL);
2fd5894f
RB
1199 int i;
1200 gimple stmt;
dea61d92 1201
2fd5894f 1202 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
dea61d92 1203 {
2fd5894f
RB
1204 int v = rdg_vertex_for_stmt (rdg, stmt);
1205
1206 if (dump_file && (dump_flags & TDF_DETAILS))
1207 fprintf (dump_file,
1208 "ldist asked to generate code for vertex %d\n", v);
b8698a0f 1209
24f161fd
RB
1210 /* If the vertex is already contained in another partition so
1211 is the partition rooted at it. */
dea61d92
SP
1212 if (bitmap_bit_p (processed, v))
1213 continue;
b8698a0f 1214
24f161fd
RB
1215 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1216 bitmap_ior_into (processed, partition->stmts);
dea61d92 1217
826a536d 1218 if (dump_file && (dump_flags & TDF_DETAILS))
dea61d92 1219 {
826a536d
RB
1220 fprintf (dump_file, "ldist useful partition:\n");
1221 dump_bitmap (dump_file, partition->stmts);
dea61d92 1222 }
826a536d
RB
1223
1224 partitions->safe_push (partition);
dea61d92
SP
1225 }
1226
83a95546
RB
1227 /* All vertices should have been assigned to at least one partition now,
1228 other than vertices belonging to dead code. */
dea61d92 1229
83a95546 1230 BITMAP_FREE (processed);
dea61d92
SP
1231}
1232
1233/* Dump to FILE the PARTITIONS. */
1234
1235static void
9771b263 1236dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
dea61d92
SP
1237{
1238 int i;
c61f8985 1239 partition_t partition;
dea61d92 1240
9771b263 1241 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1242 debug_bitmap_file (file, partition->stmts);
dea61d92
SP
1243}
1244
1245/* Debug PARTITIONS. */
9771b263 1246extern void debug_rdg_partitions (vec<partition_t> );
dea61d92 1247
24e47c76 1248DEBUG_FUNCTION void
9771b263 1249debug_rdg_partitions (vec<partition_t> partitions)
dea61d92
SP
1250{
1251 dump_rdg_partitions (stderr, partitions);
1252}
1253
2b8aee8e
SP
1254/* Returns the number of read and write operations in the RDG. */
1255
1256static int
1257number_of_rw_in_rdg (struct graph *rdg)
1258{
1259 int i, res = 0;
1260
1261 for (i = 0; i < rdg->n_vertices; i++)
1262 {
1263 if (RDG_MEM_WRITE_STMT (rdg, i))
1264 ++res;
1265
1266 if (RDG_MEM_READS_STMT (rdg, i))
1267 ++res;
1268 }
1269
1270 return res;
1271}
1272
1273/* Returns the number of read and write operations in a PARTITION of
1274 the RDG. */
1275
1276static int
c61f8985 1277number_of_rw_in_partition (struct graph *rdg, partition_t partition)
2b8aee8e
SP
1278{
1279 int res = 0;
1280 unsigned i;
1281 bitmap_iterator ii;
1282
c61f8985 1283 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2b8aee8e
SP
1284 {
1285 if (RDG_MEM_WRITE_STMT (rdg, i))
1286 ++res;
1287
1288 if (RDG_MEM_READS_STMT (rdg, i))
1289 ++res;
1290 }
1291
1292 return res;
1293}
1294
1295/* Returns true when one of the PARTITIONS contains all the read or
1296 write operations of RDG. */
1297
1298static bool
9771b263
DN
1299partition_contains_all_rw (struct graph *rdg,
1300 vec<partition_t> partitions)
2b8aee8e
SP
1301{
1302 int i;
c61f8985 1303 partition_t partition;
2b8aee8e
SP
1304 int nrw = number_of_rw_in_rdg (rdg);
1305
9771b263 1306 FOR_EACH_VEC_ELT (partitions, i, partition)
2b8aee8e
SP
1307 if (nrw == number_of_rw_in_partition (rdg, partition))
1308 return true;
1309
1310 return false;
1311}
1312
447f3223
RB
1313/* Compute partition dependence created by the data references in DRS1
1314 and DRS2 and modify and return DIR according to that. */
1315
1316static int
1317pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1318 vec<data_reference_p> drs1,
1319 vec<data_reference_p> drs2)
1320{
1321 data_reference_p dr1, dr2;
1322
1323 /* dependence direction - 0 is no dependence, -1 is back,
1324 1 is forth, 2 is both (we can stop then, merging will occur). */
1325 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1326 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1327 {
2cf19e26 1328 int this_dir = 1;
447f3223
RB
1329 ddr_p ddr;
1330 /* Re-shuffle data-refs to be in dominator order. */
1331 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1332 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1333 {
1334 data_reference_p tem = dr1;
1335 dr1 = dr2;
1336 dr2 = tem;
1337 this_dir = -this_dir;
1338 }
1339 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1340 compute_affine_dependence (ddr, loops[0]);
1341 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1342 this_dir = 2;
1343 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1344 {
1345 if (DDR_REVERSED_P (ddr))
1346 {
1347 data_reference_p tem = dr1;
1348 dr1 = dr2;
1349 dr2 = tem;
1350 this_dir = -this_dir;
1351 }
1352 /* Known dependences can still be unordered througout the
1353 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
2cf19e26 1354 if (DDR_NUM_DIST_VECTS (ddr) != 1)
447f3223 1355 this_dir = 2;
2cf19e26
RB
1356 /* If the overlap is exact preserve stmt order. */
1357 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1358 ;
1359 else
1360 {
1361 /* Else as the distance vector is lexicographic positive
1362 swap the dependence direction. */
1363 this_dir = -this_dir;
1364 }
447f3223
RB
1365 }
1366 else
1367 this_dir = 0;
1368 free_dependence_relation (ddr);
1369 if (dir == 0)
1370 dir = this_dir;
1371 else if (dir != this_dir)
1372 return 2;
1373 }
1374 return dir;
1375}
1376
1377/* Compare postorder number of the partition graph vertices V1 and V2. */
1378
1379static int
1380pgcmp (const void *v1_, const void *v2_)
1381{
1382 const vertex *v1 = (const vertex *)v1_;
1383 const vertex *v2 = (const vertex *)v2_;
1384 return v2->post - v1->post;
1385}
2fd5894f
RB
1386
1387/* Distributes the code from LOOP in such a way that producer
1388 statements are placed before consumer statements. Tries to separate
1389 only the statements from STMTS into separate loops.
1390 Returns the number of distributed loops. */
dea61d92
SP
1391
1392static int
36875e8f 1393distribute_loop (struct loop *loop, vec<gimple> stmts,
826a536d 1394 control_dependences *cd, int *nb_calls)
dea61d92 1395{
2fd5894f 1396 struct graph *rdg;
9771b263 1397 vec<partition_t> partitions;
c61f8985 1398 partition_t partition;
be6b029b 1399 bool any_builtin;
2fd5894f 1400 int i, nbp;
447f3223
RB
1401 graph *pg = NULL;
1402 int num_sccs = 1;
dea61d92 1403
826a536d 1404 *nb_calls = 0;
07687835 1405 stack_vec<loop_p, 3> loop_nest;
2fd5894f 1406 if (!find_loop_nest (loop, &loop_nest))
07687835 1407 return 0;
2fd5894f 1408
36875e8f 1409 rdg = build_rdg (loop_nest, cd);
2fd5894f
RB
1410 if (!rdg)
1411 {
1412 if (dump_file && (dump_flags & TDF_DETAILS))
1413 fprintf (dump_file,
1414 "Loop %d not distributed: failed to build the RDG.\n",
1415 loop->num);
1416
2fd5894f
RB
1417 return 0;
1418 }
1419
1420 if (dump_file && (dump_flags & TDF_DETAILS))
1421 dump_rdg (dump_file, rdg);
1422
1423 partitions.create (3);
1424 rdg_build_partitions (rdg, stmts, &partitions);
dea61d92 1425
be6b029b 1426 any_builtin = false;
9771b263 1427 FOR_EACH_VEC_ELT (partitions, i, partition)
be6b029b
RG
1428 {
1429 classify_partition (loop, rdg, partition);
1430 any_builtin |= partition_builtin_p (partition);
1431 }
30d55936 1432
447f3223
RB
1433 /* If we are only distributing patterns but did not detect any,
1434 simply bail out. */
9fed7f3a
RB
1435 if (!flag_tree_loop_distribution
1436 && !any_builtin)
1437 {
1438 nbp = 0;
1439 goto ldist_done;
1440 }
1441
447f3223
RB
1442 /* If we are only distributing patterns fuse all partitions that
1443 were not classified as builtins. This also avoids chopping
1444 a loop into pieces, separated by builtin calls. That is, we
1445 only want no or a single loop body remaining. */
1446 partition_t into;
1447 if (!flag_tree_loop_distribution)
1448 {
1449 for (i = 0; partitions.iterate (i, &into); ++i)
1450 if (!partition_builtin_p (into))
1451 break;
1452 for (++i; partitions.iterate (i, &partition); ++i)
1453 if (!partition_builtin_p (partition))
1454 {
1455 if (dump_file && (dump_flags & TDF_DETAILS))
1456 {
1457 fprintf (dump_file, "fusing non-builtin partitions\n");
1458 dump_bitmap (dump_file, into->stmts);
1459 dump_bitmap (dump_file, partition->stmts);
1460 }
1461 partition_merge_into (into, partition);
1462 partitions.unordered_remove (i);
1463 partition_free (partition);
1464 i--;
1465 }
1466 }
1467
1468 /* Due to limitations in the transform phase we have to fuse all
1469 reduction partitions into the last partition so the existing
1470 loop will contain all loop-closed PHI nodes. */
1471 for (i = 0; partitions.iterate (i, &into); ++i)
1472 if (partition_reduction_p (into))
1473 break;
1474 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1475 if (partition_reduction_p (partition))
1476 {
1477 if (dump_file && (dump_flags & TDF_DETAILS))
1478 {
1479 fprintf (dump_file, "fusing partitions\n");
1480 dump_bitmap (dump_file, into->stmts);
1481 dump_bitmap (dump_file, partition->stmts);
1482 fprintf (dump_file, "because they have reductions\n");
1483 }
1484 partition_merge_into (into, partition);
1485 partitions.unordered_remove (i);
1486 partition_free (partition);
1487 i--;
1488 }
1489
9fed7f3a
RB
1490 /* Apply our simple cost model - fuse partitions with similar
1491 memory accesses. */
9fed7f3a
RB
1492 for (i = 0; partitions.iterate (i, &into); ++i)
1493 {
1494 if (partition_builtin_p (into))
1495 continue;
1496 for (int j = i + 1;
1497 partitions.iterate (j, &partition); ++j)
1498 {
1499 if (!partition_builtin_p (partition)
1500 && similar_memory_accesses (rdg, into, partition))
1501 {
1502 if (dump_file && (dump_flags & TDF_DETAILS))
1503 {
1504 fprintf (dump_file, "fusing partitions\n");
1505 dump_bitmap (dump_file, into->stmts);
1506 dump_bitmap (dump_file, partition->stmts);
1507 fprintf (dump_file, "because they have similar "
1508 "memory accesses\n");
1509 }
826a536d 1510 partition_merge_into (into, partition);
447f3223 1511 partitions.unordered_remove (j);
9fed7f3a
RB
1512 partition_free (partition);
1513 j--;
1514 }
1515 }
1516 }
1517
447f3223
RB
1518 /* Build the partition dependency graph. */
1519 if (partitions.length () > 1)
c014f6f5 1520 {
447f3223
RB
1521 pg = new_graph (partitions.length ());
1522 struct pgdata {
1523 partition_t partition;
1524 vec<data_reference_p> writes;
1525 vec<data_reference_p> reads;
1526 };
1527#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1528 for (i = 0; partitions.iterate (i, &partition); ++i)
1529 {
1530 vertex *v = &pg->vertices[i];
1531 pgdata *data = new pgdata;
1532 data_reference_p dr;
1533 /* FIXME - leaks. */
1534 v->data = data;
1535 bitmap_iterator bi;
1536 unsigned j;
1537 data->partition = partition;
1538 data->reads = vNULL;
1539 data->writes = vNULL;
1540 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1541 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1542 if (DR_IS_READ (dr))
1543 data->reads.safe_push (dr);
1544 else
1545 data->writes.safe_push (dr);
1546 }
1547 partition_t partition1, partition2;
1548 for (i = 0; partitions.iterate (i, &partition1); ++i)
1549 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1550 {
1551 /* dependence direction - 0 is no dependence, -1 is back,
1552 1 is forth, 2 is both (we can stop then, merging will occur). */
1553 int dir = 0;
1554 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1555 PGDATA(i)->writes,
1556 PGDATA(j)->reads);
1557 if (dir != 2)
1558 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1559 PGDATA(i)->reads,
1560 PGDATA(j)->writes);
1561 if (dir != 2)
1562 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1563 PGDATA(i)->writes,
1564 PGDATA(j)->writes);
1565 if (dir == 1 || dir == 2)
1566 add_edge (pg, i, j);
1567 if (dir == -1 || dir == 2)
1568 add_edge (pg, j, i);
1569 }
1570
1571 /* Add edges to the reduction partition (if any) to force it last. */
1572 unsigned j;
1573 for (j = 0; partitions.iterate (j, &partition); ++j)
1574 if (partition_reduction_p (partition))
1575 break;
1576 if (j < partitions.length ())
38ad2d07 1577 {
447f3223
RB
1578 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1579 if (i != j)
1580 add_edge (pg, i, j);
1581 }
1582
1583 /* Compute partitions we cannot separate and fuse them. */
1584 num_sccs = graphds_scc (pg, NULL);
1585 for (i = 0; i < num_sccs; ++i)
1586 {
1587 partition_t first;
1588 int j;
1589 for (j = 0; partitions.iterate (j, &first); ++j)
1590 if (pg->vertices[j].component == i)
38ad2d07 1591 break;
447f3223
RB
1592 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1593 if (pg->vertices[j].component == i)
38ad2d07 1594 {
447f3223
RB
1595 if (dump_file && (dump_flags & TDF_DETAILS))
1596 {
1597 fprintf (dump_file, "fusing partitions\n");
1598 dump_bitmap (dump_file, first->stmts);
1599 dump_bitmap (dump_file, partition->stmts);
1600 fprintf (dump_file, "because they are in the same "
1601 "dependence SCC\n");
1602 }
1603 partition_merge_into (first, partition);
1604 partitions[j] = NULL;
5eb010bc 1605 partition_free (partition);
447f3223 1606 PGDATA (j)->partition = NULL;
38ad2d07 1607 }
38ad2d07 1608 }
30d55936 1609
447f3223
RB
1610 /* Now order the remaining nodes in postorder. */
1611 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1612 partitions.truncate (0);
1613 for (i = 0; i < pg->n_vertices; ++i)
b9fc0497 1614 {
447f3223
RB
1615 pgdata *data = PGDATA (i);
1616 if (data->partition)
1617 partitions.safe_push (data->partition);
1618 data->reads.release ();
1619 data->writes.release ();
1620 delete data;
b9fc0497 1621 }
447f3223
RB
1622 gcc_assert (partitions.length () == (unsigned)num_sccs);
1623 free_graph (pg);
b9fc0497
RB
1624 }
1625
9771b263 1626 nbp = partitions.length ();
a4293fa6 1627 if (nbp == 0
9771b263
DN
1628 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1629 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
c014f6f5
RG
1630 {
1631 nbp = 0;
1632 goto ldist_done;
1633 }
dea61d92
SP
1634
1635 if (dump_file && (dump_flags & TDF_DETAILS))
1636 dump_rdg_partitions (dump_file, partitions);
1637
9771b263 1638 FOR_EACH_VEC_ELT (partitions, i, partition)
826a536d
RB
1639 {
1640 if (partition_builtin_p (partition))
1641 (*nb_calls)++;
1642 generate_code_for_partition (loop, partition, i < nbp - 1);
1643 }
dea61d92 1644
dea61d92
SP
1645 ldist_done:
1646
9771b263 1647 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1648 partition_free (partition);
9771b263 1649 partitions.release ();
dea61d92 1650
dea61d92 1651 free_rdg (rdg);
826a536d 1652 return nbp - *nb_calls;
dea61d92
SP
1653}
1654
1655/* Distribute all loops in the current function. */
1656
1657static unsigned int
1658tree_loop_distribution (void)
1659{
1660 struct loop *loop;
1661 loop_iterator li;
c014f6f5 1662 bool changed = false;
1fa0c180 1663 basic_block bb;
36875e8f 1664 control_dependences *cd = NULL;
1fa0c180
RG
1665
1666 FOR_ALL_BB (bb)
1667 {
1668 gimple_stmt_iterator gsi;
1669 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1670 gimple_set_uid (gsi_stmt (gsi), -1);
1671 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1672 gimple_set_uid (gsi_stmt (gsi), -1);
1673 }
dea61d92 1674
c014f6f5
RG
1675 /* We can at the moment only distribute non-nested loops, thus restrict
1676 walking to innermost loops. */
1677 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
dea61d92 1678 {
6e1aa848 1679 vec<gimple> work_list = vNULL;
be6b029b 1680 basic_block *bbs;
0e20c89f 1681 int num = loop->num;
be6b029b 1682 unsigned int i;
a3357f7d
RG
1683
1684 /* If the loop doesn't have a single exit we will fail anyway,
1685 so do that early. */
1686 if (!single_exit (loop))
1687 continue;
dea61d92 1688
f56f2d33
JH
1689 /* Only optimize hot loops. */
1690 if (!optimize_loop_for_speed_p (loop))
1691 continue;
1692
be6b029b
RG
1693 /* Initialize the worklist with stmts we seed the partitions with. */
1694 bbs = get_loop_body_in_dom_order (loop);
1695 for (i = 0; i < loop->num_nodes; ++i)
1696 {
1697 gimple_stmt_iterator gsi;
deb6c11a
RB
1698 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1699 {
1700 gimple phi = gsi_stmt (gsi);
1701 if (virtual_operand_p (gimple_phi_result (phi)))
1702 continue;
1703 /* Distribute stmts which have defs that are used outside of
1704 the loop. */
1705 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1706 continue;
1707 work_list.safe_push (phi);
1708 }
be6b029b
RG
1709 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1710 {
1711 gimple stmt = gsi_stmt (gsi);
83a95546
RB
1712
1713 /* If there is a stmt with side-effects bail out - we
1714 cannot and should not distribute this loop. */
1715 if (gimple_has_side_effects (stmt))
1716 {
1717 work_list.truncate (0);
1718 goto out;
1719 }
1720
b9fc0497
RB
1721 /* Distribute stmts which have defs that are used outside of
1722 the loop. */
1723 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1724 ;
1725 /* Otherwise only distribute stores for now. */
1726 else if (!gimple_assign_single_p (stmt)
1727 || is_gimple_reg (gimple_assign_lhs (stmt)))
be6b029b
RG
1728 continue;
1729
9771b263 1730 work_list.safe_push (stmt);
be6b029b
RG
1731 }
1732 }
83a95546 1733out:
be6b029b 1734 free (bbs);
c014f6f5 1735
826a536d
RB
1736 int nb_generated_loops = 0;
1737 int nb_generated_calls = 0;
1738 location_t loc = find_loop_location (loop);
9771b263 1739 if (work_list.length () > 0)
36875e8f
RB
1740 {
1741 if (!cd)
1742 {
ca406576 1743 calculate_dominance_info (CDI_DOMINATORS);
36875e8f
RB
1744 calculate_dominance_info (CDI_POST_DOMINATORS);
1745 cd = new control_dependences (create_edge_list ());
1746 free_dominance_info (CDI_POST_DOMINATORS);
1747 }
826a536d
RB
1748 nb_generated_loops = distribute_loop (loop, work_list, cd,
1749 &nb_generated_calls);
36875e8f 1750 }
c014f6f5 1751
826a536d 1752 if (nb_generated_loops + nb_generated_calls > 0)
dea61d92 1753 {
826a536d
RB
1754 changed = true;
1755 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1756 loc, "Loop %d distributed: split to %d loops "
1757 "and %d library calls.\n",
1758 num, nb_generated_loops, nb_generated_calls);
dea61d92 1759 }
826a536d
RB
1760 else if (dump_file && (dump_flags & TDF_DETAILS))
1761 fprintf (dump_file, "Loop %d is the same.\n", num);
dea61d92 1762
9771b263 1763 work_list.release ();
dea61d92
SP
1764 }
1765
36875e8f
RB
1766 if (cd)
1767 delete cd;
1768
c014f6f5
RG
1769 if (changed)
1770 {
525174a2 1771 mark_virtual_operands_for_renaming (cfun);
c014f6f5
RG
1772 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1773 }
1774
1775#ifdef ENABLE_CHECKING
1776 verify_loop_structure ();
1777#endif
1778
5006671f 1779 return 0;
dea61d92
SP
1780}
1781
1782static bool
1783gate_tree_loop_distribution (void)
1784{
20769d5e
SP
1785 return flag_tree_loop_distribution
1786 || flag_tree_loop_distribute_patterns;
dea61d92
SP
1787}
1788
27a4cd48
DM
1789namespace {
1790
1791const pass_data pass_data_loop_distribution =
dea61d92 1792{
27a4cd48
DM
1793 GIMPLE_PASS, /* type */
1794 "ldist", /* name */
1795 OPTGROUP_LOOP, /* optinfo_flags */
1796 true, /* has_gate */
1797 true, /* has_execute */
1798 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1799 ( PROP_cfg | PROP_ssa ), /* properties_required */
1800 0, /* properties_provided */
1801 0, /* properties_destroyed */
1802 0, /* todo_flags_start */
1803 TODO_verify_ssa, /* todo_flags_finish */
dea61d92 1804};
27a4cd48
DM
1805
1806class pass_loop_distribution : public gimple_opt_pass
1807{
1808public:
c3284718
RS
1809 pass_loop_distribution (gcc::context *ctxt)
1810 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
27a4cd48
DM
1811 {}
1812
1813 /* opt_pass methods: */
1814 bool gate () { return gate_tree_loop_distribution (); }
1815 unsigned int execute () { return tree_loop_distribution (); }
1816
1817}; // class pass_loop_distribution
1818
1819} // anon namespace
1820
1821gimple_opt_pass *
1822make_pass_loop_distribution (gcc::context *ctxt)
1823{
1824 return new pass_loop_distribution (ctxt);
1825}