]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
tree-ssa-forwprop.c (forward_propagate_addr_expr_1): Restructure forwarding through...
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
dea61d92 1/* Loop distribution.
d1e082c2 2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
dea61d92
SP
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6This file is part of GCC.
b8698a0f 7
dea61d92
SP
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
b8698a0f 12
dea61d92
SP
13GCC is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
b8698a0f 17
dea61d92
SP
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
b8698a0f 29 is transformed to
dea61d92
SP
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44#include "config.h"
45#include "system.h"
46#include "coretypes.h"
7a300452 47#include "tree-ssa.h"
dea61d92 48#include "cfgloop.h"
dea61d92
SP
49#include "tree-chrec.h"
50#include "tree-data-ref.h"
51#include "tree-scalar-evolution.h"
52#include "tree-pass.h"
80ab0b19 53#include "gimple-pretty-print.h"
826a536d 54#include "tree-vectorizer.h"
80ab0b19
RB
55
56
57/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
58typedef struct rdg_vertex
59{
60 /* The statement represented by this vertex. */
61 gimple stmt;
62
63 /* Vector of data-references in this statement. */
64 vec<data_reference_p> datarefs;
65
66 /* True when the statement contains a write to memory. */
67 bool has_mem_write;
68
69 /* True when the statement contains a read from memory. */
70 bool has_mem_reads;
71} *rdg_vertex_p;
72
73#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
74#define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
75#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
76#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
77#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
78#define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
79#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
80#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
81
82/* Data dependence type. */
83
84enum rdg_dep_type
85{
86 /* Read After Write (RAW). */
87 flow_dd = 'f',
88
89 /* Write After Read (WAR). */
90 anti_dd = 'a',
91
92 /* Write After Write (WAW). */
93 output_dd = 'o',
94
95 /* Read After Read (RAR). */
36875e8f
RB
96 input_dd = 'i',
97
98 /* Control dependence (execute conditional on). */
99 control_dd = 'c'
80ab0b19
RB
100};
101
102/* Dependence information attached to an edge of the RDG. */
103
104typedef struct rdg_edge
105{
106 /* Type of the dependence. */
107 enum rdg_dep_type type;
108
109 /* Levels of the dependence: the depth of the loops that carry the
110 dependence. */
111 unsigned level;
112
113 /* Dependence relation between data dependences, NULL when one of
114 the vertices is a scalar. */
115 ddr_p relation;
116} *rdg_edge_p;
117
118#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
119#define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
120#define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
121
80ab0b19
RB
122/* Dump vertex I in RDG to FILE. */
123
124static void
125dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
126{
127 struct vertex *v = &(rdg->vertices[i]);
128 struct graph_edge *e;
129
130 fprintf (file, "(vertex %d: (%s%s) (in:", i,
131 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
132 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
133
134 if (v->pred)
135 for (e = v->pred; e; e = e->pred_next)
136 fprintf (file, " %d", e->src);
137
138 fprintf (file, ") (out:");
139
140 if (v->succ)
141 for (e = v->succ; e; e = e->succ_next)
142 fprintf (file, " %d", e->dest);
143
144 fprintf (file, ")\n");
145 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
146 fprintf (file, ")\n");
147}
148
149/* Call dump_rdg_vertex on stderr. */
150
151DEBUG_FUNCTION void
152debug_rdg_vertex (struct graph *rdg, int i)
153{
154 dump_rdg_vertex (stderr, rdg, i);
155}
156
80ab0b19
RB
157/* Dump the reduced dependence graph RDG to FILE. */
158
159static void
160dump_rdg (FILE *file, struct graph *rdg)
161{
80ab0b19 162 fprintf (file, "(rdg\n");
2fd5894f
RB
163 for (int i = 0; i < rdg->n_vertices; i++)
164 dump_rdg_vertex (file, rdg, i);
80ab0b19 165 fprintf (file, ")\n");
80ab0b19
RB
166}
167
168/* Call dump_rdg on stderr. */
169
170DEBUG_FUNCTION void
171debug_rdg (struct graph *rdg)
172{
173 dump_rdg (stderr, rdg);
174}
175
176static void
177dot_rdg_1 (FILE *file, struct graph *rdg)
178{
179 int i;
174ec470
RB
180 pretty_printer buffer;
181 pp_needs_newline (&buffer) = false;
182 buffer.buffer->stream = file;
80ab0b19
RB
183
184 fprintf (file, "digraph RDG {\n");
185
186 for (i = 0; i < rdg->n_vertices; i++)
187 {
188 struct vertex *v = &(rdg->vertices[i]);
189 struct graph_edge *e;
190
174ec470
RB
191 fprintf (file, "%d [label=\"[%d] ", i, i);
192 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
193 pp_flush (&buffer);
194 fprintf (file, "\"]\n");
195
80ab0b19
RB
196 /* Highlight reads from memory. */
197 if (RDG_MEM_READS_STMT (rdg, i))
198 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
199
200 /* Highlight stores to memory. */
201 if (RDG_MEM_WRITE_STMT (rdg, i))
202 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
203
204 if (v->succ)
205 for (e = v->succ; e; e = e->succ_next)
206 switch (RDGE_TYPE (e))
207 {
208 case input_dd:
209 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
210 break;
211
212 case output_dd:
213 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
214 break;
215
216 case flow_dd:
217 /* These are the most common dependences: don't print these. */
218 fprintf (file, "%d -> %d \n", i, e->dest);
219 break;
220
221 case anti_dd:
222 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
223 break;
224
36875e8f
RB
225 case control_dd:
226 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
227 break;
228
80ab0b19
RB
229 default:
230 gcc_unreachable ();
231 }
232 }
233
234 fprintf (file, "}\n\n");
235}
236
237/* Display the Reduced Dependence Graph using dotty. */
238
239DEBUG_FUNCTION void
240dot_rdg (struct graph *rdg)
241{
174ec470
RB
242 /* When debugging, you may want to enable the following code. */
243#if 1
c3284718 244 FILE *file = popen ("dot -Tx11", "w");
174ec470
RB
245 if (!file)
246 return;
80ab0b19 247 dot_rdg_1 (file, rdg);
174ec470
RB
248 fflush (file);
249 close (fileno (file));
250 pclose (file);
80ab0b19
RB
251#else
252 dot_rdg_1 (stderr, rdg);
253#endif
254}
255
256/* Returns the index of STMT in RDG. */
257
258static int
259rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
260{
261 int index = gimple_uid (stmt);
262 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
263 return index;
264}
265
266/* Creates an edge in RDG for each distance vector from DDR. The
267 order that we keep track of in the RDG is the order in which
268 statements have to be executed. */
269
270static void
271create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
272{
273 struct graph_edge *e;
274 int va, vb;
275 data_reference_p dra = DDR_A (ddr);
276 data_reference_p drb = DDR_B (ddr);
277 unsigned level = ddr_dependence_level (ddr);
278
279 /* For non scalar dependences, when the dependence is REVERSED,
280 statement B has to be executed before statement A. */
281 if (level > 0
282 && !DDR_REVERSED_P (ddr))
283 {
284 data_reference_p tmp = dra;
285 dra = drb;
286 drb = tmp;
287 }
288
289 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
290 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
291
292 if (va < 0 || vb < 0)
293 return;
294
295 e = add_edge (rdg, va, vb);
296 e->data = XNEW (struct rdg_edge);
297
298 RDGE_LEVEL (e) = level;
299 RDGE_RELATION (e) = ddr;
300
301 /* Determines the type of the data dependence. */
302 if (DR_IS_READ (dra) && DR_IS_READ (drb))
303 RDGE_TYPE (e) = input_dd;
304 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
305 RDGE_TYPE (e) = output_dd;
306 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
307 RDGE_TYPE (e) = flow_dd;
308 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
309 RDGE_TYPE (e) = anti_dd;
310}
311
312/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
313 the index of DEF in RDG. */
314
315static void
316create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
317{
318 use_operand_p imm_use_p;
319 imm_use_iterator iterator;
320
321 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
322 {
323 struct graph_edge *e;
324 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
325
326 if (use < 0)
327 continue;
328
329 e = add_edge (rdg, idef, use);
330 e->data = XNEW (struct rdg_edge);
331 RDGE_TYPE (e) = flow_dd;
332 RDGE_RELATION (e) = NULL;
333 }
334}
335
36875e8f
RB
336/* Creates an edge for the control dependences of BB to the vertex V. */
337
338static void
339create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
340 int v, control_dependences *cd)
341{
342 bitmap_iterator bi;
343 unsigned edge_n;
344 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
345 0, edge_n, bi)
346 {
347 basic_block cond_bb = cd->get_edge (edge_n)->src;
348 gimple stmt = last_stmt (cond_bb);
349 if (stmt && is_ctrl_stmt (stmt))
350 {
351 struct graph_edge *e;
352 int c = rdg_vertex_for_stmt (rdg, stmt);
353 if (c < 0)
354 continue;
355
356 e = add_edge (rdg, c, v);
357 e->data = XNEW (struct rdg_edge);
358 RDGE_TYPE (e) = control_dd;
359 RDGE_RELATION (e) = NULL;
360 }
361 }
362}
363
80ab0b19
RB
364/* Creates the edges of the reduced dependence graph RDG. */
365
366static void
36875e8f 367create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd)
80ab0b19
RB
368{
369 int i;
370 struct data_dependence_relation *ddr;
371 def_operand_p def_p;
372 ssa_op_iter iter;
373
374 FOR_EACH_VEC_ELT (ddrs, i, ddr)
375 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
376 create_rdg_edge_for_ddr (rdg, ddr);
97463b2b
RB
377 else
378 free_dependence_relation (ddr);
80ab0b19
RB
379
380 for (i = 0; i < rdg->n_vertices; i++)
381 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
382 iter, SSA_OP_DEF)
383 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
36875e8f
RB
384
385 if (cd)
386 for (i = 0; i < rdg->n_vertices; i++)
387 {
388 gimple stmt = RDG_STMT (rdg, i);
389 if (gimple_code (stmt) == GIMPLE_PHI)
390 {
391 edge_iterator ei;
392 edge e;
393 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
394 create_edge_for_control_dependence (rdg, e->src, i, cd);
395 }
396 else
397 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
398 }
80ab0b19
RB
399}
400
401/* Build the vertices of the reduced dependence graph RDG. Return false
402 if that failed. */
403
404static bool
405create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
406 vec<data_reference_p> *datarefs)
407{
408 int i;
409 gimple stmt;
410
411 FOR_EACH_VEC_ELT (stmts, i, stmt)
412 {
413 struct vertex *v = &(rdg->vertices[i]);
414
415 /* Record statement to vertex mapping. */
416 gimple_set_uid (stmt, i);
417
418 v->data = XNEW (struct rdg_vertex);
419 RDGV_STMT (v) = stmt;
420 RDGV_DATAREFS (v).create (0);
421 RDGV_HAS_MEM_WRITE (v) = false;
422 RDGV_HAS_MEM_READS (v) = false;
423 if (gimple_code (stmt) == GIMPLE_PHI)
424 continue;
425
426 unsigned drp = datarefs->length ();
427 if (!find_data_references_in_stmt (loop, stmt, datarefs))
428 return false;
429 for (unsigned j = drp; j < datarefs->length (); ++j)
430 {
431 data_reference_p dr = (*datarefs)[j];
432 if (DR_IS_READ (dr))
433 RDGV_HAS_MEM_READS (v) = true;
434 else
435 RDGV_HAS_MEM_WRITE (v) = true;
436 RDGV_DATAREFS (v).safe_push (dr);
437 }
438 }
439 return true;
440}
441
2fd5894f 442/* Initialize STMTS with all the statements of LOOP. The order in
80ab0b19
RB
443 which we discover statements is important as
444 generate_loops_for_partition is using the same traversal for
2fd5894f 445 identifying statements in loop copies. */
80ab0b19
RB
446
447static void
448stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
449{
450 unsigned int i;
451 basic_block *bbs = get_loop_body_in_dom_order (loop);
452
453 for (i = 0; i < loop->num_nodes; i++)
454 {
455 basic_block bb = bbs[i];
456 gimple_stmt_iterator bsi;
457 gimple stmt;
458
459 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
83a95546
RB
460 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
461 stmts->safe_push (gsi_stmt (bsi));
80ab0b19
RB
462
463 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
464 {
465 stmt = gsi_stmt (bsi);
466 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
467 stmts->safe_push (stmt);
468 }
469 }
470
471 free (bbs);
472}
473
80ab0b19
RB
474/* Build the Reduced Dependence Graph (RDG) with one vertex per
475 statement of the loop nest, and one edge per data dependence or
476 scalar dependence. */
477
478struct graph *
479build_empty_rdg (int n_stmts)
480{
481 struct graph *rdg = new_graph (n_stmts);
482 return rdg;
483}
484
485/* Free the reduced dependence graph RDG. */
486
487static void
488free_rdg (struct graph *rdg)
489{
490 int i;
491
492 for (i = 0; i < rdg->n_vertices; i++)
493 {
494 struct vertex *v = &(rdg->vertices[i]);
495 struct graph_edge *e;
496
497 for (e = v->succ; e; e = e->succ_next)
498 {
499 free_dependence_relation (RDGE_RELATION (e));
500 free (e->data);
501 }
502
503 if (v->data)
504 {
505 gimple_set_uid (RDGV_STMT (v), -1);
506 free_data_refs (RDGV_DATAREFS (v));
507 free (v->data);
508 }
509 }
510
511 free_graph (rdg);
512}
513
514/* Build the Reduced Dependence Graph (RDG) with one vertex per
97463b2b 515 statement of the loop nest LOOP_NEST, and one edge per data dependence or
80ab0b19
RB
516 scalar dependence. */
517
518static struct graph *
36875e8f 519build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
80ab0b19
RB
520{
521 struct graph *rdg;
80ab0b19
RB
522 vec<gimple> stmts;
523 vec<data_reference_p> datarefs;
524 vec<ddr_p> dependence_relations;
525
97463b2b 526 /* Create the RDG vertices from the stmts of the loop nest. */
80ab0b19 527 stmts.create (10);
97463b2b 528 stmts_from_loop (loop_nest[0], &stmts);
80ab0b19
RB
529 rdg = build_empty_rdg (stmts.length ());
530 datarefs.create (10);
97463b2b 531 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
80ab0b19
RB
532 {
533 stmts.release ();
97463b2b 534 datarefs.release ();
80ab0b19
RB
535 free_rdg (rdg);
536 return NULL;
537 }
538 stmts.release ();
97463b2b
RB
539
540 /* Create the RDG edges from the data dependences in the loop nest. */
80ab0b19
RB
541 dependence_relations.create (100);
542 if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest,
543 false)
544 || !known_dependences_p (dependence_relations))
545 {
97463b2b 546 free_dependence_relations (dependence_relations);
80ab0b19 547 datarefs.release ();
80ab0b19
RB
548 free_rdg (rdg);
549 return NULL;
550 }
36875e8f 551 create_rdg_edges (rdg, dependence_relations, cd);
80ab0b19 552 dependence_relations.release ();
97463b2b 553 datarefs.release ();
80ab0b19
RB
554
555 return rdg;
556}
557
80ab0b19 558
dea61d92 559
b9fc0497 560enum partition_kind {
826a536d 561 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
b9fc0497 562};
30d55936 563
c61f8985
RG
564typedef struct partition_s
565{
566 bitmap stmts;
80ab0b19 567 bitmap loops;
826a536d 568 bool reduction_p;
30d55936 569 enum partition_kind kind;
d0582dc1
RG
570 /* data-references a kind != PKIND_NORMAL partition is about. */
571 data_reference_p main_dr;
572 data_reference_p secondary_dr;
818625cf 573 tree niter;
c61f8985
RG
574} *partition_t;
575
c61f8985
RG
576
577/* Allocate and initialize a partition from BITMAP. */
578
579static partition_t
80ab0b19 580partition_alloc (bitmap stmts, bitmap loops)
c61f8985
RG
581{
582 partition_t partition = XCNEW (struct partition_s);
583 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
80ab0b19 584 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
826a536d 585 partition->reduction_p = false;
30d55936 586 partition->kind = PKIND_NORMAL;
c61f8985
RG
587 return partition;
588}
589
590/* Free PARTITION. */
591
592static void
593partition_free (partition_t partition)
594{
595 BITMAP_FREE (partition->stmts);
80ab0b19 596 BITMAP_FREE (partition->loops);
c61f8985
RG
597 free (partition);
598}
599
30d55936
RG
600/* Returns true if the partition can be generated as a builtin. */
601
602static bool
603partition_builtin_p (partition_t partition)
604{
826a536d 605 return partition->kind != PKIND_NORMAL;
30d55936 606}
c61f8985 607
826a536d 608/* Returns true if the partition contains a reduction. */
7ad672e4
RG
609
610static bool
826a536d 611partition_reduction_p (partition_t partition)
7ad672e4 612{
826a536d 613 return partition->reduction_p;
7ad672e4
RG
614}
615
826a536d
RB
616/* Merge PARTITION into the partition DEST. */
617
618static void
619partition_merge_into (partition_t dest, partition_t partition)
620{
621 dest->kind = PKIND_NORMAL;
622 bitmap_ior_into (dest->stmts, partition->stmts);
623 if (partition_reduction_p (partition))
624 dest->reduction_p = true;
625}
626
627
c07a8cb3
RG
628/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
629 the LOOP. */
630
631static bool
632ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
633{
634 imm_use_iterator imm_iter;
635 use_operand_p use_p;
636
637 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
e665269a
RG
638 {
639 gimple use_stmt = USE_STMT (use_p);
640 if (!is_gimple_debug (use_stmt)
641 && loop != loop_containing_stmt (use_stmt))
642 return true;
643 }
c07a8cb3
RG
644
645 return false;
646}
647
648/* Returns true when STMT defines a scalar variable used after the
88af7c1a 649 loop LOOP. */
c07a8cb3
RG
650
651static bool
88af7c1a 652stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
c07a8cb3 653{
88af7c1a
RG
654 def_operand_p def_p;
655 ssa_op_iter op_iter;
c07a8cb3 656
9ca86fc3
RG
657 if (gimple_code (stmt) == GIMPLE_PHI)
658 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
659
88af7c1a
RG
660 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
661 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
662 return true;
c07a8cb3 663
88af7c1a 664 return false;
c07a8cb3
RG
665}
666
dea61d92
SP
667/* Return a copy of LOOP placed before LOOP. */
668
669static struct loop *
670copy_loop_before (struct loop *loop)
671{
672 struct loop *res;
673 edge preheader = loop_preheader_edge (loop);
674
dea61d92
SP
675 initialize_original_copy_tables ();
676 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
30d55936 677 gcc_assert (res != NULL);
dea61d92 678 free_original_copy_tables ();
2cfc56b9 679 delete_update_ssa ();
dea61d92
SP
680
681 return res;
682}
683
684/* Creates an empty basic block after LOOP. */
685
686static void
687create_bb_after_loop (struct loop *loop)
688{
689 edge exit = single_exit (loop);
690
691 if (!exit)
692 return;
693
694 split_edge (exit);
695}
696
697/* Generate code for PARTITION from the code in LOOP. The loop is
698 copied when COPY_P is true. All the statements not flagged in the
699 PARTITION bitmap are removed from the loop or from its copy. The
700 statements are indexed in sequence inside a basic block, and the
30d55936 701 basic blocks of a loop are taken in dom order. */
dea61d92 702
30d55936 703static void
c61f8985
RG
704generate_loops_for_partition (struct loop *loop, partition_t partition,
705 bool copy_p)
dea61d92 706{
2fd5894f 707 unsigned i;
726a989a 708 gimple_stmt_iterator bsi;
dea61d92
SP
709 basic_block *bbs;
710
711 if (copy_p)
712 {
713 loop = copy_loop_before (loop);
30d55936 714 gcc_assert (loop != NULL);
dea61d92
SP
715 create_preheader (loop, CP_SIMPLE_PREHEADERS);
716 create_bb_after_loop (loop);
717 }
718
2fd5894f 719 /* Remove stmts not in the PARTITION bitmap. */
dea61d92
SP
720 bbs = get_loop_body_in_dom_order (loop);
721
b03c3082 722 if (MAY_HAVE_DEBUG_STMTS)
2fd5894f 723 for (i = 0; i < loop->num_nodes; i++)
b03c3082
JJ
724 {
725 basic_block bb = bbs[i];
726
727 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2fd5894f
RB
728 {
729 gimple phi = gsi_stmt (bsi);
730 if (!virtual_operand_p (gimple_phi_result (phi))
731 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
732 reset_debug_uses (phi);
733 }
b03c3082
JJ
734
735 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
736 {
737 gimple stmt = gsi_stmt (bsi);
738 if (gimple_code (stmt) != GIMPLE_LABEL
739 && !is_gimple_debug (stmt)
2fd5894f 740 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
b03c3082
JJ
741 reset_debug_uses (stmt);
742 }
743 }
744
2fd5894f 745 for (i = 0; i < loop->num_nodes; i++)
dea61d92
SP
746 {
747 basic_block bb = bbs[i];
dea61d92 748
726a989a 749 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
2fd5894f
RB
750 {
751 gimple phi = gsi_stmt (bsi);
752 if (!virtual_operand_p (gimple_phi_result (phi))
753 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
2706a615 754 remove_phi_node (&bsi, true);
2fd5894f
RB
755 else
756 gsi_next (&bsi);
757 }
dea61d92 758
726a989a 759 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
2706a615
RG
760 {
761 gimple stmt = gsi_stmt (bsi);
b03c3082
JJ
762 if (gimple_code (stmt) != GIMPLE_LABEL
763 && !is_gimple_debug (stmt)
2fd5894f 764 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
2706a615 765 {
36875e8f
RB
766 /* Choose an arbitrary path through the empty CFG part
767 that this unnecessary control stmt controls. */
768 if (gimple_code (stmt) == GIMPLE_COND)
769 {
770 gimple_cond_make_false (stmt);
771 update_stmt (stmt);
772 }
773 else if (gimple_code (stmt) == GIMPLE_SWITCH)
774 {
775 gimple_switch_set_index
776 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
777 update_stmt (stmt);
778 }
779 else
780 {
781 unlink_stmt_vdef (stmt);
782 gsi_remove (&bsi, true);
783 release_defs (stmt);
784 continue;
785 }
2706a615 786 }
36875e8f 787 gsi_next (&bsi);
2706a615 788 }
dea61d92
SP
789 }
790
791 free (bbs);
dea61d92
SP
792}
793
d0582dc1 794/* Build the size argument for a memory operation call. */
3661e899 795
d0582dc1
RG
796static tree
797build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
3661e899 798{
d0582dc1
RG
799 tree size;
800 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
801 fold_convert_loc (loc, sizetype, nb_iter),
802 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
803 return fold_convert_loc (loc, size_type_node, size);
804}
805
806/* Build an address argument for a memory operation call. */
807
808static tree
809build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
810{
811 tree addr_base;
812
813 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
814 addr_base = fold_convert_loc (loc, sizetype, addr_base);
815
816 /* Test for a negative stride, iterating over every element. */
817 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
818 {
819 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
820 fold_convert_loc (loc, sizetype, nb_bytes));
821 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
822 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
823 }
824
825 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
3661e899
TB
826}
827
401f3a81
JJ
828/* If VAL memory representation contains the same value in all bytes,
829 return that value, otherwise return -1.
830 E.g. for 0x24242424 return 0x24, for IEEE double
831 747708026454360457216.0 return 0x44, etc. */
832
833static int
834const_with_all_bytes_same (tree val)
835{
836 unsigned char buf[64];
837 int i, len;
838
839 if (integer_zerop (val)
840 || real_zerop (val)
841 || (TREE_CODE (val) == CONSTRUCTOR
842 && !TREE_CLOBBER_P (val)
843 && CONSTRUCTOR_NELTS (val) == 0))
844 return 0;
845
846 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
847 return -1;
848
849 len = native_encode_expr (val, buf, sizeof (buf));
850 if (len == 0)
851 return -1;
852 for (i = 1; i < len; i++)
853 if (buf[i] != buf[0])
854 return -1;
855 return buf[0];
856}
857
30d55936 858/* Generate a call to memset for PARTITION in LOOP. */
dea61d92 859
cfee318d 860static void
d0582dc1 861generate_memset_builtin (struct loop *loop, partition_t partition)
dea61d92 862{
30d55936
RG
863 gimple_stmt_iterator gsi;
864 gimple stmt, fn_call;
818625cf 865 tree mem, fn, nb_bytes;
30d55936 866 location_t loc;
b6dd5261 867 tree val;
30d55936 868
d0582dc1 869 stmt = DR_STMT (partition->main_dr);
30d55936 870 loc = gimple_location (stmt);
30d55936
RG
871
872 /* The new statements will be placed before LOOP. */
873 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dea61d92 874
818625cf 875 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
d0582dc1
RG
876 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
877 false, GSI_CONTINUE_LINKING);
878 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
879 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
880 false, GSI_CONTINUE_LINKING);
dea61d92 881
b6dd5261
RG
882 /* This exactly matches the pattern recognition in classify_partition. */
883 val = gimple_assign_rhs1 (stmt);
401f3a81
JJ
884 /* Handle constants like 0x15151515 and similarly
885 floating point constants etc. where all bytes are the same. */
886 int bytev = const_with_all_bytes_same (val);
887 if (bytev != -1)
888 val = build_int_cst (integer_type_node, bytev);
889 else if (TREE_CODE (val) == INTEGER_CST)
890 val = fold_convert (integer_type_node, val);
891 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
b6dd5261 892 {
401f3a81
JJ
893 gimple cstmt;
894 tree tem = make_ssa_name (integer_type_node, NULL);
895 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
896 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
897 val = tem;
b6dd5261
RG
898 }
899
e79983f4 900 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
b6dd5261 901 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
d0582dc1 902 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
dea61d92
SP
903
904 if (dump_file && (dump_flags & TDF_DETAILS))
b6dd5261
RG
905 {
906 fprintf (dump_file, "generated memset");
401f3a81 907 if (bytev == 0)
b6dd5261 908 fprintf (dump_file, " zero\n");
b6dd5261
RG
909 else
910 fprintf (dump_file, "\n");
911 }
dea61d92
SP
912}
913
d0582dc1
RG
914/* Generate a call to memcpy for PARTITION in LOOP. */
915
916static void
917generate_memcpy_builtin (struct loop *loop, partition_t partition)
918{
919 gimple_stmt_iterator gsi;
920 gimple stmt, fn_call;
818625cf 921 tree dest, src, fn, nb_bytes;
d0582dc1
RG
922 location_t loc;
923 enum built_in_function kind;
924
925 stmt = DR_STMT (partition->main_dr);
926 loc = gimple_location (stmt);
d0582dc1
RG
927
928 /* The new statements will be placed before LOOP. */
929 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
930
818625cf 931 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
d0582dc1
RG
932 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
933 false, GSI_CONTINUE_LINKING);
934 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
935 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
936 if (ptr_derefs_may_alias_p (dest, src))
937 kind = BUILT_IN_MEMMOVE;
938 else
939 kind = BUILT_IN_MEMCPY;
940
941 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
942 false, GSI_CONTINUE_LINKING);
943 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
944 false, GSI_CONTINUE_LINKING);
945 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
946 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
947 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
948
949 if (dump_file && (dump_flags & TDF_DETAILS))
950 {
951 if (kind == BUILT_IN_MEMCPY)
952 fprintf (dump_file, "generated memcpy\n");
953 else
954 fprintf (dump_file, "generated memmove\n");
955 }
956}
957
30d55936 958/* Remove and destroy the loop LOOP. */
dea61d92 959
30d55936
RG
960static void
961destroy_loop (struct loop *loop)
dea61d92 962{
30d55936
RG
963 unsigned nbbs = loop->num_nodes;
964 edge exit = single_exit (loop);
965 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
dea61d92 966 basic_block *bbs;
30d55936 967 unsigned i;
dea61d92
SP
968
969 bbs = get_loop_body_in_dom_order (loop);
970
30d55936
RG
971 redirect_edge_pred (exit, src);
972 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
973 exit->flags |= EDGE_FALLTHRU;
974 cancel_loop_tree (loop);
975 rescan_loop_exit (exit, false, true);
dea61d92 976
30d55936 977 for (i = 0; i < nbbs; i++)
c014f6f5
RG
978 {
979 /* We have made sure to not leave any dangling uses of SSA
980 names defined in the loop. With the exception of virtuals.
981 Make sure we replace all uses of virtual defs that will remain
982 outside of the loop with the bare symbol as delete_basic_block
983 will release them. */
984 gimple_stmt_iterator gsi;
985 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
986 {
987 gimple phi = gsi_stmt (gsi);
ea057359 988 if (virtual_operand_p (gimple_phi_result (phi)))
c014f6f5
RG
989 mark_virtual_phi_result_for_renaming (phi);
990 }
991 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
992 {
993 gimple stmt = gsi_stmt (gsi);
994 tree vdef = gimple_vdef (stmt);
995 if (vdef && TREE_CODE (vdef) == SSA_NAME)
996 mark_virtual_operand_for_renaming (vdef);
997 }
998 delete_basic_block (bbs[i]);
999 }
dea61d92 1000 free (bbs);
30d55936
RG
1001
1002 set_immediate_dominator (CDI_DOMINATORS, dest,
1003 recompute_dominator (CDI_DOMINATORS, dest));
dea61d92
SP
1004}
1005
30d55936 1006/* Generates code for PARTITION. */
dea61d92 1007
30d55936 1008static void
d0582dc1 1009generate_code_for_partition (struct loop *loop,
be6b029b 1010 partition_t partition, bool copy_p)
dea61d92 1011{
30d55936
RG
1012 switch (partition->kind)
1013 {
826a536d
RB
1014 case PKIND_NORMAL:
1015 /* Reductions all have to be in the last partition. */
1016 gcc_assert (!partition_reduction_p (partition)
1017 || !copy_p);
1018 generate_loops_for_partition (loop, partition, copy_p);
1019 return;
1020
30d55936 1021 case PKIND_MEMSET:
d0582dc1 1022 generate_memset_builtin (loop, partition);
d0582dc1
RG
1023 break;
1024
1025 case PKIND_MEMCPY:
1026 generate_memcpy_builtin (loop, partition);
30d55936
RG
1027 break;
1028
1029 default:
1030 gcc_unreachable ();
1031 }
dea61d92 1032
826a536d
RB
1033 /* Common tail for partitions we turn into a call. If this was the last
1034 partition for which we generate code, we have to destroy the loop. */
1035 if (!copy_p)
1036 destroy_loop (loop);
dea61d92
SP
1037}
1038
dea61d92 1039
dea61d92
SP
1040/* Flag V from RDG as part of PARTITION, and also flag its loop number
1041 in LOOPS. */
1042
1043static void
80ab0b19 1044rdg_flag_vertex (struct graph *rdg, int v, partition_t partition)
dea61d92
SP
1045{
1046 struct loop *loop;
1047
c61f8985 1048 if (!bitmap_set_bit (partition->stmts, v))
dea61d92
SP
1049 return;
1050
1051 loop = loop_containing_stmt (RDG_STMT (rdg, v));
80ab0b19 1052 bitmap_set_bit (partition->loops, loop->num);
dea61d92
SP
1053}
1054
1055/* Flag in the bitmap PARTITION the vertex V and all its predecessors.
fa10beec 1056 Also flag their loop number in LOOPS. */
dea61d92
SP
1057
1058static void
c61f8985 1059rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
80ab0b19 1060 bitmap processed)
dea61d92
SP
1061{
1062 unsigned i;
9771b263
DN
1063 vec<int> nodes;
1064 nodes.create (3);
dea61d92
SP
1065 int x;
1066
174ec470 1067 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
dea61d92 1068
9771b263 1069 FOR_EACH_VEC_ELT (nodes, i, x)
174ec470
RB
1070 if (bitmap_set_bit (processed, x))
1071 rdg_flag_vertex (rdg, x, partition);
dea61d92 1072
9771b263 1073 nodes.release ();
dea61d92
SP
1074}
1075
83a95546
RB
1076/* Returns a partition with all the statements needed for computing
1077 the vertex V of the RDG, also including the loop exit conditions. */
dea61d92 1078
c61f8985 1079static partition_t
83a95546 1080build_rdg_partition_for_vertex (struct graph *rdg, int v)
dea61d92 1081{
80ab0b19 1082 partition_t partition = partition_alloc (NULL, NULL);
dea61d92 1083 bitmap processed = BITMAP_ALLOC (NULL);
83a95546 1084 rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
dea61d92 1085 BITMAP_FREE (processed);
dea61d92
SP
1086 return partition;
1087}
1088
30d55936
RG
1089/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1090 For the moment we detect only the memset zero pattern. */
cfee318d 1091
30d55936
RG
1092static void
1093classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
cfee318d 1094{
cfee318d 1095 bitmap_iterator bi;
30d55936
RG
1096 unsigned i;
1097 tree nb_iter;
d0582dc1 1098 data_reference_p single_load, single_store;
b9fc0497 1099 bool volatiles_p = false;
30d55936
RG
1100
1101 partition->kind = PKIND_NORMAL;
d0582dc1
RG
1102 partition->main_dr = NULL;
1103 partition->secondary_dr = NULL;
818625cf 1104 partition->niter = NULL_TREE;
30d55936 1105
c61f8985 1106 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
30d55936
RG
1107 {
1108 gimple stmt = RDG_STMT (rdg, i);
1109
1110 if (gimple_has_volatile_ops (stmt))
b9fc0497 1111 volatiles_p = true;
cfee318d 1112
826a536d 1113 /* If the stmt has uses outside of the loop mark it as reduction. */
9ca86fc3 1114 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
30d55936 1115 {
826a536d 1116 partition->reduction_p = true;
30d55936
RG
1117 return;
1118 }
1119 }
1120
b9fc0497
RB
1121 /* Perform general partition disqualification for builtins. */
1122 if (volatiles_p
1123 || !flag_tree_loop_distribute_patterns)
1124 return;
1125
d0582dc1
RG
1126 /* Detect memset and memcpy. */
1127 single_load = NULL;
1128 single_store = NULL;
30d55936
RG
1129 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1130 {
1131 gimple stmt = RDG_STMT (rdg, i);
d0582dc1
RG
1132 data_reference_p dr;
1133 unsigned j;
30d55936
RG
1134
1135 if (gimple_code (stmt) == GIMPLE_PHI)
1136 continue;
1137
1138 /* Any scalar stmts are ok. */
1139 if (!gimple_vuse (stmt))
1140 continue;
1141
d0582dc1
RG
1142 /* Otherwise just regular loads/stores. */
1143 if (!gimple_assign_single_p (stmt))
1144 return;
1145
1146 /* But exactly one store and/or load. */
9771b263 1147 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
30d55936 1148 {
d0582dc1
RG
1149 if (DR_IS_READ (dr))
1150 {
1151 if (single_load != NULL)
1152 return;
1153 single_load = dr;
1154 }
1155 else
1156 {
1157 if (single_store != NULL)
1158 return;
1159 single_store = dr;
1160 }
30d55936 1161 }
30d55936
RG
1162 }
1163
818625cf
RB
1164 if (!single_store)
1165 return;
1166
1167 if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1168 gimple_bb (DR_STMT (single_store))))
1169 nb_iter = number_of_latch_executions (loop);
1170 else
1171 nb_iter = number_of_exit_cond_executions (loop);
1172 if (!nb_iter || nb_iter == chrec_dont_know)
1173 return;
1174
d0582dc1
RG
1175 if (single_store && !single_load)
1176 {
1177 gimple stmt = DR_STMT (single_store);
1178 tree rhs = gimple_assign_rhs1 (stmt);
401f3a81
JJ
1179 if (const_with_all_bytes_same (rhs) == -1
1180 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1181 || (TYPE_MODE (TREE_TYPE (rhs))
1182 != TYPE_MODE (unsigned_char_type_node))))
d0582dc1
RG
1183 return;
1184 if (TREE_CODE (rhs) == SSA_NAME
1185 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1186 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1187 return;
ca406576
RB
1188 if (!adjacent_dr_p (single_store)
1189 || !dominated_by_p (CDI_DOMINATORS,
1190 loop->latch, gimple_bb (stmt)))
d0582dc1
RG
1191 return;
1192 partition->kind = PKIND_MEMSET;
1193 partition->main_dr = single_store;
818625cf 1194 partition->niter = nb_iter;
d0582dc1
RG
1195 }
1196 else if (single_store && single_load)
1197 {
1198 gimple store = DR_STMT (single_store);
1199 gimple load = DR_STMT (single_load);
1200 /* Direct aggregate copy or via an SSA name temporary. */
1201 if (load != store
1202 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1203 return;
1204 if (!adjacent_dr_p (single_store)
1205 || !adjacent_dr_p (single_load)
1206 || !operand_equal_p (DR_STEP (single_store),
ca406576
RB
1207 DR_STEP (single_load), 0)
1208 || !dominated_by_p (CDI_DOMINATORS,
1209 loop->latch, gimple_bb (store)))
d0582dc1 1210 return;
f20132e7
RG
1211 /* Now check that if there is a dependence this dependence is
1212 of a suitable form for memmove. */
6e1aa848 1213 vec<loop_p> loops = vNULL;
f20132e7 1214 ddr_p ddr;
9771b263 1215 loops.safe_push (loop);
f20132e7
RG
1216 ddr = initialize_data_dependence_relation (single_load, single_store,
1217 loops);
1218 compute_affine_dependence (ddr, loop);
f20132e7
RG
1219 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1220 {
1221 free_dependence_relation (ddr);
9771b263 1222 loops.release ();
f20132e7
RG
1223 return;
1224 }
1225 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1226 {
1227 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1228 {
1229 free_dependence_relation (ddr);
9771b263 1230 loops.release ();
f20132e7
RG
1231 return;
1232 }
1233 lambda_vector dist_v;
9771b263 1234 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
f20132e7
RG
1235 {
1236 int dist = dist_v[index_in_loop_nest (loop->num,
1237 DDR_LOOP_NEST (ddr))];
1238 if (dist > 0 && !DDR_REVERSED_P (ddr))
1239 {
1240 free_dependence_relation (ddr);
9771b263 1241 loops.release ();
f20132e7
RG
1242 return;
1243 }
1244 }
1245 }
b7ce70b3 1246 free_dependence_relation (ddr);
9771b263 1247 loops.release ();
d0582dc1
RG
1248 partition->kind = PKIND_MEMCPY;
1249 partition->main_dr = single_store;
1250 partition->secondary_dr = single_load;
818625cf 1251 partition->niter = nb_iter;
d0582dc1 1252 }
cfee318d
SP
1253}
1254
1fa0c180
RG
1255/* For a data reference REF, return the declaration of its base
1256 address or NULL_TREE if the base is not determined. */
1257
1258static tree
1259ref_base_address (data_reference_p dr)
1260{
1261 tree base_address = DR_BASE_ADDRESS (dr);
1262 if (base_address
1263 && TREE_CODE (base_address) == ADDR_EXPR)
1264 return TREE_OPERAND (base_address, 0);
1265
1266 return base_address;
1267}
1268
cfee318d
SP
1269/* Returns true when PARTITION1 and PARTITION2 have similar memory
1270 accesses in RDG. */
1271
1272static bool
c61f8985
RG
1273similar_memory_accesses (struct graph *rdg, partition_t partition1,
1274 partition_t partition2)
cfee318d 1275{
1fa0c180 1276 unsigned i, j, k, l;
cfee318d 1277 bitmap_iterator bi, bj;
1fa0c180
RG
1278 data_reference_p ref1, ref2;
1279
1280 /* First check whether in the intersection of the two partitions are
1281 any loads or stores. Common loads are the situation that happens
1282 most often. */
1283 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1284 if (RDG_MEM_WRITE_STMT (rdg, i)
1285 || RDG_MEM_READS_STMT (rdg, i))
1286 return true;
cfee318d 1287
1fa0c180 1288 /* Then check all data-references against each other. */
c61f8985 1289 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
cfee318d
SP
1290 if (RDG_MEM_WRITE_STMT (rdg, i)
1291 || RDG_MEM_READS_STMT (rdg, i))
c61f8985 1292 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
cfee318d
SP
1293 if (RDG_MEM_WRITE_STMT (rdg, j)
1294 || RDG_MEM_READS_STMT (rdg, j))
1fa0c180 1295 {
9771b263 1296 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1fa0c180
RG
1297 {
1298 tree base1 = ref_base_address (ref1);
1299 if (base1)
9771b263 1300 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1fa0c180
RG
1301 if (base1 == ref_base_address (ref2))
1302 return true;
1303 }
1304 }
cfee318d
SP
1305
1306 return false;
1307}
1308
dea61d92
SP
1309/* Aggregate several components into a useful partition that is
1310 registered in the PARTITIONS vector. Partitions will be
1311 distributed in different loops. */
1312
1313static void
83a95546 1314rdg_build_partitions (struct graph *rdg,
2fd5894f 1315 vec<gimple> starting_stmts,
83a95546 1316 vec<partition_t> *partitions)
dea61d92 1317{
83a95546 1318 bitmap processed = BITMAP_ALLOC (NULL);
2fd5894f
RB
1319 int i;
1320 gimple stmt;
80ab0b19 1321 partition_t partition = partition_alloc (NULL, NULL);
dea61d92 1322
2fd5894f 1323 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
dea61d92 1324 {
c61f8985 1325 partition_t np;
2fd5894f
RB
1326 int v = rdg_vertex_for_stmt (rdg, stmt);
1327
1328 if (dump_file && (dump_flags & TDF_DETAILS))
1329 fprintf (dump_file,
1330 "ldist asked to generate code for vertex %d\n", v);
b8698a0f 1331
dea61d92
SP
1332 if (bitmap_bit_p (processed, v))
1333 continue;
b8698a0f 1334
83a95546 1335 np = build_rdg_partition_for_vertex (rdg, v);
c61f8985
RG
1336 bitmap_ior_into (partition->stmts, np->stmts);
1337 bitmap_ior_into (processed, np->stmts);
1338 partition_free (np);
dea61d92 1339
826a536d 1340 if (dump_file && (dump_flags & TDF_DETAILS))
dea61d92 1341 {
826a536d
RB
1342 fprintf (dump_file, "ldist useful partition:\n");
1343 dump_bitmap (dump_file, partition->stmts);
dea61d92 1344 }
826a536d
RB
1345
1346 partitions->safe_push (partition);
1347 partition = partition_alloc (NULL, NULL);
dea61d92
SP
1348 }
1349
83a95546
RB
1350 /* All vertices should have been assigned to at least one partition now,
1351 other than vertices belonging to dead code. */
dea61d92 1352
83a95546
RB
1353 if (!bitmap_empty_p (partition->stmts))
1354 {
1355 if (dump_file && (dump_flags & TDF_DETAILS))
1356 {
1357 fprintf (dump_file, "remaining partition:\n");
1358 dump_bitmap (dump_file, partition->stmts);
1359 }
dea61d92 1360
83a95546 1361 partitions->safe_push (partition);
dea61d92 1362 }
dea61d92 1363 else
c61f8985 1364 partition_free (partition);
83a95546
RB
1365
1366 BITMAP_FREE (processed);
dea61d92
SP
1367}
1368
1369/* Dump to FILE the PARTITIONS. */
1370
1371static void
9771b263 1372dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
dea61d92
SP
1373{
1374 int i;
c61f8985 1375 partition_t partition;
dea61d92 1376
9771b263 1377 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1378 debug_bitmap_file (file, partition->stmts);
dea61d92
SP
1379}
1380
1381/* Debug PARTITIONS. */
9771b263 1382extern void debug_rdg_partitions (vec<partition_t> );
dea61d92 1383
24e47c76 1384DEBUG_FUNCTION void
9771b263 1385debug_rdg_partitions (vec<partition_t> partitions)
dea61d92
SP
1386{
1387 dump_rdg_partitions (stderr, partitions);
1388}
1389
2b8aee8e
SP
1390/* Returns the number of read and write operations in the RDG. */
1391
1392static int
1393number_of_rw_in_rdg (struct graph *rdg)
1394{
1395 int i, res = 0;
1396
1397 for (i = 0; i < rdg->n_vertices; i++)
1398 {
1399 if (RDG_MEM_WRITE_STMT (rdg, i))
1400 ++res;
1401
1402 if (RDG_MEM_READS_STMT (rdg, i))
1403 ++res;
1404 }
1405
1406 return res;
1407}
1408
1409/* Returns the number of read and write operations in a PARTITION of
1410 the RDG. */
1411
1412static int
c61f8985 1413number_of_rw_in_partition (struct graph *rdg, partition_t partition)
2b8aee8e
SP
1414{
1415 int res = 0;
1416 unsigned i;
1417 bitmap_iterator ii;
1418
c61f8985 1419 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2b8aee8e
SP
1420 {
1421 if (RDG_MEM_WRITE_STMT (rdg, i))
1422 ++res;
1423
1424 if (RDG_MEM_READS_STMT (rdg, i))
1425 ++res;
1426 }
1427
1428 return res;
1429}
1430
1431/* Returns true when one of the PARTITIONS contains all the read or
1432 write operations of RDG. */
1433
1434static bool
9771b263
DN
1435partition_contains_all_rw (struct graph *rdg,
1436 vec<partition_t> partitions)
2b8aee8e
SP
1437{
1438 int i;
c61f8985 1439 partition_t partition;
2b8aee8e
SP
1440 int nrw = number_of_rw_in_rdg (rdg);
1441
9771b263 1442 FOR_EACH_VEC_ELT (partitions, i, partition)
2b8aee8e
SP
1443 if (nrw == number_of_rw_in_partition (rdg, partition))
1444 return true;
1445
1446 return false;
1447}
1448
2fd5894f
RB
1449
1450/* Distributes the code from LOOP in such a way that producer
1451 statements are placed before consumer statements. Tries to separate
1452 only the statements from STMTS into separate loops.
1453 Returns the number of distributed loops. */
dea61d92
SP
1454
1455static int
36875e8f 1456distribute_loop (struct loop *loop, vec<gimple> stmts,
826a536d 1457 control_dependences *cd, int *nb_calls)
dea61d92 1458{
2fd5894f
RB
1459 struct graph *rdg;
1460 vec<loop_p> loop_nest;
9771b263 1461 vec<partition_t> partitions;
c61f8985 1462 partition_t partition;
be6b029b 1463 bool any_builtin;
2fd5894f 1464 int i, nbp;
dea61d92 1465
826a536d 1466 *nb_calls = 0;
2fd5894f
RB
1467 loop_nest.create (3);
1468 if (!find_loop_nest (loop, &loop_nest))
1469 {
1470 loop_nest.release ();
1471 return 0;
1472 }
1473
36875e8f 1474 rdg = build_rdg (loop_nest, cd);
2fd5894f
RB
1475 if (!rdg)
1476 {
1477 if (dump_file && (dump_flags & TDF_DETAILS))
1478 fprintf (dump_file,
1479 "Loop %d not distributed: failed to build the RDG.\n",
1480 loop->num);
1481
1482 loop_nest.release ();
1483 return 0;
1484 }
1485
1486 if (dump_file && (dump_flags & TDF_DETAILS))
1487 dump_rdg (dump_file, rdg);
1488
1489 partitions.create (3);
1490 rdg_build_partitions (rdg, stmts, &partitions);
dea61d92 1491
be6b029b 1492 any_builtin = false;
9771b263 1493 FOR_EACH_VEC_ELT (partitions, i, partition)
be6b029b
RG
1494 {
1495 classify_partition (loop, rdg, partition);
1496 any_builtin |= partition_builtin_p (partition);
1497 }
30d55936 1498
9fed7f3a
RB
1499 /* If we did not detect any builtin but are not asked to apply
1500 regular loop distribution simply bail out. */
1501 if (!flag_tree_loop_distribution
1502 && !any_builtin)
1503 {
1504 nbp = 0;
1505 goto ldist_done;
1506 }
1507
1508 /* Apply our simple cost model - fuse partitions with similar
1509 memory accesses. */
1510 partition_t into;
1511 for (i = 0; partitions.iterate (i, &into); ++i)
1512 {
1513 if (partition_builtin_p (into))
1514 continue;
1515 for (int j = i + 1;
1516 partitions.iterate (j, &partition); ++j)
1517 {
1518 if (!partition_builtin_p (partition)
1519 && similar_memory_accesses (rdg, into, partition))
1520 {
1521 if (dump_file && (dump_flags & TDF_DETAILS))
1522 {
1523 fprintf (dump_file, "fusing partitions\n");
1524 dump_bitmap (dump_file, into->stmts);
1525 dump_bitmap (dump_file, partition->stmts);
1526 fprintf (dump_file, "because they have similar "
1527 "memory accesses\n");
1528 }
826a536d 1529 partition_merge_into (into, partition);
9fed7f3a
RB
1530 partitions.ordered_remove (j);
1531 partition_free (partition);
1532 j--;
1533 }
1534 }
1535 }
1536
c014f6f5 1537 /* If we are only distributing patterns fuse all partitions that
9fed7f3a 1538 were not properly classified as builtins. */
c014f6f5
RG
1539 if (!flag_tree_loop_distribution)
1540 {
1541 partition_t into;
38ad2d07
RG
1542 /* Only fuse adjacent non-builtin partitions, see PR53616.
1543 ??? Use dependence information to improve partition ordering. */
1544 i = 0;
1545 do
1546 {
9771b263 1547 for (; partitions.iterate (i, &into); ++i)
38ad2d07
RG
1548 if (!partition_builtin_p (into))
1549 break;
9771b263 1550 for (++i; partitions.iterate (i, &partition); ++i)
38ad2d07
RG
1551 if (!partition_builtin_p (partition))
1552 {
826a536d 1553 partition_merge_into (into, partition);
9771b263 1554 partitions.ordered_remove (i);
5eb010bc 1555 partition_free (partition);
38ad2d07
RG
1556 i--;
1557 }
1558 else
1559 break;
1560 }
9771b263 1561 while ((unsigned) i < partitions.length ());
c014f6f5 1562 }
30d55936 1563
b9fc0497
RB
1564 /* Fuse all reduction partitions into the last. */
1565 if (partitions.length () > 1)
1566 {
1567 partition_t into = partitions.last ();
1568 for (i = partitions.length () - 2; i >= 0; --i)
1569 {
1570 partition_t what = partitions[i];
826a536d 1571 if (partition_reduction_p (what))
b9fc0497
RB
1572 {
1573 if (dump_file && (dump_flags & TDF_DETAILS))
1574 {
1575 fprintf (dump_file, "fusing partitions\n");
1576 dump_bitmap (dump_file, into->stmts);
1577 dump_bitmap (dump_file, what->stmts);
1578 fprintf (dump_file, "because the latter has reductions\n");
1579 }
826a536d 1580 partition_merge_into (into, what);
b9fc0497 1581 partitions.ordered_remove (i);
5eb010bc 1582 partition_free (what);
b9fc0497
RB
1583 }
1584 }
1585 }
1586
9771b263 1587 nbp = partitions.length ();
a4293fa6 1588 if (nbp == 0
9771b263
DN
1589 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1590 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
c014f6f5
RG
1591 {
1592 nbp = 0;
1593 goto ldist_done;
1594 }
dea61d92
SP
1595
1596 if (dump_file && (dump_flags & TDF_DETAILS))
1597 dump_rdg_partitions (dump_file, partitions);
1598
9771b263 1599 FOR_EACH_VEC_ELT (partitions, i, partition)
826a536d
RB
1600 {
1601 if (partition_builtin_p (partition))
1602 (*nb_calls)++;
1603 generate_code_for_partition (loop, partition, i < nbp - 1);
1604 }
dea61d92 1605
dea61d92
SP
1606 ldist_done:
1607
9771b263 1608 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1609 partition_free (partition);
9771b263 1610 partitions.release ();
dea61d92 1611
dea61d92 1612 free_rdg (rdg);
97463b2b 1613 loop_nest.release ();
826a536d 1614 return nbp - *nb_calls;
dea61d92
SP
1615}
1616
1617/* Distribute all loops in the current function. */
1618
1619static unsigned int
1620tree_loop_distribution (void)
1621{
1622 struct loop *loop;
1623 loop_iterator li;
c014f6f5 1624 bool changed = false;
1fa0c180 1625 basic_block bb;
36875e8f 1626 control_dependences *cd = NULL;
1fa0c180
RG
1627
1628 FOR_ALL_BB (bb)
1629 {
1630 gimple_stmt_iterator gsi;
1631 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1632 gimple_set_uid (gsi_stmt (gsi), -1);
1633 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1634 gimple_set_uid (gsi_stmt (gsi), -1);
1635 }
dea61d92 1636
c014f6f5
RG
1637 /* We can at the moment only distribute non-nested loops, thus restrict
1638 walking to innermost loops. */
1639 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
dea61d92 1640 {
6e1aa848 1641 vec<gimple> work_list = vNULL;
be6b029b 1642 basic_block *bbs;
0e20c89f 1643 int num = loop->num;
be6b029b 1644 unsigned int i;
a3357f7d
RG
1645
1646 /* If the loop doesn't have a single exit we will fail anyway,
1647 so do that early. */
1648 if (!single_exit (loop))
1649 continue;
dea61d92 1650
f56f2d33
JH
1651 /* Only optimize hot loops. */
1652 if (!optimize_loop_for_speed_p (loop))
1653 continue;
1654
be6b029b
RG
1655 /* Initialize the worklist with stmts we seed the partitions with. */
1656 bbs = get_loop_body_in_dom_order (loop);
1657 for (i = 0; i < loop->num_nodes; ++i)
1658 {
1659 gimple_stmt_iterator gsi;
deb6c11a
RB
1660 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1661 {
1662 gimple phi = gsi_stmt (gsi);
1663 if (virtual_operand_p (gimple_phi_result (phi)))
1664 continue;
1665 /* Distribute stmts which have defs that are used outside of
1666 the loop. */
1667 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1668 continue;
1669 work_list.safe_push (phi);
1670 }
be6b029b
RG
1671 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1672 {
1673 gimple stmt = gsi_stmt (gsi);
83a95546
RB
1674
1675 /* If there is a stmt with side-effects bail out - we
1676 cannot and should not distribute this loop. */
1677 if (gimple_has_side_effects (stmt))
1678 {
1679 work_list.truncate (0);
1680 goto out;
1681 }
1682
b9fc0497
RB
1683 /* Distribute stmts which have defs that are used outside of
1684 the loop. */
1685 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1686 ;
1687 /* Otherwise only distribute stores for now. */
1688 else if (!gimple_assign_single_p (stmt)
1689 || is_gimple_reg (gimple_assign_lhs (stmt)))
be6b029b
RG
1690 continue;
1691
9771b263 1692 work_list.safe_push (stmt);
be6b029b
RG
1693 }
1694 }
83a95546 1695out:
be6b029b 1696 free (bbs);
c014f6f5 1697
826a536d
RB
1698 int nb_generated_loops = 0;
1699 int nb_generated_calls = 0;
1700 location_t loc = find_loop_location (loop);
9771b263 1701 if (work_list.length () > 0)
36875e8f
RB
1702 {
1703 if (!cd)
1704 {
ca406576 1705 calculate_dominance_info (CDI_DOMINATORS);
36875e8f
RB
1706 calculate_dominance_info (CDI_POST_DOMINATORS);
1707 cd = new control_dependences (create_edge_list ());
1708 free_dominance_info (CDI_POST_DOMINATORS);
1709 }
826a536d
RB
1710 nb_generated_loops = distribute_loop (loop, work_list, cd,
1711 &nb_generated_calls);
36875e8f 1712 }
c014f6f5 1713
826a536d 1714 if (nb_generated_loops + nb_generated_calls > 0)
dea61d92 1715 {
826a536d
RB
1716 changed = true;
1717 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1718 loc, "Loop %d distributed: split to %d loops "
1719 "and %d library calls.\n",
1720 num, nb_generated_loops, nb_generated_calls);
dea61d92 1721 }
826a536d
RB
1722 else if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, "Loop %d is the same.\n", num);
dea61d92 1724
9771b263 1725 work_list.release ();
dea61d92
SP
1726 }
1727
36875e8f
RB
1728 if (cd)
1729 delete cd;
1730
c014f6f5
RG
1731 if (changed)
1732 {
525174a2 1733 mark_virtual_operands_for_renaming (cfun);
c014f6f5
RG
1734 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1735 }
1736
1737#ifdef ENABLE_CHECKING
1738 verify_loop_structure ();
1739#endif
1740
5006671f 1741 return 0;
dea61d92
SP
1742}
1743
1744static bool
1745gate_tree_loop_distribution (void)
1746{
20769d5e
SP
1747 return flag_tree_loop_distribution
1748 || flag_tree_loop_distribute_patterns;
dea61d92
SP
1749}
1750
27a4cd48
DM
1751namespace {
1752
1753const pass_data pass_data_loop_distribution =
dea61d92 1754{
27a4cd48
DM
1755 GIMPLE_PASS, /* type */
1756 "ldist", /* name */
1757 OPTGROUP_LOOP, /* optinfo_flags */
1758 true, /* has_gate */
1759 true, /* has_execute */
1760 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1761 ( PROP_cfg | PROP_ssa ), /* properties_required */
1762 0, /* properties_provided */
1763 0, /* properties_destroyed */
1764 0, /* todo_flags_start */
1765 TODO_verify_ssa, /* todo_flags_finish */
dea61d92 1766};
27a4cd48
DM
1767
1768class pass_loop_distribution : public gimple_opt_pass
1769{
1770public:
c3284718
RS
1771 pass_loop_distribution (gcc::context *ctxt)
1772 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
27a4cd48
DM
1773 {}
1774
1775 /* opt_pass methods: */
1776 bool gate () { return gate_tree_loop_distribution (); }
1777 unsigned int execute () { return tree_loop_distribution (); }
1778
1779}; // class pass_loop_distribution
1780
1781} // anon namespace
1782
1783gimple_opt_pass *
1784make_pass_loop_distribution (gcc::context *ctxt)
1785{
1786 return new pass_loop_distribution (ctxt);
1787}