]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
tree-loop-distribution.c (bb_top_order_index): New.
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
dea61d92 1/* Loop distribution.
cbe34bb5 2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
dea61d92
SP
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6This file is part of GCC.
b8698a0f 7
dea61d92
SP
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
b8698a0f 12
dea61d92
SP
13GCC is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
b8698a0f 17
dea61d92
SP
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
b8698a0f 29 is transformed to
dea61d92
SP
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44#include "config.h"
45#include "system.h"
46#include "coretypes.h"
c7131fb2 47#include "backend.h"
40e23961 48#include "tree.h"
c7131fb2 49#include "gimple.h"
957060b5
AM
50#include "cfghooks.h"
51#include "tree-pass.h"
c7131fb2 52#include "ssa.h"
957060b5 53#include "gimple-pretty-print.h"
c7131fb2 54#include "fold-const.h"
60393bbc 55#include "cfganal.h"
5be5c238 56#include "gimple-iterator.h"
18f429e2 57#include "gimplify-me.h"
d8a2d370 58#include "stor-layout.h"
442b4905 59#include "tree-cfg.h"
e28030cf 60#include "tree-ssa-loop-manip.h"
442b4905
AM
61#include "tree-ssa-loop.h"
62#include "tree-into-ssa.h"
7a300452 63#include "tree-ssa.h"
dea61d92 64#include "cfgloop.h"
dea61d92 65#include "tree-scalar-evolution.h"
826a536d 66#include "tree-vectorizer.h"
80ab0b19
RB
67
68
69/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
526ceb68 70struct rdg_vertex
80ab0b19
RB
71{
72 /* The statement represented by this vertex. */
355fe088 73 gimple *stmt;
80ab0b19
RB
74
75 /* Vector of data-references in this statement. */
76 vec<data_reference_p> datarefs;
77
78 /* True when the statement contains a write to memory. */
79 bool has_mem_write;
80
81 /* True when the statement contains a read from memory. */
82 bool has_mem_reads;
526ceb68 83};
80ab0b19
RB
84
85#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
86#define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90#define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
93
94/* Data dependence type. */
95
96enum rdg_dep_type
97{
98 /* Read After Write (RAW). */
99 flow_dd = 'f',
100
36875e8f
RB
101 /* Control dependence (execute conditional on). */
102 control_dd = 'c'
80ab0b19
RB
103};
104
105/* Dependence information attached to an edge of the RDG. */
106
526ceb68 107struct rdg_edge
80ab0b19
RB
108{
109 /* Type of the dependence. */
110 enum rdg_dep_type type;
526ceb68 111};
80ab0b19
RB
112
113#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
80ab0b19 114
80ab0b19
RB
115/* Dump vertex I in RDG to FILE. */
116
117static void
118dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
119{
120 struct vertex *v = &(rdg->vertices[i]);
121 struct graph_edge *e;
122
123 fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
126
127 if (v->pred)
128 for (e = v->pred; e; e = e->pred_next)
129 fprintf (file, " %d", e->src);
130
131 fprintf (file, ") (out:");
132
133 if (v->succ)
134 for (e = v->succ; e; e = e->succ_next)
135 fprintf (file, " %d", e->dest);
136
137 fprintf (file, ")\n");
138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139 fprintf (file, ")\n");
140}
141
142/* Call dump_rdg_vertex on stderr. */
143
144DEBUG_FUNCTION void
145debug_rdg_vertex (struct graph *rdg, int i)
146{
147 dump_rdg_vertex (stderr, rdg, i);
148}
149
80ab0b19
RB
150/* Dump the reduced dependence graph RDG to FILE. */
151
152static void
153dump_rdg (FILE *file, struct graph *rdg)
154{
80ab0b19 155 fprintf (file, "(rdg\n");
2fd5894f
RB
156 for (int i = 0; i < rdg->n_vertices; i++)
157 dump_rdg_vertex (file, rdg, i);
80ab0b19 158 fprintf (file, ")\n");
80ab0b19
RB
159}
160
161/* Call dump_rdg on stderr. */
162
163DEBUG_FUNCTION void
164debug_rdg (struct graph *rdg)
165{
166 dump_rdg (stderr, rdg);
167}
168
169static void
170dot_rdg_1 (FILE *file, struct graph *rdg)
171{
172 int i;
174ec470
RB
173 pretty_printer buffer;
174 pp_needs_newline (&buffer) = false;
175 buffer.buffer->stream = file;
80ab0b19
RB
176
177 fprintf (file, "digraph RDG {\n");
178
179 for (i = 0; i < rdg->n_vertices; i++)
180 {
181 struct vertex *v = &(rdg->vertices[i]);
182 struct graph_edge *e;
183
174ec470
RB
184 fprintf (file, "%d [label=\"[%d] ", i, i);
185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186 pp_flush (&buffer);
187 fprintf (file, "\"]\n");
188
80ab0b19
RB
189 /* Highlight reads from memory. */
190 if (RDG_MEM_READS_STMT (rdg, i))
191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
192
193 /* Highlight stores to memory. */
194 if (RDG_MEM_WRITE_STMT (rdg, i))
195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
196
197 if (v->succ)
198 for (e = v->succ; e; e = e->succ_next)
199 switch (RDGE_TYPE (e))
200 {
80ab0b19
RB
201 case flow_dd:
202 /* These are the most common dependences: don't print these. */
203 fprintf (file, "%d -> %d \n", i, e->dest);
204 break;
205
36875e8f
RB
206 case control_dd:
207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208 break;
209
80ab0b19
RB
210 default:
211 gcc_unreachable ();
212 }
213 }
214
215 fprintf (file, "}\n\n");
216}
217
218/* Display the Reduced Dependence Graph using dotty. */
219
220DEBUG_FUNCTION void
221dot_rdg (struct graph *rdg)
222{
174ec470 223 /* When debugging, you may want to enable the following code. */
b6d94045 224#ifdef HAVE_POPEN
c3284718 225 FILE *file = popen ("dot -Tx11", "w");
174ec470
RB
226 if (!file)
227 return;
80ab0b19 228 dot_rdg_1 (file, rdg);
174ec470
RB
229 fflush (file);
230 close (fileno (file));
231 pclose (file);
80ab0b19
RB
232#else
233 dot_rdg_1 (stderr, rdg);
234#endif
235}
236
237/* Returns the index of STMT in RDG. */
238
239static int
355fe088 240rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
80ab0b19
RB
241{
242 int index = gimple_uid (stmt);
243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244 return index;
245}
246
80ab0b19
RB
247/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
248 the index of DEF in RDG. */
249
250static void
251create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
252{
253 use_operand_p imm_use_p;
254 imm_use_iterator iterator;
255
256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
257 {
258 struct graph_edge *e;
259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
260
261 if (use < 0)
262 continue;
263
264 e = add_edge (rdg, idef, use);
265 e->data = XNEW (struct rdg_edge);
266 RDGE_TYPE (e) = flow_dd;
80ab0b19
RB
267 }
268}
269
36875e8f
RB
270/* Creates an edge for the control dependences of BB to the vertex V. */
271
272static void
273create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 int v, control_dependences *cd)
275{
276 bitmap_iterator bi;
277 unsigned edge_n;
278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 0, edge_n, bi)
280 {
30fd2977 281 basic_block cond_bb = cd->get_edge_src (edge_n);
355fe088 282 gimple *stmt = last_stmt (cond_bb);
36875e8f
RB
283 if (stmt && is_ctrl_stmt (stmt))
284 {
285 struct graph_edge *e;
286 int c = rdg_vertex_for_stmt (rdg, stmt);
287 if (c < 0)
288 continue;
289
290 e = add_edge (rdg, c, v);
291 e->data = XNEW (struct rdg_edge);
292 RDGE_TYPE (e) = control_dd;
36875e8f
RB
293 }
294 }
295}
296
80ab0b19
RB
297/* Creates the edges of the reduced dependence graph RDG. */
298
299static void
447f3223 300create_rdg_flow_edges (struct graph *rdg)
80ab0b19
RB
301{
302 int i;
80ab0b19
RB
303 def_operand_p def_p;
304 ssa_op_iter iter;
305
80ab0b19
RB
306 for (i = 0; i < rdg->n_vertices; i++)
307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 iter, SSA_OP_DEF)
309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
447f3223 310}
36875e8f 311
447f3223
RB
312/* Creates the edges of the reduced dependence graph RDG. */
313
314static void
b71b7a8e 315create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
447f3223
RB
316{
317 int i;
318
319 for (i = 0; i < rdg->n_vertices; i++)
320 {
355fe088 321 gimple *stmt = RDG_STMT (rdg, i);
447f3223
RB
322 if (gimple_code (stmt) == GIMPLE_PHI)
323 {
324 edge_iterator ei;
325 edge e;
326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
b71b7a8e 327 if (flow_bb_inside_loop_p (loop, e->src))
36875e8f 328 create_edge_for_control_dependence (rdg, e->src, i, cd);
447f3223
RB
329 }
330 else
331 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
332 }
80ab0b19
RB
333}
334
335/* Build the vertices of the reduced dependence graph RDG. Return false
336 if that failed. */
337
338static bool
355fe088 339create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
80ab0b19
RB
340 vec<data_reference_p> *datarefs)
341{
342 int i;
355fe088 343 gimple *stmt;
80ab0b19
RB
344
345 FOR_EACH_VEC_ELT (stmts, i, stmt)
346 {
347 struct vertex *v = &(rdg->vertices[i]);
348
349 /* Record statement to vertex mapping. */
350 gimple_set_uid (stmt, i);
351
352 v->data = XNEW (struct rdg_vertex);
353 RDGV_STMT (v) = stmt;
354 RDGV_DATAREFS (v).create (0);
355 RDGV_HAS_MEM_WRITE (v) = false;
356 RDGV_HAS_MEM_READS (v) = false;
357 if (gimple_code (stmt) == GIMPLE_PHI)
358 continue;
359
360 unsigned drp = datarefs->length ();
361 if (!find_data_references_in_stmt (loop, stmt, datarefs))
362 return false;
363 for (unsigned j = drp; j < datarefs->length (); ++j)
364 {
365 data_reference_p dr = (*datarefs)[j];
366 if (DR_IS_READ (dr))
367 RDGV_HAS_MEM_READS (v) = true;
368 else
369 RDGV_HAS_MEM_WRITE (v) = true;
370 RDGV_DATAREFS (v).safe_push (dr);
371 }
372 }
373 return true;
374}
375
3be57c56
BC
376/* Array mapping basic block's index to its topological order. */
377static int *bb_top_order_index;
378/* And size of the array. */
379static int bb_top_order_index_size;
380
381/* If X has a smaller topological sort number than Y, returns -1;
382 if greater, returns 1. */
383
384static int
385bb_top_order_cmp (const void *x, const void *y)
386{
387 basic_block bb1 = *(const basic_block *) x;
388 basic_block bb2 = *(const basic_block *) y;
389
390 gcc_assert (bb1->index < bb_top_order_index_size
391 && bb2->index < bb_top_order_index_size);
392 gcc_assert (bb1 == bb2
393 || bb_top_order_index[bb1->index]
394 != bb_top_order_index[bb2->index]);
395
396 return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]);
397}
398
399/* Initialize STMTS with all the statements of LOOP. We use topological
400 order to discover all statements. The order is important because
401 generate_loops_for_partition is using the same traversal for identifying
402 statements in loop copies. */
80ab0b19
RB
403
404static void
355fe088 405stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
80ab0b19
RB
406{
407 unsigned int i;
3be57c56 408 basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
80ab0b19
RB
409
410 for (i = 0; i < loop->num_nodes; i++)
411 {
412 basic_block bb = bbs[i];
80ab0b19 413
538dd0b7
DM
414 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
415 gsi_next (&bsi))
416 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
417 stmts->safe_push (bsi.phi ());
80ab0b19 418
538dd0b7
DM
419 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
420 gsi_next (&bsi))
80ab0b19 421 {
355fe088 422 gimple *stmt = gsi_stmt (bsi);
80ab0b19
RB
423 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
424 stmts->safe_push (stmt);
425 }
426 }
427
428 free (bbs);
429}
430
80ab0b19
RB
431/* Free the reduced dependence graph RDG. */
432
433static void
434free_rdg (struct graph *rdg)
435{
436 int i;
437
438 for (i = 0; i < rdg->n_vertices; i++)
439 {
440 struct vertex *v = &(rdg->vertices[i]);
441 struct graph_edge *e;
442
443 for (e = v->succ; e; e = e->succ_next)
447f3223 444 free (e->data);
80ab0b19
RB
445
446 if (v->data)
447 {
448 gimple_set_uid (RDGV_STMT (v), -1);
449 free_data_refs (RDGV_DATAREFS (v));
450 free (v->data);
451 }
452 }
453
454 free_graph (rdg);
455}
456
457/* Build the Reduced Dependence Graph (RDG) with one vertex per
97463b2b 458 statement of the loop nest LOOP_NEST, and one edge per data dependence or
80ab0b19
RB
459 scalar dependence. */
460
461static struct graph *
36875e8f 462build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
80ab0b19
RB
463{
464 struct graph *rdg;
80ab0b19 465 vec<data_reference_p> datarefs;
80ab0b19 466
97463b2b 467 /* Create the RDG vertices from the stmts of the loop nest. */
355fe088 468 auto_vec<gimple *, 10> stmts;
97463b2b 469 stmts_from_loop (loop_nest[0], &stmts);
24f161fd 470 rdg = new_graph (stmts.length ());
80ab0b19 471 datarefs.create (10);
97463b2b 472 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
80ab0b19 473 {
97463b2b 474 datarefs.release ();
80ab0b19
RB
475 free_rdg (rdg);
476 return NULL;
477 }
478 stmts.release ();
97463b2b 479
447f3223
RB
480 create_rdg_flow_edges (rdg);
481 if (cd)
b71b7a8e 482 create_rdg_cd_edges (rdg, cd, loop_nest[0]);
447f3223 483
97463b2b 484 datarefs.release ();
80ab0b19
RB
485
486 return rdg;
487}
488
80ab0b19 489
dea61d92 490
b9fc0497 491enum partition_kind {
510d73a0 492 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
b9fc0497 493};
30d55936 494
526ceb68 495struct partition
c61f8985
RG
496{
497 bitmap stmts;
80ab0b19 498 bitmap loops;
826a536d 499 bool reduction_p;
34e82342 500 bool plus_one;
30d55936 501 enum partition_kind kind;
d0582dc1
RG
502 /* data-references a kind != PKIND_NORMAL partition is about. */
503 data_reference_p main_dr;
504 data_reference_p secondary_dr;
818625cf 505 tree niter;
526ceb68 506};
c61f8985 507
c61f8985
RG
508
509/* Allocate and initialize a partition from BITMAP. */
510
526ceb68 511static partition *
80ab0b19 512partition_alloc (bitmap stmts, bitmap loops)
c61f8985 513{
526ceb68 514 partition *partition = XCNEW (struct partition);
c61f8985 515 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
80ab0b19 516 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
826a536d 517 partition->reduction_p = false;
30d55936 518 partition->kind = PKIND_NORMAL;
c61f8985
RG
519 return partition;
520}
521
522/* Free PARTITION. */
523
524static void
526ceb68 525partition_free (partition *partition)
c61f8985
RG
526{
527 BITMAP_FREE (partition->stmts);
80ab0b19 528 BITMAP_FREE (partition->loops);
c61f8985
RG
529 free (partition);
530}
531
30d55936
RG
532/* Returns true if the partition can be generated as a builtin. */
533
534static bool
526ceb68 535partition_builtin_p (partition *partition)
30d55936 536{
826a536d 537 return partition->kind != PKIND_NORMAL;
30d55936 538}
c61f8985 539
826a536d 540/* Returns true if the partition contains a reduction. */
7ad672e4
RG
541
542static bool
526ceb68 543partition_reduction_p (partition *partition)
7ad672e4 544{
826a536d 545 return partition->reduction_p;
7ad672e4
RG
546}
547
826a536d
RB
548/* Merge PARTITION into the partition DEST. */
549
550static void
526ceb68 551partition_merge_into (partition *dest, partition *partition)
826a536d
RB
552{
553 dest->kind = PKIND_NORMAL;
554 bitmap_ior_into (dest->stmts, partition->stmts);
555 if (partition_reduction_p (partition))
556 dest->reduction_p = true;
557}
558
559
c07a8cb3
RG
560/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
561 the LOOP. */
562
563static bool
564ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
565{
566 imm_use_iterator imm_iter;
567 use_operand_p use_p;
568
569 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
e665269a 570 {
355fe088 571 gimple *use_stmt = USE_STMT (use_p);
e665269a
RG
572 if (!is_gimple_debug (use_stmt)
573 && loop != loop_containing_stmt (use_stmt))
574 return true;
575 }
c07a8cb3
RG
576
577 return false;
578}
579
580/* Returns true when STMT defines a scalar variable used after the
88af7c1a 581 loop LOOP. */
c07a8cb3
RG
582
583static bool
355fe088 584stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
c07a8cb3 585{
88af7c1a
RG
586 def_operand_p def_p;
587 ssa_op_iter op_iter;
c07a8cb3 588
9ca86fc3
RG
589 if (gimple_code (stmt) == GIMPLE_PHI)
590 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
591
88af7c1a
RG
592 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
593 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
594 return true;
c07a8cb3 595
88af7c1a 596 return false;
c07a8cb3
RG
597}
598
dea61d92
SP
599/* Return a copy of LOOP placed before LOOP. */
600
601static struct loop *
602copy_loop_before (struct loop *loop)
603{
604 struct loop *res;
605 edge preheader = loop_preheader_edge (loop);
606
dea61d92 607 initialize_original_copy_tables ();
5ce9450f 608 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
30d55936 609 gcc_assert (res != NULL);
dea61d92 610 free_original_copy_tables ();
2cfc56b9 611 delete_update_ssa ();
dea61d92
SP
612
613 return res;
614}
615
616/* Creates an empty basic block after LOOP. */
617
618static void
619create_bb_after_loop (struct loop *loop)
620{
621 edge exit = single_exit (loop);
622
623 if (!exit)
624 return;
625
626 split_edge (exit);
627}
628
629/* Generate code for PARTITION from the code in LOOP. The loop is
630 copied when COPY_P is true. All the statements not flagged in the
631 PARTITION bitmap are removed from the loop or from its copy. The
632 statements are indexed in sequence inside a basic block, and the
30d55936 633 basic blocks of a loop are taken in dom order. */
dea61d92 634
30d55936 635static void
526ceb68 636generate_loops_for_partition (struct loop *loop, partition *partition,
c61f8985 637 bool copy_p)
dea61d92 638{
2fd5894f 639 unsigned i;
dea61d92
SP
640 basic_block *bbs;
641
642 if (copy_p)
643 {
644 loop = copy_loop_before (loop);
30d55936 645 gcc_assert (loop != NULL);
dea61d92
SP
646 create_preheader (loop, CP_SIMPLE_PREHEADERS);
647 create_bb_after_loop (loop);
648 }
649
2fd5894f 650 /* Remove stmts not in the PARTITION bitmap. */
dea61d92
SP
651 bbs = get_loop_body_in_dom_order (loop);
652
b03c3082 653 if (MAY_HAVE_DEBUG_STMTS)
2fd5894f 654 for (i = 0; i < loop->num_nodes; i++)
b03c3082
JJ
655 {
656 basic_block bb = bbs[i];
657
538dd0b7
DM
658 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
659 gsi_next (&bsi))
2fd5894f 660 {
538dd0b7 661 gphi *phi = bsi.phi ();
2fd5894f
RB
662 if (!virtual_operand_p (gimple_phi_result (phi))
663 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
664 reset_debug_uses (phi);
665 }
b03c3082 666
538dd0b7 667 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
b03c3082 668 {
355fe088 669 gimple *stmt = gsi_stmt (bsi);
b03c3082
JJ
670 if (gimple_code (stmt) != GIMPLE_LABEL
671 && !is_gimple_debug (stmt)
2fd5894f 672 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
b03c3082
JJ
673 reset_debug_uses (stmt);
674 }
675 }
676
2fd5894f 677 for (i = 0; i < loop->num_nodes; i++)
dea61d92
SP
678 {
679 basic_block bb = bbs[i];
dea61d92 680
538dd0b7 681 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
2fd5894f 682 {
538dd0b7 683 gphi *phi = bsi.phi ();
2fd5894f
RB
684 if (!virtual_operand_p (gimple_phi_result (phi))
685 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
2706a615 686 remove_phi_node (&bsi, true);
2fd5894f
RB
687 else
688 gsi_next (&bsi);
689 }
dea61d92 690
538dd0b7 691 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
2706a615 692 {
355fe088 693 gimple *stmt = gsi_stmt (bsi);
b03c3082
JJ
694 if (gimple_code (stmt) != GIMPLE_LABEL
695 && !is_gimple_debug (stmt)
2fd5894f 696 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
2706a615 697 {
36875e8f
RB
698 /* Choose an arbitrary path through the empty CFG part
699 that this unnecessary control stmt controls. */
538dd0b7 700 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
36875e8f 701 {
538dd0b7 702 gimple_cond_make_false (cond_stmt);
36875e8f
RB
703 update_stmt (stmt);
704 }
705 else if (gimple_code (stmt) == GIMPLE_SWITCH)
706 {
538dd0b7 707 gswitch *switch_stmt = as_a <gswitch *> (stmt);
36875e8f 708 gimple_switch_set_index
538dd0b7 709 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
36875e8f
RB
710 update_stmt (stmt);
711 }
712 else
713 {
714 unlink_stmt_vdef (stmt);
715 gsi_remove (&bsi, true);
716 release_defs (stmt);
717 continue;
718 }
2706a615 719 }
36875e8f 720 gsi_next (&bsi);
2706a615 721 }
dea61d92
SP
722 }
723
724 free (bbs);
dea61d92
SP
725}
726
d0582dc1 727/* Build the size argument for a memory operation call. */
3661e899 728
d0582dc1 729static tree
d995e887
RB
730build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
731 bool plus_one)
3661e899 732{
d995e887
RB
733 tree size = fold_convert_loc (loc, sizetype, nb_iter);
734 if (plus_one)
735 size = size_binop (PLUS_EXPR, size, size_one_node);
736 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
d0582dc1 737 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
d995e887
RB
738 size = fold_convert_loc (loc, size_type_node, size);
739 return size;
d0582dc1
RG
740}
741
742/* Build an address argument for a memory operation call. */
743
744static tree
745build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
746{
747 tree addr_base;
748
749 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
750 addr_base = fold_convert_loc (loc, sizetype, addr_base);
751
752 /* Test for a negative stride, iterating over every element. */
753 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
754 {
755 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
756 fold_convert_loc (loc, sizetype, nb_bytes));
757 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
758 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
759 }
760
761 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
3661e899
TB
762}
763
401f3a81
JJ
764/* If VAL memory representation contains the same value in all bytes,
765 return that value, otherwise return -1.
766 E.g. for 0x24242424 return 0x24, for IEEE double
767 747708026454360457216.0 return 0x44, etc. */
768
769static int
770const_with_all_bytes_same (tree val)
771{
772 unsigned char buf[64];
773 int i, len;
774
775 if (integer_zerop (val)
401f3a81
JJ
776 || (TREE_CODE (val) == CONSTRUCTOR
777 && !TREE_CLOBBER_P (val)
778 && CONSTRUCTOR_NELTS (val) == 0))
779 return 0;
780
9e207d6f
JJ
781 if (real_zerop (val))
782 {
783 /* Only return 0 for +0.0, not for -0.0, which doesn't have
784 an all bytes same memory representation. Don't transform
785 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */
786 switch (TREE_CODE (val))
787 {
788 case REAL_CST:
789 if (!real_isneg (TREE_REAL_CST_PTR (val)))
790 return 0;
791 break;
792 case COMPLEX_CST:
793 if (!const_with_all_bytes_same (TREE_REALPART (val))
794 && !const_with_all_bytes_same (TREE_IMAGPART (val)))
795 return 0;
796 break;
797 case VECTOR_CST:
798 unsigned int j;
799 for (j = 0; j < VECTOR_CST_NELTS (val); ++j)
66088065 800 if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j)))
9e207d6f
JJ
801 break;
802 if (j == VECTOR_CST_NELTS (val))
803 return 0;
804 break;
805 default:
806 break;
807 }
808 }
809
401f3a81
JJ
810 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
811 return -1;
812
813 len = native_encode_expr (val, buf, sizeof (buf));
814 if (len == 0)
815 return -1;
816 for (i = 1; i < len; i++)
817 if (buf[i] != buf[0])
818 return -1;
819 return buf[0];
820}
821
30d55936 822/* Generate a call to memset for PARTITION in LOOP. */
dea61d92 823
cfee318d 824static void
526ceb68 825generate_memset_builtin (struct loop *loop, partition *partition)
dea61d92 826{
30d55936 827 gimple_stmt_iterator gsi;
355fe088 828 gimple *stmt, *fn_call;
818625cf 829 tree mem, fn, nb_bytes;
30d55936 830 location_t loc;
b6dd5261 831 tree val;
30d55936 832
d0582dc1 833 stmt = DR_STMT (partition->main_dr);
30d55936 834 loc = gimple_location (stmt);
30d55936
RG
835
836 /* The new statements will be placed before LOOP. */
837 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dea61d92 838
d995e887
RB
839 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
840 partition->plus_one);
d0582dc1
RG
841 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
842 false, GSI_CONTINUE_LINKING);
843 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
844 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
845 false, GSI_CONTINUE_LINKING);
dea61d92 846
b6dd5261
RG
847 /* This exactly matches the pattern recognition in classify_partition. */
848 val = gimple_assign_rhs1 (stmt);
401f3a81
JJ
849 /* Handle constants like 0x15151515 and similarly
850 floating point constants etc. where all bytes are the same. */
851 int bytev = const_with_all_bytes_same (val);
852 if (bytev != -1)
853 val = build_int_cst (integer_type_node, bytev);
854 else if (TREE_CODE (val) == INTEGER_CST)
855 val = fold_convert (integer_type_node, val);
856 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
b6dd5261 857 {
b731b390 858 tree tem = make_ssa_name (integer_type_node);
355fe088 859 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
401f3a81
JJ
860 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
861 val = tem;
b6dd5261
RG
862 }
863
e79983f4 864 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
b6dd5261 865 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
d0582dc1 866 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
dea61d92
SP
867
868 if (dump_file && (dump_flags & TDF_DETAILS))
b6dd5261
RG
869 {
870 fprintf (dump_file, "generated memset");
401f3a81 871 if (bytev == 0)
b6dd5261 872 fprintf (dump_file, " zero\n");
b6dd5261
RG
873 else
874 fprintf (dump_file, "\n");
875 }
dea61d92
SP
876}
877
d0582dc1
RG
878/* Generate a call to memcpy for PARTITION in LOOP. */
879
880static void
526ceb68 881generate_memcpy_builtin (struct loop *loop, partition *partition)
d0582dc1
RG
882{
883 gimple_stmt_iterator gsi;
355fe088 884 gimple *stmt, *fn_call;
818625cf 885 tree dest, src, fn, nb_bytes;
d0582dc1
RG
886 location_t loc;
887 enum built_in_function kind;
888
889 stmt = DR_STMT (partition->main_dr);
890 loc = gimple_location (stmt);
d0582dc1
RG
891
892 /* The new statements will be placed before LOOP. */
893 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
894
d995e887
RB
895 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
896 partition->plus_one);
d0582dc1
RG
897 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
898 false, GSI_CONTINUE_LINKING);
899 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
900 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
510d73a0
RB
901 if (partition->kind == PKIND_MEMCPY
902 || ! ptr_derefs_may_alias_p (dest, src))
d0582dc1 903 kind = BUILT_IN_MEMCPY;
510d73a0
RB
904 else
905 kind = BUILT_IN_MEMMOVE;
d0582dc1
RG
906
907 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
908 false, GSI_CONTINUE_LINKING);
909 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
910 false, GSI_CONTINUE_LINKING);
911 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
912 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
913 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
914
915 if (dump_file && (dump_flags & TDF_DETAILS))
916 {
917 if (kind == BUILT_IN_MEMCPY)
918 fprintf (dump_file, "generated memcpy\n");
919 else
920 fprintf (dump_file, "generated memmove\n");
921 }
922}
923
30d55936 924/* Remove and destroy the loop LOOP. */
dea61d92 925
30d55936
RG
926static void
927destroy_loop (struct loop *loop)
dea61d92 928{
30d55936
RG
929 unsigned nbbs = loop->num_nodes;
930 edge exit = single_exit (loop);
931 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
dea61d92 932 basic_block *bbs;
30d55936 933 unsigned i;
dea61d92
SP
934
935 bbs = get_loop_body_in_dom_order (loop);
936
30d55936
RG
937 redirect_edge_pred (exit, src);
938 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
939 exit->flags |= EDGE_FALLTHRU;
940 cancel_loop_tree (loop);
941 rescan_loop_exit (exit, false, true);
dea61d92 942
b9aba0a0
RB
943 i = nbbs;
944 do
c014f6f5
RG
945 {
946 /* We have made sure to not leave any dangling uses of SSA
947 names defined in the loop. With the exception of virtuals.
948 Make sure we replace all uses of virtual defs that will remain
949 outside of the loop with the bare symbol as delete_basic_block
950 will release them. */
b9aba0a0 951 --i;
538dd0b7
DM
952 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
953 gsi_next (&gsi))
c014f6f5 954 {
538dd0b7 955 gphi *phi = gsi.phi ();
ea057359 956 if (virtual_operand_p (gimple_phi_result (phi)))
c014f6f5
RG
957 mark_virtual_phi_result_for_renaming (phi);
958 }
538dd0b7
DM
959 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
960 gsi_next (&gsi))
c014f6f5 961 {
355fe088 962 gimple *stmt = gsi_stmt (gsi);
c014f6f5
RG
963 tree vdef = gimple_vdef (stmt);
964 if (vdef && TREE_CODE (vdef) == SSA_NAME)
965 mark_virtual_operand_for_renaming (vdef);
966 }
967 delete_basic_block (bbs[i]);
968 }
b9aba0a0
RB
969 while (i != 0);
970
dea61d92 971 free (bbs);
30d55936
RG
972
973 set_immediate_dominator (CDI_DOMINATORS, dest,
974 recompute_dominator (CDI_DOMINATORS, dest));
dea61d92
SP
975}
976
b71b7a8e 977/* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
dea61d92 978
b71b7a8e 979static bool
d0582dc1 980generate_code_for_partition (struct loop *loop,
526ceb68 981 partition *partition, bool copy_p)
dea61d92 982{
30d55936
RG
983 switch (partition->kind)
984 {
826a536d
RB
985 case PKIND_NORMAL:
986 /* Reductions all have to be in the last partition. */
987 gcc_assert (!partition_reduction_p (partition)
988 || !copy_p);
989 generate_loops_for_partition (loop, partition, copy_p);
b71b7a8e 990 return false;
826a536d 991
30d55936 992 case PKIND_MEMSET:
d0582dc1 993 generate_memset_builtin (loop, partition);
d0582dc1
RG
994 break;
995
996 case PKIND_MEMCPY:
510d73a0 997 case PKIND_MEMMOVE:
d0582dc1 998 generate_memcpy_builtin (loop, partition);
30d55936
RG
999 break;
1000
1001 default:
1002 gcc_unreachable ();
1003 }
dea61d92 1004
826a536d
RB
1005 /* Common tail for partitions we turn into a call. If this was the last
1006 partition for which we generate code, we have to destroy the loop. */
1007 if (!copy_p)
b71b7a8e
RB
1008 return true;
1009 return false;
dea61d92
SP
1010}
1011
dea61d92 1012
24f161fd
RB
1013/* Returns a partition with all the statements needed for computing
1014 the vertex V of the RDG, also including the loop exit conditions. */
dea61d92 1015
526ceb68 1016static partition *
24f161fd 1017build_rdg_partition_for_vertex (struct graph *rdg, int v)
dea61d92 1018{
526ceb68 1019 partition *partition = partition_alloc (NULL, NULL);
00f96dc9 1020 auto_vec<int, 3> nodes;
24f161fd 1021 unsigned i;
dea61d92
SP
1022 int x;
1023
174ec470 1024 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
dea61d92 1025
9771b263 1026 FOR_EACH_VEC_ELT (nodes, i, x)
24f161fd
RB
1027 {
1028 bitmap_set_bit (partition->stmts, x);
1029 bitmap_set_bit (partition->loops,
1030 loop_containing_stmt (RDG_STMT (rdg, x))->num);
1031 }
dea61d92 1032
dea61d92
SP
1033 return partition;
1034}
1035
30d55936
RG
1036/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1037 For the moment we detect only the memset zero pattern. */
cfee318d 1038
30d55936 1039static void
526ceb68 1040classify_partition (loop_p loop, struct graph *rdg, partition *partition)
cfee318d 1041{
cfee318d 1042 bitmap_iterator bi;
30d55936
RG
1043 unsigned i;
1044 tree nb_iter;
d0582dc1 1045 data_reference_p single_load, single_store;
b9fc0497 1046 bool volatiles_p = false;
d995e887 1047 bool plus_one = false;
30d55936
RG
1048
1049 partition->kind = PKIND_NORMAL;
d0582dc1
RG
1050 partition->main_dr = NULL;
1051 partition->secondary_dr = NULL;
818625cf 1052 partition->niter = NULL_TREE;
d995e887 1053 partition->plus_one = false;
30d55936 1054
c61f8985 1055 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
30d55936 1056 {
355fe088 1057 gimple *stmt = RDG_STMT (rdg, i);
30d55936
RG
1058
1059 if (gimple_has_volatile_ops (stmt))
b9fc0497 1060 volatiles_p = true;
cfee318d 1061
826a536d 1062 /* If the stmt has uses outside of the loop mark it as reduction. */
9ca86fc3 1063 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
30d55936 1064 {
826a536d 1065 partition->reduction_p = true;
30d55936
RG
1066 return;
1067 }
1068 }
1069
b9fc0497
RB
1070 /* Perform general partition disqualification for builtins. */
1071 if (volatiles_p
1072 || !flag_tree_loop_distribute_patterns)
1073 return;
1074
d0582dc1
RG
1075 /* Detect memset and memcpy. */
1076 single_load = NULL;
1077 single_store = NULL;
30d55936
RG
1078 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1079 {
355fe088 1080 gimple *stmt = RDG_STMT (rdg, i);
d0582dc1
RG
1081 data_reference_p dr;
1082 unsigned j;
30d55936
RG
1083
1084 if (gimple_code (stmt) == GIMPLE_PHI)
1085 continue;
1086
1087 /* Any scalar stmts are ok. */
1088 if (!gimple_vuse (stmt))
1089 continue;
1090
d0582dc1
RG
1091 /* Otherwise just regular loads/stores. */
1092 if (!gimple_assign_single_p (stmt))
1093 return;
1094
1095 /* But exactly one store and/or load. */
9771b263 1096 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
30d55936 1097 {
d002d099
JJ
1098 tree type = TREE_TYPE (DR_REF (dr));
1099
1100 /* The memset, memcpy and memmove library calls are only
1101 able to deal with generic address space. */
1102 if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
1103 return;
1104
d0582dc1
RG
1105 if (DR_IS_READ (dr))
1106 {
1107 if (single_load != NULL)
1108 return;
1109 single_load = dr;
1110 }
1111 else
1112 {
1113 if (single_store != NULL)
1114 return;
1115 single_store = dr;
1116 }
30d55936 1117 }
30d55936
RG
1118 }
1119
818625cf
RB
1120 if (!single_store)
1121 return;
1122
d995e887 1123 nb_iter = number_of_latch_executions (loop);
818625cf
RB
1124 if (!nb_iter || nb_iter == chrec_dont_know)
1125 return;
d995e887
RB
1126 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1127 gimple_bb (DR_STMT (single_store))))
1128 plus_one = true;
818625cf 1129
d0582dc1
RG
1130 if (single_store && !single_load)
1131 {
355fe088 1132 gimple *stmt = DR_STMT (single_store);
d0582dc1 1133 tree rhs = gimple_assign_rhs1 (stmt);
401f3a81
JJ
1134 if (const_with_all_bytes_same (rhs) == -1
1135 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1136 || (TYPE_MODE (TREE_TYPE (rhs))
1137 != TYPE_MODE (unsigned_char_type_node))))
d0582dc1
RG
1138 return;
1139 if (TREE_CODE (rhs) == SSA_NAME
1140 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1141 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1142 return;
ca406576
RB
1143 if (!adjacent_dr_p (single_store)
1144 || !dominated_by_p (CDI_DOMINATORS,
1145 loop->latch, gimple_bb (stmt)))
d0582dc1
RG
1146 return;
1147 partition->kind = PKIND_MEMSET;
1148 partition->main_dr = single_store;
818625cf 1149 partition->niter = nb_iter;
d995e887 1150 partition->plus_one = plus_one;
d0582dc1
RG
1151 }
1152 else if (single_store && single_load)
1153 {
355fe088
TS
1154 gimple *store = DR_STMT (single_store);
1155 gimple *load = DR_STMT (single_load);
d0582dc1
RG
1156 /* Direct aggregate copy or via an SSA name temporary. */
1157 if (load != store
1158 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1159 return;
1160 if (!adjacent_dr_p (single_store)
1161 || !adjacent_dr_p (single_load)
1162 || !operand_equal_p (DR_STEP (single_store),
ca406576
RB
1163 DR_STEP (single_load), 0)
1164 || !dominated_by_p (CDI_DOMINATORS,
1165 loop->latch, gimple_bb (store)))
d0582dc1 1166 return;
f20132e7
RG
1167 /* Now check that if there is a dependence this dependence is
1168 of a suitable form for memmove. */
6e1aa848 1169 vec<loop_p> loops = vNULL;
f20132e7 1170 ddr_p ddr;
9771b263 1171 loops.safe_push (loop);
f20132e7
RG
1172 ddr = initialize_data_dependence_relation (single_load, single_store,
1173 loops);
1174 compute_affine_dependence (ddr, loop);
f20132e7
RG
1175 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1176 {
1177 free_dependence_relation (ddr);
9771b263 1178 loops.release ();
f20132e7
RG
1179 return;
1180 }
1181 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1182 {
1183 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1184 {
1185 free_dependence_relation (ddr);
9771b263 1186 loops.release ();
f20132e7
RG
1187 return;
1188 }
1189 lambda_vector dist_v;
9771b263 1190 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
f20132e7
RG
1191 {
1192 int dist = dist_v[index_in_loop_nest (loop->num,
1193 DDR_LOOP_NEST (ddr))];
1194 if (dist > 0 && !DDR_REVERSED_P (ddr))
1195 {
1196 free_dependence_relation (ddr);
9771b263 1197 loops.release ();
f20132e7
RG
1198 return;
1199 }
1200 }
510d73a0 1201 partition->kind = PKIND_MEMMOVE;
f20132e7 1202 }
510d73a0
RB
1203 else
1204 partition->kind = PKIND_MEMCPY;
b7ce70b3 1205 free_dependence_relation (ddr);
9771b263 1206 loops.release ();
d0582dc1
RG
1207 partition->main_dr = single_store;
1208 partition->secondary_dr = single_load;
818625cf 1209 partition->niter = nb_iter;
d995e887 1210 partition->plus_one = plus_one;
d0582dc1 1211 }
cfee318d
SP
1212}
1213
1fa0c180
RG
1214/* For a data reference REF, return the declaration of its base
1215 address or NULL_TREE if the base is not determined. */
1216
1217static tree
1218ref_base_address (data_reference_p dr)
1219{
1220 tree base_address = DR_BASE_ADDRESS (dr);
1221 if (base_address
1222 && TREE_CODE (base_address) == ADDR_EXPR)
1223 return TREE_OPERAND (base_address, 0);
1224
1225 return base_address;
1226}
1227
cfee318d
SP
1228/* Returns true when PARTITION1 and PARTITION2 have similar memory
1229 accesses in RDG. */
1230
1231static bool
526ceb68
TS
1232similar_memory_accesses (struct graph *rdg, partition *partition1,
1233 partition *partition2)
cfee318d 1234{
1fa0c180 1235 unsigned i, j, k, l;
cfee318d 1236 bitmap_iterator bi, bj;
1fa0c180
RG
1237 data_reference_p ref1, ref2;
1238
1239 /* First check whether in the intersection of the two partitions are
1240 any loads or stores. Common loads are the situation that happens
1241 most often. */
1242 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1243 if (RDG_MEM_WRITE_STMT (rdg, i)
1244 || RDG_MEM_READS_STMT (rdg, i))
1245 return true;
cfee318d 1246
1fa0c180 1247 /* Then check all data-references against each other. */
c61f8985 1248 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
cfee318d
SP
1249 if (RDG_MEM_WRITE_STMT (rdg, i)
1250 || RDG_MEM_READS_STMT (rdg, i))
c61f8985 1251 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
cfee318d
SP
1252 if (RDG_MEM_WRITE_STMT (rdg, j)
1253 || RDG_MEM_READS_STMT (rdg, j))
1fa0c180 1254 {
9771b263 1255 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1fa0c180
RG
1256 {
1257 tree base1 = ref_base_address (ref1);
1258 if (base1)
9771b263 1259 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1fa0c180
RG
1260 if (base1 == ref_base_address (ref2))
1261 return true;
1262 }
1263 }
cfee318d
SP
1264
1265 return false;
1266}
1267
dea61d92
SP
1268/* Aggregate several components into a useful partition that is
1269 registered in the PARTITIONS vector. Partitions will be
1270 distributed in different loops. */
1271
1272static void
83a95546 1273rdg_build_partitions (struct graph *rdg,
355fe088 1274 vec<gimple *> starting_stmts,
526ceb68 1275 vec<partition *> *partitions)
dea61d92 1276{
0e3de1d4 1277 auto_bitmap processed;
2fd5894f 1278 int i;
355fe088 1279 gimple *stmt;
dea61d92 1280
2fd5894f 1281 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
dea61d92 1282 {
2fd5894f
RB
1283 int v = rdg_vertex_for_stmt (rdg, stmt);
1284
1285 if (dump_file && (dump_flags & TDF_DETAILS))
1286 fprintf (dump_file,
1287 "ldist asked to generate code for vertex %d\n", v);
b8698a0f 1288
24f161fd
RB
1289 /* If the vertex is already contained in another partition so
1290 is the partition rooted at it. */
dea61d92
SP
1291 if (bitmap_bit_p (processed, v))
1292 continue;
b8698a0f 1293
526ceb68 1294 partition *partition = build_rdg_partition_for_vertex (rdg, v);
24f161fd 1295 bitmap_ior_into (processed, partition->stmts);
dea61d92 1296
826a536d 1297 if (dump_file && (dump_flags & TDF_DETAILS))
dea61d92 1298 {
826a536d
RB
1299 fprintf (dump_file, "ldist useful partition:\n");
1300 dump_bitmap (dump_file, partition->stmts);
dea61d92 1301 }
826a536d
RB
1302
1303 partitions->safe_push (partition);
dea61d92
SP
1304 }
1305
83a95546
RB
1306 /* All vertices should have been assigned to at least one partition now,
1307 other than vertices belonging to dead code. */
dea61d92
SP
1308}
1309
1310/* Dump to FILE the PARTITIONS. */
1311
1312static void
526ceb68 1313dump_rdg_partitions (FILE *file, vec<partition *> partitions)
dea61d92
SP
1314{
1315 int i;
526ceb68 1316 partition *partition;
dea61d92 1317
9771b263 1318 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1319 debug_bitmap_file (file, partition->stmts);
dea61d92
SP
1320}
1321
1322/* Debug PARTITIONS. */
526ceb68 1323extern void debug_rdg_partitions (vec<partition *> );
dea61d92 1324
24e47c76 1325DEBUG_FUNCTION void
526ceb68 1326debug_rdg_partitions (vec<partition *> partitions)
dea61d92
SP
1327{
1328 dump_rdg_partitions (stderr, partitions);
1329}
1330
2b8aee8e
SP
1331/* Returns the number of read and write operations in the RDG. */
1332
1333static int
1334number_of_rw_in_rdg (struct graph *rdg)
1335{
1336 int i, res = 0;
1337
1338 for (i = 0; i < rdg->n_vertices; i++)
1339 {
1340 if (RDG_MEM_WRITE_STMT (rdg, i))
1341 ++res;
1342
1343 if (RDG_MEM_READS_STMT (rdg, i))
1344 ++res;
1345 }
1346
1347 return res;
1348}
1349
1350/* Returns the number of read and write operations in a PARTITION of
1351 the RDG. */
1352
1353static int
526ceb68 1354number_of_rw_in_partition (struct graph *rdg, partition *partition)
2b8aee8e
SP
1355{
1356 int res = 0;
1357 unsigned i;
1358 bitmap_iterator ii;
1359
c61f8985 1360 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2b8aee8e
SP
1361 {
1362 if (RDG_MEM_WRITE_STMT (rdg, i))
1363 ++res;
1364
1365 if (RDG_MEM_READS_STMT (rdg, i))
1366 ++res;
1367 }
1368
1369 return res;
1370}
1371
1372/* Returns true when one of the PARTITIONS contains all the read or
1373 write operations of RDG. */
1374
1375static bool
9771b263 1376partition_contains_all_rw (struct graph *rdg,
526ceb68 1377 vec<partition *> partitions)
2b8aee8e
SP
1378{
1379 int i;
526ceb68 1380 partition *partition;
2b8aee8e
SP
1381 int nrw = number_of_rw_in_rdg (rdg);
1382
9771b263 1383 FOR_EACH_VEC_ELT (partitions, i, partition)
2b8aee8e
SP
1384 if (nrw == number_of_rw_in_partition (rdg, partition))
1385 return true;
1386
1387 return false;
1388}
1389
447f3223
RB
1390/* Compute partition dependence created by the data references in DRS1
1391 and DRS2 and modify and return DIR according to that. */
1392
1393static int
1394pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1395 vec<data_reference_p> drs1,
1396 vec<data_reference_p> drs2)
1397{
1398 data_reference_p dr1, dr2;
1399
1400 /* dependence direction - 0 is no dependence, -1 is back,
1401 1 is forth, 2 is both (we can stop then, merging will occur). */
1402 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1403 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1404 {
62e22fcb 1405 data_reference_p saved_dr1 = dr1;
2cf19e26 1406 int this_dir = 1;
447f3223
RB
1407 ddr_p ddr;
1408 /* Re-shuffle data-refs to be in dominator order. */
1409 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1410 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1411 {
6b4db501 1412 std::swap (dr1, dr2);
447f3223
RB
1413 this_dir = -this_dir;
1414 }
1415 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1416 compute_affine_dependence (ddr, loops[0]);
1417 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1418 this_dir = 2;
1419 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1420 {
1421 if (DDR_REVERSED_P (ddr))
1422 {
6b4db501 1423 std::swap (dr1, dr2);
447f3223
RB
1424 this_dir = -this_dir;
1425 }
1426 /* Known dependences can still be unordered througout the
1427 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
2cf19e26 1428 if (DDR_NUM_DIST_VECTS (ddr) != 1)
447f3223 1429 this_dir = 2;
2cf19e26
RB
1430 /* If the overlap is exact preserve stmt order. */
1431 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1432 ;
1433 else
1434 {
1435 /* Else as the distance vector is lexicographic positive
1436 swap the dependence direction. */
1437 this_dir = -this_dir;
1438 }
447f3223
RB
1439 }
1440 else
1441 this_dir = 0;
1442 free_dependence_relation (ddr);
dd18c8c3
JW
1443 if (this_dir == 2)
1444 return 2;
1445 else if (dir == 0)
447f3223 1446 dir = this_dir;
dd18c8c3 1447 else if (this_dir != 0 && dir != this_dir)
447f3223 1448 return 2;
62e22fcb
RB
1449 /* Shuffle "back" dr1. */
1450 dr1 = saved_dr1;
447f3223
RB
1451 }
1452 return dir;
1453}
1454
1455/* Compare postorder number of the partition graph vertices V1 and V2. */
1456
1457static int
1458pgcmp (const void *v1_, const void *v2_)
1459{
1460 const vertex *v1 = (const vertex *)v1_;
1461 const vertex *v2 = (const vertex *)v2_;
1462 return v2->post - v1->post;
1463}
2fd5894f
RB
1464
1465/* Distributes the code from LOOP in such a way that producer
1466 statements are placed before consumer statements. Tries to separate
1467 only the statements from STMTS into separate loops.
b71b7a8e
RB
1468 Returns the number of distributed loops. Set *DESTROY_P to whether
1469 LOOP needs to be destroyed. */
dea61d92
SP
1470
1471static int
355fe088 1472distribute_loop (struct loop *loop, vec<gimple *> stmts,
b71b7a8e 1473 control_dependences *cd, int *nb_calls, bool *destroy_p)
dea61d92 1474{
2fd5894f 1475 struct graph *rdg;
526ceb68 1476 partition *partition;
be6b029b 1477 bool any_builtin;
2fd5894f 1478 int i, nbp;
447f3223
RB
1479 graph *pg = NULL;
1480 int num_sccs = 1;
dea61d92 1481
c9326aef 1482 *destroy_p = false;
826a536d 1483 *nb_calls = 0;
00f96dc9 1484 auto_vec<loop_p, 3> loop_nest;
2fd5894f 1485 if (!find_loop_nest (loop, &loop_nest))
07687835 1486 return 0;
2fd5894f 1487
36875e8f 1488 rdg = build_rdg (loop_nest, cd);
2fd5894f
RB
1489 if (!rdg)
1490 {
1491 if (dump_file && (dump_flags & TDF_DETAILS))
1492 fprintf (dump_file,
1493 "Loop %d not distributed: failed to build the RDG.\n",
1494 loop->num);
1495
2fd5894f
RB
1496 return 0;
1497 }
1498
1499 if (dump_file && (dump_flags & TDF_DETAILS))
1500 dump_rdg (dump_file, rdg);
1501
526ceb68 1502 auto_vec<struct partition *, 3> partitions;
2fd5894f 1503 rdg_build_partitions (rdg, stmts, &partitions);
dea61d92 1504
be6b029b 1505 any_builtin = false;
9771b263 1506 FOR_EACH_VEC_ELT (partitions, i, partition)
be6b029b
RG
1507 {
1508 classify_partition (loop, rdg, partition);
1509 any_builtin |= partition_builtin_p (partition);
1510 }
30d55936 1511
447f3223
RB
1512 /* If we are only distributing patterns but did not detect any,
1513 simply bail out. */
9fed7f3a
RB
1514 if (!flag_tree_loop_distribution
1515 && !any_builtin)
1516 {
1517 nbp = 0;
1518 goto ldist_done;
1519 }
1520
447f3223
RB
1521 /* If we are only distributing patterns fuse all partitions that
1522 were not classified as builtins. This also avoids chopping
1523 a loop into pieces, separated by builtin calls. That is, we
1524 only want no or a single loop body remaining. */
526ceb68 1525 struct partition *into;
447f3223
RB
1526 if (!flag_tree_loop_distribution)
1527 {
1528 for (i = 0; partitions.iterate (i, &into); ++i)
1529 if (!partition_builtin_p (into))
1530 break;
1531 for (++i; partitions.iterate (i, &partition); ++i)
1532 if (!partition_builtin_p (partition))
1533 {
1534 if (dump_file && (dump_flags & TDF_DETAILS))
1535 {
1536 fprintf (dump_file, "fusing non-builtin partitions\n");
1537 dump_bitmap (dump_file, into->stmts);
1538 dump_bitmap (dump_file, partition->stmts);
1539 }
1540 partition_merge_into (into, partition);
1541 partitions.unordered_remove (i);
1542 partition_free (partition);
1543 i--;
1544 }
1545 }
1546
1547 /* Due to limitations in the transform phase we have to fuse all
1548 reduction partitions into the last partition so the existing
1549 loop will contain all loop-closed PHI nodes. */
1550 for (i = 0; partitions.iterate (i, &into); ++i)
1551 if (partition_reduction_p (into))
1552 break;
1553 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1554 if (partition_reduction_p (partition))
1555 {
1556 if (dump_file && (dump_flags & TDF_DETAILS))
1557 {
1558 fprintf (dump_file, "fusing partitions\n");
1559 dump_bitmap (dump_file, into->stmts);
1560 dump_bitmap (dump_file, partition->stmts);
1561 fprintf (dump_file, "because they have reductions\n");
1562 }
1563 partition_merge_into (into, partition);
1564 partitions.unordered_remove (i);
1565 partition_free (partition);
1566 i--;
1567 }
1568
9fed7f3a
RB
1569 /* Apply our simple cost model - fuse partitions with similar
1570 memory accesses. */
9fed7f3a
RB
1571 for (i = 0; partitions.iterate (i, &into); ++i)
1572 {
16eba420 1573 bool changed = false;
9fed7f3a
RB
1574 if (partition_builtin_p (into))
1575 continue;
1576 for (int j = i + 1;
1577 partitions.iterate (j, &partition); ++j)
1578 {
40b6bff9 1579 if (similar_memory_accesses (rdg, into, partition))
9fed7f3a
RB
1580 {
1581 if (dump_file && (dump_flags & TDF_DETAILS))
1582 {
1583 fprintf (dump_file, "fusing partitions\n");
1584 dump_bitmap (dump_file, into->stmts);
1585 dump_bitmap (dump_file, partition->stmts);
1586 fprintf (dump_file, "because they have similar "
1587 "memory accesses\n");
1588 }
826a536d 1589 partition_merge_into (into, partition);
447f3223 1590 partitions.unordered_remove (j);
9fed7f3a
RB
1591 partition_free (partition);
1592 j--;
16eba420 1593 changed = true;
9fed7f3a
RB
1594 }
1595 }
16eba420
RB
1596 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
1597 accesses when 1 and 2 have similar accesses but not 0 and 1
1598 then in the next iteration we will fail to consider merging
1599 1 into 0,2. So try again if we did any merging into 0. */
1600 if (changed)
1601 i--;
9fed7f3a
RB
1602 }
1603
447f3223
RB
1604 /* Build the partition dependency graph. */
1605 if (partitions.length () > 1)
c014f6f5 1606 {
447f3223
RB
1607 pg = new_graph (partitions.length ());
1608 struct pgdata {
526ceb68 1609 struct partition *partition;
447f3223
RB
1610 vec<data_reference_p> writes;
1611 vec<data_reference_p> reads;
1612 };
1613#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1614 for (i = 0; partitions.iterate (i, &partition); ++i)
1615 {
1616 vertex *v = &pg->vertices[i];
1617 pgdata *data = new pgdata;
1618 data_reference_p dr;
1619 /* FIXME - leaks. */
1620 v->data = data;
1621 bitmap_iterator bi;
1622 unsigned j;
1623 data->partition = partition;
1624 data->reads = vNULL;
1625 data->writes = vNULL;
1626 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1627 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1628 if (DR_IS_READ (dr))
1629 data->reads.safe_push (dr);
1630 else
1631 data->writes.safe_push (dr);
1632 }
526ceb68 1633 struct partition *partition1, *partition2;
447f3223
RB
1634 for (i = 0; partitions.iterate (i, &partition1); ++i)
1635 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1636 {
1637 /* dependence direction - 0 is no dependence, -1 is back,
1638 1 is forth, 2 is both (we can stop then, merging will occur). */
1639 int dir = 0;
1640 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1641 PGDATA(i)->writes,
1642 PGDATA(j)->reads);
1643 if (dir != 2)
1644 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1645 PGDATA(i)->reads,
1646 PGDATA(j)->writes);
1647 if (dir != 2)
1648 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1649 PGDATA(i)->writes,
1650 PGDATA(j)->writes);
1651 if (dir == 1 || dir == 2)
1652 add_edge (pg, i, j);
1653 if (dir == -1 || dir == 2)
1654 add_edge (pg, j, i);
1655 }
1656
1657 /* Add edges to the reduction partition (if any) to force it last. */
1658 unsigned j;
1659 for (j = 0; partitions.iterate (j, &partition); ++j)
1660 if (partition_reduction_p (partition))
1661 break;
1662 if (j < partitions.length ())
38ad2d07 1663 {
447f3223
RB
1664 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1665 if (i != j)
1666 add_edge (pg, i, j);
1667 }
1668
1669 /* Compute partitions we cannot separate and fuse them. */
1670 num_sccs = graphds_scc (pg, NULL);
1671 for (i = 0; i < num_sccs; ++i)
1672 {
526ceb68 1673 struct partition *first;
447f3223
RB
1674 int j;
1675 for (j = 0; partitions.iterate (j, &first); ++j)
1676 if (pg->vertices[j].component == i)
38ad2d07 1677 break;
447f3223
RB
1678 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1679 if (pg->vertices[j].component == i)
38ad2d07 1680 {
447f3223
RB
1681 if (dump_file && (dump_flags & TDF_DETAILS))
1682 {
1683 fprintf (dump_file, "fusing partitions\n");
1684 dump_bitmap (dump_file, first->stmts);
1685 dump_bitmap (dump_file, partition->stmts);
1686 fprintf (dump_file, "because they are in the same "
1687 "dependence SCC\n");
1688 }
1689 partition_merge_into (first, partition);
1690 partitions[j] = NULL;
5eb010bc 1691 partition_free (partition);
447f3223 1692 PGDATA (j)->partition = NULL;
38ad2d07 1693 }
38ad2d07 1694 }
30d55936 1695
447f3223
RB
1696 /* Now order the remaining nodes in postorder. */
1697 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1698 partitions.truncate (0);
1699 for (i = 0; i < pg->n_vertices; ++i)
b9fc0497 1700 {
447f3223
RB
1701 pgdata *data = PGDATA (i);
1702 if (data->partition)
1703 partitions.safe_push (data->partition);
1704 data->reads.release ();
1705 data->writes.release ();
1706 delete data;
b9fc0497 1707 }
447f3223
RB
1708 gcc_assert (partitions.length () == (unsigned)num_sccs);
1709 free_graph (pg);
b9fc0497
RB
1710 }
1711
9771b263 1712 nbp = partitions.length ();
a4293fa6 1713 if (nbp == 0
9771b263
DN
1714 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1715 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
c014f6f5
RG
1716 {
1717 nbp = 0;
1718 goto ldist_done;
1719 }
dea61d92
SP
1720
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1722 dump_rdg_partitions (dump_file, partitions);
1723
9771b263 1724 FOR_EACH_VEC_ELT (partitions, i, partition)
826a536d
RB
1725 {
1726 if (partition_builtin_p (partition))
1727 (*nb_calls)++;
b71b7a8e 1728 *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
826a536d 1729 }
dea61d92 1730
dea61d92
SP
1731 ldist_done:
1732
9771b263 1733 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1734 partition_free (partition);
dea61d92 1735
dea61d92 1736 free_rdg (rdg);
826a536d 1737 return nbp - *nb_calls;
dea61d92
SP
1738}
1739
1740/* Distribute all loops in the current function. */
1741
be55bfe6
TS
1742namespace {
1743
1744const pass_data pass_data_loop_distribution =
1745{
1746 GIMPLE_PASS, /* type */
1747 "ldist", /* name */
1748 OPTGROUP_LOOP, /* optinfo_flags */
be55bfe6
TS
1749 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1750 ( PROP_cfg | PROP_ssa ), /* properties_required */
1751 0, /* properties_provided */
1752 0, /* properties_destroyed */
1753 0, /* todo_flags_start */
3bea341f 1754 0, /* todo_flags_finish */
be55bfe6
TS
1755};
1756
1757class pass_loop_distribution : public gimple_opt_pass
1758{
1759public:
1760 pass_loop_distribution (gcc::context *ctxt)
1761 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1762 {}
1763
1764 /* opt_pass methods: */
1765 virtual bool gate (function *)
1766 {
1767 return flag_tree_loop_distribution
1768 || flag_tree_loop_distribute_patterns;
1769 }
1770
1771 virtual unsigned int execute (function *);
1772
1773}; // class pass_loop_distribution
1774
1775unsigned int
1776pass_loop_distribution::execute (function *fun)
dea61d92
SP
1777{
1778 struct loop *loop;
c014f6f5 1779 bool changed = false;
1fa0c180 1780 basic_block bb;
36875e8f 1781 control_dependences *cd = NULL;
b71b7a8e 1782 auto_vec<loop_p> loops_to_be_destroyed;
1fa0c180 1783
773d9217
BC
1784 if (number_of_loops (fun) <= 1)
1785 return 0;
1786
3be57c56
BC
1787 /* Compute topological order for basic blocks. Topological order is
1788 needed because data dependence is computed for data references in
1789 lexicographical order. */
1790 if (bb_top_order_index == NULL)
1791 {
1792 int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
1793
1794 bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
1795 bb_top_order_index_size
1796 = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
1797 for (int i = 0; i < bb_top_order_index_size; i++)
1798 bb_top_order_index[rpo[i]] = i;
1799
1800 free (rpo);
1801 }
1802
be55bfe6 1803 FOR_ALL_BB_FN (bb, fun)
1fa0c180
RG
1804 {
1805 gimple_stmt_iterator gsi;
1806 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1807 gimple_set_uid (gsi_stmt (gsi), -1);
1808 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1809 gimple_set_uid (gsi_stmt (gsi), -1);
1810 }
dea61d92 1811
c014f6f5
RG
1812 /* We can at the moment only distribute non-nested loops, thus restrict
1813 walking to innermost loops. */
f0bd40b1 1814 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
dea61d92 1815 {
355fe088 1816 auto_vec<gimple *> work_list;
be6b029b 1817 basic_block *bbs;
0e20c89f 1818 int num = loop->num;
be6b029b 1819 unsigned int i;
a3357f7d
RG
1820
1821 /* If the loop doesn't have a single exit we will fail anyway,
1822 so do that early. */
1823 if (!single_exit (loop))
1824 continue;
dea61d92 1825
f56f2d33
JH
1826 /* Only optimize hot loops. */
1827 if (!optimize_loop_for_speed_p (loop))
1828 continue;
1829
be6b029b
RG
1830 /* Initialize the worklist with stmts we seed the partitions with. */
1831 bbs = get_loop_body_in_dom_order (loop);
1832 for (i = 0; i < loop->num_nodes; ++i)
1833 {
538dd0b7
DM
1834 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1835 !gsi_end_p (gsi);
1836 gsi_next (&gsi))
deb6c11a 1837 {
538dd0b7 1838 gphi *phi = gsi.phi ();
deb6c11a
RB
1839 if (virtual_operand_p (gimple_phi_result (phi)))
1840 continue;
1841 /* Distribute stmts which have defs that are used outside of
be55bfe6 1842 the loop. */
deb6c11a
RB
1843 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1844 continue;
1845 work_list.safe_push (phi);
1846 }
538dd0b7
DM
1847 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1848 !gsi_end_p (gsi);
1849 gsi_next (&gsi))
be6b029b 1850 {
355fe088 1851 gimple *stmt = gsi_stmt (gsi);
83a95546
RB
1852
1853 /* If there is a stmt with side-effects bail out - we
be55bfe6 1854 cannot and should not distribute this loop. */
83a95546
RB
1855 if (gimple_has_side_effects (stmt))
1856 {
1857 work_list.truncate (0);
1858 goto out;
1859 }
1860
b9fc0497 1861 /* Distribute stmts which have defs that are used outside of
be55bfe6 1862 the loop. */
b9fc0497
RB
1863 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1864 ;
1865 /* Otherwise only distribute stores for now. */
e179190c 1866 else if (!gimple_vdef (stmt))
be6b029b
RG
1867 continue;
1868
9771b263 1869 work_list.safe_push (stmt);
be6b029b
RG
1870 }
1871 }
83a95546 1872out:
be6b029b 1873 free (bbs);
c014f6f5 1874
826a536d
RB
1875 int nb_generated_loops = 0;
1876 int nb_generated_calls = 0;
1877 location_t loc = find_loop_location (loop);
9771b263 1878 if (work_list.length () > 0)
36875e8f
RB
1879 {
1880 if (!cd)
1881 {
ca406576 1882 calculate_dominance_info (CDI_DOMINATORS);
36875e8f 1883 calculate_dominance_info (CDI_POST_DOMINATORS);
30fd2977 1884 cd = new control_dependences ();
36875e8f
RB
1885 free_dominance_info (CDI_POST_DOMINATORS);
1886 }
b71b7a8e 1887 bool destroy_p;
826a536d 1888 nb_generated_loops = distribute_loop (loop, work_list, cd,
b71b7a8e
RB
1889 &nb_generated_calls,
1890 &destroy_p);
1891 if (destroy_p)
1892 loops_to_be_destroyed.safe_push (loop);
36875e8f 1893 }
c014f6f5 1894
826a536d 1895 if (nb_generated_loops + nb_generated_calls > 0)
dea61d92 1896 {
826a536d
RB
1897 changed = true;
1898 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1899 loc, "Loop %d distributed: split to %d loops "
1900 "and %d library calls.\n",
1901 num, nb_generated_loops, nb_generated_calls);
dea61d92 1902 }
826a536d
RB
1903 else if (dump_file && (dump_flags & TDF_DETAILS))
1904 fprintf (dump_file, "Loop %d is the same.\n", num);
dea61d92
SP
1905 }
1906
36875e8f
RB
1907 if (cd)
1908 delete cd;
1909
3be57c56
BC
1910 if (bb_top_order_index != NULL)
1911 {
1912 free (bb_top_order_index);
1913 bb_top_order_index = NULL;
1914 bb_top_order_index_size = 0;
1915 }
1916
c014f6f5
RG
1917 if (changed)
1918 {
30fd2977
RB
1919 /* Destroy loop bodies that could not be reused. Do this late as we
1920 otherwise can end up refering to stale data in control dependences. */
1921 unsigned i;
1922 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
3be57c56 1923 destroy_loop (loop);
30fd2977 1924
d0ed943c
RB
1925 /* Cached scalar evolutions now may refer to wrong or non-existing
1926 loops. */
1927 scev_reset_htab ();
be55bfe6 1928 mark_virtual_operands_for_renaming (fun);
c014f6f5
RG
1929 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1930 }
1931
b2b29377 1932 checking_verify_loop_structure ();
c014f6f5 1933
5006671f 1934 return 0;
dea61d92
SP
1935}
1936
27a4cd48
DM
1937} // anon namespace
1938
1939gimple_opt_pass *
1940make_pass_loop_distribution (gcc::context *ctxt)
1941{
1942 return new pass_loop_distribution (ctxt);
1943}