]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
replace some manual stacks with auto_vec
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
dea61d92 1/* Loop distribution.
cbe34bb5 2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
dea61d92
SP
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6This file is part of GCC.
b8698a0f 7
dea61d92
SP
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
b8698a0f 12
dea61d92
SP
13GCC is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
b8698a0f 17
dea61d92
SP
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
b8698a0f 29 is transformed to
dea61d92
SP
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44#include "config.h"
45#include "system.h"
46#include "coretypes.h"
c7131fb2 47#include "backend.h"
40e23961 48#include "tree.h"
c7131fb2 49#include "gimple.h"
957060b5
AM
50#include "cfghooks.h"
51#include "tree-pass.h"
c7131fb2 52#include "ssa.h"
957060b5 53#include "gimple-pretty-print.h"
c7131fb2 54#include "fold-const.h"
60393bbc 55#include "cfganal.h"
5be5c238 56#include "gimple-iterator.h"
18f429e2 57#include "gimplify-me.h"
d8a2d370 58#include "stor-layout.h"
442b4905 59#include "tree-cfg.h"
e28030cf 60#include "tree-ssa-loop-manip.h"
442b4905
AM
61#include "tree-ssa-loop.h"
62#include "tree-into-ssa.h"
7a300452 63#include "tree-ssa.h"
dea61d92 64#include "cfgloop.h"
dea61d92 65#include "tree-scalar-evolution.h"
826a536d 66#include "tree-vectorizer.h"
80ab0b19
RB
67
68
69/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
526ceb68 70struct rdg_vertex
80ab0b19
RB
71{
72 /* The statement represented by this vertex. */
355fe088 73 gimple *stmt;
80ab0b19
RB
74
75 /* Vector of data-references in this statement. */
76 vec<data_reference_p> datarefs;
77
78 /* True when the statement contains a write to memory. */
79 bool has_mem_write;
80
81 /* True when the statement contains a read from memory. */
82 bool has_mem_reads;
526ceb68 83};
80ab0b19
RB
84
85#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
86#define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90#define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
93
94/* Data dependence type. */
95
96enum rdg_dep_type
97{
98 /* Read After Write (RAW). */
99 flow_dd = 'f',
100
36875e8f
RB
101 /* Control dependence (execute conditional on). */
102 control_dd = 'c'
80ab0b19
RB
103};
104
105/* Dependence information attached to an edge of the RDG. */
106
526ceb68 107struct rdg_edge
80ab0b19
RB
108{
109 /* Type of the dependence. */
110 enum rdg_dep_type type;
526ceb68 111};
80ab0b19
RB
112
113#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
80ab0b19 114
80ab0b19
RB
115/* Dump vertex I in RDG to FILE. */
116
117static void
118dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
119{
120 struct vertex *v = &(rdg->vertices[i]);
121 struct graph_edge *e;
122
123 fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
126
127 if (v->pred)
128 for (e = v->pred; e; e = e->pred_next)
129 fprintf (file, " %d", e->src);
130
131 fprintf (file, ") (out:");
132
133 if (v->succ)
134 for (e = v->succ; e; e = e->succ_next)
135 fprintf (file, " %d", e->dest);
136
137 fprintf (file, ")\n");
138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139 fprintf (file, ")\n");
140}
141
142/* Call dump_rdg_vertex on stderr. */
143
144DEBUG_FUNCTION void
145debug_rdg_vertex (struct graph *rdg, int i)
146{
147 dump_rdg_vertex (stderr, rdg, i);
148}
149
80ab0b19
RB
150/* Dump the reduced dependence graph RDG to FILE. */
151
152static void
153dump_rdg (FILE *file, struct graph *rdg)
154{
80ab0b19 155 fprintf (file, "(rdg\n");
2fd5894f
RB
156 for (int i = 0; i < rdg->n_vertices; i++)
157 dump_rdg_vertex (file, rdg, i);
80ab0b19 158 fprintf (file, ")\n");
80ab0b19
RB
159}
160
161/* Call dump_rdg on stderr. */
162
163DEBUG_FUNCTION void
164debug_rdg (struct graph *rdg)
165{
166 dump_rdg (stderr, rdg);
167}
168
169static void
170dot_rdg_1 (FILE *file, struct graph *rdg)
171{
172 int i;
174ec470
RB
173 pretty_printer buffer;
174 pp_needs_newline (&buffer) = false;
175 buffer.buffer->stream = file;
80ab0b19
RB
176
177 fprintf (file, "digraph RDG {\n");
178
179 for (i = 0; i < rdg->n_vertices; i++)
180 {
181 struct vertex *v = &(rdg->vertices[i]);
182 struct graph_edge *e;
183
174ec470
RB
184 fprintf (file, "%d [label=\"[%d] ", i, i);
185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186 pp_flush (&buffer);
187 fprintf (file, "\"]\n");
188
80ab0b19
RB
189 /* Highlight reads from memory. */
190 if (RDG_MEM_READS_STMT (rdg, i))
191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
192
193 /* Highlight stores to memory. */
194 if (RDG_MEM_WRITE_STMT (rdg, i))
195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
196
197 if (v->succ)
198 for (e = v->succ; e; e = e->succ_next)
199 switch (RDGE_TYPE (e))
200 {
80ab0b19
RB
201 case flow_dd:
202 /* These are the most common dependences: don't print these. */
203 fprintf (file, "%d -> %d \n", i, e->dest);
204 break;
205
36875e8f
RB
206 case control_dd:
207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208 break;
209
80ab0b19
RB
210 default:
211 gcc_unreachable ();
212 }
213 }
214
215 fprintf (file, "}\n\n");
216}
217
218/* Display the Reduced Dependence Graph using dotty. */
219
220DEBUG_FUNCTION void
221dot_rdg (struct graph *rdg)
222{
174ec470 223 /* When debugging, you may want to enable the following code. */
b6d94045 224#ifdef HAVE_POPEN
c3284718 225 FILE *file = popen ("dot -Tx11", "w");
174ec470
RB
226 if (!file)
227 return;
80ab0b19 228 dot_rdg_1 (file, rdg);
174ec470
RB
229 fflush (file);
230 close (fileno (file));
231 pclose (file);
80ab0b19
RB
232#else
233 dot_rdg_1 (stderr, rdg);
234#endif
235}
236
237/* Returns the index of STMT in RDG. */
238
239static int
355fe088 240rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
80ab0b19
RB
241{
242 int index = gimple_uid (stmt);
243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244 return index;
245}
246
80ab0b19
RB
247/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
248 the index of DEF in RDG. */
249
250static void
251create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
252{
253 use_operand_p imm_use_p;
254 imm_use_iterator iterator;
255
256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
257 {
258 struct graph_edge *e;
259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
260
261 if (use < 0)
262 continue;
263
264 e = add_edge (rdg, idef, use);
265 e->data = XNEW (struct rdg_edge);
266 RDGE_TYPE (e) = flow_dd;
80ab0b19
RB
267 }
268}
269
36875e8f
RB
270/* Creates an edge for the control dependences of BB to the vertex V. */
271
272static void
273create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 int v, control_dependences *cd)
275{
276 bitmap_iterator bi;
277 unsigned edge_n;
278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 0, edge_n, bi)
280 {
30fd2977 281 basic_block cond_bb = cd->get_edge_src (edge_n);
355fe088 282 gimple *stmt = last_stmt (cond_bb);
36875e8f
RB
283 if (stmt && is_ctrl_stmt (stmt))
284 {
285 struct graph_edge *e;
286 int c = rdg_vertex_for_stmt (rdg, stmt);
287 if (c < 0)
288 continue;
289
290 e = add_edge (rdg, c, v);
291 e->data = XNEW (struct rdg_edge);
292 RDGE_TYPE (e) = control_dd;
36875e8f
RB
293 }
294 }
295}
296
80ab0b19
RB
297/* Creates the edges of the reduced dependence graph RDG. */
298
299static void
447f3223 300create_rdg_flow_edges (struct graph *rdg)
80ab0b19
RB
301{
302 int i;
80ab0b19
RB
303 def_operand_p def_p;
304 ssa_op_iter iter;
305
80ab0b19
RB
306 for (i = 0; i < rdg->n_vertices; i++)
307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 iter, SSA_OP_DEF)
309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
447f3223 310}
36875e8f 311
447f3223
RB
312/* Creates the edges of the reduced dependence graph RDG. */
313
314static void
b71b7a8e 315create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
447f3223
RB
316{
317 int i;
318
319 for (i = 0; i < rdg->n_vertices; i++)
320 {
355fe088 321 gimple *stmt = RDG_STMT (rdg, i);
447f3223
RB
322 if (gimple_code (stmt) == GIMPLE_PHI)
323 {
324 edge_iterator ei;
325 edge e;
326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
b71b7a8e 327 if (flow_bb_inside_loop_p (loop, e->src))
36875e8f 328 create_edge_for_control_dependence (rdg, e->src, i, cd);
447f3223
RB
329 }
330 else
331 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
332 }
80ab0b19
RB
333}
334
335/* Build the vertices of the reduced dependence graph RDG. Return false
336 if that failed. */
337
338static bool
355fe088 339create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
80ab0b19
RB
340 vec<data_reference_p> *datarefs)
341{
342 int i;
355fe088 343 gimple *stmt;
80ab0b19
RB
344
345 FOR_EACH_VEC_ELT (stmts, i, stmt)
346 {
347 struct vertex *v = &(rdg->vertices[i]);
348
349 /* Record statement to vertex mapping. */
350 gimple_set_uid (stmt, i);
351
352 v->data = XNEW (struct rdg_vertex);
353 RDGV_STMT (v) = stmt;
354 RDGV_DATAREFS (v).create (0);
355 RDGV_HAS_MEM_WRITE (v) = false;
356 RDGV_HAS_MEM_READS (v) = false;
357 if (gimple_code (stmt) == GIMPLE_PHI)
358 continue;
359
360 unsigned drp = datarefs->length ();
361 if (!find_data_references_in_stmt (loop, stmt, datarefs))
362 return false;
363 for (unsigned j = drp; j < datarefs->length (); ++j)
364 {
365 data_reference_p dr = (*datarefs)[j];
366 if (DR_IS_READ (dr))
367 RDGV_HAS_MEM_READS (v) = true;
368 else
369 RDGV_HAS_MEM_WRITE (v) = true;
370 RDGV_DATAREFS (v).safe_push (dr);
371 }
372 }
373 return true;
374}
375
2fd5894f 376/* Initialize STMTS with all the statements of LOOP. The order in
80ab0b19
RB
377 which we discover statements is important as
378 generate_loops_for_partition is using the same traversal for
2fd5894f 379 identifying statements in loop copies. */
80ab0b19
RB
380
381static void
355fe088 382stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
80ab0b19
RB
383{
384 unsigned int i;
385 basic_block *bbs = get_loop_body_in_dom_order (loop);
386
387 for (i = 0; i < loop->num_nodes; i++)
388 {
389 basic_block bb = bbs[i];
80ab0b19 390
538dd0b7
DM
391 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
392 gsi_next (&bsi))
393 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
394 stmts->safe_push (bsi.phi ());
80ab0b19 395
538dd0b7
DM
396 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
397 gsi_next (&bsi))
80ab0b19 398 {
355fe088 399 gimple *stmt = gsi_stmt (bsi);
80ab0b19
RB
400 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
401 stmts->safe_push (stmt);
402 }
403 }
404
405 free (bbs);
406}
407
80ab0b19
RB
408/* Free the reduced dependence graph RDG. */
409
410static void
411free_rdg (struct graph *rdg)
412{
413 int i;
414
415 for (i = 0; i < rdg->n_vertices; i++)
416 {
417 struct vertex *v = &(rdg->vertices[i]);
418 struct graph_edge *e;
419
420 for (e = v->succ; e; e = e->succ_next)
447f3223 421 free (e->data);
80ab0b19
RB
422
423 if (v->data)
424 {
425 gimple_set_uid (RDGV_STMT (v), -1);
426 free_data_refs (RDGV_DATAREFS (v));
427 free (v->data);
428 }
429 }
430
431 free_graph (rdg);
432}
433
434/* Build the Reduced Dependence Graph (RDG) with one vertex per
97463b2b 435 statement of the loop nest LOOP_NEST, and one edge per data dependence or
80ab0b19
RB
436 scalar dependence. */
437
438static struct graph *
36875e8f 439build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
80ab0b19
RB
440{
441 struct graph *rdg;
80ab0b19 442 vec<data_reference_p> datarefs;
80ab0b19 443
97463b2b 444 /* Create the RDG vertices from the stmts of the loop nest. */
355fe088 445 auto_vec<gimple *, 10> stmts;
97463b2b 446 stmts_from_loop (loop_nest[0], &stmts);
24f161fd 447 rdg = new_graph (stmts.length ());
80ab0b19 448 datarefs.create (10);
97463b2b 449 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
80ab0b19 450 {
97463b2b 451 datarefs.release ();
80ab0b19
RB
452 free_rdg (rdg);
453 return NULL;
454 }
455 stmts.release ();
97463b2b 456
447f3223
RB
457 create_rdg_flow_edges (rdg);
458 if (cd)
b71b7a8e 459 create_rdg_cd_edges (rdg, cd, loop_nest[0]);
447f3223 460
97463b2b 461 datarefs.release ();
80ab0b19
RB
462
463 return rdg;
464}
465
80ab0b19 466
dea61d92 467
b9fc0497 468enum partition_kind {
510d73a0 469 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
b9fc0497 470};
30d55936 471
526ceb68 472struct partition
c61f8985
RG
473{
474 bitmap stmts;
80ab0b19 475 bitmap loops;
826a536d 476 bool reduction_p;
34e82342 477 bool plus_one;
30d55936 478 enum partition_kind kind;
d0582dc1
RG
479 /* data-references a kind != PKIND_NORMAL partition is about. */
480 data_reference_p main_dr;
481 data_reference_p secondary_dr;
818625cf 482 tree niter;
526ceb68 483};
c61f8985 484
c61f8985
RG
485
486/* Allocate and initialize a partition from BITMAP. */
487
526ceb68 488static partition *
80ab0b19 489partition_alloc (bitmap stmts, bitmap loops)
c61f8985 490{
526ceb68 491 partition *partition = XCNEW (struct partition);
c61f8985 492 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
80ab0b19 493 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
826a536d 494 partition->reduction_p = false;
30d55936 495 partition->kind = PKIND_NORMAL;
c61f8985
RG
496 return partition;
497}
498
499/* Free PARTITION. */
500
501static void
526ceb68 502partition_free (partition *partition)
c61f8985
RG
503{
504 BITMAP_FREE (partition->stmts);
80ab0b19 505 BITMAP_FREE (partition->loops);
c61f8985
RG
506 free (partition);
507}
508
30d55936
RG
509/* Returns true if the partition can be generated as a builtin. */
510
511static bool
526ceb68 512partition_builtin_p (partition *partition)
30d55936 513{
826a536d 514 return partition->kind != PKIND_NORMAL;
30d55936 515}
c61f8985 516
826a536d 517/* Returns true if the partition contains a reduction. */
7ad672e4
RG
518
519static bool
526ceb68 520partition_reduction_p (partition *partition)
7ad672e4 521{
826a536d 522 return partition->reduction_p;
7ad672e4
RG
523}
524
826a536d
RB
525/* Merge PARTITION into the partition DEST. */
526
527static void
526ceb68 528partition_merge_into (partition *dest, partition *partition)
826a536d
RB
529{
530 dest->kind = PKIND_NORMAL;
531 bitmap_ior_into (dest->stmts, partition->stmts);
532 if (partition_reduction_p (partition))
533 dest->reduction_p = true;
534}
535
536
c07a8cb3
RG
537/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
538 the LOOP. */
539
540static bool
541ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
542{
543 imm_use_iterator imm_iter;
544 use_operand_p use_p;
545
546 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
e665269a 547 {
355fe088 548 gimple *use_stmt = USE_STMT (use_p);
e665269a
RG
549 if (!is_gimple_debug (use_stmt)
550 && loop != loop_containing_stmt (use_stmt))
551 return true;
552 }
c07a8cb3
RG
553
554 return false;
555}
556
557/* Returns true when STMT defines a scalar variable used after the
88af7c1a 558 loop LOOP. */
c07a8cb3
RG
559
560static bool
355fe088 561stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
c07a8cb3 562{
88af7c1a
RG
563 def_operand_p def_p;
564 ssa_op_iter op_iter;
c07a8cb3 565
9ca86fc3
RG
566 if (gimple_code (stmt) == GIMPLE_PHI)
567 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
568
88af7c1a
RG
569 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
570 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
571 return true;
c07a8cb3 572
88af7c1a 573 return false;
c07a8cb3
RG
574}
575
dea61d92
SP
576/* Return a copy of LOOP placed before LOOP. */
577
578static struct loop *
579copy_loop_before (struct loop *loop)
580{
581 struct loop *res;
582 edge preheader = loop_preheader_edge (loop);
583
dea61d92 584 initialize_original_copy_tables ();
5ce9450f 585 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
30d55936 586 gcc_assert (res != NULL);
dea61d92 587 free_original_copy_tables ();
2cfc56b9 588 delete_update_ssa ();
dea61d92
SP
589
590 return res;
591}
592
593/* Creates an empty basic block after LOOP. */
594
595static void
596create_bb_after_loop (struct loop *loop)
597{
598 edge exit = single_exit (loop);
599
600 if (!exit)
601 return;
602
603 split_edge (exit);
604}
605
606/* Generate code for PARTITION from the code in LOOP. The loop is
607 copied when COPY_P is true. All the statements not flagged in the
608 PARTITION bitmap are removed from the loop or from its copy. The
609 statements are indexed in sequence inside a basic block, and the
30d55936 610 basic blocks of a loop are taken in dom order. */
dea61d92 611
30d55936 612static void
526ceb68 613generate_loops_for_partition (struct loop *loop, partition *partition,
c61f8985 614 bool copy_p)
dea61d92 615{
2fd5894f 616 unsigned i;
dea61d92
SP
617 basic_block *bbs;
618
619 if (copy_p)
620 {
621 loop = copy_loop_before (loop);
30d55936 622 gcc_assert (loop != NULL);
dea61d92
SP
623 create_preheader (loop, CP_SIMPLE_PREHEADERS);
624 create_bb_after_loop (loop);
625 }
626
2fd5894f 627 /* Remove stmts not in the PARTITION bitmap. */
dea61d92
SP
628 bbs = get_loop_body_in_dom_order (loop);
629
b03c3082 630 if (MAY_HAVE_DEBUG_STMTS)
2fd5894f 631 for (i = 0; i < loop->num_nodes; i++)
b03c3082
JJ
632 {
633 basic_block bb = bbs[i];
634
538dd0b7
DM
635 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
636 gsi_next (&bsi))
2fd5894f 637 {
538dd0b7 638 gphi *phi = bsi.phi ();
2fd5894f
RB
639 if (!virtual_operand_p (gimple_phi_result (phi))
640 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
641 reset_debug_uses (phi);
642 }
b03c3082 643
538dd0b7 644 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
b03c3082 645 {
355fe088 646 gimple *stmt = gsi_stmt (bsi);
b03c3082
JJ
647 if (gimple_code (stmt) != GIMPLE_LABEL
648 && !is_gimple_debug (stmt)
2fd5894f 649 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
b03c3082
JJ
650 reset_debug_uses (stmt);
651 }
652 }
653
2fd5894f 654 for (i = 0; i < loop->num_nodes; i++)
dea61d92
SP
655 {
656 basic_block bb = bbs[i];
dea61d92 657
538dd0b7 658 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
2fd5894f 659 {
538dd0b7 660 gphi *phi = bsi.phi ();
2fd5894f
RB
661 if (!virtual_operand_p (gimple_phi_result (phi))
662 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
2706a615 663 remove_phi_node (&bsi, true);
2fd5894f
RB
664 else
665 gsi_next (&bsi);
666 }
dea61d92 667
538dd0b7 668 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
2706a615 669 {
355fe088 670 gimple *stmt = gsi_stmt (bsi);
b03c3082
JJ
671 if (gimple_code (stmt) != GIMPLE_LABEL
672 && !is_gimple_debug (stmt)
2fd5894f 673 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
2706a615 674 {
36875e8f
RB
675 /* Choose an arbitrary path through the empty CFG part
676 that this unnecessary control stmt controls. */
538dd0b7 677 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
36875e8f 678 {
538dd0b7 679 gimple_cond_make_false (cond_stmt);
36875e8f
RB
680 update_stmt (stmt);
681 }
682 else if (gimple_code (stmt) == GIMPLE_SWITCH)
683 {
538dd0b7 684 gswitch *switch_stmt = as_a <gswitch *> (stmt);
36875e8f 685 gimple_switch_set_index
538dd0b7 686 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
36875e8f
RB
687 update_stmt (stmt);
688 }
689 else
690 {
691 unlink_stmt_vdef (stmt);
692 gsi_remove (&bsi, true);
693 release_defs (stmt);
694 continue;
695 }
2706a615 696 }
36875e8f 697 gsi_next (&bsi);
2706a615 698 }
dea61d92
SP
699 }
700
701 free (bbs);
dea61d92
SP
702}
703
d0582dc1 704/* Build the size argument for a memory operation call. */
3661e899 705
d0582dc1 706static tree
d995e887
RB
707build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
708 bool plus_one)
3661e899 709{
d995e887
RB
710 tree size = fold_convert_loc (loc, sizetype, nb_iter);
711 if (plus_one)
712 size = size_binop (PLUS_EXPR, size, size_one_node);
713 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
d0582dc1 714 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
d995e887
RB
715 size = fold_convert_loc (loc, size_type_node, size);
716 return size;
d0582dc1
RG
717}
718
719/* Build an address argument for a memory operation call. */
720
721static tree
722build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
723{
724 tree addr_base;
725
726 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
727 addr_base = fold_convert_loc (loc, sizetype, addr_base);
728
729 /* Test for a negative stride, iterating over every element. */
730 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
731 {
732 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
733 fold_convert_loc (loc, sizetype, nb_bytes));
734 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
735 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
736 }
737
738 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
3661e899
TB
739}
740
401f3a81
JJ
741/* If VAL memory representation contains the same value in all bytes,
742 return that value, otherwise return -1.
743 E.g. for 0x24242424 return 0x24, for IEEE double
744 747708026454360457216.0 return 0x44, etc. */
745
746static int
747const_with_all_bytes_same (tree val)
748{
749 unsigned char buf[64];
750 int i, len;
751
752 if (integer_zerop (val)
401f3a81
JJ
753 || (TREE_CODE (val) == CONSTRUCTOR
754 && !TREE_CLOBBER_P (val)
755 && CONSTRUCTOR_NELTS (val) == 0))
756 return 0;
757
9e207d6f
JJ
758 if (real_zerop (val))
759 {
760 /* Only return 0 for +0.0, not for -0.0, which doesn't have
761 an all bytes same memory representation. Don't transform
762 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */
763 switch (TREE_CODE (val))
764 {
765 case REAL_CST:
766 if (!real_isneg (TREE_REAL_CST_PTR (val)))
767 return 0;
768 break;
769 case COMPLEX_CST:
770 if (!const_with_all_bytes_same (TREE_REALPART (val))
771 && !const_with_all_bytes_same (TREE_IMAGPART (val)))
772 return 0;
773 break;
774 case VECTOR_CST:
775 unsigned int j;
776 for (j = 0; j < VECTOR_CST_NELTS (val); ++j)
66088065 777 if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j)))
9e207d6f
JJ
778 break;
779 if (j == VECTOR_CST_NELTS (val))
780 return 0;
781 break;
782 default:
783 break;
784 }
785 }
786
401f3a81
JJ
787 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
788 return -1;
789
790 len = native_encode_expr (val, buf, sizeof (buf));
791 if (len == 0)
792 return -1;
793 for (i = 1; i < len; i++)
794 if (buf[i] != buf[0])
795 return -1;
796 return buf[0];
797}
798
30d55936 799/* Generate a call to memset for PARTITION in LOOP. */
dea61d92 800
cfee318d 801static void
526ceb68 802generate_memset_builtin (struct loop *loop, partition *partition)
dea61d92 803{
30d55936 804 gimple_stmt_iterator gsi;
355fe088 805 gimple *stmt, *fn_call;
818625cf 806 tree mem, fn, nb_bytes;
30d55936 807 location_t loc;
b6dd5261 808 tree val;
30d55936 809
d0582dc1 810 stmt = DR_STMT (partition->main_dr);
30d55936 811 loc = gimple_location (stmt);
30d55936
RG
812
813 /* The new statements will be placed before LOOP. */
814 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dea61d92 815
d995e887
RB
816 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
817 partition->plus_one);
d0582dc1
RG
818 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
819 false, GSI_CONTINUE_LINKING);
820 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
821 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
822 false, GSI_CONTINUE_LINKING);
dea61d92 823
b6dd5261
RG
824 /* This exactly matches the pattern recognition in classify_partition. */
825 val = gimple_assign_rhs1 (stmt);
401f3a81
JJ
826 /* Handle constants like 0x15151515 and similarly
827 floating point constants etc. where all bytes are the same. */
828 int bytev = const_with_all_bytes_same (val);
829 if (bytev != -1)
830 val = build_int_cst (integer_type_node, bytev);
831 else if (TREE_CODE (val) == INTEGER_CST)
832 val = fold_convert (integer_type_node, val);
833 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
b6dd5261 834 {
b731b390 835 tree tem = make_ssa_name (integer_type_node);
355fe088 836 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
401f3a81
JJ
837 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
838 val = tem;
b6dd5261
RG
839 }
840
e79983f4 841 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
b6dd5261 842 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
d0582dc1 843 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
dea61d92
SP
844
845 if (dump_file && (dump_flags & TDF_DETAILS))
b6dd5261
RG
846 {
847 fprintf (dump_file, "generated memset");
401f3a81 848 if (bytev == 0)
b6dd5261 849 fprintf (dump_file, " zero\n");
b6dd5261
RG
850 else
851 fprintf (dump_file, "\n");
852 }
dea61d92
SP
853}
854
d0582dc1
RG
855/* Generate a call to memcpy for PARTITION in LOOP. */
856
857static void
526ceb68 858generate_memcpy_builtin (struct loop *loop, partition *partition)
d0582dc1
RG
859{
860 gimple_stmt_iterator gsi;
355fe088 861 gimple *stmt, *fn_call;
818625cf 862 tree dest, src, fn, nb_bytes;
d0582dc1
RG
863 location_t loc;
864 enum built_in_function kind;
865
866 stmt = DR_STMT (partition->main_dr);
867 loc = gimple_location (stmt);
d0582dc1
RG
868
869 /* The new statements will be placed before LOOP. */
870 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
871
d995e887
RB
872 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
873 partition->plus_one);
d0582dc1
RG
874 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
875 false, GSI_CONTINUE_LINKING);
876 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
877 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
510d73a0
RB
878 if (partition->kind == PKIND_MEMCPY
879 || ! ptr_derefs_may_alias_p (dest, src))
d0582dc1 880 kind = BUILT_IN_MEMCPY;
510d73a0
RB
881 else
882 kind = BUILT_IN_MEMMOVE;
d0582dc1
RG
883
884 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
885 false, GSI_CONTINUE_LINKING);
886 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
887 false, GSI_CONTINUE_LINKING);
888 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
889 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
890 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
891
892 if (dump_file && (dump_flags & TDF_DETAILS))
893 {
894 if (kind == BUILT_IN_MEMCPY)
895 fprintf (dump_file, "generated memcpy\n");
896 else
897 fprintf (dump_file, "generated memmove\n");
898 }
899}
900
30d55936 901/* Remove and destroy the loop LOOP. */
dea61d92 902
30d55936
RG
903static void
904destroy_loop (struct loop *loop)
dea61d92 905{
30d55936
RG
906 unsigned nbbs = loop->num_nodes;
907 edge exit = single_exit (loop);
908 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
dea61d92 909 basic_block *bbs;
30d55936 910 unsigned i;
dea61d92
SP
911
912 bbs = get_loop_body_in_dom_order (loop);
913
30d55936
RG
914 redirect_edge_pred (exit, src);
915 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
916 exit->flags |= EDGE_FALLTHRU;
917 cancel_loop_tree (loop);
918 rescan_loop_exit (exit, false, true);
dea61d92 919
b9aba0a0
RB
920 i = nbbs;
921 do
c014f6f5
RG
922 {
923 /* We have made sure to not leave any dangling uses of SSA
924 names defined in the loop. With the exception of virtuals.
925 Make sure we replace all uses of virtual defs that will remain
926 outside of the loop with the bare symbol as delete_basic_block
927 will release them. */
b9aba0a0 928 --i;
538dd0b7
DM
929 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
930 gsi_next (&gsi))
c014f6f5 931 {
538dd0b7 932 gphi *phi = gsi.phi ();
ea057359 933 if (virtual_operand_p (gimple_phi_result (phi)))
c014f6f5
RG
934 mark_virtual_phi_result_for_renaming (phi);
935 }
538dd0b7
DM
936 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
937 gsi_next (&gsi))
c014f6f5 938 {
355fe088 939 gimple *stmt = gsi_stmt (gsi);
c014f6f5
RG
940 tree vdef = gimple_vdef (stmt);
941 if (vdef && TREE_CODE (vdef) == SSA_NAME)
942 mark_virtual_operand_for_renaming (vdef);
943 }
944 delete_basic_block (bbs[i]);
945 }
b9aba0a0
RB
946 while (i != 0);
947
dea61d92 948 free (bbs);
30d55936
RG
949
950 set_immediate_dominator (CDI_DOMINATORS, dest,
951 recompute_dominator (CDI_DOMINATORS, dest));
dea61d92
SP
952}
953
b71b7a8e 954/* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
dea61d92 955
b71b7a8e 956static bool
d0582dc1 957generate_code_for_partition (struct loop *loop,
526ceb68 958 partition *partition, bool copy_p)
dea61d92 959{
30d55936
RG
960 switch (partition->kind)
961 {
826a536d
RB
962 case PKIND_NORMAL:
963 /* Reductions all have to be in the last partition. */
964 gcc_assert (!partition_reduction_p (partition)
965 || !copy_p);
966 generate_loops_for_partition (loop, partition, copy_p);
b71b7a8e 967 return false;
826a536d 968
30d55936 969 case PKIND_MEMSET:
d0582dc1 970 generate_memset_builtin (loop, partition);
d0582dc1
RG
971 break;
972
973 case PKIND_MEMCPY:
510d73a0 974 case PKIND_MEMMOVE:
d0582dc1 975 generate_memcpy_builtin (loop, partition);
30d55936
RG
976 break;
977
978 default:
979 gcc_unreachable ();
980 }
dea61d92 981
826a536d
RB
982 /* Common tail for partitions we turn into a call. If this was the last
983 partition for which we generate code, we have to destroy the loop. */
984 if (!copy_p)
b71b7a8e
RB
985 return true;
986 return false;
dea61d92
SP
987}
988
dea61d92 989
24f161fd
RB
990/* Returns a partition with all the statements needed for computing
991 the vertex V of the RDG, also including the loop exit conditions. */
dea61d92 992
526ceb68 993static partition *
24f161fd 994build_rdg_partition_for_vertex (struct graph *rdg, int v)
dea61d92 995{
526ceb68 996 partition *partition = partition_alloc (NULL, NULL);
00f96dc9 997 auto_vec<int, 3> nodes;
24f161fd 998 unsigned i;
dea61d92
SP
999 int x;
1000
174ec470 1001 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
dea61d92 1002
9771b263 1003 FOR_EACH_VEC_ELT (nodes, i, x)
24f161fd
RB
1004 {
1005 bitmap_set_bit (partition->stmts, x);
1006 bitmap_set_bit (partition->loops,
1007 loop_containing_stmt (RDG_STMT (rdg, x))->num);
1008 }
dea61d92 1009
dea61d92
SP
1010 return partition;
1011}
1012
30d55936
RG
1013/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1014 For the moment we detect only the memset zero pattern. */
cfee318d 1015
30d55936 1016static void
526ceb68 1017classify_partition (loop_p loop, struct graph *rdg, partition *partition)
cfee318d 1018{
cfee318d 1019 bitmap_iterator bi;
30d55936
RG
1020 unsigned i;
1021 tree nb_iter;
d0582dc1 1022 data_reference_p single_load, single_store;
b9fc0497 1023 bool volatiles_p = false;
d995e887 1024 bool plus_one = false;
30d55936
RG
1025
1026 partition->kind = PKIND_NORMAL;
d0582dc1
RG
1027 partition->main_dr = NULL;
1028 partition->secondary_dr = NULL;
818625cf 1029 partition->niter = NULL_TREE;
d995e887 1030 partition->plus_one = false;
30d55936 1031
c61f8985 1032 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
30d55936 1033 {
355fe088 1034 gimple *stmt = RDG_STMT (rdg, i);
30d55936
RG
1035
1036 if (gimple_has_volatile_ops (stmt))
b9fc0497 1037 volatiles_p = true;
cfee318d 1038
826a536d 1039 /* If the stmt has uses outside of the loop mark it as reduction. */
9ca86fc3 1040 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
30d55936 1041 {
826a536d 1042 partition->reduction_p = true;
30d55936
RG
1043 return;
1044 }
1045 }
1046
b9fc0497
RB
1047 /* Perform general partition disqualification for builtins. */
1048 if (volatiles_p
1049 || !flag_tree_loop_distribute_patterns)
1050 return;
1051
d0582dc1
RG
1052 /* Detect memset and memcpy. */
1053 single_load = NULL;
1054 single_store = NULL;
30d55936
RG
1055 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1056 {
355fe088 1057 gimple *stmt = RDG_STMT (rdg, i);
d0582dc1
RG
1058 data_reference_p dr;
1059 unsigned j;
30d55936
RG
1060
1061 if (gimple_code (stmt) == GIMPLE_PHI)
1062 continue;
1063
1064 /* Any scalar stmts are ok. */
1065 if (!gimple_vuse (stmt))
1066 continue;
1067
d0582dc1
RG
1068 /* Otherwise just regular loads/stores. */
1069 if (!gimple_assign_single_p (stmt))
1070 return;
1071
1072 /* But exactly one store and/or load. */
9771b263 1073 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
30d55936 1074 {
d002d099
JJ
1075 tree type = TREE_TYPE (DR_REF (dr));
1076
1077 /* The memset, memcpy and memmove library calls are only
1078 able to deal with generic address space. */
1079 if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
1080 return;
1081
d0582dc1
RG
1082 if (DR_IS_READ (dr))
1083 {
1084 if (single_load != NULL)
1085 return;
1086 single_load = dr;
1087 }
1088 else
1089 {
1090 if (single_store != NULL)
1091 return;
1092 single_store = dr;
1093 }
30d55936 1094 }
30d55936
RG
1095 }
1096
818625cf
RB
1097 if (!single_store)
1098 return;
1099
d995e887 1100 nb_iter = number_of_latch_executions (loop);
818625cf
RB
1101 if (!nb_iter || nb_iter == chrec_dont_know)
1102 return;
d995e887
RB
1103 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1104 gimple_bb (DR_STMT (single_store))))
1105 plus_one = true;
818625cf 1106
d0582dc1
RG
1107 if (single_store && !single_load)
1108 {
355fe088 1109 gimple *stmt = DR_STMT (single_store);
d0582dc1 1110 tree rhs = gimple_assign_rhs1 (stmt);
401f3a81
JJ
1111 if (const_with_all_bytes_same (rhs) == -1
1112 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1113 || (TYPE_MODE (TREE_TYPE (rhs))
1114 != TYPE_MODE (unsigned_char_type_node))))
d0582dc1
RG
1115 return;
1116 if (TREE_CODE (rhs) == SSA_NAME
1117 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1118 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1119 return;
ca406576
RB
1120 if (!adjacent_dr_p (single_store)
1121 || !dominated_by_p (CDI_DOMINATORS,
1122 loop->latch, gimple_bb (stmt)))
d0582dc1
RG
1123 return;
1124 partition->kind = PKIND_MEMSET;
1125 partition->main_dr = single_store;
818625cf 1126 partition->niter = nb_iter;
d995e887 1127 partition->plus_one = plus_one;
d0582dc1
RG
1128 }
1129 else if (single_store && single_load)
1130 {
355fe088
TS
1131 gimple *store = DR_STMT (single_store);
1132 gimple *load = DR_STMT (single_load);
d0582dc1
RG
1133 /* Direct aggregate copy or via an SSA name temporary. */
1134 if (load != store
1135 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1136 return;
1137 if (!adjacent_dr_p (single_store)
1138 || !adjacent_dr_p (single_load)
1139 || !operand_equal_p (DR_STEP (single_store),
ca406576
RB
1140 DR_STEP (single_load), 0)
1141 || !dominated_by_p (CDI_DOMINATORS,
1142 loop->latch, gimple_bb (store)))
d0582dc1 1143 return;
f20132e7
RG
1144 /* Now check that if there is a dependence this dependence is
1145 of a suitable form for memmove. */
6e1aa848 1146 vec<loop_p> loops = vNULL;
f20132e7 1147 ddr_p ddr;
9771b263 1148 loops.safe_push (loop);
f20132e7
RG
1149 ddr = initialize_data_dependence_relation (single_load, single_store,
1150 loops);
1151 compute_affine_dependence (ddr, loop);
f20132e7
RG
1152 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1153 {
1154 free_dependence_relation (ddr);
9771b263 1155 loops.release ();
f20132e7
RG
1156 return;
1157 }
1158 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1159 {
1160 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1161 {
1162 free_dependence_relation (ddr);
9771b263 1163 loops.release ();
f20132e7
RG
1164 return;
1165 }
1166 lambda_vector dist_v;
9771b263 1167 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
f20132e7
RG
1168 {
1169 int dist = dist_v[index_in_loop_nest (loop->num,
1170 DDR_LOOP_NEST (ddr))];
1171 if (dist > 0 && !DDR_REVERSED_P (ddr))
1172 {
1173 free_dependence_relation (ddr);
9771b263 1174 loops.release ();
f20132e7
RG
1175 return;
1176 }
1177 }
510d73a0 1178 partition->kind = PKIND_MEMMOVE;
f20132e7 1179 }
510d73a0
RB
1180 else
1181 partition->kind = PKIND_MEMCPY;
b7ce70b3 1182 free_dependence_relation (ddr);
9771b263 1183 loops.release ();
d0582dc1
RG
1184 partition->main_dr = single_store;
1185 partition->secondary_dr = single_load;
818625cf 1186 partition->niter = nb_iter;
d995e887 1187 partition->plus_one = plus_one;
d0582dc1 1188 }
cfee318d
SP
1189}
1190
1fa0c180
RG
1191/* For a data reference REF, return the declaration of its base
1192 address or NULL_TREE if the base is not determined. */
1193
1194static tree
1195ref_base_address (data_reference_p dr)
1196{
1197 tree base_address = DR_BASE_ADDRESS (dr);
1198 if (base_address
1199 && TREE_CODE (base_address) == ADDR_EXPR)
1200 return TREE_OPERAND (base_address, 0);
1201
1202 return base_address;
1203}
1204
cfee318d
SP
1205/* Returns true when PARTITION1 and PARTITION2 have similar memory
1206 accesses in RDG. */
1207
1208static bool
526ceb68
TS
1209similar_memory_accesses (struct graph *rdg, partition *partition1,
1210 partition *partition2)
cfee318d 1211{
1fa0c180 1212 unsigned i, j, k, l;
cfee318d 1213 bitmap_iterator bi, bj;
1fa0c180
RG
1214 data_reference_p ref1, ref2;
1215
1216 /* First check whether in the intersection of the two partitions are
1217 any loads or stores. Common loads are the situation that happens
1218 most often. */
1219 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1220 if (RDG_MEM_WRITE_STMT (rdg, i)
1221 || RDG_MEM_READS_STMT (rdg, i))
1222 return true;
cfee318d 1223
1fa0c180 1224 /* Then check all data-references against each other. */
c61f8985 1225 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
cfee318d
SP
1226 if (RDG_MEM_WRITE_STMT (rdg, i)
1227 || RDG_MEM_READS_STMT (rdg, i))
c61f8985 1228 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
cfee318d
SP
1229 if (RDG_MEM_WRITE_STMT (rdg, j)
1230 || RDG_MEM_READS_STMT (rdg, j))
1fa0c180 1231 {
9771b263 1232 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1fa0c180
RG
1233 {
1234 tree base1 = ref_base_address (ref1);
1235 if (base1)
9771b263 1236 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1fa0c180
RG
1237 if (base1 == ref_base_address (ref2))
1238 return true;
1239 }
1240 }
cfee318d
SP
1241
1242 return false;
1243}
1244
dea61d92
SP
1245/* Aggregate several components into a useful partition that is
1246 registered in the PARTITIONS vector. Partitions will be
1247 distributed in different loops. */
1248
1249static void
83a95546 1250rdg_build_partitions (struct graph *rdg,
355fe088 1251 vec<gimple *> starting_stmts,
526ceb68 1252 vec<partition *> *partitions)
dea61d92 1253{
83a95546 1254 bitmap processed = BITMAP_ALLOC (NULL);
2fd5894f 1255 int i;
355fe088 1256 gimple *stmt;
dea61d92 1257
2fd5894f 1258 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
dea61d92 1259 {
2fd5894f
RB
1260 int v = rdg_vertex_for_stmt (rdg, stmt);
1261
1262 if (dump_file && (dump_flags & TDF_DETAILS))
1263 fprintf (dump_file,
1264 "ldist asked to generate code for vertex %d\n", v);
b8698a0f 1265
24f161fd
RB
1266 /* If the vertex is already contained in another partition so
1267 is the partition rooted at it. */
dea61d92
SP
1268 if (bitmap_bit_p (processed, v))
1269 continue;
b8698a0f 1270
526ceb68 1271 partition *partition = build_rdg_partition_for_vertex (rdg, v);
24f161fd 1272 bitmap_ior_into (processed, partition->stmts);
dea61d92 1273
826a536d 1274 if (dump_file && (dump_flags & TDF_DETAILS))
dea61d92 1275 {
826a536d
RB
1276 fprintf (dump_file, "ldist useful partition:\n");
1277 dump_bitmap (dump_file, partition->stmts);
dea61d92 1278 }
826a536d
RB
1279
1280 partitions->safe_push (partition);
dea61d92
SP
1281 }
1282
83a95546
RB
1283 /* All vertices should have been assigned to at least one partition now,
1284 other than vertices belonging to dead code. */
dea61d92 1285
83a95546 1286 BITMAP_FREE (processed);
dea61d92
SP
1287}
1288
1289/* Dump to FILE the PARTITIONS. */
1290
1291static void
526ceb68 1292dump_rdg_partitions (FILE *file, vec<partition *> partitions)
dea61d92
SP
1293{
1294 int i;
526ceb68 1295 partition *partition;
dea61d92 1296
9771b263 1297 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1298 debug_bitmap_file (file, partition->stmts);
dea61d92
SP
1299}
1300
1301/* Debug PARTITIONS. */
526ceb68 1302extern void debug_rdg_partitions (vec<partition *> );
dea61d92 1303
24e47c76 1304DEBUG_FUNCTION void
526ceb68 1305debug_rdg_partitions (vec<partition *> partitions)
dea61d92
SP
1306{
1307 dump_rdg_partitions (stderr, partitions);
1308}
1309
2b8aee8e
SP
1310/* Returns the number of read and write operations in the RDG. */
1311
1312static int
1313number_of_rw_in_rdg (struct graph *rdg)
1314{
1315 int i, res = 0;
1316
1317 for (i = 0; i < rdg->n_vertices; i++)
1318 {
1319 if (RDG_MEM_WRITE_STMT (rdg, i))
1320 ++res;
1321
1322 if (RDG_MEM_READS_STMT (rdg, i))
1323 ++res;
1324 }
1325
1326 return res;
1327}
1328
1329/* Returns the number of read and write operations in a PARTITION of
1330 the RDG. */
1331
1332static int
526ceb68 1333number_of_rw_in_partition (struct graph *rdg, partition *partition)
2b8aee8e
SP
1334{
1335 int res = 0;
1336 unsigned i;
1337 bitmap_iterator ii;
1338
c61f8985 1339 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2b8aee8e
SP
1340 {
1341 if (RDG_MEM_WRITE_STMT (rdg, i))
1342 ++res;
1343
1344 if (RDG_MEM_READS_STMT (rdg, i))
1345 ++res;
1346 }
1347
1348 return res;
1349}
1350
1351/* Returns true when one of the PARTITIONS contains all the read or
1352 write operations of RDG. */
1353
1354static bool
9771b263 1355partition_contains_all_rw (struct graph *rdg,
526ceb68 1356 vec<partition *> partitions)
2b8aee8e
SP
1357{
1358 int i;
526ceb68 1359 partition *partition;
2b8aee8e
SP
1360 int nrw = number_of_rw_in_rdg (rdg);
1361
9771b263 1362 FOR_EACH_VEC_ELT (partitions, i, partition)
2b8aee8e
SP
1363 if (nrw == number_of_rw_in_partition (rdg, partition))
1364 return true;
1365
1366 return false;
1367}
1368
447f3223
RB
1369/* Compute partition dependence created by the data references in DRS1
1370 and DRS2 and modify and return DIR according to that. */
1371
1372static int
1373pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1374 vec<data_reference_p> drs1,
1375 vec<data_reference_p> drs2)
1376{
1377 data_reference_p dr1, dr2;
1378
1379 /* dependence direction - 0 is no dependence, -1 is back,
1380 1 is forth, 2 is both (we can stop then, merging will occur). */
1381 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1382 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1383 {
62e22fcb 1384 data_reference_p saved_dr1 = dr1;
2cf19e26 1385 int this_dir = 1;
447f3223
RB
1386 ddr_p ddr;
1387 /* Re-shuffle data-refs to be in dominator order. */
1388 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1389 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1390 {
6b4db501 1391 std::swap (dr1, dr2);
447f3223
RB
1392 this_dir = -this_dir;
1393 }
1394 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1395 compute_affine_dependence (ddr, loops[0]);
1396 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1397 this_dir = 2;
1398 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1399 {
1400 if (DDR_REVERSED_P (ddr))
1401 {
6b4db501 1402 std::swap (dr1, dr2);
447f3223
RB
1403 this_dir = -this_dir;
1404 }
1405 /* Known dependences can still be unordered througout the
1406 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
2cf19e26 1407 if (DDR_NUM_DIST_VECTS (ddr) != 1)
447f3223 1408 this_dir = 2;
2cf19e26
RB
1409 /* If the overlap is exact preserve stmt order. */
1410 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1411 ;
1412 else
1413 {
1414 /* Else as the distance vector is lexicographic positive
1415 swap the dependence direction. */
1416 this_dir = -this_dir;
1417 }
447f3223
RB
1418 }
1419 else
1420 this_dir = 0;
1421 free_dependence_relation (ddr);
dd18c8c3
JW
1422 if (this_dir == 2)
1423 return 2;
1424 else if (dir == 0)
447f3223 1425 dir = this_dir;
dd18c8c3 1426 else if (this_dir != 0 && dir != this_dir)
447f3223 1427 return 2;
62e22fcb
RB
1428 /* Shuffle "back" dr1. */
1429 dr1 = saved_dr1;
447f3223
RB
1430 }
1431 return dir;
1432}
1433
1434/* Compare postorder number of the partition graph vertices V1 and V2. */
1435
1436static int
1437pgcmp (const void *v1_, const void *v2_)
1438{
1439 const vertex *v1 = (const vertex *)v1_;
1440 const vertex *v2 = (const vertex *)v2_;
1441 return v2->post - v1->post;
1442}
2fd5894f
RB
1443
1444/* Distributes the code from LOOP in such a way that producer
1445 statements are placed before consumer statements. Tries to separate
1446 only the statements from STMTS into separate loops.
b71b7a8e
RB
1447 Returns the number of distributed loops. Set *DESTROY_P to whether
1448 LOOP needs to be destroyed. */
dea61d92
SP
1449
1450static int
355fe088 1451distribute_loop (struct loop *loop, vec<gimple *> stmts,
b71b7a8e 1452 control_dependences *cd, int *nb_calls, bool *destroy_p)
dea61d92 1453{
2fd5894f 1454 struct graph *rdg;
526ceb68 1455 partition *partition;
be6b029b 1456 bool any_builtin;
2fd5894f 1457 int i, nbp;
447f3223
RB
1458 graph *pg = NULL;
1459 int num_sccs = 1;
dea61d92 1460
c9326aef 1461 *destroy_p = false;
826a536d 1462 *nb_calls = 0;
00f96dc9 1463 auto_vec<loop_p, 3> loop_nest;
2fd5894f 1464 if (!find_loop_nest (loop, &loop_nest))
07687835 1465 return 0;
2fd5894f 1466
36875e8f 1467 rdg = build_rdg (loop_nest, cd);
2fd5894f
RB
1468 if (!rdg)
1469 {
1470 if (dump_file && (dump_flags & TDF_DETAILS))
1471 fprintf (dump_file,
1472 "Loop %d not distributed: failed to build the RDG.\n",
1473 loop->num);
1474
2fd5894f
RB
1475 return 0;
1476 }
1477
1478 if (dump_file && (dump_flags & TDF_DETAILS))
1479 dump_rdg (dump_file, rdg);
1480
526ceb68 1481 auto_vec<struct partition *, 3> partitions;
2fd5894f 1482 rdg_build_partitions (rdg, stmts, &partitions);
dea61d92 1483
be6b029b 1484 any_builtin = false;
9771b263 1485 FOR_EACH_VEC_ELT (partitions, i, partition)
be6b029b
RG
1486 {
1487 classify_partition (loop, rdg, partition);
1488 any_builtin |= partition_builtin_p (partition);
1489 }
30d55936 1490
447f3223
RB
1491 /* If we are only distributing patterns but did not detect any,
1492 simply bail out. */
9fed7f3a
RB
1493 if (!flag_tree_loop_distribution
1494 && !any_builtin)
1495 {
1496 nbp = 0;
1497 goto ldist_done;
1498 }
1499
447f3223
RB
1500 /* If we are only distributing patterns fuse all partitions that
1501 were not classified as builtins. This also avoids chopping
1502 a loop into pieces, separated by builtin calls. That is, we
1503 only want no or a single loop body remaining. */
526ceb68 1504 struct partition *into;
447f3223
RB
1505 if (!flag_tree_loop_distribution)
1506 {
1507 for (i = 0; partitions.iterate (i, &into); ++i)
1508 if (!partition_builtin_p (into))
1509 break;
1510 for (++i; partitions.iterate (i, &partition); ++i)
1511 if (!partition_builtin_p (partition))
1512 {
1513 if (dump_file && (dump_flags & TDF_DETAILS))
1514 {
1515 fprintf (dump_file, "fusing non-builtin partitions\n");
1516 dump_bitmap (dump_file, into->stmts);
1517 dump_bitmap (dump_file, partition->stmts);
1518 }
1519 partition_merge_into (into, partition);
1520 partitions.unordered_remove (i);
1521 partition_free (partition);
1522 i--;
1523 }
1524 }
1525
1526 /* Due to limitations in the transform phase we have to fuse all
1527 reduction partitions into the last partition so the existing
1528 loop will contain all loop-closed PHI nodes. */
1529 for (i = 0; partitions.iterate (i, &into); ++i)
1530 if (partition_reduction_p (into))
1531 break;
1532 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1533 if (partition_reduction_p (partition))
1534 {
1535 if (dump_file && (dump_flags & TDF_DETAILS))
1536 {
1537 fprintf (dump_file, "fusing partitions\n");
1538 dump_bitmap (dump_file, into->stmts);
1539 dump_bitmap (dump_file, partition->stmts);
1540 fprintf (dump_file, "because they have reductions\n");
1541 }
1542 partition_merge_into (into, partition);
1543 partitions.unordered_remove (i);
1544 partition_free (partition);
1545 i--;
1546 }
1547
9fed7f3a
RB
1548 /* Apply our simple cost model - fuse partitions with similar
1549 memory accesses. */
9fed7f3a
RB
1550 for (i = 0; partitions.iterate (i, &into); ++i)
1551 {
16eba420 1552 bool changed = false;
9fed7f3a
RB
1553 if (partition_builtin_p (into))
1554 continue;
1555 for (int j = i + 1;
1556 partitions.iterate (j, &partition); ++j)
1557 {
40b6bff9 1558 if (similar_memory_accesses (rdg, into, partition))
9fed7f3a
RB
1559 {
1560 if (dump_file && (dump_flags & TDF_DETAILS))
1561 {
1562 fprintf (dump_file, "fusing partitions\n");
1563 dump_bitmap (dump_file, into->stmts);
1564 dump_bitmap (dump_file, partition->stmts);
1565 fprintf (dump_file, "because they have similar "
1566 "memory accesses\n");
1567 }
826a536d 1568 partition_merge_into (into, partition);
447f3223 1569 partitions.unordered_remove (j);
9fed7f3a
RB
1570 partition_free (partition);
1571 j--;
16eba420 1572 changed = true;
9fed7f3a
RB
1573 }
1574 }
16eba420
RB
1575 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
1576 accesses when 1 and 2 have similar accesses but not 0 and 1
1577 then in the next iteration we will fail to consider merging
1578 1 into 0,2. So try again if we did any merging into 0. */
1579 if (changed)
1580 i--;
9fed7f3a
RB
1581 }
1582
447f3223
RB
1583 /* Build the partition dependency graph. */
1584 if (partitions.length () > 1)
c014f6f5 1585 {
447f3223
RB
1586 pg = new_graph (partitions.length ());
1587 struct pgdata {
526ceb68 1588 struct partition *partition;
447f3223
RB
1589 vec<data_reference_p> writes;
1590 vec<data_reference_p> reads;
1591 };
1592#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1593 for (i = 0; partitions.iterate (i, &partition); ++i)
1594 {
1595 vertex *v = &pg->vertices[i];
1596 pgdata *data = new pgdata;
1597 data_reference_p dr;
1598 /* FIXME - leaks. */
1599 v->data = data;
1600 bitmap_iterator bi;
1601 unsigned j;
1602 data->partition = partition;
1603 data->reads = vNULL;
1604 data->writes = vNULL;
1605 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1606 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1607 if (DR_IS_READ (dr))
1608 data->reads.safe_push (dr);
1609 else
1610 data->writes.safe_push (dr);
1611 }
526ceb68 1612 struct partition *partition1, *partition2;
447f3223
RB
1613 for (i = 0; partitions.iterate (i, &partition1); ++i)
1614 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1615 {
1616 /* dependence direction - 0 is no dependence, -1 is back,
1617 1 is forth, 2 is both (we can stop then, merging will occur). */
1618 int dir = 0;
1619 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1620 PGDATA(i)->writes,
1621 PGDATA(j)->reads);
1622 if (dir != 2)
1623 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1624 PGDATA(i)->reads,
1625 PGDATA(j)->writes);
1626 if (dir != 2)
1627 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1628 PGDATA(i)->writes,
1629 PGDATA(j)->writes);
1630 if (dir == 1 || dir == 2)
1631 add_edge (pg, i, j);
1632 if (dir == -1 || dir == 2)
1633 add_edge (pg, j, i);
1634 }
1635
1636 /* Add edges to the reduction partition (if any) to force it last. */
1637 unsigned j;
1638 for (j = 0; partitions.iterate (j, &partition); ++j)
1639 if (partition_reduction_p (partition))
1640 break;
1641 if (j < partitions.length ())
38ad2d07 1642 {
447f3223
RB
1643 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1644 if (i != j)
1645 add_edge (pg, i, j);
1646 }
1647
1648 /* Compute partitions we cannot separate and fuse them. */
1649 num_sccs = graphds_scc (pg, NULL);
1650 for (i = 0; i < num_sccs; ++i)
1651 {
526ceb68 1652 struct partition *first;
447f3223
RB
1653 int j;
1654 for (j = 0; partitions.iterate (j, &first); ++j)
1655 if (pg->vertices[j].component == i)
38ad2d07 1656 break;
447f3223
RB
1657 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1658 if (pg->vertices[j].component == i)
38ad2d07 1659 {
447f3223
RB
1660 if (dump_file && (dump_flags & TDF_DETAILS))
1661 {
1662 fprintf (dump_file, "fusing partitions\n");
1663 dump_bitmap (dump_file, first->stmts);
1664 dump_bitmap (dump_file, partition->stmts);
1665 fprintf (dump_file, "because they are in the same "
1666 "dependence SCC\n");
1667 }
1668 partition_merge_into (first, partition);
1669 partitions[j] = NULL;
5eb010bc 1670 partition_free (partition);
447f3223 1671 PGDATA (j)->partition = NULL;
38ad2d07 1672 }
38ad2d07 1673 }
30d55936 1674
447f3223
RB
1675 /* Now order the remaining nodes in postorder. */
1676 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1677 partitions.truncate (0);
1678 for (i = 0; i < pg->n_vertices; ++i)
b9fc0497 1679 {
447f3223
RB
1680 pgdata *data = PGDATA (i);
1681 if (data->partition)
1682 partitions.safe_push (data->partition);
1683 data->reads.release ();
1684 data->writes.release ();
1685 delete data;
b9fc0497 1686 }
447f3223
RB
1687 gcc_assert (partitions.length () == (unsigned)num_sccs);
1688 free_graph (pg);
b9fc0497
RB
1689 }
1690
9771b263 1691 nbp = partitions.length ();
a4293fa6 1692 if (nbp == 0
9771b263
DN
1693 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1694 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
c014f6f5
RG
1695 {
1696 nbp = 0;
1697 goto ldist_done;
1698 }
dea61d92
SP
1699
1700 if (dump_file && (dump_flags & TDF_DETAILS))
1701 dump_rdg_partitions (dump_file, partitions);
1702
9771b263 1703 FOR_EACH_VEC_ELT (partitions, i, partition)
826a536d
RB
1704 {
1705 if (partition_builtin_p (partition))
1706 (*nb_calls)++;
b71b7a8e 1707 *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
826a536d 1708 }
dea61d92 1709
dea61d92
SP
1710 ldist_done:
1711
9771b263 1712 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1713 partition_free (partition);
dea61d92 1714
dea61d92 1715 free_rdg (rdg);
826a536d 1716 return nbp - *nb_calls;
dea61d92
SP
1717}
1718
1719/* Distribute all loops in the current function. */
1720
be55bfe6
TS
1721namespace {
1722
1723const pass_data pass_data_loop_distribution =
1724{
1725 GIMPLE_PASS, /* type */
1726 "ldist", /* name */
1727 OPTGROUP_LOOP, /* optinfo_flags */
be55bfe6
TS
1728 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1729 ( PROP_cfg | PROP_ssa ), /* properties_required */
1730 0, /* properties_provided */
1731 0, /* properties_destroyed */
1732 0, /* todo_flags_start */
3bea341f 1733 0, /* todo_flags_finish */
be55bfe6
TS
1734};
1735
1736class pass_loop_distribution : public gimple_opt_pass
1737{
1738public:
1739 pass_loop_distribution (gcc::context *ctxt)
1740 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1741 {}
1742
1743 /* opt_pass methods: */
1744 virtual bool gate (function *)
1745 {
1746 return flag_tree_loop_distribution
1747 || flag_tree_loop_distribute_patterns;
1748 }
1749
1750 virtual unsigned int execute (function *);
1751
1752}; // class pass_loop_distribution
1753
1754unsigned int
1755pass_loop_distribution::execute (function *fun)
dea61d92
SP
1756{
1757 struct loop *loop;
c014f6f5 1758 bool changed = false;
1fa0c180 1759 basic_block bb;
36875e8f 1760 control_dependences *cd = NULL;
b71b7a8e 1761 auto_vec<loop_p> loops_to_be_destroyed;
1fa0c180 1762
be55bfe6 1763 FOR_ALL_BB_FN (bb, fun)
1fa0c180
RG
1764 {
1765 gimple_stmt_iterator gsi;
1766 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1767 gimple_set_uid (gsi_stmt (gsi), -1);
1768 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1769 gimple_set_uid (gsi_stmt (gsi), -1);
1770 }
dea61d92 1771
c014f6f5
RG
1772 /* We can at the moment only distribute non-nested loops, thus restrict
1773 walking to innermost loops. */
f0bd40b1 1774 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
dea61d92 1775 {
355fe088 1776 auto_vec<gimple *> work_list;
be6b029b 1777 basic_block *bbs;
0e20c89f 1778 int num = loop->num;
be6b029b 1779 unsigned int i;
a3357f7d
RG
1780
1781 /* If the loop doesn't have a single exit we will fail anyway,
1782 so do that early. */
1783 if (!single_exit (loop))
1784 continue;
dea61d92 1785
f56f2d33
JH
1786 /* Only optimize hot loops. */
1787 if (!optimize_loop_for_speed_p (loop))
1788 continue;
1789
be6b029b
RG
1790 /* Initialize the worklist with stmts we seed the partitions with. */
1791 bbs = get_loop_body_in_dom_order (loop);
1792 for (i = 0; i < loop->num_nodes; ++i)
1793 {
538dd0b7
DM
1794 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1795 !gsi_end_p (gsi);
1796 gsi_next (&gsi))
deb6c11a 1797 {
538dd0b7 1798 gphi *phi = gsi.phi ();
deb6c11a
RB
1799 if (virtual_operand_p (gimple_phi_result (phi)))
1800 continue;
1801 /* Distribute stmts which have defs that are used outside of
be55bfe6 1802 the loop. */
deb6c11a
RB
1803 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1804 continue;
1805 work_list.safe_push (phi);
1806 }
538dd0b7
DM
1807 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1808 !gsi_end_p (gsi);
1809 gsi_next (&gsi))
be6b029b 1810 {
355fe088 1811 gimple *stmt = gsi_stmt (gsi);
83a95546
RB
1812
1813 /* If there is a stmt with side-effects bail out - we
be55bfe6 1814 cannot and should not distribute this loop. */
83a95546
RB
1815 if (gimple_has_side_effects (stmt))
1816 {
1817 work_list.truncate (0);
1818 goto out;
1819 }
1820
b9fc0497 1821 /* Distribute stmts which have defs that are used outside of
be55bfe6 1822 the loop. */
b9fc0497
RB
1823 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1824 ;
1825 /* Otherwise only distribute stores for now. */
e179190c 1826 else if (!gimple_vdef (stmt))
be6b029b
RG
1827 continue;
1828
9771b263 1829 work_list.safe_push (stmt);
be6b029b
RG
1830 }
1831 }
83a95546 1832out:
be6b029b 1833 free (bbs);
c014f6f5 1834
826a536d
RB
1835 int nb_generated_loops = 0;
1836 int nb_generated_calls = 0;
1837 location_t loc = find_loop_location (loop);
9771b263 1838 if (work_list.length () > 0)
36875e8f
RB
1839 {
1840 if (!cd)
1841 {
ca406576 1842 calculate_dominance_info (CDI_DOMINATORS);
36875e8f 1843 calculate_dominance_info (CDI_POST_DOMINATORS);
30fd2977 1844 cd = new control_dependences ();
36875e8f
RB
1845 free_dominance_info (CDI_POST_DOMINATORS);
1846 }
b71b7a8e 1847 bool destroy_p;
826a536d 1848 nb_generated_loops = distribute_loop (loop, work_list, cd,
b71b7a8e
RB
1849 &nb_generated_calls,
1850 &destroy_p);
1851 if (destroy_p)
1852 loops_to_be_destroyed.safe_push (loop);
36875e8f 1853 }
c014f6f5 1854
826a536d 1855 if (nb_generated_loops + nb_generated_calls > 0)
dea61d92 1856 {
826a536d
RB
1857 changed = true;
1858 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1859 loc, "Loop %d distributed: split to %d loops "
1860 "and %d library calls.\n",
1861 num, nb_generated_loops, nb_generated_calls);
dea61d92 1862 }
826a536d
RB
1863 else if (dump_file && (dump_flags & TDF_DETAILS))
1864 fprintf (dump_file, "Loop %d is the same.\n", num);
dea61d92
SP
1865 }
1866
36875e8f
RB
1867 if (cd)
1868 delete cd;
1869
c014f6f5
RG
1870 if (changed)
1871 {
30fd2977
RB
1872 /* Destroy loop bodies that could not be reused. Do this late as we
1873 otherwise can end up refering to stale data in control dependences. */
1874 unsigned i;
1875 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
1876 destroy_loop (loop);
1877
d0ed943c
RB
1878 /* Cached scalar evolutions now may refer to wrong or non-existing
1879 loops. */
1880 scev_reset_htab ();
be55bfe6 1881 mark_virtual_operands_for_renaming (fun);
c014f6f5
RG
1882 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1883 }
1884
b2b29377 1885 checking_verify_loop_structure ();
c014f6f5 1886
5006671f 1887 return 0;
dea61d92
SP
1888}
1889
27a4cd48
DM
1890} // anon namespace
1891
1892gimple_opt_pass *
1893make_pass_loop_distribution (gcc::context *ctxt)
1894{
1895 return new pass_loop_distribution (ctxt);
1896}