]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
tree-loop-distribution.c (classify_partition): New parameter and better handle reduct...
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
dea61d92 1/* Loop distribution.
cbe34bb5 2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
dea61d92
SP
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6This file is part of GCC.
b8698a0f 7
dea61d92
SP
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
b8698a0f 12
dea61d92
SP
13GCC is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
b8698a0f 17
dea61d92
SP
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
b8698a0f 29 is transformed to
dea61d92
SP
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44#include "config.h"
45#include "system.h"
46#include "coretypes.h"
c7131fb2 47#include "backend.h"
40e23961 48#include "tree.h"
c7131fb2 49#include "gimple.h"
957060b5
AM
50#include "cfghooks.h"
51#include "tree-pass.h"
c7131fb2 52#include "ssa.h"
957060b5 53#include "gimple-pretty-print.h"
c7131fb2 54#include "fold-const.h"
60393bbc 55#include "cfganal.h"
5be5c238 56#include "gimple-iterator.h"
18f429e2 57#include "gimplify-me.h"
d8a2d370 58#include "stor-layout.h"
442b4905 59#include "tree-cfg.h"
e28030cf 60#include "tree-ssa-loop-manip.h"
442b4905
AM
61#include "tree-ssa-loop.h"
62#include "tree-into-ssa.h"
7a300452 63#include "tree-ssa.h"
dea61d92 64#include "cfgloop.h"
dea61d92 65#include "tree-scalar-evolution.h"
9fafb14a 66#include "params.h"
826a536d 67#include "tree-vectorizer.h"
80ab0b19
RB
68
69
9fafb14a
BC
70#define MAX_DATAREFS_NUM \
71 ((unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
72
17c5cbdf
BC
73/* Hashtable helpers. */
74
75struct ddr_hasher : nofree_ptr_hash <struct data_dependence_relation>
76{
77 static inline hashval_t hash (const data_dependence_relation *);
78 static inline bool equal (const data_dependence_relation *,
79 const data_dependence_relation *);
80};
81
82/* Hash function for data dependence. */
83
84inline hashval_t
85ddr_hasher::hash (const data_dependence_relation *ddr)
86{
87 inchash::hash h;
88 h.add_ptr (DDR_A (ddr));
89 h.add_ptr (DDR_B (ddr));
90 return h.end ();
91}
92
93/* Hash table equality function for data dependence. */
94
95inline bool
96ddr_hasher::equal (const data_dependence_relation *ddr1,
97 const data_dependence_relation *ddr2)
98{
99 return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2));
100}
101
4084ea5f
BC
102/* The loop (nest) to be distributed. */
103static vec<loop_p> loop_nest;
104
9fafb14a
BC
105/* Vector of data references in the loop to be distributed. */
106static vec<data_reference_p> datarefs_vec;
107
108/* Store index of data reference in aux field. */
109#define DR_INDEX(dr) ((uintptr_t) (dr)->aux)
110
17c5cbdf
BC
111/* Hash table for data dependence relation in the loop to be distributed. */
112static hash_table<ddr_hasher> ddrs_table (389);
113
114
80ab0b19 115/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
526ceb68 116struct rdg_vertex
80ab0b19
RB
117{
118 /* The statement represented by this vertex. */
355fe088 119 gimple *stmt;
80ab0b19
RB
120
121 /* Vector of data-references in this statement. */
122 vec<data_reference_p> datarefs;
123
124 /* True when the statement contains a write to memory. */
125 bool has_mem_write;
126
127 /* True when the statement contains a read from memory. */
128 bool has_mem_reads;
526ceb68 129};
80ab0b19
RB
130
131#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
132#define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
133#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
134#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
135#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
136#define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
137#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
138#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
139
140/* Data dependence type. */
141
142enum rdg_dep_type
143{
144 /* Read After Write (RAW). */
145 flow_dd = 'f',
146
36875e8f
RB
147 /* Control dependence (execute conditional on). */
148 control_dd = 'c'
80ab0b19
RB
149};
150
151/* Dependence information attached to an edge of the RDG. */
152
526ceb68 153struct rdg_edge
80ab0b19
RB
154{
155 /* Type of the dependence. */
156 enum rdg_dep_type type;
526ceb68 157};
80ab0b19
RB
158
159#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
80ab0b19 160
80ab0b19
RB
161/* Dump vertex I in RDG to FILE. */
162
163static void
164dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
165{
166 struct vertex *v = &(rdg->vertices[i]);
167 struct graph_edge *e;
168
169 fprintf (file, "(vertex %d: (%s%s) (in:", i,
170 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
171 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
172
173 if (v->pred)
174 for (e = v->pred; e; e = e->pred_next)
175 fprintf (file, " %d", e->src);
176
177 fprintf (file, ") (out:");
178
179 if (v->succ)
180 for (e = v->succ; e; e = e->succ_next)
181 fprintf (file, " %d", e->dest);
182
183 fprintf (file, ")\n");
184 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
185 fprintf (file, ")\n");
186}
187
188/* Call dump_rdg_vertex on stderr. */
189
190DEBUG_FUNCTION void
191debug_rdg_vertex (struct graph *rdg, int i)
192{
193 dump_rdg_vertex (stderr, rdg, i);
194}
195
80ab0b19
RB
196/* Dump the reduced dependence graph RDG to FILE. */
197
198static void
199dump_rdg (FILE *file, struct graph *rdg)
200{
80ab0b19 201 fprintf (file, "(rdg\n");
2fd5894f
RB
202 for (int i = 0; i < rdg->n_vertices; i++)
203 dump_rdg_vertex (file, rdg, i);
80ab0b19 204 fprintf (file, ")\n");
80ab0b19
RB
205}
206
207/* Call dump_rdg on stderr. */
208
209DEBUG_FUNCTION void
210debug_rdg (struct graph *rdg)
211{
212 dump_rdg (stderr, rdg);
213}
214
215static void
216dot_rdg_1 (FILE *file, struct graph *rdg)
217{
218 int i;
174ec470
RB
219 pretty_printer buffer;
220 pp_needs_newline (&buffer) = false;
221 buffer.buffer->stream = file;
80ab0b19
RB
222
223 fprintf (file, "digraph RDG {\n");
224
225 for (i = 0; i < rdg->n_vertices; i++)
226 {
227 struct vertex *v = &(rdg->vertices[i]);
228 struct graph_edge *e;
229
174ec470
RB
230 fprintf (file, "%d [label=\"[%d] ", i, i);
231 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
232 pp_flush (&buffer);
233 fprintf (file, "\"]\n");
234
80ab0b19
RB
235 /* Highlight reads from memory. */
236 if (RDG_MEM_READS_STMT (rdg, i))
237 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
238
239 /* Highlight stores to memory. */
240 if (RDG_MEM_WRITE_STMT (rdg, i))
241 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
242
243 if (v->succ)
244 for (e = v->succ; e; e = e->succ_next)
245 switch (RDGE_TYPE (e))
246 {
80ab0b19
RB
247 case flow_dd:
248 /* These are the most common dependences: don't print these. */
249 fprintf (file, "%d -> %d \n", i, e->dest);
250 break;
251
36875e8f
RB
252 case control_dd:
253 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
254 break;
255
80ab0b19
RB
256 default:
257 gcc_unreachable ();
258 }
259 }
260
261 fprintf (file, "}\n\n");
262}
263
264/* Display the Reduced Dependence Graph using dotty. */
265
266DEBUG_FUNCTION void
267dot_rdg (struct graph *rdg)
268{
174ec470 269 /* When debugging, you may want to enable the following code. */
b6d94045 270#ifdef HAVE_POPEN
c3284718 271 FILE *file = popen ("dot -Tx11", "w");
174ec470
RB
272 if (!file)
273 return;
80ab0b19 274 dot_rdg_1 (file, rdg);
174ec470
RB
275 fflush (file);
276 close (fileno (file));
277 pclose (file);
80ab0b19
RB
278#else
279 dot_rdg_1 (stderr, rdg);
280#endif
281}
282
283/* Returns the index of STMT in RDG. */
284
285static int
355fe088 286rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
80ab0b19
RB
287{
288 int index = gimple_uid (stmt);
289 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
290 return index;
291}
292
80ab0b19
RB
293/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
294 the index of DEF in RDG. */
295
296static void
297create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
298{
299 use_operand_p imm_use_p;
300 imm_use_iterator iterator;
301
302 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
303 {
304 struct graph_edge *e;
305 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
306
307 if (use < 0)
308 continue;
309
310 e = add_edge (rdg, idef, use);
311 e->data = XNEW (struct rdg_edge);
312 RDGE_TYPE (e) = flow_dd;
80ab0b19
RB
313 }
314}
315
36875e8f
RB
316/* Creates an edge for the control dependences of BB to the vertex V. */
317
318static void
319create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
320 int v, control_dependences *cd)
321{
322 bitmap_iterator bi;
323 unsigned edge_n;
324 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
325 0, edge_n, bi)
326 {
30fd2977 327 basic_block cond_bb = cd->get_edge_src (edge_n);
355fe088 328 gimple *stmt = last_stmt (cond_bb);
36875e8f
RB
329 if (stmt && is_ctrl_stmt (stmt))
330 {
331 struct graph_edge *e;
332 int c = rdg_vertex_for_stmt (rdg, stmt);
333 if (c < 0)
334 continue;
335
336 e = add_edge (rdg, c, v);
337 e->data = XNEW (struct rdg_edge);
338 RDGE_TYPE (e) = control_dd;
36875e8f
RB
339 }
340 }
341}
342
80ab0b19
RB
343/* Creates the edges of the reduced dependence graph RDG. */
344
345static void
447f3223 346create_rdg_flow_edges (struct graph *rdg)
80ab0b19
RB
347{
348 int i;
80ab0b19
RB
349 def_operand_p def_p;
350 ssa_op_iter iter;
351
80ab0b19
RB
352 for (i = 0; i < rdg->n_vertices; i++)
353 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
354 iter, SSA_OP_DEF)
355 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
447f3223 356}
36875e8f 357
447f3223
RB
358/* Creates the edges of the reduced dependence graph RDG. */
359
360static void
b71b7a8e 361create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
447f3223
RB
362{
363 int i;
364
365 for (i = 0; i < rdg->n_vertices; i++)
366 {
355fe088 367 gimple *stmt = RDG_STMT (rdg, i);
447f3223
RB
368 if (gimple_code (stmt) == GIMPLE_PHI)
369 {
370 edge_iterator ei;
371 edge e;
372 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
b71b7a8e 373 if (flow_bb_inside_loop_p (loop, e->src))
36875e8f 374 create_edge_for_control_dependence (rdg, e->src, i, cd);
447f3223
RB
375 }
376 else
377 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
378 }
80ab0b19
RB
379}
380
381/* Build the vertices of the reduced dependence graph RDG. Return false
382 if that failed. */
383
384static bool
9fafb14a 385create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop)
80ab0b19
RB
386{
387 int i;
355fe088 388 gimple *stmt;
80ab0b19
RB
389
390 FOR_EACH_VEC_ELT (stmts, i, stmt)
391 {
392 struct vertex *v = &(rdg->vertices[i]);
393
394 /* Record statement to vertex mapping. */
395 gimple_set_uid (stmt, i);
396
397 v->data = XNEW (struct rdg_vertex);
398 RDGV_STMT (v) = stmt;
399 RDGV_DATAREFS (v).create (0);
400 RDGV_HAS_MEM_WRITE (v) = false;
401 RDGV_HAS_MEM_READS (v) = false;
402 if (gimple_code (stmt) == GIMPLE_PHI)
403 continue;
404
9fafb14a
BC
405 unsigned drp = datarefs_vec.length ();
406 if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec))
80ab0b19 407 return false;
9fafb14a 408 for (unsigned j = drp; j < datarefs_vec.length (); ++j)
80ab0b19 409 {
9fafb14a 410 data_reference_p dr = datarefs_vec[j];
80ab0b19
RB
411 if (DR_IS_READ (dr))
412 RDGV_HAS_MEM_READS (v) = true;
413 else
414 RDGV_HAS_MEM_WRITE (v) = true;
415 RDGV_DATAREFS (v).safe_push (dr);
416 }
417 }
418 return true;
419}
420
3be57c56
BC
421/* Array mapping basic block's index to its topological order. */
422static int *bb_top_order_index;
423/* And size of the array. */
424static int bb_top_order_index_size;
425
426/* If X has a smaller topological sort number than Y, returns -1;
427 if greater, returns 1. */
428
429static int
430bb_top_order_cmp (const void *x, const void *y)
431{
432 basic_block bb1 = *(const basic_block *) x;
433 basic_block bb2 = *(const basic_block *) y;
434
435 gcc_assert (bb1->index < bb_top_order_index_size
436 && bb2->index < bb_top_order_index_size);
437 gcc_assert (bb1 == bb2
438 || bb_top_order_index[bb1->index]
439 != bb_top_order_index[bb2->index]);
440
441 return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]);
442}
443
444/* Initialize STMTS with all the statements of LOOP. We use topological
445 order to discover all statements. The order is important because
446 generate_loops_for_partition is using the same traversal for identifying
447 statements in loop copies. */
80ab0b19
RB
448
449static void
355fe088 450stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
80ab0b19
RB
451{
452 unsigned int i;
3be57c56 453 basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
80ab0b19
RB
454
455 for (i = 0; i < loop->num_nodes; i++)
456 {
457 basic_block bb = bbs[i];
80ab0b19 458
538dd0b7
DM
459 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
460 gsi_next (&bsi))
461 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
462 stmts->safe_push (bsi.phi ());
80ab0b19 463
538dd0b7
DM
464 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
465 gsi_next (&bsi))
80ab0b19 466 {
355fe088 467 gimple *stmt = gsi_stmt (bsi);
80ab0b19
RB
468 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
469 stmts->safe_push (stmt);
470 }
471 }
472
473 free (bbs);
474}
475
80ab0b19
RB
476/* Free the reduced dependence graph RDG. */
477
478static void
479free_rdg (struct graph *rdg)
480{
481 int i;
482
483 for (i = 0; i < rdg->n_vertices; i++)
484 {
485 struct vertex *v = &(rdg->vertices[i]);
486 struct graph_edge *e;
487
488 for (e = v->succ; e; e = e->succ_next)
447f3223 489 free (e->data);
80ab0b19
RB
490
491 if (v->data)
492 {
493 gimple_set_uid (RDGV_STMT (v), -1);
9fafb14a 494 (RDGV_DATAREFS (v)).release ();
80ab0b19
RB
495 free (v->data);
496 }
497 }
498
499 free_graph (rdg);
500}
501
4084ea5f
BC
502/* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
503 LOOP, and one edge per flow dependence or control dependence from control
9fafb14a
BC
504 dependence CD. During visiting each statement, data references are also
505 collected and recorded in global data DATAREFS_VEC. */
80ab0b19
RB
506
507static struct graph *
4084ea5f 508build_rdg (struct loop *loop, control_dependences *cd)
80ab0b19
RB
509{
510 struct graph *rdg;
80ab0b19 511
97463b2b 512 /* Create the RDG vertices from the stmts of the loop nest. */
355fe088 513 auto_vec<gimple *, 10> stmts;
4084ea5f 514 stmts_from_loop (loop, &stmts);
24f161fd 515 rdg = new_graph (stmts.length ());
9fafb14a 516 if (!create_rdg_vertices (rdg, stmts, loop))
80ab0b19 517 {
80ab0b19
RB
518 free_rdg (rdg);
519 return NULL;
520 }
521 stmts.release ();
97463b2b 522
447f3223
RB
523 create_rdg_flow_edges (rdg);
524 if (cd)
4084ea5f 525 create_rdg_cd_edges (rdg, cd, loop);
447f3223 526
80ab0b19
RB
527 return rdg;
528}
529
80ab0b19 530
f1eb4621 531/* Kind of distributed loop. */
b9fc0497 532enum partition_kind {
510d73a0 533 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
b9fc0497 534};
30d55936 535
f1eb4621
BC
536/* Type of distributed loop. */
537enum partition_type {
538 /* The distributed loop can be executed parallelly. */
539 PTYPE_PARALLEL = 0,
540 /* The distributed loop has to be executed sequentially. */
541 PTYPE_SEQUENTIAL
542};
543
a7a44c07 544/* Partition for loop distribution. */
526ceb68 545struct partition
c61f8985 546{
a7a44c07 547 /* Statements of the partition. */
c61f8985 548 bitmap stmts;
a7a44c07 549 /* Loops of the partition. */
80ab0b19 550 bitmap loops;
a7a44c07 551 /* True if the partition defines variable which is used outside of loop. */
826a536d 552 bool reduction_p;
a7a44c07
BC
553 /* For builtin partition, true if it executes one iteration more than
554 number of loop (latch) iterations. */
34e82342 555 bool plus_one;
30d55936 556 enum partition_kind kind;
f1eb4621 557 enum partition_type type;
d0582dc1
RG
558 /* data-references a kind != PKIND_NORMAL partition is about. */
559 data_reference_p main_dr;
560 data_reference_p secondary_dr;
a7a44c07 561 /* Number of loop (latch) iterations. */
818625cf 562 tree niter;
a7a44c07
BC
563 /* Data references in the partition. */
564 bitmap datarefs;
526ceb68 565};
c61f8985 566
c61f8985
RG
567
568/* Allocate and initialize a partition from BITMAP. */
569
526ceb68 570static partition *
a7a44c07 571partition_alloc (void)
c61f8985 572{
526ceb68 573 partition *partition = XCNEW (struct partition);
a7a44c07
BC
574 partition->stmts = BITMAP_ALLOC (NULL);
575 partition->loops = BITMAP_ALLOC (NULL);
826a536d 576 partition->reduction_p = false;
30d55936 577 partition->kind = PKIND_NORMAL;
a7a44c07 578 partition->datarefs = BITMAP_ALLOC (NULL);
c61f8985
RG
579 return partition;
580}
581
582/* Free PARTITION. */
583
584static void
526ceb68 585partition_free (partition *partition)
c61f8985
RG
586{
587 BITMAP_FREE (partition->stmts);
80ab0b19 588 BITMAP_FREE (partition->loops);
a7a44c07 589 BITMAP_FREE (partition->datarefs);
c61f8985
RG
590 free (partition);
591}
592
30d55936
RG
593/* Returns true if the partition can be generated as a builtin. */
594
595static bool
526ceb68 596partition_builtin_p (partition *partition)
30d55936 597{
826a536d 598 return partition->kind != PKIND_NORMAL;
30d55936 599}
c61f8985 600
826a536d 601/* Returns true if the partition contains a reduction. */
7ad672e4
RG
602
603static bool
526ceb68 604partition_reduction_p (partition *partition)
7ad672e4 605{
826a536d 606 return partition->reduction_p;
7ad672e4
RG
607}
608
821dbeef
BC
609/* Partitions are fused because of different reasons. */
610enum fuse_type
611{
612 FUSE_NON_BUILTIN = 0,
613 FUSE_REDUCTION = 1,
614 FUSE_SHARE_REF = 2,
615 FUSE_SAME_SCC = 3,
616 FUSE_FINALIZE = 4
617};
618
619/* Description on different fusing reason. */
620static const char *fuse_message[] = {
621 "they are non-builtins",
622 "they have reductions",
623 "they have shared memory refs",
624 "they are in the same dependence scc",
625 "there is no point to distribute loop"};
626
826a536d 627static void
f1eb4621 628update_type_for_merge (struct graph *, partition *, partition *);
821dbeef 629
f1eb4621
BC
630/* Merge PARTITION into the partition DEST. RDG is the reduced dependence
631 graph and we update type for result partition if it is non-NULL. */
a7a44c07 632
f1eb4621
BC
633static void
634partition_merge_into (struct graph *rdg, partition *dest,
635 partition *partition, enum fuse_type ft)
636{
821dbeef
BC
637 if (dump_file && (dump_flags & TDF_DETAILS))
638 {
639 fprintf (dump_file, "Fuse partitions because %s:\n", fuse_message[ft]);
640 fprintf (dump_file, " Part 1: ");
641 dump_bitmap (dump_file, dest->stmts);
642 fprintf (dump_file, " Part 2: ");
643 dump_bitmap (dump_file, partition->stmts);
644 }
f1eb4621
BC
645
646 dest->kind = PKIND_NORMAL;
647 if (dest->type == PTYPE_PARALLEL)
648 dest->type = partition->type;
649
650 bitmap_ior_into (dest->stmts, partition->stmts);
651 if (partition_reduction_p (partition))
652 dest->reduction_p = true;
653
654 /* Further check if any data dependence prevents us from executing the
655 new partition parallelly. */
656 if (dest->type == PTYPE_PARALLEL && rdg != NULL)
657 update_type_for_merge (rdg, dest, partition);
658
659 bitmap_ior_into (dest->datarefs, partition->datarefs);
826a536d
RB
660}
661
662
c07a8cb3
RG
663/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
664 the LOOP. */
665
666static bool
667ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
668{
669 imm_use_iterator imm_iter;
670 use_operand_p use_p;
671
672 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
e665269a 673 {
355fe088 674 gimple *use_stmt = USE_STMT (use_p);
e665269a
RG
675 if (!is_gimple_debug (use_stmt)
676 && loop != loop_containing_stmt (use_stmt))
677 return true;
678 }
c07a8cb3
RG
679
680 return false;
681}
682
683/* Returns true when STMT defines a scalar variable used after the
88af7c1a 684 loop LOOP. */
c07a8cb3
RG
685
686static bool
355fe088 687stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
c07a8cb3 688{
88af7c1a
RG
689 def_operand_p def_p;
690 ssa_op_iter op_iter;
c07a8cb3 691
9ca86fc3
RG
692 if (gimple_code (stmt) == GIMPLE_PHI)
693 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
694
88af7c1a
RG
695 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
696 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
697 return true;
c07a8cb3 698
88af7c1a 699 return false;
c07a8cb3
RG
700}
701
dea61d92
SP
702/* Return a copy of LOOP placed before LOOP. */
703
704static struct loop *
705copy_loop_before (struct loop *loop)
706{
707 struct loop *res;
708 edge preheader = loop_preheader_edge (loop);
709
dea61d92 710 initialize_original_copy_tables ();
5ce9450f 711 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
30d55936 712 gcc_assert (res != NULL);
dea61d92 713 free_original_copy_tables ();
2cfc56b9 714 delete_update_ssa ();
dea61d92
SP
715
716 return res;
717}
718
719/* Creates an empty basic block after LOOP. */
720
721static void
722create_bb_after_loop (struct loop *loop)
723{
724 edge exit = single_exit (loop);
725
726 if (!exit)
727 return;
728
729 split_edge (exit);
730}
731
732/* Generate code for PARTITION from the code in LOOP. The loop is
733 copied when COPY_P is true. All the statements not flagged in the
734 PARTITION bitmap are removed from the loop or from its copy. The
735 statements are indexed in sequence inside a basic block, and the
30d55936 736 basic blocks of a loop are taken in dom order. */
dea61d92 737
30d55936 738static void
526ceb68 739generate_loops_for_partition (struct loop *loop, partition *partition,
c61f8985 740 bool copy_p)
dea61d92 741{
2fd5894f 742 unsigned i;
dea61d92
SP
743 basic_block *bbs;
744
745 if (copy_p)
746 {
747 loop = copy_loop_before (loop);
30d55936 748 gcc_assert (loop != NULL);
dea61d92
SP
749 create_preheader (loop, CP_SIMPLE_PREHEADERS);
750 create_bb_after_loop (loop);
751 }
752
2fd5894f 753 /* Remove stmts not in the PARTITION bitmap. */
dea61d92
SP
754 bbs = get_loop_body_in_dom_order (loop);
755
b03c3082 756 if (MAY_HAVE_DEBUG_STMTS)
2fd5894f 757 for (i = 0; i < loop->num_nodes; i++)
b03c3082
JJ
758 {
759 basic_block bb = bbs[i];
760
538dd0b7
DM
761 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
762 gsi_next (&bsi))
2fd5894f 763 {
538dd0b7 764 gphi *phi = bsi.phi ();
2fd5894f
RB
765 if (!virtual_operand_p (gimple_phi_result (phi))
766 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
767 reset_debug_uses (phi);
768 }
b03c3082 769
538dd0b7 770 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
b03c3082 771 {
355fe088 772 gimple *stmt = gsi_stmt (bsi);
b03c3082
JJ
773 if (gimple_code (stmt) != GIMPLE_LABEL
774 && !is_gimple_debug (stmt)
2fd5894f 775 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
b03c3082
JJ
776 reset_debug_uses (stmt);
777 }
778 }
779
2fd5894f 780 for (i = 0; i < loop->num_nodes; i++)
dea61d92
SP
781 {
782 basic_block bb = bbs[i];
dea61d92 783
538dd0b7 784 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
2fd5894f 785 {
538dd0b7 786 gphi *phi = bsi.phi ();
2fd5894f
RB
787 if (!virtual_operand_p (gimple_phi_result (phi))
788 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
2706a615 789 remove_phi_node (&bsi, true);
2fd5894f
RB
790 else
791 gsi_next (&bsi);
792 }
dea61d92 793
538dd0b7 794 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
2706a615 795 {
355fe088 796 gimple *stmt = gsi_stmt (bsi);
b03c3082
JJ
797 if (gimple_code (stmt) != GIMPLE_LABEL
798 && !is_gimple_debug (stmt)
2fd5894f 799 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
2706a615 800 {
36875e8f
RB
801 /* Choose an arbitrary path through the empty CFG part
802 that this unnecessary control stmt controls. */
538dd0b7 803 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
36875e8f 804 {
538dd0b7 805 gimple_cond_make_false (cond_stmt);
36875e8f
RB
806 update_stmt (stmt);
807 }
808 else if (gimple_code (stmt) == GIMPLE_SWITCH)
809 {
538dd0b7 810 gswitch *switch_stmt = as_a <gswitch *> (stmt);
36875e8f 811 gimple_switch_set_index
538dd0b7 812 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
36875e8f
RB
813 update_stmt (stmt);
814 }
815 else
816 {
817 unlink_stmt_vdef (stmt);
818 gsi_remove (&bsi, true);
819 release_defs (stmt);
820 continue;
821 }
2706a615 822 }
36875e8f 823 gsi_next (&bsi);
2706a615 824 }
dea61d92
SP
825 }
826
827 free (bbs);
dea61d92
SP
828}
829
d0582dc1 830/* Build the size argument for a memory operation call. */
3661e899 831
d0582dc1 832static tree
d995e887
RB
833build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
834 bool plus_one)
3661e899 835{
d995e887
RB
836 tree size = fold_convert_loc (loc, sizetype, nb_iter);
837 if (plus_one)
838 size = size_binop (PLUS_EXPR, size, size_one_node);
839 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
d0582dc1 840 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
d995e887
RB
841 size = fold_convert_loc (loc, size_type_node, size);
842 return size;
d0582dc1
RG
843}
844
845/* Build an address argument for a memory operation call. */
846
847static tree
848build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
849{
850 tree addr_base;
851
852 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
853 addr_base = fold_convert_loc (loc, sizetype, addr_base);
854
855 /* Test for a negative stride, iterating over every element. */
856 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
857 {
858 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
859 fold_convert_loc (loc, sizetype, nb_bytes));
860 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
861 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
862 }
863
864 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
3661e899
TB
865}
866
401f3a81
JJ
867/* If VAL memory representation contains the same value in all bytes,
868 return that value, otherwise return -1.
869 E.g. for 0x24242424 return 0x24, for IEEE double
870 747708026454360457216.0 return 0x44, etc. */
871
872static int
873const_with_all_bytes_same (tree val)
874{
875 unsigned char buf[64];
876 int i, len;
877
878 if (integer_zerop (val)
401f3a81
JJ
879 || (TREE_CODE (val) == CONSTRUCTOR
880 && !TREE_CLOBBER_P (val)
881 && CONSTRUCTOR_NELTS (val) == 0))
882 return 0;
883
9e207d6f
JJ
884 if (real_zerop (val))
885 {
886 /* Only return 0 for +0.0, not for -0.0, which doesn't have
887 an all bytes same memory representation. Don't transform
888 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */
889 switch (TREE_CODE (val))
890 {
891 case REAL_CST:
892 if (!real_isneg (TREE_REAL_CST_PTR (val)))
893 return 0;
894 break;
895 case COMPLEX_CST:
896 if (!const_with_all_bytes_same (TREE_REALPART (val))
897 && !const_with_all_bytes_same (TREE_IMAGPART (val)))
898 return 0;
899 break;
900 case VECTOR_CST:
901 unsigned int j;
902 for (j = 0; j < VECTOR_CST_NELTS (val); ++j)
66088065 903 if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j)))
9e207d6f
JJ
904 break;
905 if (j == VECTOR_CST_NELTS (val))
906 return 0;
907 break;
908 default:
909 break;
910 }
911 }
912
401f3a81
JJ
913 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
914 return -1;
915
916 len = native_encode_expr (val, buf, sizeof (buf));
917 if (len == 0)
918 return -1;
919 for (i = 1; i < len; i++)
920 if (buf[i] != buf[0])
921 return -1;
922 return buf[0];
923}
924
30d55936 925/* Generate a call to memset for PARTITION in LOOP. */
dea61d92 926
cfee318d 927static void
526ceb68 928generate_memset_builtin (struct loop *loop, partition *partition)
dea61d92 929{
30d55936 930 gimple_stmt_iterator gsi;
355fe088 931 gimple *stmt, *fn_call;
818625cf 932 tree mem, fn, nb_bytes;
30d55936 933 location_t loc;
b6dd5261 934 tree val;
30d55936 935
d0582dc1 936 stmt = DR_STMT (partition->main_dr);
30d55936 937 loc = gimple_location (stmt);
30d55936
RG
938
939 /* The new statements will be placed before LOOP. */
940 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dea61d92 941
d995e887
RB
942 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
943 partition->plus_one);
d0582dc1
RG
944 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
945 false, GSI_CONTINUE_LINKING);
946 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
947 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
948 false, GSI_CONTINUE_LINKING);
dea61d92 949
b6dd5261
RG
950 /* This exactly matches the pattern recognition in classify_partition. */
951 val = gimple_assign_rhs1 (stmt);
401f3a81
JJ
952 /* Handle constants like 0x15151515 and similarly
953 floating point constants etc. where all bytes are the same. */
954 int bytev = const_with_all_bytes_same (val);
955 if (bytev != -1)
956 val = build_int_cst (integer_type_node, bytev);
957 else if (TREE_CODE (val) == INTEGER_CST)
958 val = fold_convert (integer_type_node, val);
959 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
b6dd5261 960 {
b731b390 961 tree tem = make_ssa_name (integer_type_node);
355fe088 962 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
401f3a81
JJ
963 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
964 val = tem;
b6dd5261
RG
965 }
966
e79983f4 967 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
b6dd5261 968 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
d0582dc1 969 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
dea61d92
SP
970
971 if (dump_file && (dump_flags & TDF_DETAILS))
b6dd5261
RG
972 {
973 fprintf (dump_file, "generated memset");
401f3a81 974 if (bytev == 0)
b6dd5261 975 fprintf (dump_file, " zero\n");
b6dd5261
RG
976 else
977 fprintf (dump_file, "\n");
978 }
dea61d92
SP
979}
980
d0582dc1
RG
981/* Generate a call to memcpy for PARTITION in LOOP. */
982
983static void
526ceb68 984generate_memcpy_builtin (struct loop *loop, partition *partition)
d0582dc1
RG
985{
986 gimple_stmt_iterator gsi;
355fe088 987 gimple *stmt, *fn_call;
818625cf 988 tree dest, src, fn, nb_bytes;
d0582dc1
RG
989 location_t loc;
990 enum built_in_function kind;
991
992 stmt = DR_STMT (partition->main_dr);
993 loc = gimple_location (stmt);
d0582dc1
RG
994
995 /* The new statements will be placed before LOOP. */
996 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
997
d995e887
RB
998 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
999 partition->plus_one);
d0582dc1
RG
1000 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
1001 false, GSI_CONTINUE_LINKING);
1002 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
1003 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
510d73a0
RB
1004 if (partition->kind == PKIND_MEMCPY
1005 || ! ptr_derefs_may_alias_p (dest, src))
d0582dc1 1006 kind = BUILT_IN_MEMCPY;
510d73a0
RB
1007 else
1008 kind = BUILT_IN_MEMMOVE;
d0582dc1
RG
1009
1010 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
1011 false, GSI_CONTINUE_LINKING);
1012 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
1013 false, GSI_CONTINUE_LINKING);
1014 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
1015 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
1016 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
1017
1018 if (dump_file && (dump_flags & TDF_DETAILS))
1019 {
1020 if (kind == BUILT_IN_MEMCPY)
1021 fprintf (dump_file, "generated memcpy\n");
1022 else
1023 fprintf (dump_file, "generated memmove\n");
1024 }
1025}
1026
30d55936 1027/* Remove and destroy the loop LOOP. */
dea61d92 1028
30d55936
RG
1029static void
1030destroy_loop (struct loop *loop)
dea61d92 1031{
30d55936
RG
1032 unsigned nbbs = loop->num_nodes;
1033 edge exit = single_exit (loop);
1034 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
dea61d92 1035 basic_block *bbs;
30d55936 1036 unsigned i;
dea61d92
SP
1037
1038 bbs = get_loop_body_in_dom_order (loop);
1039
30d55936
RG
1040 redirect_edge_pred (exit, src);
1041 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
1042 exit->flags |= EDGE_FALLTHRU;
1043 cancel_loop_tree (loop);
1044 rescan_loop_exit (exit, false, true);
dea61d92 1045
b9aba0a0
RB
1046 i = nbbs;
1047 do
c014f6f5
RG
1048 {
1049 /* We have made sure to not leave any dangling uses of SSA
1050 names defined in the loop. With the exception of virtuals.
1051 Make sure we replace all uses of virtual defs that will remain
1052 outside of the loop with the bare symbol as delete_basic_block
1053 will release them. */
b9aba0a0 1054 --i;
538dd0b7
DM
1055 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
1056 gsi_next (&gsi))
c014f6f5 1057 {
538dd0b7 1058 gphi *phi = gsi.phi ();
ea057359 1059 if (virtual_operand_p (gimple_phi_result (phi)))
c014f6f5
RG
1060 mark_virtual_phi_result_for_renaming (phi);
1061 }
538dd0b7
DM
1062 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
1063 gsi_next (&gsi))
c014f6f5 1064 {
355fe088 1065 gimple *stmt = gsi_stmt (gsi);
c014f6f5
RG
1066 tree vdef = gimple_vdef (stmt);
1067 if (vdef && TREE_CODE (vdef) == SSA_NAME)
1068 mark_virtual_operand_for_renaming (vdef);
1069 }
1070 delete_basic_block (bbs[i]);
1071 }
b9aba0a0
RB
1072 while (i != 0);
1073
dea61d92 1074 free (bbs);
30d55936
RG
1075
1076 set_immediate_dominator (CDI_DOMINATORS, dest,
1077 recompute_dominator (CDI_DOMINATORS, dest));
dea61d92
SP
1078}
1079
b71b7a8e 1080/* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
dea61d92 1081
b71b7a8e 1082static bool
d0582dc1 1083generate_code_for_partition (struct loop *loop,
526ceb68 1084 partition *partition, bool copy_p)
dea61d92 1085{
30d55936
RG
1086 switch (partition->kind)
1087 {
826a536d
RB
1088 case PKIND_NORMAL:
1089 /* Reductions all have to be in the last partition. */
1090 gcc_assert (!partition_reduction_p (partition)
1091 || !copy_p);
1092 generate_loops_for_partition (loop, partition, copy_p);
b71b7a8e 1093 return false;
826a536d 1094
30d55936 1095 case PKIND_MEMSET:
d0582dc1 1096 generate_memset_builtin (loop, partition);
d0582dc1
RG
1097 break;
1098
1099 case PKIND_MEMCPY:
510d73a0 1100 case PKIND_MEMMOVE:
d0582dc1 1101 generate_memcpy_builtin (loop, partition);
30d55936
RG
1102 break;
1103
1104 default:
1105 gcc_unreachable ();
1106 }
dea61d92 1107
826a536d
RB
1108 /* Common tail for partitions we turn into a call. If this was the last
1109 partition for which we generate code, we have to destroy the loop. */
1110 if (!copy_p)
b71b7a8e
RB
1111 return true;
1112 return false;
dea61d92
SP
1113}
1114
17c5cbdf
BC
1115/* Return data dependence relation for data references A and B. The two
1116 data references must be in lexicographic order wrto reduced dependence
1117 graph RDG. We firstly try to find ddr from global ddr hash table. If
1118 it doesn't exist, compute the ddr and cache it. */
1119
1120static data_dependence_relation *
1121get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b)
1122{
1123 struct data_dependence_relation ent, **slot;
1124 struct data_dependence_relation *ddr;
1125
1126 gcc_assert (DR_IS_WRITE (a) || DR_IS_WRITE (b));
1127 gcc_assert (rdg_vertex_for_stmt (rdg, DR_STMT (a))
1128 <= rdg_vertex_for_stmt (rdg, DR_STMT (b)));
1129 ent.a = a;
1130 ent.b = b;
1131 slot = ddrs_table.find_slot (&ent, INSERT);
1132 if (*slot == NULL)
1133 {
1134 ddr = initialize_data_dependence_relation (a, b, loop_nest);
1135 compute_affine_dependence (ddr, loop_nest[0]);
1136 *slot = ddr;
1137 }
1138
1139 return *slot;
1140}
dea61d92 1141
f1eb4621
BC
1142/* In reduced dependence graph RDG for loop distribution, return true if
1143 dependence between references DR1 and DR2 leads to a dependence cycle
1144 and such dependence cycle can't be resolved by runtime alias check. */
1145
1146static bool
1147data_dep_in_cycle_p (struct graph *rdg,
1148 data_reference_p dr1, data_reference_p dr2)
1149{
1150 struct data_dependence_relation *ddr;
1151
1152 /* Re-shuffle data-refs to be in topological order. */
1153 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1154 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1155 std::swap (dr1, dr2);
1156
1157 ddr = get_data_dependence (rdg, dr1, dr2);
1158
1159 /* In case of no data dependence. */
1160 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1161 return false;
1162 /* For unknown data dependence or known data dependence which can't be
1163 expressed in classic distance vector, we check if it can be resolved
1164 by runtime alias check. If yes, we still consider data dependence
1165 as won't introduce data dependence cycle. */
1166 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1167 || DDR_NUM_DIST_VECTS (ddr) == 0)
1168 return !runtime_alias_check_p (ddr, NULL, true);
1169 else if (DDR_NUM_DIST_VECTS (ddr) > 1)
1170 return true;
1171 else if (DDR_REVERSED_P (ddr)
1172 || lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1173 return false;
1174
1175 return true;
1176}
1177
1178/* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
1179 PARTITION1's type after merging PARTITION2 into PARTITION1. */
1180
1181static void
1182update_type_for_merge (struct graph *rdg,
1183 partition *partition1, partition *partition2)
1184{
1185 unsigned i, j;
1186 bitmap_iterator bi, bj;
1187 data_reference_p dr1, dr2;
1188
1189 EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
1190 {
1191 unsigned start = (partition1 == partition2) ? i + 1 : 0;
1192
1193 dr1 = datarefs_vec[i];
1194 EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, start, j, bj)
1195 {
1196 dr2 = datarefs_vec[j];
1197 if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
1198 continue;
1199
1200 /* Partition can only be executed sequentially if there is any
1201 data dependence cycle. */
1202 if (data_dep_in_cycle_p (rdg, dr1, dr2))
1203 {
1204 partition1->type = PTYPE_SEQUENTIAL;
1205 return;
1206 }
1207 }
1208 }
1209}
1210
24f161fd
RB
1211/* Returns a partition with all the statements needed for computing
1212 the vertex V of the RDG, also including the loop exit conditions. */
dea61d92 1213
526ceb68 1214static partition *
24f161fd 1215build_rdg_partition_for_vertex (struct graph *rdg, int v)
dea61d92 1216{
a7a44c07 1217 partition *partition = partition_alloc ();
00f96dc9 1218 auto_vec<int, 3> nodes;
a7a44c07 1219 unsigned i, j;
dea61d92 1220 int x;
a7a44c07 1221 data_reference_p dr;
dea61d92 1222
174ec470 1223 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
dea61d92 1224
9771b263 1225 FOR_EACH_VEC_ELT (nodes, i, x)
24f161fd
RB
1226 {
1227 bitmap_set_bit (partition->stmts, x);
1228 bitmap_set_bit (partition->loops,
1229 loop_containing_stmt (RDG_STMT (rdg, x))->num);
a7a44c07
BC
1230
1231 for (j = 0; RDG_DATAREFS (rdg, x).iterate (j, &dr); ++j)
1232 {
1233 unsigned idx = (unsigned) DR_INDEX (dr);
1234 gcc_assert (idx < datarefs_vec.length ());
1235
f1eb4621
BC
1236 /* Partition can only be executed sequentially if there is any
1237 unknown data reference. */
1238 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr)
1239 || !DR_INIT (dr) || !DR_STEP (dr))
1240 partition->type = PTYPE_SEQUENTIAL;
1241
a7a44c07
BC
1242 bitmap_set_bit (partition->datarefs, idx);
1243 }
24f161fd 1244 }
dea61d92 1245
f1eb4621
BC
1246 if (partition->type == PTYPE_SEQUENTIAL)
1247 return partition;
1248
1249 /* Further check if any data dependence prevents us from executing the
1250 partition parallelly. */
1251 update_type_for_merge (rdg, partition, partition);
1252
dea61d92
SP
1253 return partition;
1254}
1255
30d55936 1256/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
4a52eb19
BC
1257 For the moment we detect memset, memcpy and memmove patterns. Bitmap
1258 STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */
cfee318d 1259
30d55936 1260static void
4a52eb19
BC
1261classify_partition (loop_p loop, struct graph *rdg, partition *partition,
1262 bitmap stmt_in_all_partitions)
cfee318d 1263{
cfee318d 1264 bitmap_iterator bi;
30d55936
RG
1265 unsigned i;
1266 tree nb_iter;
d0582dc1 1267 data_reference_p single_load, single_store;
4a52eb19 1268 bool volatiles_p = false, plus_one = false, has_reduction = false;
30d55936
RG
1269
1270 partition->kind = PKIND_NORMAL;
d0582dc1
RG
1271 partition->main_dr = NULL;
1272 partition->secondary_dr = NULL;
818625cf 1273 partition->niter = NULL_TREE;
d995e887 1274 partition->plus_one = false;
30d55936 1275
c61f8985 1276 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
30d55936 1277 {
355fe088 1278 gimple *stmt = RDG_STMT (rdg, i);
30d55936
RG
1279
1280 if (gimple_has_volatile_ops (stmt))
b9fc0497 1281 volatiles_p = true;
cfee318d 1282
4a52eb19
BC
1283 /* If the stmt is not included by all partitions and there is uses
1284 outside of the loop, then mark the partition as reduction. */
9ca86fc3 1285 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
30d55936 1286 {
4a52eb19
BC
1287 /* Due to limitation in the transform phase we have to fuse all
1288 reduction partitions. As a result, this could cancel valid
1289 loop distribution especially for loop that induction variable
1290 is used outside of loop. To workaround this issue, we skip
1291 marking partition as reudction if the reduction stmt belongs
1292 to all partitions. In such case, reduction will be computed
1293 correctly no matter how partitions are fused/distributed. */
1294 if (!bitmap_bit_p (stmt_in_all_partitions, i))
1295 {
1296 partition->reduction_p = true;
1297 return;
1298 }
1299 has_reduction = true;
30d55936
RG
1300 }
1301 }
1302
b9fc0497
RB
1303 /* Perform general partition disqualification for builtins. */
1304 if (volatiles_p
4a52eb19
BC
1305 /* Simple workaround to prevent classifying the partition as builtin
1306 if it contains any use outside of loop. */
1307 || has_reduction
b9fc0497
RB
1308 || !flag_tree_loop_distribute_patterns)
1309 return;
1310
d0582dc1
RG
1311 /* Detect memset and memcpy. */
1312 single_load = NULL;
1313 single_store = NULL;
30d55936
RG
1314 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1315 {
355fe088 1316 gimple *stmt = RDG_STMT (rdg, i);
d0582dc1
RG
1317 data_reference_p dr;
1318 unsigned j;
30d55936
RG
1319
1320 if (gimple_code (stmt) == GIMPLE_PHI)
1321 continue;
1322
1323 /* Any scalar stmts are ok. */
1324 if (!gimple_vuse (stmt))
1325 continue;
1326
d0582dc1
RG
1327 /* Otherwise just regular loads/stores. */
1328 if (!gimple_assign_single_p (stmt))
1329 return;
1330
1331 /* But exactly one store and/or load. */
9771b263 1332 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
30d55936 1333 {
d002d099
JJ
1334 tree type = TREE_TYPE (DR_REF (dr));
1335
1336 /* The memset, memcpy and memmove library calls are only
1337 able to deal with generic address space. */
1338 if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
1339 return;
1340
d0582dc1
RG
1341 if (DR_IS_READ (dr))
1342 {
1343 if (single_load != NULL)
1344 return;
1345 single_load = dr;
1346 }
1347 else
1348 {
1349 if (single_store != NULL)
1350 return;
1351 single_store = dr;
1352 }
30d55936 1353 }
30d55936
RG
1354 }
1355
818625cf
RB
1356 if (!single_store)
1357 return;
1358
d995e887 1359 nb_iter = number_of_latch_executions (loop);
818625cf
RB
1360 if (!nb_iter || nb_iter == chrec_dont_know)
1361 return;
d995e887
RB
1362 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1363 gimple_bb (DR_STMT (single_store))))
1364 plus_one = true;
818625cf 1365
d0582dc1
RG
1366 if (single_store && !single_load)
1367 {
355fe088 1368 gimple *stmt = DR_STMT (single_store);
d0582dc1 1369 tree rhs = gimple_assign_rhs1 (stmt);
401f3a81
JJ
1370 if (const_with_all_bytes_same (rhs) == -1
1371 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1372 || (TYPE_MODE (TREE_TYPE (rhs))
1373 != TYPE_MODE (unsigned_char_type_node))))
d0582dc1
RG
1374 return;
1375 if (TREE_CODE (rhs) == SSA_NAME
1376 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1377 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1378 return;
ca406576
RB
1379 if (!adjacent_dr_p (single_store)
1380 || !dominated_by_p (CDI_DOMINATORS,
1381 loop->latch, gimple_bb (stmt)))
d0582dc1
RG
1382 return;
1383 partition->kind = PKIND_MEMSET;
1384 partition->main_dr = single_store;
818625cf 1385 partition->niter = nb_iter;
d995e887 1386 partition->plus_one = plus_one;
d0582dc1
RG
1387 }
1388 else if (single_store && single_load)
1389 {
355fe088
TS
1390 gimple *store = DR_STMT (single_store);
1391 gimple *load = DR_STMT (single_load);
d0582dc1
RG
1392 /* Direct aggregate copy or via an SSA name temporary. */
1393 if (load != store
1394 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1395 return;
1396 if (!adjacent_dr_p (single_store)
1397 || !adjacent_dr_p (single_load)
1398 || !operand_equal_p (DR_STEP (single_store),
ca406576
RB
1399 DR_STEP (single_load), 0)
1400 || !dominated_by_p (CDI_DOMINATORS,
1401 loop->latch, gimple_bb (store)))
d0582dc1 1402 return;
f20132e7
RG
1403 /* Now check that if there is a dependence this dependence is
1404 of a suitable form for memmove. */
17c5cbdf 1405 ddr_p ddr = get_data_dependence (rdg, single_load, single_store);
f20132e7 1406 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
17c5cbdf
BC
1407 return;
1408
f20132e7
RG
1409 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1410 {
1411 if (DDR_NUM_DIST_VECTS (ddr) == 0)
17c5cbdf
BC
1412 return;
1413
f20132e7 1414 lambda_vector dist_v;
9771b263 1415 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
f20132e7
RG
1416 {
1417 int dist = dist_v[index_in_loop_nest (loop->num,
1418 DDR_LOOP_NEST (ddr))];
1419 if (dist > 0 && !DDR_REVERSED_P (ddr))
17c5cbdf 1420 return;
f20132e7 1421 }
510d73a0 1422 partition->kind = PKIND_MEMMOVE;
f20132e7 1423 }
510d73a0
RB
1424 else
1425 partition->kind = PKIND_MEMCPY;
d0582dc1
RG
1426 partition->main_dr = single_store;
1427 partition->secondary_dr = single_load;
818625cf 1428 partition->niter = nb_iter;
d995e887 1429 partition->plus_one = plus_one;
d0582dc1 1430 }
cfee318d
SP
1431}
1432
95f7d11b
BC
1433/* Returns true when PARTITION1 and PARTITION2 access the same memory
1434 object in RDG. */
cfee318d
SP
1435
1436static bool
95f7d11b
BC
1437share_memory_accesses (struct graph *rdg,
1438 partition *partition1, partition *partition2)
cfee318d 1439{
95f7d11b 1440 unsigned i, j;
cfee318d 1441 bitmap_iterator bi, bj;
95f7d11b 1442 data_reference_p dr1, dr2;
1fa0c180
RG
1443
1444 /* First check whether in the intersection of the two partitions are
1445 any loads or stores. Common loads are the situation that happens
1446 most often. */
1447 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1448 if (RDG_MEM_WRITE_STMT (rdg, i)
1449 || RDG_MEM_READS_STMT (rdg, i))
1450 return true;
cfee318d 1451
95f7d11b
BC
1452 /* Then check whether the two partitions access the same memory object. */
1453 EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi)
1454 {
1455 dr1 = datarefs_vec[i];
1456
1457 if (!DR_BASE_ADDRESS (dr1)
1458 || !DR_OFFSET (dr1) || !DR_INIT (dr1) || !DR_STEP (dr1))
1459 continue;
1460
1461 EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, 0, j, bj)
1462 {
1463 dr2 = datarefs_vec[j];
1464
1465 if (!DR_BASE_ADDRESS (dr2)
1466 || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2))
1467 continue;
1468
1469 if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
1470 && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
1471 && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
1472 && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
1473 return true;
1474 }
1475 }
cfee318d
SP
1476
1477 return false;
1478}
1479
4a52eb19
BC
1480/* For each seed statement in STARTING_STMTS, this function builds
1481 partition for it by adding depended statements according to RDG.
1482 All partitions are recorded in PARTITIONS. */
dea61d92
SP
1483
1484static void
83a95546 1485rdg_build_partitions (struct graph *rdg,
355fe088 1486 vec<gimple *> starting_stmts,
526ceb68 1487 vec<partition *> *partitions)
dea61d92 1488{
0e3de1d4 1489 auto_bitmap processed;
2fd5894f 1490 int i;
355fe088 1491 gimple *stmt;
dea61d92 1492
2fd5894f 1493 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
dea61d92 1494 {
2fd5894f
RB
1495 int v = rdg_vertex_for_stmt (rdg, stmt);
1496
1497 if (dump_file && (dump_flags & TDF_DETAILS))
1498 fprintf (dump_file,
1499 "ldist asked to generate code for vertex %d\n", v);
b8698a0f 1500
24f161fd
RB
1501 /* If the vertex is already contained in another partition so
1502 is the partition rooted at it. */
dea61d92
SP
1503 if (bitmap_bit_p (processed, v))
1504 continue;
b8698a0f 1505
526ceb68 1506 partition *partition = build_rdg_partition_for_vertex (rdg, v);
24f161fd 1507 bitmap_ior_into (processed, partition->stmts);
dea61d92 1508
826a536d 1509 if (dump_file && (dump_flags & TDF_DETAILS))
dea61d92 1510 {
f1eb4621
BC
1511 fprintf (dump_file, "ldist creates useful %s partition:\n",
1512 partition->type == PTYPE_PARALLEL ? "parallel" : "sequent");
1513 bitmap_print (dump_file, partition->stmts, " ", "\n");
dea61d92 1514 }
826a536d
RB
1515
1516 partitions->safe_push (partition);
dea61d92
SP
1517 }
1518
83a95546
RB
1519 /* All vertices should have been assigned to at least one partition now,
1520 other than vertices belonging to dead code. */
dea61d92
SP
1521}
1522
1523/* Dump to FILE the PARTITIONS. */
1524
1525static void
526ceb68 1526dump_rdg_partitions (FILE *file, vec<partition *> partitions)
dea61d92
SP
1527{
1528 int i;
526ceb68 1529 partition *partition;
dea61d92 1530
9771b263 1531 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1532 debug_bitmap_file (file, partition->stmts);
dea61d92
SP
1533}
1534
1535/* Debug PARTITIONS. */
526ceb68 1536extern void debug_rdg_partitions (vec<partition *> );
dea61d92 1537
24e47c76 1538DEBUG_FUNCTION void
526ceb68 1539debug_rdg_partitions (vec<partition *> partitions)
dea61d92
SP
1540{
1541 dump_rdg_partitions (stderr, partitions);
1542}
1543
2b8aee8e
SP
1544/* Returns the number of read and write operations in the RDG. */
1545
1546static int
1547number_of_rw_in_rdg (struct graph *rdg)
1548{
1549 int i, res = 0;
1550
1551 for (i = 0; i < rdg->n_vertices; i++)
1552 {
1553 if (RDG_MEM_WRITE_STMT (rdg, i))
1554 ++res;
1555
1556 if (RDG_MEM_READS_STMT (rdg, i))
1557 ++res;
1558 }
1559
1560 return res;
1561}
1562
1563/* Returns the number of read and write operations in a PARTITION of
1564 the RDG. */
1565
1566static int
526ceb68 1567number_of_rw_in_partition (struct graph *rdg, partition *partition)
2b8aee8e
SP
1568{
1569 int res = 0;
1570 unsigned i;
1571 bitmap_iterator ii;
1572
c61f8985 1573 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2b8aee8e
SP
1574 {
1575 if (RDG_MEM_WRITE_STMT (rdg, i))
1576 ++res;
1577
1578 if (RDG_MEM_READS_STMT (rdg, i))
1579 ++res;
1580 }
1581
1582 return res;
1583}
1584
1585/* Returns true when one of the PARTITIONS contains all the read or
1586 write operations of RDG. */
1587
1588static bool
9771b263 1589partition_contains_all_rw (struct graph *rdg,
526ceb68 1590 vec<partition *> partitions)
2b8aee8e
SP
1591{
1592 int i;
526ceb68 1593 partition *partition;
2b8aee8e
SP
1594 int nrw = number_of_rw_in_rdg (rdg);
1595
9771b263 1596 FOR_EACH_VEC_ELT (partitions, i, partition)
2b8aee8e
SP
1597 if (nrw == number_of_rw_in_partition (rdg, partition))
1598 return true;
1599
1600 return false;
1601}
1602
447f3223
RB
1603/* Compute partition dependence created by the data references in DRS1
1604 and DRS2 and modify and return DIR according to that. */
1605
1606static int
4084ea5f 1607pg_add_dependence_edges (struct graph *rdg, int dir,
a7a44c07 1608 bitmap drs1, bitmap drs2)
447f3223 1609{
a7a44c07
BC
1610 unsigned i, j;
1611 bitmap_iterator bi, bj;
1612 data_reference_p dr1, dr2, saved_dr1;
447f3223
RB
1613
1614 /* dependence direction - 0 is no dependence, -1 is back,
1615 1 is forth, 2 is both (we can stop then, merging will occur). */
a7a44c07
BC
1616 EXECUTE_IF_SET_IN_BITMAP (drs1, 0, i, bi)
1617 {
1618 dr1 = datarefs_vec[i];
1619
1620 EXECUTE_IF_SET_IN_BITMAP (drs2, 0, j, bj)
1621 {
1622 dr2 = datarefs_vec[j];
1623
1624 /* Skip all <read, read> data dependence. */
1625 if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
1626 continue;
1627
1628 saved_dr1 = dr1;
1629 int this_dir = 1;
1630 ddr_p ddr;
1631 /* Re-shuffle data-refs to be in dominator order. */
1632 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1633 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1634 {
1635 std::swap (dr1, dr2);
1636 this_dir = -this_dir;
1637 }
17c5cbdf 1638 ddr = get_data_dependence (rdg, dr1, dr2);
a7a44c07
BC
1639 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1640 this_dir = 2;
1641 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1642 {
1643 if (DDR_REVERSED_P (ddr))
1644 {
1645 std::swap (dr1, dr2);
1646 this_dir = -this_dir;
1647 }
1648 /* Known dependences can still be unordered througout the
1649 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1650 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1651 this_dir = 2;
1652 /* If the overlap is exact preserve stmt order. */
1653 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1654 ;
1655 else
1656 {
1657 /* Else as the distance vector is lexicographic positive
1658 swap the dependence direction. */
1659 this_dir = -this_dir;
1660 }
1661 }
1662 else
1663 this_dir = 0;
a7a44c07
BC
1664 if (this_dir == 2)
1665 return 2;
1666 else if (dir == 0)
1667 dir = this_dir;
1668 else if (this_dir != 0 && dir != this_dir)
1669 return 2;
1670 /* Shuffle "back" dr1. */
1671 dr1 = saved_dr1;
1672 }
1673 }
447f3223
RB
1674 return dir;
1675}
1676
1677/* Compare postorder number of the partition graph vertices V1 and V2. */
1678
1679static int
1680pgcmp (const void *v1_, const void *v2_)
1681{
1682 const vertex *v1 = (const vertex *)v1_;
1683 const vertex *v2 = (const vertex *)v2_;
1684 return v2->post - v1->post;
1685}
2fd5894f
RB
1686
1687/* Distributes the code from LOOP in such a way that producer
1688 statements are placed before consumer statements. Tries to separate
1689 only the statements from STMTS into separate loops.
b71b7a8e
RB
1690 Returns the number of distributed loops. Set *DESTROY_P to whether
1691 LOOP needs to be destroyed. */
dea61d92
SP
1692
1693static int
355fe088 1694distribute_loop (struct loop *loop, vec<gimple *> stmts,
b71b7a8e 1695 control_dependences *cd, int *nb_calls, bool *destroy_p)
dea61d92 1696{
2fd5894f 1697 struct graph *rdg;
526ceb68 1698 partition *partition;
be6b029b 1699 bool any_builtin;
2fd5894f 1700 int i, nbp;
447f3223
RB
1701 graph *pg = NULL;
1702 int num_sccs = 1;
dea61d92 1703
c9326aef 1704 *destroy_p = false;
826a536d 1705 *nb_calls = 0;
4084ea5f 1706 loop_nest.create (0);
2fd5894f 1707 if (!find_loop_nest (loop, &loop_nest))
4084ea5f
BC
1708 {
1709 loop_nest.release ();
1710 return 0;
1711 }
2fd5894f 1712
9fafb14a 1713 datarefs_vec.create (20);
4084ea5f 1714 rdg = build_rdg (loop, cd);
2fd5894f
RB
1715 if (!rdg)
1716 {
1717 if (dump_file && (dump_flags & TDF_DETAILS))
1718 fprintf (dump_file,
1719 "Loop %d not distributed: failed to build the RDG.\n",
1720 loop->num);
1721
4084ea5f 1722 loop_nest.release ();
9fafb14a
BC
1723 free_data_refs (datarefs_vec);
1724 return 0;
1725 }
1726
1727 if (datarefs_vec.length () > MAX_DATAREFS_NUM)
1728 {
1729 if (dump_file && (dump_flags & TDF_DETAILS))
1730 fprintf (dump_file,
1731 "Loop %d not distributed: too many memory references.\n",
1732 loop->num);
1733
1734 free_rdg (rdg);
1735 loop_nest.release ();
1736 free_data_refs (datarefs_vec);
2fd5894f
RB
1737 return 0;
1738 }
1739
9fafb14a
BC
1740 data_reference_p dref;
1741 for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
1742 dref->aux = (void *) (uintptr_t) i;
1743
2fd5894f
RB
1744 if (dump_file && (dump_flags & TDF_DETAILS))
1745 dump_rdg (dump_file, rdg);
1746
526ceb68 1747 auto_vec<struct partition *, 3> partitions;
2fd5894f 1748 rdg_build_partitions (rdg, stmts, &partitions);
dea61d92 1749
4a52eb19
BC
1750 auto_bitmap stmt_in_all_partitions;
1751 bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts);
1752 for (i = 1; partitions.iterate (i, &partition); ++i)
1753 bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts);
1754
be6b029b 1755 any_builtin = false;
9771b263 1756 FOR_EACH_VEC_ELT (partitions, i, partition)
be6b029b 1757 {
4a52eb19 1758 classify_partition (loop, rdg, partition, stmt_in_all_partitions);
be6b029b
RG
1759 any_builtin |= partition_builtin_p (partition);
1760 }
30d55936 1761
447f3223
RB
1762 /* If we are only distributing patterns but did not detect any,
1763 simply bail out. */
9fed7f3a
RB
1764 if (!flag_tree_loop_distribution
1765 && !any_builtin)
1766 {
1767 nbp = 0;
1768 goto ldist_done;
1769 }
1770
447f3223
RB
1771 /* If we are only distributing patterns fuse all partitions that
1772 were not classified as builtins. This also avoids chopping
1773 a loop into pieces, separated by builtin calls. That is, we
1774 only want no or a single loop body remaining. */
526ceb68 1775 struct partition *into;
447f3223
RB
1776 if (!flag_tree_loop_distribution)
1777 {
1778 for (i = 0; partitions.iterate (i, &into); ++i)
1779 if (!partition_builtin_p (into))
1780 break;
1781 for (++i; partitions.iterate (i, &partition); ++i)
1782 if (!partition_builtin_p (partition))
1783 {
f1eb4621 1784 partition_merge_into (NULL, into, partition, FUSE_NON_BUILTIN);
447f3223
RB
1785 partitions.unordered_remove (i);
1786 partition_free (partition);
1787 i--;
1788 }
1789 }
1790
1791 /* Due to limitations in the transform phase we have to fuse all
1792 reduction partitions into the last partition so the existing
1793 loop will contain all loop-closed PHI nodes. */
1794 for (i = 0; partitions.iterate (i, &into); ++i)
1795 if (partition_reduction_p (into))
1796 break;
1797 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1798 if (partition_reduction_p (partition))
1799 {
f1eb4621 1800 partition_merge_into (rdg, into, partition, FUSE_REDUCTION);
447f3223
RB
1801 partitions.unordered_remove (i);
1802 partition_free (partition);
1803 i--;
1804 }
1805
9fed7f3a
RB
1806 /* Apply our simple cost model - fuse partitions with similar
1807 memory accesses. */
9fed7f3a
RB
1808 for (i = 0; partitions.iterate (i, &into); ++i)
1809 {
16eba420 1810 bool changed = false;
9fed7f3a
RB
1811 if (partition_builtin_p (into))
1812 continue;
1813 for (int j = i + 1;
1814 partitions.iterate (j, &partition); ++j)
1815 {
95f7d11b 1816 if (share_memory_accesses (rdg, into, partition))
9fed7f3a 1817 {
f1eb4621 1818 partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
447f3223 1819 partitions.unordered_remove (j);
9fed7f3a
RB
1820 partition_free (partition);
1821 j--;
16eba420 1822 changed = true;
9fed7f3a
RB
1823 }
1824 }
16eba420
RB
1825 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
1826 accesses when 1 and 2 have similar accesses but not 0 and 1
1827 then in the next iteration we will fail to consider merging
1828 1 into 0,2. So try again if we did any merging into 0. */
1829 if (changed)
1830 i--;
9fed7f3a
RB
1831 }
1832
447f3223
RB
1833 /* Build the partition dependency graph. */
1834 if (partitions.length () > 1)
c014f6f5 1835 {
447f3223
RB
1836 pg = new_graph (partitions.length ());
1837 struct pgdata {
526ceb68 1838 struct partition *partition;
447f3223
RB
1839 };
1840#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1841 for (i = 0; partitions.iterate (i, &partition); ++i)
1842 {
1843 vertex *v = &pg->vertices[i];
1844 pgdata *data = new pgdata;
447f3223
RB
1845 /* FIXME - leaks. */
1846 v->data = data;
447f3223 1847 data->partition = partition;
447f3223 1848 }
526ceb68 1849 struct partition *partition1, *partition2;
447f3223
RB
1850 for (i = 0; partitions.iterate (i, &partition1); ++i)
1851 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1852 {
1853 /* dependence direction - 0 is no dependence, -1 is back,
1854 1 is forth, 2 is both (we can stop then, merging will occur). */
a7a44c07
BC
1855 int dir = pg_add_dependence_edges (rdg, 0,
1856 partition1->datarefs,
1857 partition2->datarefs);
447f3223
RB
1858 if (dir == 1 || dir == 2)
1859 add_edge (pg, i, j);
1860 if (dir == -1 || dir == 2)
1861 add_edge (pg, j, i);
1862 }
1863
1864 /* Add edges to the reduction partition (if any) to force it last. */
1865 unsigned j;
1866 for (j = 0; partitions.iterate (j, &partition); ++j)
1867 if (partition_reduction_p (partition))
1868 break;
1869 if (j < partitions.length ())
38ad2d07 1870 {
447f3223
RB
1871 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1872 if (i != j)
1873 add_edge (pg, i, j);
1874 }
1875
1876 /* Compute partitions we cannot separate and fuse them. */
1877 num_sccs = graphds_scc (pg, NULL);
1878 for (i = 0; i < num_sccs; ++i)
1879 {
526ceb68 1880 struct partition *first;
447f3223
RB
1881 int j;
1882 for (j = 0; partitions.iterate (j, &first); ++j)
1883 if (pg->vertices[j].component == i)
38ad2d07 1884 break;
447f3223
RB
1885 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1886 if (pg->vertices[j].component == i)
38ad2d07 1887 {
f1eb4621
BC
1888 partition_merge_into (NULL, first,
1889 partition, FUSE_SAME_SCC);
1890 first->type = PTYPE_SEQUENTIAL;
447f3223 1891 partitions[j] = NULL;
5eb010bc 1892 partition_free (partition);
447f3223 1893 PGDATA (j)->partition = NULL;
38ad2d07 1894 }
38ad2d07 1895 }
30d55936 1896
447f3223
RB
1897 /* Now order the remaining nodes in postorder. */
1898 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1899 partitions.truncate (0);
1900 for (i = 0; i < pg->n_vertices; ++i)
b9fc0497 1901 {
447f3223
RB
1902 pgdata *data = PGDATA (i);
1903 if (data->partition)
1904 partitions.safe_push (data->partition);
447f3223 1905 delete data;
b9fc0497 1906 }
447f3223
RB
1907 gcc_assert (partitions.length () == (unsigned)num_sccs);
1908 free_graph (pg);
b9fc0497
RB
1909 }
1910
9771b263 1911 nbp = partitions.length ();
a4293fa6 1912 if (nbp == 0
9771b263
DN
1913 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1914 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
c014f6f5
RG
1915 {
1916 nbp = 0;
1917 goto ldist_done;
1918 }
dea61d92
SP
1919
1920 if (dump_file && (dump_flags & TDF_DETAILS))
1921 dump_rdg_partitions (dump_file, partitions);
1922
9771b263 1923 FOR_EACH_VEC_ELT (partitions, i, partition)
826a536d
RB
1924 {
1925 if (partition_builtin_p (partition))
1926 (*nb_calls)++;
b71b7a8e 1927 *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
826a536d 1928 }
dea61d92 1929
dea61d92 1930 ldist_done:
4084ea5f 1931 loop_nest.release ();
9fafb14a 1932 free_data_refs (datarefs_vec);
17c5cbdf
BC
1933 for (hash_table<ddr_hasher>::iterator iter = ddrs_table.begin ();
1934 iter != ddrs_table.end (); ++iter)
1935 {
1936 free_dependence_relation (*iter);
1937 *iter = NULL;
1938 }
1939 ddrs_table.empty ();
dea61d92 1940
9771b263 1941 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1942 partition_free (partition);
dea61d92 1943
dea61d92 1944 free_rdg (rdg);
826a536d 1945 return nbp - *nb_calls;
dea61d92
SP
1946}
1947
1948/* Distribute all loops in the current function. */
1949
be55bfe6
TS
1950namespace {
1951
1952const pass_data pass_data_loop_distribution =
1953{
1954 GIMPLE_PASS, /* type */
1955 "ldist", /* name */
1956 OPTGROUP_LOOP, /* optinfo_flags */
be55bfe6
TS
1957 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1958 ( PROP_cfg | PROP_ssa ), /* properties_required */
1959 0, /* properties_provided */
1960 0, /* properties_destroyed */
1961 0, /* todo_flags_start */
3bea341f 1962 0, /* todo_flags_finish */
be55bfe6
TS
1963};
1964
1965class pass_loop_distribution : public gimple_opt_pass
1966{
1967public:
1968 pass_loop_distribution (gcc::context *ctxt)
1969 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1970 {}
1971
1972 /* opt_pass methods: */
1973 virtual bool gate (function *)
1974 {
1975 return flag_tree_loop_distribution
1976 || flag_tree_loop_distribute_patterns;
1977 }
1978
1979 virtual unsigned int execute (function *);
1980
1981}; // class pass_loop_distribution
1982
1983unsigned int
1984pass_loop_distribution::execute (function *fun)
dea61d92
SP
1985{
1986 struct loop *loop;
c014f6f5 1987 bool changed = false;
1fa0c180 1988 basic_block bb;
36875e8f 1989 control_dependences *cd = NULL;
b71b7a8e 1990 auto_vec<loop_p> loops_to_be_destroyed;
1fa0c180 1991
773d9217
BC
1992 if (number_of_loops (fun) <= 1)
1993 return 0;
1994
3be57c56
BC
1995 /* Compute topological order for basic blocks. Topological order is
1996 needed because data dependence is computed for data references in
1997 lexicographical order. */
1998 if (bb_top_order_index == NULL)
1999 {
2000 int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
2001
2002 bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
2003 bb_top_order_index_size
2004 = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
2005 for (int i = 0; i < bb_top_order_index_size; i++)
2006 bb_top_order_index[rpo[i]] = i;
2007
2008 free (rpo);
2009 }
2010
be55bfe6 2011 FOR_ALL_BB_FN (bb, fun)
1fa0c180
RG
2012 {
2013 gimple_stmt_iterator gsi;
2014 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2015 gimple_set_uid (gsi_stmt (gsi), -1);
2016 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2017 gimple_set_uid (gsi_stmt (gsi), -1);
2018 }
dea61d92 2019
c014f6f5
RG
2020 /* We can at the moment only distribute non-nested loops, thus restrict
2021 walking to innermost loops. */
f0bd40b1 2022 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
dea61d92 2023 {
355fe088 2024 auto_vec<gimple *> work_list;
be6b029b 2025 basic_block *bbs;
0e20c89f 2026 int num = loop->num;
be6b029b 2027 unsigned int i;
a3357f7d
RG
2028
2029 /* If the loop doesn't have a single exit we will fail anyway,
2030 so do that early. */
2031 if (!single_exit (loop))
2032 continue;
dea61d92 2033
f56f2d33
JH
2034 /* Only optimize hot loops. */
2035 if (!optimize_loop_for_speed_p (loop))
2036 continue;
2037
be6b029b
RG
2038 /* Initialize the worklist with stmts we seed the partitions with. */
2039 bbs = get_loop_body_in_dom_order (loop);
2040 for (i = 0; i < loop->num_nodes; ++i)
2041 {
538dd0b7
DM
2042 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
2043 !gsi_end_p (gsi);
2044 gsi_next (&gsi))
deb6c11a 2045 {
538dd0b7 2046 gphi *phi = gsi.phi ();
deb6c11a
RB
2047 if (virtual_operand_p (gimple_phi_result (phi)))
2048 continue;
2049 /* Distribute stmts which have defs that are used outside of
be55bfe6 2050 the loop. */
deb6c11a
RB
2051 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
2052 continue;
2053 work_list.safe_push (phi);
2054 }
538dd0b7
DM
2055 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
2056 !gsi_end_p (gsi);
2057 gsi_next (&gsi))
be6b029b 2058 {
355fe088 2059 gimple *stmt = gsi_stmt (gsi);
83a95546
RB
2060
2061 /* If there is a stmt with side-effects bail out - we
be55bfe6 2062 cannot and should not distribute this loop. */
83a95546
RB
2063 if (gimple_has_side_effects (stmt))
2064 {
2065 work_list.truncate (0);
2066 goto out;
2067 }
2068
b9fc0497 2069 /* Distribute stmts which have defs that are used outside of
be55bfe6 2070 the loop. */
b9fc0497
RB
2071 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
2072 ;
2073 /* Otherwise only distribute stores for now. */
e179190c 2074 else if (!gimple_vdef (stmt))
be6b029b
RG
2075 continue;
2076
9771b263 2077 work_list.safe_push (stmt);
be6b029b
RG
2078 }
2079 }
83a95546 2080out:
be6b029b 2081 free (bbs);
c014f6f5 2082
826a536d
RB
2083 int nb_generated_loops = 0;
2084 int nb_generated_calls = 0;
2085 location_t loc = find_loop_location (loop);
9771b263 2086 if (work_list.length () > 0)
36875e8f
RB
2087 {
2088 if (!cd)
2089 {
ca406576 2090 calculate_dominance_info (CDI_DOMINATORS);
36875e8f 2091 calculate_dominance_info (CDI_POST_DOMINATORS);
30fd2977 2092 cd = new control_dependences ();
36875e8f
RB
2093 free_dominance_info (CDI_POST_DOMINATORS);
2094 }
b71b7a8e 2095 bool destroy_p;
826a536d 2096 nb_generated_loops = distribute_loop (loop, work_list, cd,
b71b7a8e
RB
2097 &nb_generated_calls,
2098 &destroy_p);
2099 if (destroy_p)
2100 loops_to_be_destroyed.safe_push (loop);
36875e8f 2101 }
c014f6f5 2102
826a536d 2103 if (nb_generated_loops + nb_generated_calls > 0)
dea61d92 2104 {
826a536d
RB
2105 changed = true;
2106 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
2107 loc, "Loop %d distributed: split to %d loops "
2108 "and %d library calls.\n",
2109 num, nb_generated_loops, nb_generated_calls);
dea61d92 2110 }
826a536d
RB
2111 else if (dump_file && (dump_flags & TDF_DETAILS))
2112 fprintf (dump_file, "Loop %d is the same.\n", num);
dea61d92
SP
2113 }
2114
36875e8f
RB
2115 if (cd)
2116 delete cd;
2117
3be57c56
BC
2118 if (bb_top_order_index != NULL)
2119 {
2120 free (bb_top_order_index);
2121 bb_top_order_index = NULL;
2122 bb_top_order_index_size = 0;
2123 }
2124
c014f6f5
RG
2125 if (changed)
2126 {
30fd2977
RB
2127 /* Destroy loop bodies that could not be reused. Do this late as we
2128 otherwise can end up refering to stale data in control dependences. */
2129 unsigned i;
2130 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
3be57c56 2131 destroy_loop (loop);
30fd2977 2132
d0ed943c
RB
2133 /* Cached scalar evolutions now may refer to wrong or non-existing
2134 loops. */
2135 scev_reset_htab ();
be55bfe6 2136 mark_virtual_operands_for_renaming (fun);
c014f6f5
RG
2137 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
2138 }
2139
b2b29377 2140 checking_verify_loop_structure ();
c014f6f5 2141
5006671f 2142 return 0;
dea61d92
SP
2143}
2144
27a4cd48
DM
2145} // anon namespace
2146
2147gimple_opt_pass *
2148make_pass_loop_distribution (gcc::context *ctxt)
2149{
2150 return new pass_loop_distribution (ctxt);
2151}