]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-loop-distribution.c
re PR tree-optimization/58473 (FAIL: ext/random/normal_mv_distribution/cons/default...
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
CommitLineData
dea61d92 1/* Loop distribution.
d1e082c2 2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
dea61d92
SP
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6This file is part of GCC.
b8698a0f 7
dea61d92
SP
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
b8698a0f 12
dea61d92
SP
13GCC is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
b8698a0f 17
dea61d92
SP
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22/* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
b8698a0f 29 is transformed to
dea61d92
SP
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44#include "config.h"
45#include "system.h"
46#include "coretypes.h"
7a300452 47#include "tree-ssa.h"
dea61d92 48#include "cfgloop.h"
dea61d92
SP
49#include "tree-chrec.h"
50#include "tree-data-ref.h"
51#include "tree-scalar-evolution.h"
52#include "tree-pass.h"
80ab0b19
RB
53#include "gimple-pretty-print.h"
54
55
56/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
57typedef struct rdg_vertex
58{
59 /* The statement represented by this vertex. */
60 gimple stmt;
61
62 /* Vector of data-references in this statement. */
63 vec<data_reference_p> datarefs;
64
65 /* True when the statement contains a write to memory. */
66 bool has_mem_write;
67
68 /* True when the statement contains a read from memory. */
69 bool has_mem_reads;
70} *rdg_vertex_p;
71
72#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
73#define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
74#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
75#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
76#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
77#define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
78#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
79#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
80
81/* Data dependence type. */
82
83enum rdg_dep_type
84{
85 /* Read After Write (RAW). */
86 flow_dd = 'f',
87
88 /* Write After Read (WAR). */
89 anti_dd = 'a',
90
91 /* Write After Write (WAW). */
92 output_dd = 'o',
93
94 /* Read After Read (RAR). */
36875e8f
RB
95 input_dd = 'i',
96
97 /* Control dependence (execute conditional on). */
98 control_dd = 'c'
80ab0b19
RB
99};
100
101/* Dependence information attached to an edge of the RDG. */
102
103typedef struct rdg_edge
104{
105 /* Type of the dependence. */
106 enum rdg_dep_type type;
107
108 /* Levels of the dependence: the depth of the loops that carry the
109 dependence. */
110 unsigned level;
111
112 /* Dependence relation between data dependences, NULL when one of
113 the vertices is a scalar. */
114 ddr_p relation;
115} *rdg_edge_p;
116
117#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
118#define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
119#define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
120
80ab0b19
RB
121/* Dump vertex I in RDG to FILE. */
122
123static void
124dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
125{
126 struct vertex *v = &(rdg->vertices[i]);
127 struct graph_edge *e;
128
129 fprintf (file, "(vertex %d: (%s%s) (in:", i,
130 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
131 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
132
133 if (v->pred)
134 for (e = v->pred; e; e = e->pred_next)
135 fprintf (file, " %d", e->src);
136
137 fprintf (file, ") (out:");
138
139 if (v->succ)
140 for (e = v->succ; e; e = e->succ_next)
141 fprintf (file, " %d", e->dest);
142
143 fprintf (file, ")\n");
144 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
145 fprintf (file, ")\n");
146}
147
148/* Call dump_rdg_vertex on stderr. */
149
150DEBUG_FUNCTION void
151debug_rdg_vertex (struct graph *rdg, int i)
152{
153 dump_rdg_vertex (stderr, rdg, i);
154}
155
80ab0b19
RB
156/* Dump the reduced dependence graph RDG to FILE. */
157
158static void
159dump_rdg (FILE *file, struct graph *rdg)
160{
80ab0b19 161 fprintf (file, "(rdg\n");
2fd5894f
RB
162 for (int i = 0; i < rdg->n_vertices; i++)
163 dump_rdg_vertex (file, rdg, i);
80ab0b19 164 fprintf (file, ")\n");
80ab0b19
RB
165}
166
167/* Call dump_rdg on stderr. */
168
169DEBUG_FUNCTION void
170debug_rdg (struct graph *rdg)
171{
172 dump_rdg (stderr, rdg);
173}
174
175static void
176dot_rdg_1 (FILE *file, struct graph *rdg)
177{
178 int i;
174ec470
RB
179 pretty_printer buffer;
180 pp_needs_newline (&buffer) = false;
181 buffer.buffer->stream = file;
80ab0b19
RB
182
183 fprintf (file, "digraph RDG {\n");
184
185 for (i = 0; i < rdg->n_vertices; i++)
186 {
187 struct vertex *v = &(rdg->vertices[i]);
188 struct graph_edge *e;
189
174ec470
RB
190 fprintf (file, "%d [label=\"[%d] ", i, i);
191 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
192 pp_flush (&buffer);
193 fprintf (file, "\"]\n");
194
80ab0b19
RB
195 /* Highlight reads from memory. */
196 if (RDG_MEM_READS_STMT (rdg, i))
197 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
198
199 /* Highlight stores to memory. */
200 if (RDG_MEM_WRITE_STMT (rdg, i))
201 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
202
203 if (v->succ)
204 for (e = v->succ; e; e = e->succ_next)
205 switch (RDGE_TYPE (e))
206 {
207 case input_dd:
208 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
209 break;
210
211 case output_dd:
212 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
213 break;
214
215 case flow_dd:
216 /* These are the most common dependences: don't print these. */
217 fprintf (file, "%d -> %d \n", i, e->dest);
218 break;
219
220 case anti_dd:
221 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
222 break;
223
36875e8f
RB
224 case control_dd:
225 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
226 break;
227
80ab0b19
RB
228 default:
229 gcc_unreachable ();
230 }
231 }
232
233 fprintf (file, "}\n\n");
234}
235
236/* Display the Reduced Dependence Graph using dotty. */
237
238DEBUG_FUNCTION void
239dot_rdg (struct graph *rdg)
240{
174ec470
RB
241 /* When debugging, you may want to enable the following code. */
242#if 1
243 FILE *file = popen("dot -Tx11", "w");
244 if (!file)
245 return;
80ab0b19 246 dot_rdg_1 (file, rdg);
174ec470
RB
247 fflush (file);
248 close (fileno (file));
249 pclose (file);
80ab0b19
RB
250#else
251 dot_rdg_1 (stderr, rdg);
252#endif
253}
254
255/* Returns the index of STMT in RDG. */
256
257static int
258rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
259{
260 int index = gimple_uid (stmt);
261 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
262 return index;
263}
264
265/* Creates an edge in RDG for each distance vector from DDR. The
266 order that we keep track of in the RDG is the order in which
267 statements have to be executed. */
268
269static void
270create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
271{
272 struct graph_edge *e;
273 int va, vb;
274 data_reference_p dra = DDR_A (ddr);
275 data_reference_p drb = DDR_B (ddr);
276 unsigned level = ddr_dependence_level (ddr);
277
278 /* For non scalar dependences, when the dependence is REVERSED,
279 statement B has to be executed before statement A. */
280 if (level > 0
281 && !DDR_REVERSED_P (ddr))
282 {
283 data_reference_p tmp = dra;
284 dra = drb;
285 drb = tmp;
286 }
287
288 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
289 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
290
291 if (va < 0 || vb < 0)
292 return;
293
294 e = add_edge (rdg, va, vb);
295 e->data = XNEW (struct rdg_edge);
296
297 RDGE_LEVEL (e) = level;
298 RDGE_RELATION (e) = ddr;
299
300 /* Determines the type of the data dependence. */
301 if (DR_IS_READ (dra) && DR_IS_READ (drb))
302 RDGE_TYPE (e) = input_dd;
303 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
304 RDGE_TYPE (e) = output_dd;
305 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
306 RDGE_TYPE (e) = flow_dd;
307 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
308 RDGE_TYPE (e) = anti_dd;
309}
310
311/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
312 the index of DEF in RDG. */
313
314static void
315create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
316{
317 use_operand_p imm_use_p;
318 imm_use_iterator iterator;
319
320 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
321 {
322 struct graph_edge *e;
323 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
324
325 if (use < 0)
326 continue;
327
328 e = add_edge (rdg, idef, use);
329 e->data = XNEW (struct rdg_edge);
330 RDGE_TYPE (e) = flow_dd;
331 RDGE_RELATION (e) = NULL;
332 }
333}
334
36875e8f
RB
335/* Creates an edge for the control dependences of BB to the vertex V. */
336
337static void
338create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
339 int v, control_dependences *cd)
340{
341 bitmap_iterator bi;
342 unsigned edge_n;
343 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
344 0, edge_n, bi)
345 {
346 basic_block cond_bb = cd->get_edge (edge_n)->src;
347 gimple stmt = last_stmt (cond_bb);
348 if (stmt && is_ctrl_stmt (stmt))
349 {
350 struct graph_edge *e;
351 int c = rdg_vertex_for_stmt (rdg, stmt);
352 if (c < 0)
353 continue;
354
355 e = add_edge (rdg, c, v);
356 e->data = XNEW (struct rdg_edge);
357 RDGE_TYPE (e) = control_dd;
358 RDGE_RELATION (e) = NULL;
359 }
360 }
361}
362
80ab0b19
RB
363/* Creates the edges of the reduced dependence graph RDG. */
364
365static void
36875e8f 366create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd)
80ab0b19
RB
367{
368 int i;
369 struct data_dependence_relation *ddr;
370 def_operand_p def_p;
371 ssa_op_iter iter;
372
373 FOR_EACH_VEC_ELT (ddrs, i, ddr)
374 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
375 create_rdg_edge_for_ddr (rdg, ddr);
97463b2b
RB
376 else
377 free_dependence_relation (ddr);
80ab0b19
RB
378
379 for (i = 0; i < rdg->n_vertices; i++)
380 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
381 iter, SSA_OP_DEF)
382 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
36875e8f
RB
383
384 if (cd)
385 for (i = 0; i < rdg->n_vertices; i++)
386 {
387 gimple stmt = RDG_STMT (rdg, i);
388 if (gimple_code (stmt) == GIMPLE_PHI)
389 {
390 edge_iterator ei;
391 edge e;
392 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
393 create_edge_for_control_dependence (rdg, e->src, i, cd);
394 }
395 else
396 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
397 }
80ab0b19
RB
398}
399
400/* Build the vertices of the reduced dependence graph RDG. Return false
401 if that failed. */
402
403static bool
404create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
405 vec<data_reference_p> *datarefs)
406{
407 int i;
408 gimple stmt;
409
410 FOR_EACH_VEC_ELT (stmts, i, stmt)
411 {
412 struct vertex *v = &(rdg->vertices[i]);
413
414 /* Record statement to vertex mapping. */
415 gimple_set_uid (stmt, i);
416
417 v->data = XNEW (struct rdg_vertex);
418 RDGV_STMT (v) = stmt;
419 RDGV_DATAREFS (v).create (0);
420 RDGV_HAS_MEM_WRITE (v) = false;
421 RDGV_HAS_MEM_READS (v) = false;
422 if (gimple_code (stmt) == GIMPLE_PHI)
423 continue;
424
425 unsigned drp = datarefs->length ();
426 if (!find_data_references_in_stmt (loop, stmt, datarefs))
427 return false;
428 for (unsigned j = drp; j < datarefs->length (); ++j)
429 {
430 data_reference_p dr = (*datarefs)[j];
431 if (DR_IS_READ (dr))
432 RDGV_HAS_MEM_READS (v) = true;
433 else
434 RDGV_HAS_MEM_WRITE (v) = true;
435 RDGV_DATAREFS (v).safe_push (dr);
436 }
437 }
438 return true;
439}
440
2fd5894f 441/* Initialize STMTS with all the statements of LOOP. The order in
80ab0b19
RB
442 which we discover statements is important as
443 generate_loops_for_partition is using the same traversal for
2fd5894f 444 identifying statements in loop copies. */
80ab0b19
RB
445
446static void
447stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
448{
449 unsigned int i;
450 basic_block *bbs = get_loop_body_in_dom_order (loop);
451
452 for (i = 0; i < loop->num_nodes; i++)
453 {
454 basic_block bb = bbs[i];
455 gimple_stmt_iterator bsi;
456 gimple stmt;
457
458 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
83a95546
RB
459 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
460 stmts->safe_push (gsi_stmt (bsi));
80ab0b19
RB
461
462 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
463 {
464 stmt = gsi_stmt (bsi);
465 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
466 stmts->safe_push (stmt);
467 }
468 }
469
470 free (bbs);
471}
472
80ab0b19
RB
473/* Build the Reduced Dependence Graph (RDG) with one vertex per
474 statement of the loop nest, and one edge per data dependence or
475 scalar dependence. */
476
477struct graph *
478build_empty_rdg (int n_stmts)
479{
480 struct graph *rdg = new_graph (n_stmts);
481 return rdg;
482}
483
484/* Free the reduced dependence graph RDG. */
485
486static void
487free_rdg (struct graph *rdg)
488{
489 int i;
490
491 for (i = 0; i < rdg->n_vertices; i++)
492 {
493 struct vertex *v = &(rdg->vertices[i]);
494 struct graph_edge *e;
495
496 for (e = v->succ; e; e = e->succ_next)
497 {
498 free_dependence_relation (RDGE_RELATION (e));
499 free (e->data);
500 }
501
502 if (v->data)
503 {
504 gimple_set_uid (RDGV_STMT (v), -1);
505 free_data_refs (RDGV_DATAREFS (v));
506 free (v->data);
507 }
508 }
509
510 free_graph (rdg);
511}
512
513/* Build the Reduced Dependence Graph (RDG) with one vertex per
97463b2b 514 statement of the loop nest LOOP_NEST, and one edge per data dependence or
80ab0b19
RB
515 scalar dependence. */
516
517static struct graph *
36875e8f 518build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
80ab0b19
RB
519{
520 struct graph *rdg;
80ab0b19
RB
521 vec<gimple> stmts;
522 vec<data_reference_p> datarefs;
523 vec<ddr_p> dependence_relations;
524
97463b2b 525 /* Create the RDG vertices from the stmts of the loop nest. */
80ab0b19 526 stmts.create (10);
97463b2b 527 stmts_from_loop (loop_nest[0], &stmts);
80ab0b19
RB
528 rdg = build_empty_rdg (stmts.length ());
529 datarefs.create (10);
97463b2b 530 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
80ab0b19
RB
531 {
532 stmts.release ();
97463b2b 533 datarefs.release ();
80ab0b19
RB
534 free_rdg (rdg);
535 return NULL;
536 }
537 stmts.release ();
97463b2b
RB
538
539 /* Create the RDG edges from the data dependences in the loop nest. */
80ab0b19
RB
540 dependence_relations.create (100);
541 if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest,
542 false)
543 || !known_dependences_p (dependence_relations))
544 {
97463b2b 545 free_dependence_relations (dependence_relations);
80ab0b19 546 datarefs.release ();
80ab0b19
RB
547 free_rdg (rdg);
548 return NULL;
549 }
36875e8f 550 create_rdg_edges (rdg, dependence_relations, cd);
80ab0b19 551 dependence_relations.release ();
97463b2b 552 datarefs.release ();
80ab0b19
RB
553
554 return rdg;
555}
556
80ab0b19 557
dea61d92 558
b9fc0497
RB
559enum partition_kind {
560 PKIND_NORMAL, PKIND_REDUCTION, PKIND_MEMSET, PKIND_MEMCPY
561};
30d55936 562
c61f8985
RG
563typedef struct partition_s
564{
565 bitmap stmts;
80ab0b19 566 bitmap loops;
7ad672e4 567 bool has_writes;
30d55936 568 enum partition_kind kind;
d0582dc1
RG
569 /* data-references a kind != PKIND_NORMAL partition is about. */
570 data_reference_p main_dr;
571 data_reference_p secondary_dr;
c61f8985
RG
572} *partition_t;
573
c61f8985
RG
574
575/* Allocate and initialize a partition from BITMAP. */
576
577static partition_t
80ab0b19 578partition_alloc (bitmap stmts, bitmap loops)
c61f8985
RG
579{
580 partition_t partition = XCNEW (struct partition_s);
581 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
80ab0b19 582 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
7ad672e4 583 partition->has_writes = false;
30d55936 584 partition->kind = PKIND_NORMAL;
c61f8985
RG
585 return partition;
586}
587
588/* Free PARTITION. */
589
590static void
591partition_free (partition_t partition)
592{
593 BITMAP_FREE (partition->stmts);
80ab0b19 594 BITMAP_FREE (partition->loops);
c61f8985
RG
595 free (partition);
596}
597
30d55936
RG
598/* Returns true if the partition can be generated as a builtin. */
599
600static bool
601partition_builtin_p (partition_t partition)
602{
b9fc0497 603 return partition->kind > PKIND_REDUCTION;
30d55936 604}
c61f8985 605
7ad672e4
RG
606/* Returns true if the partition has an writes. */
607
608static bool
609partition_has_writes (partition_t partition)
610{
611 return partition->has_writes;
612}
613
c07a8cb3
RG
614/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
615 the LOOP. */
616
617static bool
618ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
619{
620 imm_use_iterator imm_iter;
621 use_operand_p use_p;
622
623 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
e665269a
RG
624 {
625 gimple use_stmt = USE_STMT (use_p);
626 if (!is_gimple_debug (use_stmt)
627 && loop != loop_containing_stmt (use_stmt))
628 return true;
629 }
c07a8cb3
RG
630
631 return false;
632}
633
634/* Returns true when STMT defines a scalar variable used after the
88af7c1a 635 loop LOOP. */
c07a8cb3
RG
636
637static bool
88af7c1a 638stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
c07a8cb3 639{
88af7c1a
RG
640 def_operand_p def_p;
641 ssa_op_iter op_iter;
c07a8cb3 642
9ca86fc3
RG
643 if (gimple_code (stmt) == GIMPLE_PHI)
644 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
645
88af7c1a
RG
646 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
647 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
648 return true;
c07a8cb3 649
88af7c1a 650 return false;
c07a8cb3
RG
651}
652
dea61d92
SP
653/* Return a copy of LOOP placed before LOOP. */
654
655static struct loop *
656copy_loop_before (struct loop *loop)
657{
658 struct loop *res;
659 edge preheader = loop_preheader_edge (loop);
660
dea61d92
SP
661 initialize_original_copy_tables ();
662 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
30d55936 663 gcc_assert (res != NULL);
dea61d92 664 free_original_copy_tables ();
2cfc56b9 665 delete_update_ssa ();
dea61d92
SP
666
667 return res;
668}
669
670/* Creates an empty basic block after LOOP. */
671
672static void
673create_bb_after_loop (struct loop *loop)
674{
675 edge exit = single_exit (loop);
676
677 if (!exit)
678 return;
679
680 split_edge (exit);
681}
682
683/* Generate code for PARTITION from the code in LOOP. The loop is
684 copied when COPY_P is true. All the statements not flagged in the
685 PARTITION bitmap are removed from the loop or from its copy. The
686 statements are indexed in sequence inside a basic block, and the
30d55936 687 basic blocks of a loop are taken in dom order. */
dea61d92 688
30d55936 689static void
c61f8985
RG
690generate_loops_for_partition (struct loop *loop, partition_t partition,
691 bool copy_p)
dea61d92 692{
2fd5894f 693 unsigned i;
726a989a 694 gimple_stmt_iterator bsi;
dea61d92
SP
695 basic_block *bbs;
696
697 if (copy_p)
698 {
699 loop = copy_loop_before (loop);
30d55936 700 gcc_assert (loop != NULL);
dea61d92
SP
701 create_preheader (loop, CP_SIMPLE_PREHEADERS);
702 create_bb_after_loop (loop);
703 }
704
2fd5894f 705 /* Remove stmts not in the PARTITION bitmap. */
dea61d92
SP
706 bbs = get_loop_body_in_dom_order (loop);
707
b03c3082 708 if (MAY_HAVE_DEBUG_STMTS)
2fd5894f 709 for (i = 0; i < loop->num_nodes; i++)
b03c3082
JJ
710 {
711 basic_block bb = bbs[i];
712
713 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2fd5894f
RB
714 {
715 gimple phi = gsi_stmt (bsi);
716 if (!virtual_operand_p (gimple_phi_result (phi))
717 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
718 reset_debug_uses (phi);
719 }
b03c3082
JJ
720
721 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
722 {
723 gimple stmt = gsi_stmt (bsi);
724 if (gimple_code (stmt) != GIMPLE_LABEL
725 && !is_gimple_debug (stmt)
2fd5894f 726 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
b03c3082
JJ
727 reset_debug_uses (stmt);
728 }
729 }
730
2fd5894f 731 for (i = 0; i < loop->num_nodes; i++)
dea61d92
SP
732 {
733 basic_block bb = bbs[i];
dea61d92 734
726a989a 735 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
2fd5894f
RB
736 {
737 gimple phi = gsi_stmt (bsi);
738 if (!virtual_operand_p (gimple_phi_result (phi))
739 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
2706a615 740 remove_phi_node (&bsi, true);
2fd5894f
RB
741 else
742 gsi_next (&bsi);
743 }
dea61d92 744
726a989a 745 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
2706a615
RG
746 {
747 gimple stmt = gsi_stmt (bsi);
b03c3082
JJ
748 if (gimple_code (stmt) != GIMPLE_LABEL
749 && !is_gimple_debug (stmt)
2fd5894f 750 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
2706a615 751 {
36875e8f
RB
752 /* Choose an arbitrary path through the empty CFG part
753 that this unnecessary control stmt controls. */
754 if (gimple_code (stmt) == GIMPLE_COND)
755 {
756 gimple_cond_make_false (stmt);
757 update_stmt (stmt);
758 }
759 else if (gimple_code (stmt) == GIMPLE_SWITCH)
760 {
761 gimple_switch_set_index
762 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
763 update_stmt (stmt);
764 }
765 else
766 {
767 unlink_stmt_vdef (stmt);
768 gsi_remove (&bsi, true);
769 release_defs (stmt);
770 continue;
771 }
2706a615 772 }
36875e8f 773 gsi_next (&bsi);
2706a615 774 }
dea61d92
SP
775 }
776
777 free (bbs);
dea61d92
SP
778}
779
d0582dc1 780/* Build the size argument for a memory operation call. */
3661e899 781
d0582dc1
RG
782static tree
783build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
3661e899 784{
d0582dc1
RG
785 tree size;
786 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
787 fold_convert_loc (loc, sizetype, nb_iter),
788 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
789 return fold_convert_loc (loc, size_type_node, size);
790}
791
792/* Build an address argument for a memory operation call. */
793
794static tree
795build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
796{
797 tree addr_base;
798
799 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
800 addr_base = fold_convert_loc (loc, sizetype, addr_base);
801
802 /* Test for a negative stride, iterating over every element. */
803 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
804 {
805 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
806 fold_convert_loc (loc, sizetype, nb_bytes));
807 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
808 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
809 }
810
811 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
3661e899
TB
812}
813
401f3a81
JJ
814/* If VAL memory representation contains the same value in all bytes,
815 return that value, otherwise return -1.
816 E.g. for 0x24242424 return 0x24, for IEEE double
817 747708026454360457216.0 return 0x44, etc. */
818
819static int
820const_with_all_bytes_same (tree val)
821{
822 unsigned char buf[64];
823 int i, len;
824
825 if (integer_zerop (val)
826 || real_zerop (val)
827 || (TREE_CODE (val) == CONSTRUCTOR
828 && !TREE_CLOBBER_P (val)
829 && CONSTRUCTOR_NELTS (val) == 0))
830 return 0;
831
832 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
833 return -1;
834
835 len = native_encode_expr (val, buf, sizeof (buf));
836 if (len == 0)
837 return -1;
838 for (i = 1; i < len; i++)
839 if (buf[i] != buf[0])
840 return -1;
841 return buf[0];
842}
843
30d55936 844/* Generate a call to memset for PARTITION in LOOP. */
dea61d92 845
cfee318d 846static void
d0582dc1 847generate_memset_builtin (struct loop *loop, partition_t partition)
dea61d92 848{
30d55936
RG
849 gimple_stmt_iterator gsi;
850 gimple stmt, fn_call;
d0582dc1 851 tree nb_iter, mem, fn, nb_bytes;
30d55936 852 location_t loc;
b6dd5261 853 tree val;
30d55936 854
d0582dc1 855 stmt = DR_STMT (partition->main_dr);
30d55936 856 loc = gimple_location (stmt);
30d55936
RG
857 if (gimple_bb (stmt) == loop->latch)
858 nb_iter = number_of_latch_executions (loop);
859 else
860 nb_iter = number_of_exit_cond_executions (loop);
861
862 /* The new statements will be placed before LOOP. */
863 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dea61d92 864
d0582dc1
RG
865 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
866 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
867 false, GSI_CONTINUE_LINKING);
868 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
869 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
870 false, GSI_CONTINUE_LINKING);
dea61d92 871
b6dd5261
RG
872 /* This exactly matches the pattern recognition in classify_partition. */
873 val = gimple_assign_rhs1 (stmt);
401f3a81
JJ
874 /* Handle constants like 0x15151515 and similarly
875 floating point constants etc. where all bytes are the same. */
876 int bytev = const_with_all_bytes_same (val);
877 if (bytev != -1)
878 val = build_int_cst (integer_type_node, bytev);
879 else if (TREE_CODE (val) == INTEGER_CST)
880 val = fold_convert (integer_type_node, val);
881 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
b6dd5261 882 {
401f3a81
JJ
883 gimple cstmt;
884 tree tem = make_ssa_name (integer_type_node, NULL);
885 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
886 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
887 val = tem;
b6dd5261
RG
888 }
889
e79983f4 890 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
b6dd5261 891 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
d0582dc1 892 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
dea61d92
SP
893
894 if (dump_file && (dump_flags & TDF_DETAILS))
b6dd5261
RG
895 {
896 fprintf (dump_file, "generated memset");
401f3a81 897 if (bytev == 0)
b6dd5261 898 fprintf (dump_file, " zero\n");
b6dd5261
RG
899 else
900 fprintf (dump_file, "\n");
901 }
dea61d92
SP
902}
903
d0582dc1
RG
904/* Generate a call to memcpy for PARTITION in LOOP. */
905
906static void
907generate_memcpy_builtin (struct loop *loop, partition_t partition)
908{
909 gimple_stmt_iterator gsi;
910 gimple stmt, fn_call;
911 tree nb_iter, dest, src, fn, nb_bytes;
912 location_t loc;
913 enum built_in_function kind;
914
915 stmt = DR_STMT (partition->main_dr);
916 loc = gimple_location (stmt);
917 if (gimple_bb (stmt) == loop->latch)
918 nb_iter = number_of_latch_executions (loop);
919 else
920 nb_iter = number_of_exit_cond_executions (loop);
921
922 /* The new statements will be placed before LOOP. */
923 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
924
925 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
926 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
927 false, GSI_CONTINUE_LINKING);
928 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
929 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
930 if (ptr_derefs_may_alias_p (dest, src))
931 kind = BUILT_IN_MEMMOVE;
932 else
933 kind = BUILT_IN_MEMCPY;
934
935 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
936 false, GSI_CONTINUE_LINKING);
937 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
938 false, GSI_CONTINUE_LINKING);
939 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
940 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
941 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
942
943 if (dump_file && (dump_flags & TDF_DETAILS))
944 {
945 if (kind == BUILT_IN_MEMCPY)
946 fprintf (dump_file, "generated memcpy\n");
947 else
948 fprintf (dump_file, "generated memmove\n");
949 }
950}
951
30d55936 952/* Remove and destroy the loop LOOP. */
dea61d92 953
30d55936
RG
954static void
955destroy_loop (struct loop *loop)
dea61d92 956{
30d55936
RG
957 unsigned nbbs = loop->num_nodes;
958 edge exit = single_exit (loop);
959 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
dea61d92 960 basic_block *bbs;
30d55936 961 unsigned i;
dea61d92
SP
962
963 bbs = get_loop_body_in_dom_order (loop);
964
30d55936
RG
965 redirect_edge_pred (exit, src);
966 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
967 exit->flags |= EDGE_FALLTHRU;
968 cancel_loop_tree (loop);
969 rescan_loop_exit (exit, false, true);
dea61d92 970
30d55936 971 for (i = 0; i < nbbs; i++)
c014f6f5
RG
972 {
973 /* We have made sure to not leave any dangling uses of SSA
974 names defined in the loop. With the exception of virtuals.
975 Make sure we replace all uses of virtual defs that will remain
976 outside of the loop with the bare symbol as delete_basic_block
977 will release them. */
978 gimple_stmt_iterator gsi;
979 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
980 {
981 gimple phi = gsi_stmt (gsi);
ea057359 982 if (virtual_operand_p (gimple_phi_result (phi)))
c014f6f5
RG
983 mark_virtual_phi_result_for_renaming (phi);
984 }
985 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
986 {
987 gimple stmt = gsi_stmt (gsi);
988 tree vdef = gimple_vdef (stmt);
989 if (vdef && TREE_CODE (vdef) == SSA_NAME)
990 mark_virtual_operand_for_renaming (vdef);
991 }
992 delete_basic_block (bbs[i]);
993 }
dea61d92 994 free (bbs);
30d55936
RG
995
996 set_immediate_dominator (CDI_DOMINATORS, dest,
997 recompute_dominator (CDI_DOMINATORS, dest));
dea61d92
SP
998}
999
30d55936 1000/* Generates code for PARTITION. */
dea61d92 1001
30d55936 1002static void
d0582dc1 1003generate_code_for_partition (struct loop *loop,
be6b029b 1004 partition_t partition, bool copy_p)
dea61d92 1005{
30d55936
RG
1006 switch (partition->kind)
1007 {
1008 case PKIND_MEMSET:
d0582dc1
RG
1009 generate_memset_builtin (loop, partition);
1010 /* If this is the last partition for which we generate code, we have
1011 to destroy the loop. */
1012 if (!copy_p)
1013 destroy_loop (loop);
1014 break;
1015
1016 case PKIND_MEMCPY:
1017 generate_memcpy_builtin (loop, partition);
30d55936
RG
1018 /* If this is the last partition for which we generate code, we have
1019 to destroy the loop. */
1020 if (!copy_p)
1021 destroy_loop (loop);
1022 break;
1023
b9fc0497
RB
1024 case PKIND_REDUCTION:
1025 /* Reductions all have to be in the last partition. */
1026 gcc_assert (!copy_p);
30d55936
RG
1027 case PKIND_NORMAL:
1028 generate_loops_for_partition (loop, partition, copy_p);
1029 break;
1030
1031 default:
1032 gcc_unreachable ();
1033 }
dea61d92
SP
1034}
1035
1036
1037/* Returns true if the node V of RDG cannot be recomputed. */
1038
1039static bool
1040rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
1041{
1042 if (RDG_MEM_WRITE_STMT (rdg, v))
1043 return true;
1044
1045 return false;
1046}
1047
1048/* Returns true when the vertex V has already been generated in the
1049 current partition (V is in PROCESSED), or when V belongs to another
1050 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
1051
1052static inline bool
1053already_processed_vertex_p (bitmap processed, int v)
1054{
174ec470 1055 return bitmap_bit_p (processed, v);
dea61d92
SP
1056}
1057
c61f8985 1058static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
80ab0b19 1059 bitmap);
dea61d92 1060
dea61d92
SP
1061/* Flag V from RDG as part of PARTITION, and also flag its loop number
1062 in LOOPS. */
1063
1064static void
80ab0b19 1065rdg_flag_vertex (struct graph *rdg, int v, partition_t partition)
dea61d92
SP
1066{
1067 struct loop *loop;
1068
c61f8985 1069 if (!bitmap_set_bit (partition->stmts, v))
dea61d92
SP
1070 return;
1071
1072 loop = loop_containing_stmt (RDG_STMT (rdg, v));
80ab0b19 1073 bitmap_set_bit (partition->loops, loop->num);
dea61d92
SP
1074
1075 if (rdg_cannot_recompute_vertex_p (rdg, v))
174ec470 1076 partition->has_writes = true;
dea61d92
SP
1077}
1078
1079/* Flag in the bitmap PARTITION the vertex V and all its predecessors.
fa10beec 1080 Also flag their loop number in LOOPS. */
dea61d92
SP
1081
1082static void
c61f8985 1083rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
80ab0b19 1084 bitmap processed)
dea61d92
SP
1085{
1086 unsigned i;
9771b263
DN
1087 vec<int> nodes;
1088 nodes.create (3);
dea61d92
SP
1089 int x;
1090
174ec470 1091 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
dea61d92 1092
9771b263 1093 FOR_EACH_VEC_ELT (nodes, i, x)
174ec470
RB
1094 if (bitmap_set_bit (processed, x))
1095 rdg_flag_vertex (rdg, x, partition);
dea61d92 1096
9771b263 1097 nodes.release ();
dea61d92
SP
1098}
1099
83a95546
RB
1100/* Returns a partition with all the statements needed for computing
1101 the vertex V of the RDG, also including the loop exit conditions. */
dea61d92 1102
c61f8985 1103static partition_t
83a95546 1104build_rdg_partition_for_vertex (struct graph *rdg, int v)
dea61d92 1105{
80ab0b19 1106 partition_t partition = partition_alloc (NULL, NULL);
dea61d92 1107 bitmap processed = BITMAP_ALLOC (NULL);
83a95546 1108 rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
dea61d92 1109 BITMAP_FREE (processed);
dea61d92
SP
1110 return partition;
1111}
1112
30d55936
RG
1113/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1114 For the moment we detect only the memset zero pattern. */
cfee318d 1115
30d55936
RG
1116static void
1117classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
cfee318d 1118{
cfee318d 1119 bitmap_iterator bi;
30d55936
RG
1120 unsigned i;
1121 tree nb_iter;
d0582dc1 1122 data_reference_p single_load, single_store;
b9fc0497 1123 bool volatiles_p = false;
30d55936
RG
1124
1125 partition->kind = PKIND_NORMAL;
d0582dc1
RG
1126 partition->main_dr = NULL;
1127 partition->secondary_dr = NULL;
30d55936 1128
c61f8985 1129 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
30d55936
RG
1130 {
1131 gimple stmt = RDG_STMT (rdg, i);
1132
1133 if (gimple_has_volatile_ops (stmt))
b9fc0497 1134 volatiles_p = true;
cfee318d 1135
30d55936
RG
1136 /* If the stmt has uses outside of the loop fail.
1137 ??? If the stmt is generated in another partition that
1138 is not created as builtin we can ignore this. */
9ca86fc3 1139 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
30d55936
RG
1140 {
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1142 fprintf (dump_file, "not generating builtin, partition has "
1143 "scalar uses outside of the loop\n");
b9fc0497 1144 partition->kind = PKIND_REDUCTION;
30d55936
RG
1145 return;
1146 }
1147 }
1148
b9fc0497
RB
1149 /* Perform general partition disqualification for builtins. */
1150 if (volatiles_p
1151 || !flag_tree_loop_distribute_patterns)
1152 return;
1153
1154 nb_iter = number_of_exit_cond_executions (loop);
1155 if (!nb_iter || nb_iter == chrec_dont_know)
1156 return;
1157
d0582dc1
RG
1158 /* Detect memset and memcpy. */
1159 single_load = NULL;
1160 single_store = NULL;
30d55936
RG
1161 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1162 {
1163 gimple stmt = RDG_STMT (rdg, i);
d0582dc1
RG
1164 data_reference_p dr;
1165 unsigned j;
30d55936
RG
1166
1167 if (gimple_code (stmt) == GIMPLE_PHI)
1168 continue;
1169
1170 /* Any scalar stmts are ok. */
1171 if (!gimple_vuse (stmt))
1172 continue;
1173
d0582dc1
RG
1174 /* Otherwise just regular loads/stores. */
1175 if (!gimple_assign_single_p (stmt))
1176 return;
1177
1178 /* But exactly one store and/or load. */
9771b263 1179 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
30d55936 1180 {
d0582dc1
RG
1181 if (DR_IS_READ (dr))
1182 {
1183 if (single_load != NULL)
1184 return;
1185 single_load = dr;
1186 }
1187 else
1188 {
1189 if (single_store != NULL)
1190 return;
1191 single_store = dr;
1192 }
30d55936 1193 }
30d55936
RG
1194 }
1195
d0582dc1
RG
1196 if (single_store && !single_load)
1197 {
1198 gimple stmt = DR_STMT (single_store);
1199 tree rhs = gimple_assign_rhs1 (stmt);
401f3a81
JJ
1200 if (const_with_all_bytes_same (rhs) == -1
1201 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1202 || (TYPE_MODE (TREE_TYPE (rhs))
1203 != TYPE_MODE (unsigned_char_type_node))))
d0582dc1
RG
1204 return;
1205 if (TREE_CODE (rhs) == SSA_NAME
1206 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1207 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1208 return;
1209 if (!adjacent_dr_p (single_store))
1210 return;
1211 partition->kind = PKIND_MEMSET;
1212 partition->main_dr = single_store;
1213 }
1214 else if (single_store && single_load)
1215 {
1216 gimple store = DR_STMT (single_store);
1217 gimple load = DR_STMT (single_load);
1218 /* Direct aggregate copy or via an SSA name temporary. */
1219 if (load != store
1220 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1221 return;
1222 if (!adjacent_dr_p (single_store)
1223 || !adjacent_dr_p (single_load)
1224 || !operand_equal_p (DR_STEP (single_store),
1225 DR_STEP (single_load), 0))
1226 return;
f20132e7
RG
1227 /* Now check that if there is a dependence this dependence is
1228 of a suitable form for memmove. */
6e1aa848 1229 vec<loop_p> loops = vNULL;
f20132e7 1230 ddr_p ddr;
9771b263 1231 loops.safe_push (loop);
f20132e7
RG
1232 ddr = initialize_data_dependence_relation (single_load, single_store,
1233 loops);
1234 compute_affine_dependence (ddr, loop);
f20132e7
RG
1235 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1236 {
1237 free_dependence_relation (ddr);
9771b263 1238 loops.release ();
f20132e7
RG
1239 return;
1240 }
1241 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1242 {
1243 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1244 {
1245 free_dependence_relation (ddr);
9771b263 1246 loops.release ();
f20132e7
RG
1247 return;
1248 }
1249 lambda_vector dist_v;
9771b263 1250 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
f20132e7
RG
1251 {
1252 int dist = dist_v[index_in_loop_nest (loop->num,
1253 DDR_LOOP_NEST (ddr))];
1254 if (dist > 0 && !DDR_REVERSED_P (ddr))
1255 {
1256 free_dependence_relation (ddr);
9771b263 1257 loops.release ();
f20132e7
RG
1258 return;
1259 }
1260 }
1261 }
b7ce70b3 1262 free_dependence_relation (ddr);
9771b263 1263 loops.release ();
d0582dc1
RG
1264 partition->kind = PKIND_MEMCPY;
1265 partition->main_dr = single_store;
1266 partition->secondary_dr = single_load;
1267 }
cfee318d
SP
1268}
1269
1fa0c180
RG
1270/* For a data reference REF, return the declaration of its base
1271 address or NULL_TREE if the base is not determined. */
1272
1273static tree
1274ref_base_address (data_reference_p dr)
1275{
1276 tree base_address = DR_BASE_ADDRESS (dr);
1277 if (base_address
1278 && TREE_CODE (base_address) == ADDR_EXPR)
1279 return TREE_OPERAND (base_address, 0);
1280
1281 return base_address;
1282}
1283
cfee318d
SP
1284/* Returns true when PARTITION1 and PARTITION2 have similar memory
1285 accesses in RDG. */
1286
1287static bool
c61f8985
RG
1288similar_memory_accesses (struct graph *rdg, partition_t partition1,
1289 partition_t partition2)
cfee318d 1290{
1fa0c180 1291 unsigned i, j, k, l;
cfee318d 1292 bitmap_iterator bi, bj;
1fa0c180
RG
1293 data_reference_p ref1, ref2;
1294
1295 /* First check whether in the intersection of the two partitions are
1296 any loads or stores. Common loads are the situation that happens
1297 most often. */
1298 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1299 if (RDG_MEM_WRITE_STMT (rdg, i)
1300 || RDG_MEM_READS_STMT (rdg, i))
1301 return true;
cfee318d 1302
1fa0c180 1303 /* Then check all data-references against each other. */
c61f8985 1304 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
cfee318d
SP
1305 if (RDG_MEM_WRITE_STMT (rdg, i)
1306 || RDG_MEM_READS_STMT (rdg, i))
c61f8985 1307 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
cfee318d
SP
1308 if (RDG_MEM_WRITE_STMT (rdg, j)
1309 || RDG_MEM_READS_STMT (rdg, j))
1fa0c180 1310 {
9771b263 1311 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1fa0c180
RG
1312 {
1313 tree base1 = ref_base_address (ref1);
1314 if (base1)
9771b263 1315 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1fa0c180
RG
1316 if (base1 == ref_base_address (ref2))
1317 return true;
1318 }
1319 }
cfee318d
SP
1320
1321 return false;
1322}
1323
dea61d92
SP
1324/* Aggregate several components into a useful partition that is
1325 registered in the PARTITIONS vector. Partitions will be
1326 distributed in different loops. */
1327
1328static void
83a95546 1329rdg_build_partitions (struct graph *rdg,
2fd5894f 1330 vec<gimple> starting_stmts,
83a95546 1331 vec<partition_t> *partitions)
dea61d92 1332{
83a95546 1333 bitmap processed = BITMAP_ALLOC (NULL);
2fd5894f
RB
1334 int i;
1335 gimple stmt;
80ab0b19 1336 partition_t partition = partition_alloc (NULL, NULL);
dea61d92 1337
2fd5894f 1338 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
dea61d92 1339 {
c61f8985 1340 partition_t np;
2fd5894f
RB
1341 int v = rdg_vertex_for_stmt (rdg, stmt);
1342
1343 if (dump_file && (dump_flags & TDF_DETAILS))
1344 fprintf (dump_file,
1345 "ldist asked to generate code for vertex %d\n", v);
b8698a0f 1346
dea61d92
SP
1347 if (bitmap_bit_p (processed, v))
1348 continue;
b8698a0f 1349
83a95546 1350 np = build_rdg_partition_for_vertex (rdg, v);
c61f8985 1351 bitmap_ior_into (partition->stmts, np->stmts);
7ad672e4 1352 partition->has_writes = partition_has_writes (np);
c61f8985
RG
1353 bitmap_ior_into (processed, np->stmts);
1354 partition_free (np);
dea61d92 1355
7ad672e4 1356 if (partition_has_writes (partition))
dea61d92
SP
1357 {
1358 if (dump_file && (dump_flags & TDF_DETAILS))
1359 {
1360 fprintf (dump_file, "ldist useful partition:\n");
c61f8985 1361 dump_bitmap (dump_file, partition->stmts);
dea61d92
SP
1362 }
1363
9771b263 1364 partitions->safe_push (partition);
80ab0b19 1365 partition = partition_alloc (NULL, NULL);
dea61d92
SP
1366 }
1367 }
1368
83a95546
RB
1369 /* All vertices should have been assigned to at least one partition now,
1370 other than vertices belonging to dead code. */
dea61d92 1371
83a95546
RB
1372 if (!bitmap_empty_p (partition->stmts))
1373 {
1374 if (dump_file && (dump_flags & TDF_DETAILS))
1375 {
1376 fprintf (dump_file, "remaining partition:\n");
1377 dump_bitmap (dump_file, partition->stmts);
1378 }
dea61d92 1379
83a95546 1380 partitions->safe_push (partition);
dea61d92 1381 }
dea61d92 1382 else
c61f8985 1383 partition_free (partition);
83a95546
RB
1384
1385 BITMAP_FREE (processed);
dea61d92
SP
1386}
1387
1388/* Dump to FILE the PARTITIONS. */
1389
1390static void
9771b263 1391dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
dea61d92
SP
1392{
1393 int i;
c61f8985 1394 partition_t partition;
dea61d92 1395
9771b263 1396 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1397 debug_bitmap_file (file, partition->stmts);
dea61d92
SP
1398}
1399
1400/* Debug PARTITIONS. */
9771b263 1401extern void debug_rdg_partitions (vec<partition_t> );
dea61d92 1402
24e47c76 1403DEBUG_FUNCTION void
9771b263 1404debug_rdg_partitions (vec<partition_t> partitions)
dea61d92
SP
1405{
1406 dump_rdg_partitions (stderr, partitions);
1407}
1408
2b8aee8e
SP
1409/* Returns the number of read and write operations in the RDG. */
1410
1411static int
1412number_of_rw_in_rdg (struct graph *rdg)
1413{
1414 int i, res = 0;
1415
1416 for (i = 0; i < rdg->n_vertices; i++)
1417 {
1418 if (RDG_MEM_WRITE_STMT (rdg, i))
1419 ++res;
1420
1421 if (RDG_MEM_READS_STMT (rdg, i))
1422 ++res;
1423 }
1424
1425 return res;
1426}
1427
1428/* Returns the number of read and write operations in a PARTITION of
1429 the RDG. */
1430
1431static int
c61f8985 1432number_of_rw_in_partition (struct graph *rdg, partition_t partition)
2b8aee8e
SP
1433{
1434 int res = 0;
1435 unsigned i;
1436 bitmap_iterator ii;
1437
c61f8985 1438 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
2b8aee8e
SP
1439 {
1440 if (RDG_MEM_WRITE_STMT (rdg, i))
1441 ++res;
1442
1443 if (RDG_MEM_READS_STMT (rdg, i))
1444 ++res;
1445 }
1446
1447 return res;
1448}
1449
1450/* Returns true when one of the PARTITIONS contains all the read or
1451 write operations of RDG. */
1452
1453static bool
9771b263
DN
1454partition_contains_all_rw (struct graph *rdg,
1455 vec<partition_t> partitions)
2b8aee8e
SP
1456{
1457 int i;
c61f8985 1458 partition_t partition;
2b8aee8e
SP
1459 int nrw = number_of_rw_in_rdg (rdg);
1460
9771b263 1461 FOR_EACH_VEC_ELT (partitions, i, partition)
2b8aee8e
SP
1462 if (nrw == number_of_rw_in_partition (rdg, partition))
1463 return true;
1464
1465 return false;
1466}
1467
2fd5894f
RB
1468
1469/* Distributes the code from LOOP in such a way that producer
1470 statements are placed before consumer statements. Tries to separate
1471 only the statements from STMTS into separate loops.
1472 Returns the number of distributed loops. */
dea61d92
SP
1473
1474static int
36875e8f
RB
1475distribute_loop (struct loop *loop, vec<gimple> stmts,
1476 control_dependences *cd)
dea61d92 1477{
2fd5894f
RB
1478 struct graph *rdg;
1479 vec<loop_p> loop_nest;
9771b263 1480 vec<partition_t> partitions;
c61f8985 1481 partition_t partition;
be6b029b 1482 bool any_builtin;
2fd5894f 1483 int i, nbp;
dea61d92 1484
2fd5894f
RB
1485 loop_nest.create (3);
1486 if (!find_loop_nest (loop, &loop_nest))
1487 {
1488 loop_nest.release ();
1489 return 0;
1490 }
1491
36875e8f 1492 rdg = build_rdg (loop_nest, cd);
2fd5894f
RB
1493 if (!rdg)
1494 {
1495 if (dump_file && (dump_flags & TDF_DETAILS))
1496 fprintf (dump_file,
1497 "Loop %d not distributed: failed to build the RDG.\n",
1498 loop->num);
1499
1500 loop_nest.release ();
1501 return 0;
1502 }
1503
1504 if (dump_file && (dump_flags & TDF_DETAILS))
1505 dump_rdg (dump_file, rdg);
1506
1507 partitions.create (3);
1508 rdg_build_partitions (rdg, stmts, &partitions);
dea61d92 1509
be6b029b 1510 any_builtin = false;
9771b263 1511 FOR_EACH_VEC_ELT (partitions, i, partition)
be6b029b
RG
1512 {
1513 classify_partition (loop, rdg, partition);
1514 any_builtin |= partition_builtin_p (partition);
1515 }
30d55936 1516
c014f6f5
RG
1517 /* If we are only distributing patterns fuse all partitions that
1518 were not properly classified as builtins. Else fuse partitions
1519 with similar memory accesses. */
1520 if (!flag_tree_loop_distribution)
1521 {
1522 partition_t into;
be6b029b
RG
1523 /* If we did not detect any builtin simply bail out. */
1524 if (!any_builtin)
1525 {
1526 nbp = 0;
1527 goto ldist_done;
1528 }
38ad2d07
RG
1529 /* Only fuse adjacent non-builtin partitions, see PR53616.
1530 ??? Use dependence information to improve partition ordering. */
1531 i = 0;
1532 do
1533 {
9771b263 1534 for (; partitions.iterate (i, &into); ++i)
38ad2d07
RG
1535 if (!partition_builtin_p (into))
1536 break;
9771b263 1537 for (++i; partitions.iterate (i, &partition); ++i)
38ad2d07
RG
1538 if (!partition_builtin_p (partition))
1539 {
1540 bitmap_ior_into (into->stmts, partition->stmts);
b9fc0497
RB
1541 if (partition->kind == PKIND_REDUCTION)
1542 into->kind = PKIND_REDUCTION;
9771b263 1543 partitions.ordered_remove (i);
5eb010bc 1544 partition_free (partition);
38ad2d07
RG
1545 i--;
1546 }
1547 else
1548 break;
1549 }
9771b263 1550 while ((unsigned) i < partitions.length ());
c014f6f5
RG
1551 }
1552 else
1553 {
1554 partition_t into;
1555 int j;
9771b263 1556 for (i = 0; partitions.iterate (i, &into); ++i)
c014f6f5
RG
1557 {
1558 if (partition_builtin_p (into))
1559 continue;
1560 for (j = i + 1;
9771b263 1561 partitions.iterate (j, &partition); ++j)
c014f6f5
RG
1562 {
1563 if (!partition_builtin_p (partition)
c014f6f5
RG
1564 && similar_memory_accesses (rdg, into, partition))
1565 {
1566 if (dump_file && (dump_flags & TDF_DETAILS))
1567 {
1568 fprintf (dump_file, "fusing partitions\n");
1569 dump_bitmap (dump_file, into->stmts);
1570 dump_bitmap (dump_file, partition->stmts);
1571 fprintf (dump_file, "because they have similar "
1572 "memory accesses\n");
1573 }
1574 bitmap_ior_into (into->stmts, partition->stmts);
b9fc0497
RB
1575 if (partition->kind == PKIND_REDUCTION)
1576 into->kind = PKIND_REDUCTION;
9771b263 1577 partitions.ordered_remove (j);
5eb010bc 1578 partition_free (partition);
c014f6f5
RG
1579 j--;
1580 }
1581 }
1582 }
1583 }
30d55936 1584
b9fc0497
RB
1585 /* Fuse all reduction partitions into the last. */
1586 if (partitions.length () > 1)
1587 {
1588 partition_t into = partitions.last ();
1589 for (i = partitions.length () - 2; i >= 0; --i)
1590 {
1591 partition_t what = partitions[i];
1592 if (what->kind == PKIND_REDUCTION)
1593 {
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1595 {
1596 fprintf (dump_file, "fusing partitions\n");
1597 dump_bitmap (dump_file, into->stmts);
1598 dump_bitmap (dump_file, what->stmts);
1599 fprintf (dump_file, "because the latter has reductions\n");
1600 }
1601 bitmap_ior_into (into->stmts, what->stmts);
1602 into->kind = PKIND_REDUCTION;
1603 partitions.ordered_remove (i);
5eb010bc 1604 partition_free (what);
b9fc0497
RB
1605 }
1606 }
1607 }
1608
9771b263 1609 nbp = partitions.length ();
a4293fa6 1610 if (nbp == 0
9771b263
DN
1611 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1612 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
c014f6f5
RG
1613 {
1614 nbp = 0;
1615 goto ldist_done;
1616 }
dea61d92
SP
1617
1618 if (dump_file && (dump_flags & TDF_DETAILS))
1619 dump_rdg_partitions (dump_file, partitions);
1620
9771b263 1621 FOR_EACH_VEC_ELT (partitions, i, partition)
d0582dc1 1622 generate_code_for_partition (loop, partition, i < nbp - 1);
dea61d92 1623
dea61d92
SP
1624 ldist_done:
1625
9771b263 1626 FOR_EACH_VEC_ELT (partitions, i, partition)
c61f8985 1627 partition_free (partition);
9771b263 1628 partitions.release ();
dea61d92 1629
dea61d92 1630 free_rdg (rdg);
97463b2b 1631 loop_nest.release ();
2fd5894f 1632 return nbp;
dea61d92
SP
1633}
1634
1635/* Distribute all loops in the current function. */
1636
1637static unsigned int
1638tree_loop_distribution (void)
1639{
1640 struct loop *loop;
1641 loop_iterator li;
c014f6f5 1642 bool changed = false;
1fa0c180 1643 basic_block bb;
36875e8f 1644 control_dependences *cd = NULL;
1fa0c180
RG
1645
1646 FOR_ALL_BB (bb)
1647 {
1648 gimple_stmt_iterator gsi;
1649 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1650 gimple_set_uid (gsi_stmt (gsi), -1);
1651 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1652 gimple_set_uid (gsi_stmt (gsi), -1);
1653 }
dea61d92 1654
c014f6f5
RG
1655 /* We can at the moment only distribute non-nested loops, thus restrict
1656 walking to innermost loops. */
1657 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
dea61d92 1658 {
6e1aa848 1659 vec<gimple> work_list = vNULL;
be6b029b 1660 basic_block *bbs;
0e20c89f 1661 int num = loop->num;
c014f6f5 1662 int nb_generated_loops = 0;
be6b029b 1663 unsigned int i;
a3357f7d
RG
1664
1665 /* If the loop doesn't have a single exit we will fail anyway,
1666 so do that early. */
1667 if (!single_exit (loop))
1668 continue;
dea61d92 1669
f56f2d33
JH
1670 /* Only optimize hot loops. */
1671 if (!optimize_loop_for_speed_p (loop))
1672 continue;
1673
be6b029b
RG
1674 /* Initialize the worklist with stmts we seed the partitions with. */
1675 bbs = get_loop_body_in_dom_order (loop);
1676 for (i = 0; i < loop->num_nodes; ++i)
1677 {
1678 gimple_stmt_iterator gsi;
deb6c11a
RB
1679 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1680 {
1681 gimple phi = gsi_stmt (gsi);
1682 if (virtual_operand_p (gimple_phi_result (phi)))
1683 continue;
1684 /* Distribute stmts which have defs that are used outside of
1685 the loop. */
1686 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1687 continue;
1688 work_list.safe_push (phi);
1689 }
be6b029b
RG
1690 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1691 {
1692 gimple stmt = gsi_stmt (gsi);
83a95546
RB
1693
1694 /* If there is a stmt with side-effects bail out - we
1695 cannot and should not distribute this loop. */
1696 if (gimple_has_side_effects (stmt))
1697 {
1698 work_list.truncate (0);
1699 goto out;
1700 }
1701
b9fc0497
RB
1702 /* Distribute stmts which have defs that are used outside of
1703 the loop. */
1704 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1705 ;
1706 /* Otherwise only distribute stores for now. */
1707 else if (!gimple_assign_single_p (stmt)
1708 || is_gimple_reg (gimple_assign_lhs (stmt)))
be6b029b
RG
1709 continue;
1710
9771b263 1711 work_list.safe_push (stmt);
be6b029b
RG
1712 }
1713 }
83a95546 1714out:
be6b029b 1715 free (bbs);
c014f6f5 1716
9771b263 1717 if (work_list.length () > 0)
36875e8f
RB
1718 {
1719 if (!cd)
1720 {
1721 calculate_dominance_info (CDI_POST_DOMINATORS);
1722 cd = new control_dependences (create_edge_list ());
1723 free_dominance_info (CDI_POST_DOMINATORS);
1724 }
1725 nb_generated_loops = distribute_loop (loop, work_list, cd);
1726 }
c014f6f5
RG
1727
1728 if (nb_generated_loops > 0)
1729 changed = true;
dea61d92
SP
1730
1731 if (dump_file && (dump_flags & TDF_DETAILS))
1732 {
1733 if (nb_generated_loops > 1)
1734 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
0e20c89f 1735 num, nb_generated_loops);
dea61d92 1736 else
0e20c89f 1737 fprintf (dump_file, "Loop %d is the same.\n", num);
dea61d92
SP
1738 }
1739
9771b263 1740 work_list.release ();
dea61d92
SP
1741 }
1742
36875e8f
RB
1743 if (cd)
1744 delete cd;
1745
c014f6f5
RG
1746 if (changed)
1747 {
525174a2 1748 mark_virtual_operands_for_renaming (cfun);
c014f6f5
RG
1749 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1750 }
1751
1752#ifdef ENABLE_CHECKING
1753 verify_loop_structure ();
1754#endif
1755
5006671f 1756 return 0;
dea61d92
SP
1757}
1758
1759static bool
1760gate_tree_loop_distribution (void)
1761{
20769d5e
SP
1762 return flag_tree_loop_distribution
1763 || flag_tree_loop_distribute_patterns;
dea61d92
SP
1764}
1765
27a4cd48
DM
1766namespace {
1767
1768const pass_data pass_data_loop_distribution =
dea61d92 1769{
27a4cd48
DM
1770 GIMPLE_PASS, /* type */
1771 "ldist", /* name */
1772 OPTGROUP_LOOP, /* optinfo_flags */
1773 true, /* has_gate */
1774 true, /* has_execute */
1775 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1776 ( PROP_cfg | PROP_ssa ), /* properties_required */
1777 0, /* properties_provided */
1778 0, /* properties_destroyed */
1779 0, /* todo_flags_start */
1780 TODO_verify_ssa, /* todo_flags_finish */
dea61d92 1781};
27a4cd48
DM
1782
1783class pass_loop_distribution : public gimple_opt_pass
1784{
1785public:
1786 pass_loop_distribution(gcc::context *ctxt)
1787 : gimple_opt_pass(pass_data_loop_distribution, ctxt)
1788 {}
1789
1790 /* opt_pass methods: */
1791 bool gate () { return gate_tree_loop_distribution (); }
1792 unsigned int execute () { return tree_loop_distribution (); }
1793
1794}; // class pass_loop_distribution
1795
1796} // anon namespace
1797
1798gimple_opt_pass *
1799make_pass_loop_distribution (gcc::context *ctxt)
1800{
1801 return new pass_loop_distribution (ctxt);
1802}