]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-loop-distribution.c
tree-ssa.h: Remove all #include's
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "gimple-ssa.h"
50 #include "tree-cfg.h"
51 #include "tree-phinodes.h"
52 #include "ssa-iterators.h"
53 #include "tree-ssanames.h"
54 #include "tree-ssa-loop.h"
55 #include "tree-into-ssa.h"
56 #include "tree-ssa.h"
57 #include "cfgloop.h"
58 #include "tree-chrec.h"
59 #include "tree-data-ref.h"
60 #include "tree-scalar-evolution.h"
61 #include "tree-pass.h"
62 #include "gimple-pretty-print.h"
63 #include "tree-vectorizer.h"
64
65
66 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
67 typedef struct rdg_vertex
68 {
69 /* The statement represented by this vertex. */
70 gimple stmt;
71
72 /* Vector of data-references in this statement. */
73 vec<data_reference_p> datarefs;
74
75 /* True when the statement contains a write to memory. */
76 bool has_mem_write;
77
78 /* True when the statement contains a read from memory. */
79 bool has_mem_reads;
80 } *rdg_vertex_p;
81
82 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
83 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
84 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
85 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
86 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
87 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
88 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
89 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
90
91 /* Data dependence type. */
92
93 enum rdg_dep_type
94 {
95 /* Read After Write (RAW). */
96 flow_dd = 'f',
97
98 /* Write After Read (WAR). */
99 anti_dd = 'a',
100
101 /* Write After Write (WAW). */
102 output_dd = 'o',
103
104 /* Read After Read (RAR). */
105 input_dd = 'i',
106
107 /* Control dependence (execute conditional on). */
108 control_dd = 'c'
109 };
110
111 /* Dependence information attached to an edge of the RDG. */
112
113 typedef struct rdg_edge
114 {
115 /* Type of the dependence. */
116 enum rdg_dep_type type;
117
118 /* Levels of the dependence: the depth of the loops that carry the
119 dependence. */
120 unsigned level;
121
122 /* Dependence relation between data dependences, NULL when one of
123 the vertices is a scalar. */
124 ddr_p relation;
125 } *rdg_edge_p;
126
127 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
128 #define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
129 #define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
130
131 /* Dump vertex I in RDG to FILE. */
132
133 static void
134 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
135 {
136 struct vertex *v = &(rdg->vertices[i]);
137 struct graph_edge *e;
138
139 fprintf (file, "(vertex %d: (%s%s) (in:", i,
140 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
141 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
142
143 if (v->pred)
144 for (e = v->pred; e; e = e->pred_next)
145 fprintf (file, " %d", e->src);
146
147 fprintf (file, ") (out:");
148
149 if (v->succ)
150 for (e = v->succ; e; e = e->succ_next)
151 fprintf (file, " %d", e->dest);
152
153 fprintf (file, ")\n");
154 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
155 fprintf (file, ")\n");
156 }
157
158 /* Call dump_rdg_vertex on stderr. */
159
160 DEBUG_FUNCTION void
161 debug_rdg_vertex (struct graph *rdg, int i)
162 {
163 dump_rdg_vertex (stderr, rdg, i);
164 }
165
166 /* Dump the reduced dependence graph RDG to FILE. */
167
168 static void
169 dump_rdg (FILE *file, struct graph *rdg)
170 {
171 fprintf (file, "(rdg\n");
172 for (int i = 0; i < rdg->n_vertices; i++)
173 dump_rdg_vertex (file, rdg, i);
174 fprintf (file, ")\n");
175 }
176
177 /* Call dump_rdg on stderr. */
178
179 DEBUG_FUNCTION void
180 debug_rdg (struct graph *rdg)
181 {
182 dump_rdg (stderr, rdg);
183 }
184
185 static void
186 dot_rdg_1 (FILE *file, struct graph *rdg)
187 {
188 int i;
189 pretty_printer buffer;
190 pp_needs_newline (&buffer) = false;
191 buffer.buffer->stream = file;
192
193 fprintf (file, "digraph RDG {\n");
194
195 for (i = 0; i < rdg->n_vertices; i++)
196 {
197 struct vertex *v = &(rdg->vertices[i]);
198 struct graph_edge *e;
199
200 fprintf (file, "%d [label=\"[%d] ", i, i);
201 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
202 pp_flush (&buffer);
203 fprintf (file, "\"]\n");
204
205 /* Highlight reads from memory. */
206 if (RDG_MEM_READS_STMT (rdg, i))
207 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
208
209 /* Highlight stores to memory. */
210 if (RDG_MEM_WRITE_STMT (rdg, i))
211 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
212
213 if (v->succ)
214 for (e = v->succ; e; e = e->succ_next)
215 switch (RDGE_TYPE (e))
216 {
217 case input_dd:
218 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
219 break;
220
221 case output_dd:
222 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
223 break;
224
225 case flow_dd:
226 /* These are the most common dependences: don't print these. */
227 fprintf (file, "%d -> %d \n", i, e->dest);
228 break;
229
230 case anti_dd:
231 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
232 break;
233
234 case control_dd:
235 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
236 break;
237
238 default:
239 gcc_unreachable ();
240 }
241 }
242
243 fprintf (file, "}\n\n");
244 }
245
246 /* Display the Reduced Dependence Graph using dotty. */
247
248 DEBUG_FUNCTION void
249 dot_rdg (struct graph *rdg)
250 {
251 /* When debugging, you may want to enable the following code. */
252 #if 1
253 FILE *file = popen ("dot -Tx11", "w");
254 if (!file)
255 return;
256 dot_rdg_1 (file, rdg);
257 fflush (file);
258 close (fileno (file));
259 pclose (file);
260 #else
261 dot_rdg_1 (stderr, rdg);
262 #endif
263 }
264
265 /* Returns the index of STMT in RDG. */
266
267 static int
268 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
269 {
270 int index = gimple_uid (stmt);
271 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
272 return index;
273 }
274
275 /* Creates an edge in RDG for each distance vector from DDR. The
276 order that we keep track of in the RDG is the order in which
277 statements have to be executed. */
278
279 static void
280 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
281 {
282 struct graph_edge *e;
283 int va, vb;
284 data_reference_p dra = DDR_A (ddr);
285 data_reference_p drb = DDR_B (ddr);
286 unsigned level = ddr_dependence_level (ddr);
287
288 /* For non scalar dependences, when the dependence is REVERSED,
289 statement B has to be executed before statement A. */
290 if (level > 0
291 && !DDR_REVERSED_P (ddr))
292 {
293 data_reference_p tmp = dra;
294 dra = drb;
295 drb = tmp;
296 }
297
298 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
299 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
300
301 if (va < 0 || vb < 0)
302 return;
303
304 e = add_edge (rdg, va, vb);
305 e->data = XNEW (struct rdg_edge);
306
307 RDGE_LEVEL (e) = level;
308 RDGE_RELATION (e) = ddr;
309
310 /* Determines the type of the data dependence. */
311 if (DR_IS_READ (dra) && DR_IS_READ (drb))
312 RDGE_TYPE (e) = input_dd;
313 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
314 RDGE_TYPE (e) = output_dd;
315 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
316 RDGE_TYPE (e) = flow_dd;
317 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
318 RDGE_TYPE (e) = anti_dd;
319 }
320
321 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
322 the index of DEF in RDG. */
323
324 static void
325 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
326 {
327 use_operand_p imm_use_p;
328 imm_use_iterator iterator;
329
330 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
331 {
332 struct graph_edge *e;
333 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
334
335 if (use < 0)
336 continue;
337
338 e = add_edge (rdg, idef, use);
339 e->data = XNEW (struct rdg_edge);
340 RDGE_TYPE (e) = flow_dd;
341 RDGE_RELATION (e) = NULL;
342 }
343 }
344
345 /* Creates an edge for the control dependences of BB to the vertex V. */
346
347 static void
348 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
349 int v, control_dependences *cd)
350 {
351 bitmap_iterator bi;
352 unsigned edge_n;
353 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
354 0, edge_n, bi)
355 {
356 basic_block cond_bb = cd->get_edge (edge_n)->src;
357 gimple stmt = last_stmt (cond_bb);
358 if (stmt && is_ctrl_stmt (stmt))
359 {
360 struct graph_edge *e;
361 int c = rdg_vertex_for_stmt (rdg, stmt);
362 if (c < 0)
363 continue;
364
365 e = add_edge (rdg, c, v);
366 e->data = XNEW (struct rdg_edge);
367 RDGE_TYPE (e) = control_dd;
368 RDGE_RELATION (e) = NULL;
369 }
370 }
371 }
372
373 /* Creates the edges of the reduced dependence graph RDG. */
374
375 static void
376 create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd)
377 {
378 int i;
379 struct data_dependence_relation *ddr;
380 def_operand_p def_p;
381 ssa_op_iter iter;
382
383 FOR_EACH_VEC_ELT (ddrs, i, ddr)
384 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
385 create_rdg_edge_for_ddr (rdg, ddr);
386 else
387 free_dependence_relation (ddr);
388
389 for (i = 0; i < rdg->n_vertices; i++)
390 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
391 iter, SSA_OP_DEF)
392 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
393
394 if (cd)
395 for (i = 0; i < rdg->n_vertices; i++)
396 {
397 gimple stmt = RDG_STMT (rdg, i);
398 if (gimple_code (stmt) == GIMPLE_PHI)
399 {
400 edge_iterator ei;
401 edge e;
402 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
403 create_edge_for_control_dependence (rdg, e->src, i, cd);
404 }
405 else
406 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
407 }
408 }
409
410 /* Build the vertices of the reduced dependence graph RDG. Return false
411 if that failed. */
412
413 static bool
414 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
415 vec<data_reference_p> *datarefs)
416 {
417 int i;
418 gimple stmt;
419
420 FOR_EACH_VEC_ELT (stmts, i, stmt)
421 {
422 struct vertex *v = &(rdg->vertices[i]);
423
424 /* Record statement to vertex mapping. */
425 gimple_set_uid (stmt, i);
426
427 v->data = XNEW (struct rdg_vertex);
428 RDGV_STMT (v) = stmt;
429 RDGV_DATAREFS (v).create (0);
430 RDGV_HAS_MEM_WRITE (v) = false;
431 RDGV_HAS_MEM_READS (v) = false;
432 if (gimple_code (stmt) == GIMPLE_PHI)
433 continue;
434
435 unsigned drp = datarefs->length ();
436 if (!find_data_references_in_stmt (loop, stmt, datarefs))
437 return false;
438 for (unsigned j = drp; j < datarefs->length (); ++j)
439 {
440 data_reference_p dr = (*datarefs)[j];
441 if (DR_IS_READ (dr))
442 RDGV_HAS_MEM_READS (v) = true;
443 else
444 RDGV_HAS_MEM_WRITE (v) = true;
445 RDGV_DATAREFS (v).safe_push (dr);
446 }
447 }
448 return true;
449 }
450
451 /* Initialize STMTS with all the statements of LOOP. The order in
452 which we discover statements is important as
453 generate_loops_for_partition is using the same traversal for
454 identifying statements in loop copies. */
455
456 static void
457 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
458 {
459 unsigned int i;
460 basic_block *bbs = get_loop_body_in_dom_order (loop);
461
462 for (i = 0; i < loop->num_nodes; i++)
463 {
464 basic_block bb = bbs[i];
465 gimple_stmt_iterator bsi;
466 gimple stmt;
467
468 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
469 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
470 stmts->safe_push (gsi_stmt (bsi));
471
472 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
473 {
474 stmt = gsi_stmt (bsi);
475 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
476 stmts->safe_push (stmt);
477 }
478 }
479
480 free (bbs);
481 }
482
483 /* Free the reduced dependence graph RDG. */
484
485 static void
486 free_rdg (struct graph *rdg)
487 {
488 int i;
489
490 for (i = 0; i < rdg->n_vertices; i++)
491 {
492 struct vertex *v = &(rdg->vertices[i]);
493 struct graph_edge *e;
494
495 for (e = v->succ; e; e = e->succ_next)
496 {
497 free_dependence_relation (RDGE_RELATION (e));
498 free (e->data);
499 }
500
501 if (v->data)
502 {
503 gimple_set_uid (RDGV_STMT (v), -1);
504 free_data_refs (RDGV_DATAREFS (v));
505 free (v->data);
506 }
507 }
508
509 free_graph (rdg);
510 }
511
512 /* Build the Reduced Dependence Graph (RDG) with one vertex per
513 statement of the loop nest LOOP_NEST, and one edge per data dependence or
514 scalar dependence. */
515
516 static struct graph *
517 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
518 {
519 struct graph *rdg;
520 vec<gimple> stmts;
521 vec<data_reference_p> datarefs;
522 vec<ddr_p> dependence_relations;
523
524 /* Create the RDG vertices from the stmts of the loop nest. */
525 stmts.create (10);
526 stmts_from_loop (loop_nest[0], &stmts);
527 rdg = new_graph (stmts.length ());
528 datarefs.create (10);
529 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
530 {
531 stmts.release ();
532 datarefs.release ();
533 free_rdg (rdg);
534 return NULL;
535 }
536 stmts.release ();
537
538 /* Create the RDG edges from the data dependences in the loop nest. */
539 dependence_relations.create (100);
540 if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest,
541 false)
542 || !known_dependences_p (dependence_relations))
543 {
544 free_dependence_relations (dependence_relations);
545 datarefs.release ();
546 free_rdg (rdg);
547 return NULL;
548 }
549 create_rdg_edges (rdg, dependence_relations, cd);
550 dependence_relations.release ();
551 datarefs.release ();
552
553 return rdg;
554 }
555
556
557
558 enum partition_kind {
559 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
560 };
561
562 typedef struct partition_s
563 {
564 bitmap stmts;
565 bitmap loops;
566 bool reduction_p;
567 enum partition_kind kind;
568 /* data-references a kind != PKIND_NORMAL partition is about. */
569 data_reference_p main_dr;
570 data_reference_p secondary_dr;
571 tree niter;
572 } *partition_t;
573
574
575 /* Allocate and initialize a partition from BITMAP. */
576
577 static partition_t
578 partition_alloc (bitmap stmts, bitmap loops)
579 {
580 partition_t partition = XCNEW (struct partition_s);
581 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
582 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
583 partition->reduction_p = false;
584 partition->kind = PKIND_NORMAL;
585 return partition;
586 }
587
588 /* Free PARTITION. */
589
590 static void
591 partition_free (partition_t partition)
592 {
593 BITMAP_FREE (partition->stmts);
594 BITMAP_FREE (partition->loops);
595 free (partition);
596 }
597
598 /* Returns true if the partition can be generated as a builtin. */
599
600 static bool
601 partition_builtin_p (partition_t partition)
602 {
603 return partition->kind != PKIND_NORMAL;
604 }
605
606 /* Returns true if the partition contains a reduction. */
607
608 static bool
609 partition_reduction_p (partition_t partition)
610 {
611 return partition->reduction_p;
612 }
613
614 /* Merge PARTITION into the partition DEST. */
615
616 static void
617 partition_merge_into (partition_t dest, partition_t partition)
618 {
619 dest->kind = PKIND_NORMAL;
620 bitmap_ior_into (dest->stmts, partition->stmts);
621 if (partition_reduction_p (partition))
622 dest->reduction_p = true;
623 }
624
625
626 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
627 the LOOP. */
628
629 static bool
630 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
631 {
632 imm_use_iterator imm_iter;
633 use_operand_p use_p;
634
635 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
636 {
637 gimple use_stmt = USE_STMT (use_p);
638 if (!is_gimple_debug (use_stmt)
639 && loop != loop_containing_stmt (use_stmt))
640 return true;
641 }
642
643 return false;
644 }
645
646 /* Returns true when STMT defines a scalar variable used after the
647 loop LOOP. */
648
649 static bool
650 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
651 {
652 def_operand_p def_p;
653 ssa_op_iter op_iter;
654
655 if (gimple_code (stmt) == GIMPLE_PHI)
656 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
657
658 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
659 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
660 return true;
661
662 return false;
663 }
664
665 /* Return a copy of LOOP placed before LOOP. */
666
667 static struct loop *
668 copy_loop_before (struct loop *loop)
669 {
670 struct loop *res;
671 edge preheader = loop_preheader_edge (loop);
672
673 initialize_original_copy_tables ();
674 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
675 gcc_assert (res != NULL);
676 free_original_copy_tables ();
677 delete_update_ssa ();
678
679 return res;
680 }
681
682 /* Creates an empty basic block after LOOP. */
683
684 static void
685 create_bb_after_loop (struct loop *loop)
686 {
687 edge exit = single_exit (loop);
688
689 if (!exit)
690 return;
691
692 split_edge (exit);
693 }
694
695 /* Generate code for PARTITION from the code in LOOP. The loop is
696 copied when COPY_P is true. All the statements not flagged in the
697 PARTITION bitmap are removed from the loop or from its copy. The
698 statements are indexed in sequence inside a basic block, and the
699 basic blocks of a loop are taken in dom order. */
700
701 static void
702 generate_loops_for_partition (struct loop *loop, partition_t partition,
703 bool copy_p)
704 {
705 unsigned i;
706 gimple_stmt_iterator bsi;
707 basic_block *bbs;
708
709 if (copy_p)
710 {
711 loop = copy_loop_before (loop);
712 gcc_assert (loop != NULL);
713 create_preheader (loop, CP_SIMPLE_PREHEADERS);
714 create_bb_after_loop (loop);
715 }
716
717 /* Remove stmts not in the PARTITION bitmap. */
718 bbs = get_loop_body_in_dom_order (loop);
719
720 if (MAY_HAVE_DEBUG_STMTS)
721 for (i = 0; i < loop->num_nodes; i++)
722 {
723 basic_block bb = bbs[i];
724
725 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
726 {
727 gimple phi = gsi_stmt (bsi);
728 if (!virtual_operand_p (gimple_phi_result (phi))
729 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
730 reset_debug_uses (phi);
731 }
732
733 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
734 {
735 gimple stmt = gsi_stmt (bsi);
736 if (gimple_code (stmt) != GIMPLE_LABEL
737 && !is_gimple_debug (stmt)
738 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
739 reset_debug_uses (stmt);
740 }
741 }
742
743 for (i = 0; i < loop->num_nodes; i++)
744 {
745 basic_block bb = bbs[i];
746
747 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
748 {
749 gimple phi = gsi_stmt (bsi);
750 if (!virtual_operand_p (gimple_phi_result (phi))
751 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
752 remove_phi_node (&bsi, true);
753 else
754 gsi_next (&bsi);
755 }
756
757 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
758 {
759 gimple stmt = gsi_stmt (bsi);
760 if (gimple_code (stmt) != GIMPLE_LABEL
761 && !is_gimple_debug (stmt)
762 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
763 {
764 /* Choose an arbitrary path through the empty CFG part
765 that this unnecessary control stmt controls. */
766 if (gimple_code (stmt) == GIMPLE_COND)
767 {
768 gimple_cond_make_false (stmt);
769 update_stmt (stmt);
770 }
771 else if (gimple_code (stmt) == GIMPLE_SWITCH)
772 {
773 gimple_switch_set_index
774 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
775 update_stmt (stmt);
776 }
777 else
778 {
779 unlink_stmt_vdef (stmt);
780 gsi_remove (&bsi, true);
781 release_defs (stmt);
782 continue;
783 }
784 }
785 gsi_next (&bsi);
786 }
787 }
788
789 free (bbs);
790 }
791
792 /* Build the size argument for a memory operation call. */
793
794 static tree
795 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
796 {
797 tree size;
798 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
799 fold_convert_loc (loc, sizetype, nb_iter),
800 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
801 return fold_convert_loc (loc, size_type_node, size);
802 }
803
804 /* Build an address argument for a memory operation call. */
805
806 static tree
807 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
808 {
809 tree addr_base;
810
811 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
812 addr_base = fold_convert_loc (loc, sizetype, addr_base);
813
814 /* Test for a negative stride, iterating over every element. */
815 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
816 {
817 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
818 fold_convert_loc (loc, sizetype, nb_bytes));
819 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
820 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
821 }
822
823 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
824 }
825
826 /* If VAL memory representation contains the same value in all bytes,
827 return that value, otherwise return -1.
828 E.g. for 0x24242424 return 0x24, for IEEE double
829 747708026454360457216.0 return 0x44, etc. */
830
831 static int
832 const_with_all_bytes_same (tree val)
833 {
834 unsigned char buf[64];
835 int i, len;
836
837 if (integer_zerop (val)
838 || real_zerop (val)
839 || (TREE_CODE (val) == CONSTRUCTOR
840 && !TREE_CLOBBER_P (val)
841 && CONSTRUCTOR_NELTS (val) == 0))
842 return 0;
843
844 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
845 return -1;
846
847 len = native_encode_expr (val, buf, sizeof (buf));
848 if (len == 0)
849 return -1;
850 for (i = 1; i < len; i++)
851 if (buf[i] != buf[0])
852 return -1;
853 return buf[0];
854 }
855
856 /* Generate a call to memset for PARTITION in LOOP. */
857
858 static void
859 generate_memset_builtin (struct loop *loop, partition_t partition)
860 {
861 gimple_stmt_iterator gsi;
862 gimple stmt, fn_call;
863 tree mem, fn, nb_bytes;
864 location_t loc;
865 tree val;
866
867 stmt = DR_STMT (partition->main_dr);
868 loc = gimple_location (stmt);
869
870 /* The new statements will be placed before LOOP. */
871 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
872
873 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
874 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
875 false, GSI_CONTINUE_LINKING);
876 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
877 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
878 false, GSI_CONTINUE_LINKING);
879
880 /* This exactly matches the pattern recognition in classify_partition. */
881 val = gimple_assign_rhs1 (stmt);
882 /* Handle constants like 0x15151515 and similarly
883 floating point constants etc. where all bytes are the same. */
884 int bytev = const_with_all_bytes_same (val);
885 if (bytev != -1)
886 val = build_int_cst (integer_type_node, bytev);
887 else if (TREE_CODE (val) == INTEGER_CST)
888 val = fold_convert (integer_type_node, val);
889 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
890 {
891 gimple cstmt;
892 tree tem = make_ssa_name (integer_type_node, NULL);
893 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
894 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
895 val = tem;
896 }
897
898 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
899 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
900 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
901
902 if (dump_file && (dump_flags & TDF_DETAILS))
903 {
904 fprintf (dump_file, "generated memset");
905 if (bytev == 0)
906 fprintf (dump_file, " zero\n");
907 else
908 fprintf (dump_file, "\n");
909 }
910 }
911
912 /* Generate a call to memcpy for PARTITION in LOOP. */
913
914 static void
915 generate_memcpy_builtin (struct loop *loop, partition_t partition)
916 {
917 gimple_stmt_iterator gsi;
918 gimple stmt, fn_call;
919 tree dest, src, fn, nb_bytes;
920 location_t loc;
921 enum built_in_function kind;
922
923 stmt = DR_STMT (partition->main_dr);
924 loc = gimple_location (stmt);
925
926 /* The new statements will be placed before LOOP. */
927 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
928
929 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
930 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
931 false, GSI_CONTINUE_LINKING);
932 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
933 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
934 if (ptr_derefs_may_alias_p (dest, src))
935 kind = BUILT_IN_MEMMOVE;
936 else
937 kind = BUILT_IN_MEMCPY;
938
939 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
940 false, GSI_CONTINUE_LINKING);
941 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
942 false, GSI_CONTINUE_LINKING);
943 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
944 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
945 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
946
947 if (dump_file && (dump_flags & TDF_DETAILS))
948 {
949 if (kind == BUILT_IN_MEMCPY)
950 fprintf (dump_file, "generated memcpy\n");
951 else
952 fprintf (dump_file, "generated memmove\n");
953 }
954 }
955
956 /* Remove and destroy the loop LOOP. */
957
958 static void
959 destroy_loop (struct loop *loop)
960 {
961 unsigned nbbs = loop->num_nodes;
962 edge exit = single_exit (loop);
963 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
964 basic_block *bbs;
965 unsigned i;
966
967 bbs = get_loop_body_in_dom_order (loop);
968
969 redirect_edge_pred (exit, src);
970 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
971 exit->flags |= EDGE_FALLTHRU;
972 cancel_loop_tree (loop);
973 rescan_loop_exit (exit, false, true);
974
975 for (i = 0; i < nbbs; i++)
976 {
977 /* We have made sure to not leave any dangling uses of SSA
978 names defined in the loop. With the exception of virtuals.
979 Make sure we replace all uses of virtual defs that will remain
980 outside of the loop with the bare symbol as delete_basic_block
981 will release them. */
982 gimple_stmt_iterator gsi;
983 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
984 {
985 gimple phi = gsi_stmt (gsi);
986 if (virtual_operand_p (gimple_phi_result (phi)))
987 mark_virtual_phi_result_for_renaming (phi);
988 }
989 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
990 {
991 gimple stmt = gsi_stmt (gsi);
992 tree vdef = gimple_vdef (stmt);
993 if (vdef && TREE_CODE (vdef) == SSA_NAME)
994 mark_virtual_operand_for_renaming (vdef);
995 }
996 delete_basic_block (bbs[i]);
997 }
998 free (bbs);
999
1000 set_immediate_dominator (CDI_DOMINATORS, dest,
1001 recompute_dominator (CDI_DOMINATORS, dest));
1002 }
1003
1004 /* Generates code for PARTITION. */
1005
1006 static void
1007 generate_code_for_partition (struct loop *loop,
1008 partition_t partition, bool copy_p)
1009 {
1010 switch (partition->kind)
1011 {
1012 case PKIND_NORMAL:
1013 /* Reductions all have to be in the last partition. */
1014 gcc_assert (!partition_reduction_p (partition)
1015 || !copy_p);
1016 generate_loops_for_partition (loop, partition, copy_p);
1017 return;
1018
1019 case PKIND_MEMSET:
1020 generate_memset_builtin (loop, partition);
1021 break;
1022
1023 case PKIND_MEMCPY:
1024 generate_memcpy_builtin (loop, partition);
1025 break;
1026
1027 default:
1028 gcc_unreachable ();
1029 }
1030
1031 /* Common tail for partitions we turn into a call. If this was the last
1032 partition for which we generate code, we have to destroy the loop. */
1033 if (!copy_p)
1034 destroy_loop (loop);
1035 }
1036
1037
1038 /* Returns a partition with all the statements needed for computing
1039 the vertex V of the RDG, also including the loop exit conditions. */
1040
1041 static partition_t
1042 build_rdg_partition_for_vertex (struct graph *rdg, int v)
1043 {
1044 partition_t partition = partition_alloc (NULL, NULL);
1045 vec<int> nodes;
1046 unsigned i;
1047 int x;
1048
1049 nodes.create (3);
1050 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
1051
1052 FOR_EACH_VEC_ELT (nodes, i, x)
1053 {
1054 bitmap_set_bit (partition->stmts, x);
1055 bitmap_set_bit (partition->loops,
1056 loop_containing_stmt (RDG_STMT (rdg, x))->num);
1057 }
1058
1059 nodes.release ();
1060 return partition;
1061 }
1062
1063 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1064 For the moment we detect only the memset zero pattern. */
1065
1066 static void
1067 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1068 {
1069 bitmap_iterator bi;
1070 unsigned i;
1071 tree nb_iter;
1072 data_reference_p single_load, single_store;
1073 bool volatiles_p = false;
1074
1075 partition->kind = PKIND_NORMAL;
1076 partition->main_dr = NULL;
1077 partition->secondary_dr = NULL;
1078 partition->niter = NULL_TREE;
1079
1080 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1081 {
1082 gimple stmt = RDG_STMT (rdg, i);
1083
1084 if (gimple_has_volatile_ops (stmt))
1085 volatiles_p = true;
1086
1087 /* If the stmt has uses outside of the loop mark it as reduction. */
1088 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1089 {
1090 partition->reduction_p = true;
1091 return;
1092 }
1093 }
1094
1095 /* Perform general partition disqualification for builtins. */
1096 if (volatiles_p
1097 || !flag_tree_loop_distribute_patterns)
1098 return;
1099
1100 /* Detect memset and memcpy. */
1101 single_load = NULL;
1102 single_store = NULL;
1103 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1104 {
1105 gimple stmt = RDG_STMT (rdg, i);
1106 data_reference_p dr;
1107 unsigned j;
1108
1109 if (gimple_code (stmt) == GIMPLE_PHI)
1110 continue;
1111
1112 /* Any scalar stmts are ok. */
1113 if (!gimple_vuse (stmt))
1114 continue;
1115
1116 /* Otherwise just regular loads/stores. */
1117 if (!gimple_assign_single_p (stmt))
1118 return;
1119
1120 /* But exactly one store and/or load. */
1121 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1122 {
1123 if (DR_IS_READ (dr))
1124 {
1125 if (single_load != NULL)
1126 return;
1127 single_load = dr;
1128 }
1129 else
1130 {
1131 if (single_store != NULL)
1132 return;
1133 single_store = dr;
1134 }
1135 }
1136 }
1137
1138 if (!single_store)
1139 return;
1140
1141 if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1142 gimple_bb (DR_STMT (single_store))))
1143 nb_iter = number_of_latch_executions (loop);
1144 else
1145 nb_iter = number_of_exit_cond_executions (loop);
1146 if (!nb_iter || nb_iter == chrec_dont_know)
1147 return;
1148
1149 if (single_store && !single_load)
1150 {
1151 gimple stmt = DR_STMT (single_store);
1152 tree rhs = gimple_assign_rhs1 (stmt);
1153 if (const_with_all_bytes_same (rhs) == -1
1154 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1155 || (TYPE_MODE (TREE_TYPE (rhs))
1156 != TYPE_MODE (unsigned_char_type_node))))
1157 return;
1158 if (TREE_CODE (rhs) == SSA_NAME
1159 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1160 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1161 return;
1162 if (!adjacent_dr_p (single_store)
1163 || !dominated_by_p (CDI_DOMINATORS,
1164 loop->latch, gimple_bb (stmt)))
1165 return;
1166 partition->kind = PKIND_MEMSET;
1167 partition->main_dr = single_store;
1168 partition->niter = nb_iter;
1169 }
1170 else if (single_store && single_load)
1171 {
1172 gimple store = DR_STMT (single_store);
1173 gimple load = DR_STMT (single_load);
1174 /* Direct aggregate copy or via an SSA name temporary. */
1175 if (load != store
1176 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1177 return;
1178 if (!adjacent_dr_p (single_store)
1179 || !adjacent_dr_p (single_load)
1180 || !operand_equal_p (DR_STEP (single_store),
1181 DR_STEP (single_load), 0)
1182 || !dominated_by_p (CDI_DOMINATORS,
1183 loop->latch, gimple_bb (store)))
1184 return;
1185 /* Now check that if there is a dependence this dependence is
1186 of a suitable form for memmove. */
1187 vec<loop_p> loops = vNULL;
1188 ddr_p ddr;
1189 loops.safe_push (loop);
1190 ddr = initialize_data_dependence_relation (single_load, single_store,
1191 loops);
1192 compute_affine_dependence (ddr, loop);
1193 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1194 {
1195 free_dependence_relation (ddr);
1196 loops.release ();
1197 return;
1198 }
1199 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1200 {
1201 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1202 {
1203 free_dependence_relation (ddr);
1204 loops.release ();
1205 return;
1206 }
1207 lambda_vector dist_v;
1208 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1209 {
1210 int dist = dist_v[index_in_loop_nest (loop->num,
1211 DDR_LOOP_NEST (ddr))];
1212 if (dist > 0 && !DDR_REVERSED_P (ddr))
1213 {
1214 free_dependence_relation (ddr);
1215 loops.release ();
1216 return;
1217 }
1218 }
1219 }
1220 free_dependence_relation (ddr);
1221 loops.release ();
1222 partition->kind = PKIND_MEMCPY;
1223 partition->main_dr = single_store;
1224 partition->secondary_dr = single_load;
1225 partition->niter = nb_iter;
1226 }
1227 }
1228
1229 /* For a data reference REF, return the declaration of its base
1230 address or NULL_TREE if the base is not determined. */
1231
1232 static tree
1233 ref_base_address (data_reference_p dr)
1234 {
1235 tree base_address = DR_BASE_ADDRESS (dr);
1236 if (base_address
1237 && TREE_CODE (base_address) == ADDR_EXPR)
1238 return TREE_OPERAND (base_address, 0);
1239
1240 return base_address;
1241 }
1242
1243 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1244 accesses in RDG. */
1245
1246 static bool
1247 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1248 partition_t partition2)
1249 {
1250 unsigned i, j, k, l;
1251 bitmap_iterator bi, bj;
1252 data_reference_p ref1, ref2;
1253
1254 /* First check whether in the intersection of the two partitions are
1255 any loads or stores. Common loads are the situation that happens
1256 most often. */
1257 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1258 if (RDG_MEM_WRITE_STMT (rdg, i)
1259 || RDG_MEM_READS_STMT (rdg, i))
1260 return true;
1261
1262 /* Then check all data-references against each other. */
1263 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1264 if (RDG_MEM_WRITE_STMT (rdg, i)
1265 || RDG_MEM_READS_STMT (rdg, i))
1266 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1267 if (RDG_MEM_WRITE_STMT (rdg, j)
1268 || RDG_MEM_READS_STMT (rdg, j))
1269 {
1270 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1271 {
1272 tree base1 = ref_base_address (ref1);
1273 if (base1)
1274 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1275 if (base1 == ref_base_address (ref2))
1276 return true;
1277 }
1278 }
1279
1280 return false;
1281 }
1282
1283 /* Aggregate several components into a useful partition that is
1284 registered in the PARTITIONS vector. Partitions will be
1285 distributed in different loops. */
1286
1287 static void
1288 rdg_build_partitions (struct graph *rdg,
1289 vec<gimple> starting_stmts,
1290 vec<partition_t> *partitions)
1291 {
1292 bitmap processed = BITMAP_ALLOC (NULL);
1293 int i;
1294 gimple stmt;
1295
1296 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1297 {
1298 int v = rdg_vertex_for_stmt (rdg, stmt);
1299
1300 if (dump_file && (dump_flags & TDF_DETAILS))
1301 fprintf (dump_file,
1302 "ldist asked to generate code for vertex %d\n", v);
1303
1304 /* If the vertex is already contained in another partition so
1305 is the partition rooted at it. */
1306 if (bitmap_bit_p (processed, v))
1307 continue;
1308
1309 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1310 bitmap_ior_into (processed, partition->stmts);
1311
1312 if (dump_file && (dump_flags & TDF_DETAILS))
1313 {
1314 fprintf (dump_file, "ldist useful partition:\n");
1315 dump_bitmap (dump_file, partition->stmts);
1316 }
1317
1318 partitions->safe_push (partition);
1319 }
1320
1321 /* All vertices should have been assigned to at least one partition now,
1322 other than vertices belonging to dead code. */
1323
1324 BITMAP_FREE (processed);
1325 }
1326
1327 /* Dump to FILE the PARTITIONS. */
1328
1329 static void
1330 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1331 {
1332 int i;
1333 partition_t partition;
1334
1335 FOR_EACH_VEC_ELT (partitions, i, partition)
1336 debug_bitmap_file (file, partition->stmts);
1337 }
1338
1339 /* Debug PARTITIONS. */
1340 extern void debug_rdg_partitions (vec<partition_t> );
1341
1342 DEBUG_FUNCTION void
1343 debug_rdg_partitions (vec<partition_t> partitions)
1344 {
1345 dump_rdg_partitions (stderr, partitions);
1346 }
1347
1348 /* Returns the number of read and write operations in the RDG. */
1349
1350 static int
1351 number_of_rw_in_rdg (struct graph *rdg)
1352 {
1353 int i, res = 0;
1354
1355 for (i = 0; i < rdg->n_vertices; i++)
1356 {
1357 if (RDG_MEM_WRITE_STMT (rdg, i))
1358 ++res;
1359
1360 if (RDG_MEM_READS_STMT (rdg, i))
1361 ++res;
1362 }
1363
1364 return res;
1365 }
1366
1367 /* Returns the number of read and write operations in a PARTITION of
1368 the RDG. */
1369
1370 static int
1371 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1372 {
1373 int res = 0;
1374 unsigned i;
1375 bitmap_iterator ii;
1376
1377 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1378 {
1379 if (RDG_MEM_WRITE_STMT (rdg, i))
1380 ++res;
1381
1382 if (RDG_MEM_READS_STMT (rdg, i))
1383 ++res;
1384 }
1385
1386 return res;
1387 }
1388
1389 /* Returns true when one of the PARTITIONS contains all the read or
1390 write operations of RDG. */
1391
1392 static bool
1393 partition_contains_all_rw (struct graph *rdg,
1394 vec<partition_t> partitions)
1395 {
1396 int i;
1397 partition_t partition;
1398 int nrw = number_of_rw_in_rdg (rdg);
1399
1400 FOR_EACH_VEC_ELT (partitions, i, partition)
1401 if (nrw == number_of_rw_in_partition (rdg, partition))
1402 return true;
1403
1404 return false;
1405 }
1406
1407
1408 /* Distributes the code from LOOP in such a way that producer
1409 statements are placed before consumer statements. Tries to separate
1410 only the statements from STMTS into separate loops.
1411 Returns the number of distributed loops. */
1412
1413 static int
1414 distribute_loop (struct loop *loop, vec<gimple> stmts,
1415 control_dependences *cd, int *nb_calls)
1416 {
1417 struct graph *rdg;
1418 vec<loop_p> loop_nest;
1419 vec<partition_t> partitions;
1420 partition_t partition;
1421 bool any_builtin;
1422 int i, nbp;
1423
1424 *nb_calls = 0;
1425 loop_nest.create (3);
1426 if (!find_loop_nest (loop, &loop_nest))
1427 {
1428 loop_nest.release ();
1429 return 0;
1430 }
1431
1432 rdg = build_rdg (loop_nest, cd);
1433 if (!rdg)
1434 {
1435 if (dump_file && (dump_flags & TDF_DETAILS))
1436 fprintf (dump_file,
1437 "Loop %d not distributed: failed to build the RDG.\n",
1438 loop->num);
1439
1440 loop_nest.release ();
1441 return 0;
1442 }
1443
1444 if (dump_file && (dump_flags & TDF_DETAILS))
1445 dump_rdg (dump_file, rdg);
1446
1447 partitions.create (3);
1448 rdg_build_partitions (rdg, stmts, &partitions);
1449
1450 any_builtin = false;
1451 FOR_EACH_VEC_ELT (partitions, i, partition)
1452 {
1453 classify_partition (loop, rdg, partition);
1454 any_builtin |= partition_builtin_p (partition);
1455 }
1456
1457 /* If we did not detect any builtin but are not asked to apply
1458 regular loop distribution simply bail out. */
1459 if (!flag_tree_loop_distribution
1460 && !any_builtin)
1461 {
1462 nbp = 0;
1463 goto ldist_done;
1464 }
1465
1466 /* Apply our simple cost model - fuse partitions with similar
1467 memory accesses. */
1468 partition_t into;
1469 for (i = 0; partitions.iterate (i, &into); ++i)
1470 {
1471 if (partition_builtin_p (into))
1472 continue;
1473 for (int j = i + 1;
1474 partitions.iterate (j, &partition); ++j)
1475 {
1476 if (!partition_builtin_p (partition)
1477 && similar_memory_accesses (rdg, into, partition))
1478 {
1479 if (dump_file && (dump_flags & TDF_DETAILS))
1480 {
1481 fprintf (dump_file, "fusing partitions\n");
1482 dump_bitmap (dump_file, into->stmts);
1483 dump_bitmap (dump_file, partition->stmts);
1484 fprintf (dump_file, "because they have similar "
1485 "memory accesses\n");
1486 }
1487 partition_merge_into (into, partition);
1488 partitions.ordered_remove (j);
1489 partition_free (partition);
1490 j--;
1491 }
1492 }
1493 }
1494
1495 /* If we are only distributing patterns fuse all partitions that
1496 were not properly classified as builtins. */
1497 if (!flag_tree_loop_distribution)
1498 {
1499 partition_t into;
1500 /* Only fuse adjacent non-builtin partitions, see PR53616.
1501 ??? Use dependence information to improve partition ordering. */
1502 i = 0;
1503 do
1504 {
1505 for (; partitions.iterate (i, &into); ++i)
1506 if (!partition_builtin_p (into))
1507 break;
1508 for (++i; partitions.iterate (i, &partition); ++i)
1509 if (!partition_builtin_p (partition))
1510 {
1511 partition_merge_into (into, partition);
1512 partitions.ordered_remove (i);
1513 partition_free (partition);
1514 i--;
1515 }
1516 else
1517 break;
1518 }
1519 while ((unsigned) i < partitions.length ());
1520 }
1521
1522 /* Fuse all reduction partitions into the last. */
1523 if (partitions.length () > 1)
1524 {
1525 partition_t into = partitions.last ();
1526 for (i = partitions.length () - 2; i >= 0; --i)
1527 {
1528 partition_t what = partitions[i];
1529 if (partition_reduction_p (what))
1530 {
1531 if (dump_file && (dump_flags & TDF_DETAILS))
1532 {
1533 fprintf (dump_file, "fusing partitions\n");
1534 dump_bitmap (dump_file, into->stmts);
1535 dump_bitmap (dump_file, what->stmts);
1536 fprintf (dump_file, "because the latter has reductions\n");
1537 }
1538 partition_merge_into (into, what);
1539 partitions.ordered_remove (i);
1540 partition_free (what);
1541 }
1542 }
1543 }
1544
1545 nbp = partitions.length ();
1546 if (nbp == 0
1547 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1548 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1549 {
1550 nbp = 0;
1551 goto ldist_done;
1552 }
1553
1554 if (dump_file && (dump_flags & TDF_DETAILS))
1555 dump_rdg_partitions (dump_file, partitions);
1556
1557 FOR_EACH_VEC_ELT (partitions, i, partition)
1558 {
1559 if (partition_builtin_p (partition))
1560 (*nb_calls)++;
1561 generate_code_for_partition (loop, partition, i < nbp - 1);
1562 }
1563
1564 ldist_done:
1565
1566 FOR_EACH_VEC_ELT (partitions, i, partition)
1567 partition_free (partition);
1568 partitions.release ();
1569
1570 free_rdg (rdg);
1571 loop_nest.release ();
1572 return nbp - *nb_calls;
1573 }
1574
1575 /* Distribute all loops in the current function. */
1576
1577 static unsigned int
1578 tree_loop_distribution (void)
1579 {
1580 struct loop *loop;
1581 loop_iterator li;
1582 bool changed = false;
1583 basic_block bb;
1584 control_dependences *cd = NULL;
1585
1586 FOR_ALL_BB (bb)
1587 {
1588 gimple_stmt_iterator gsi;
1589 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1590 gimple_set_uid (gsi_stmt (gsi), -1);
1591 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1592 gimple_set_uid (gsi_stmt (gsi), -1);
1593 }
1594
1595 /* We can at the moment only distribute non-nested loops, thus restrict
1596 walking to innermost loops. */
1597 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1598 {
1599 vec<gimple> work_list = vNULL;
1600 basic_block *bbs;
1601 int num = loop->num;
1602 unsigned int i;
1603
1604 /* If the loop doesn't have a single exit we will fail anyway,
1605 so do that early. */
1606 if (!single_exit (loop))
1607 continue;
1608
1609 /* Only optimize hot loops. */
1610 if (!optimize_loop_for_speed_p (loop))
1611 continue;
1612
1613 /* Initialize the worklist with stmts we seed the partitions with. */
1614 bbs = get_loop_body_in_dom_order (loop);
1615 for (i = 0; i < loop->num_nodes; ++i)
1616 {
1617 gimple_stmt_iterator gsi;
1618 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1619 {
1620 gimple phi = gsi_stmt (gsi);
1621 if (virtual_operand_p (gimple_phi_result (phi)))
1622 continue;
1623 /* Distribute stmts which have defs that are used outside of
1624 the loop. */
1625 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1626 continue;
1627 work_list.safe_push (phi);
1628 }
1629 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1630 {
1631 gimple stmt = gsi_stmt (gsi);
1632
1633 /* If there is a stmt with side-effects bail out - we
1634 cannot and should not distribute this loop. */
1635 if (gimple_has_side_effects (stmt))
1636 {
1637 work_list.truncate (0);
1638 goto out;
1639 }
1640
1641 /* Distribute stmts which have defs that are used outside of
1642 the loop. */
1643 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1644 ;
1645 /* Otherwise only distribute stores for now. */
1646 else if (!gimple_assign_single_p (stmt)
1647 || is_gimple_reg (gimple_assign_lhs (stmt)))
1648 continue;
1649
1650 work_list.safe_push (stmt);
1651 }
1652 }
1653 out:
1654 free (bbs);
1655
1656 int nb_generated_loops = 0;
1657 int nb_generated_calls = 0;
1658 location_t loc = find_loop_location (loop);
1659 if (work_list.length () > 0)
1660 {
1661 if (!cd)
1662 {
1663 calculate_dominance_info (CDI_DOMINATORS);
1664 calculate_dominance_info (CDI_POST_DOMINATORS);
1665 cd = new control_dependences (create_edge_list ());
1666 free_dominance_info (CDI_POST_DOMINATORS);
1667 }
1668 nb_generated_loops = distribute_loop (loop, work_list, cd,
1669 &nb_generated_calls);
1670 }
1671
1672 if (nb_generated_loops + nb_generated_calls > 0)
1673 {
1674 changed = true;
1675 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1676 loc, "Loop %d distributed: split to %d loops "
1677 "and %d library calls.\n",
1678 num, nb_generated_loops, nb_generated_calls);
1679 }
1680 else if (dump_file && (dump_flags & TDF_DETAILS))
1681 fprintf (dump_file, "Loop %d is the same.\n", num);
1682
1683 work_list.release ();
1684 }
1685
1686 if (cd)
1687 delete cd;
1688
1689 if (changed)
1690 {
1691 mark_virtual_operands_for_renaming (cfun);
1692 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1693 }
1694
1695 #ifdef ENABLE_CHECKING
1696 verify_loop_structure ();
1697 #endif
1698
1699 return 0;
1700 }
1701
1702 static bool
1703 gate_tree_loop_distribution (void)
1704 {
1705 return flag_tree_loop_distribution
1706 || flag_tree_loop_distribute_patterns;
1707 }
1708
1709 namespace {
1710
1711 const pass_data pass_data_loop_distribution =
1712 {
1713 GIMPLE_PASS, /* type */
1714 "ldist", /* name */
1715 OPTGROUP_LOOP, /* optinfo_flags */
1716 true, /* has_gate */
1717 true, /* has_execute */
1718 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1719 ( PROP_cfg | PROP_ssa ), /* properties_required */
1720 0, /* properties_provided */
1721 0, /* properties_destroyed */
1722 0, /* todo_flags_start */
1723 TODO_verify_ssa, /* todo_flags_finish */
1724 };
1725
1726 class pass_loop_distribution : public gimple_opt_pass
1727 {
1728 public:
1729 pass_loop_distribution (gcc::context *ctxt)
1730 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1731 {}
1732
1733 /* opt_pass methods: */
1734 bool gate () { return gate_tree_loop_distribution (); }
1735 unsigned int execute () { return tree_loop_distribution (); }
1736
1737 }; // class pass_loop_distribution
1738
1739 } // anon namespace
1740
1741 gimple_opt_pass *
1742 make_pass_loop_distribution (gcc::context *ctxt)
1743 {
1744 return new pass_loop_distribution (ctxt);
1745 }