]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-loop-distribution.c
re PR tree-optimization/79245 (Inefficient loop distribution to memcpy)
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "tree.h"
49 #include "gimple.h"
50 #include "cfghooks.h"
51 #include "tree-pass.h"
52 #include "ssa.h"
53 #include "gimple-pretty-print.h"
54 #include "fold-const.h"
55 #include "cfganal.h"
56 #include "gimple-iterator.h"
57 #include "gimplify-me.h"
58 #include "stor-layout.h"
59 #include "tree-cfg.h"
60 #include "tree-ssa-loop-manip.h"
61 #include "tree-ssa-loop.h"
62 #include "tree-into-ssa.h"
63 #include "tree-ssa.h"
64 #include "cfgloop.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-vectorizer.h"
67
68
69 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
70 struct rdg_vertex
71 {
72 /* The statement represented by this vertex. */
73 gimple *stmt;
74
75 /* Vector of data-references in this statement. */
76 vec<data_reference_p> datarefs;
77
78 /* True when the statement contains a write to memory. */
79 bool has_mem_write;
80
81 /* True when the statement contains a read from memory. */
82 bool has_mem_reads;
83 };
84
85 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
93
94 /* Data dependence type. */
95
96 enum rdg_dep_type
97 {
98 /* Read After Write (RAW). */
99 flow_dd = 'f',
100
101 /* Control dependence (execute conditional on). */
102 control_dd = 'c'
103 };
104
105 /* Dependence information attached to an edge of the RDG. */
106
107 struct rdg_edge
108 {
109 /* Type of the dependence. */
110 enum rdg_dep_type type;
111 };
112
113 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
114
115 /* Dump vertex I in RDG to FILE. */
116
117 static void
118 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
119 {
120 struct vertex *v = &(rdg->vertices[i]);
121 struct graph_edge *e;
122
123 fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
126
127 if (v->pred)
128 for (e = v->pred; e; e = e->pred_next)
129 fprintf (file, " %d", e->src);
130
131 fprintf (file, ") (out:");
132
133 if (v->succ)
134 for (e = v->succ; e; e = e->succ_next)
135 fprintf (file, " %d", e->dest);
136
137 fprintf (file, ")\n");
138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139 fprintf (file, ")\n");
140 }
141
142 /* Call dump_rdg_vertex on stderr. */
143
144 DEBUG_FUNCTION void
145 debug_rdg_vertex (struct graph *rdg, int i)
146 {
147 dump_rdg_vertex (stderr, rdg, i);
148 }
149
150 /* Dump the reduced dependence graph RDG to FILE. */
151
152 static void
153 dump_rdg (FILE *file, struct graph *rdg)
154 {
155 fprintf (file, "(rdg\n");
156 for (int i = 0; i < rdg->n_vertices; i++)
157 dump_rdg_vertex (file, rdg, i);
158 fprintf (file, ")\n");
159 }
160
161 /* Call dump_rdg on stderr. */
162
163 DEBUG_FUNCTION void
164 debug_rdg (struct graph *rdg)
165 {
166 dump_rdg (stderr, rdg);
167 }
168
169 static void
170 dot_rdg_1 (FILE *file, struct graph *rdg)
171 {
172 int i;
173 pretty_printer buffer;
174 pp_needs_newline (&buffer) = false;
175 buffer.buffer->stream = file;
176
177 fprintf (file, "digraph RDG {\n");
178
179 for (i = 0; i < rdg->n_vertices; i++)
180 {
181 struct vertex *v = &(rdg->vertices[i]);
182 struct graph_edge *e;
183
184 fprintf (file, "%d [label=\"[%d] ", i, i);
185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186 pp_flush (&buffer);
187 fprintf (file, "\"]\n");
188
189 /* Highlight reads from memory. */
190 if (RDG_MEM_READS_STMT (rdg, i))
191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
192
193 /* Highlight stores to memory. */
194 if (RDG_MEM_WRITE_STMT (rdg, i))
195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
196
197 if (v->succ)
198 for (e = v->succ; e; e = e->succ_next)
199 switch (RDGE_TYPE (e))
200 {
201 case flow_dd:
202 /* These are the most common dependences: don't print these. */
203 fprintf (file, "%d -> %d \n", i, e->dest);
204 break;
205
206 case control_dd:
207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208 break;
209
210 default:
211 gcc_unreachable ();
212 }
213 }
214
215 fprintf (file, "}\n\n");
216 }
217
218 /* Display the Reduced Dependence Graph using dotty. */
219
220 DEBUG_FUNCTION void
221 dot_rdg (struct graph *rdg)
222 {
223 /* When debugging, you may want to enable the following code. */
224 #ifdef HAVE_POPEN
225 FILE *file = popen ("dot -Tx11", "w");
226 if (!file)
227 return;
228 dot_rdg_1 (file, rdg);
229 fflush (file);
230 close (fileno (file));
231 pclose (file);
232 #else
233 dot_rdg_1 (stderr, rdg);
234 #endif
235 }
236
237 /* Returns the index of STMT in RDG. */
238
239 static int
240 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
241 {
242 int index = gimple_uid (stmt);
243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244 return index;
245 }
246
247 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
248 the index of DEF in RDG. */
249
250 static void
251 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
252 {
253 use_operand_p imm_use_p;
254 imm_use_iterator iterator;
255
256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
257 {
258 struct graph_edge *e;
259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
260
261 if (use < 0)
262 continue;
263
264 e = add_edge (rdg, idef, use);
265 e->data = XNEW (struct rdg_edge);
266 RDGE_TYPE (e) = flow_dd;
267 }
268 }
269
270 /* Creates an edge for the control dependences of BB to the vertex V. */
271
272 static void
273 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 int v, control_dependences *cd)
275 {
276 bitmap_iterator bi;
277 unsigned edge_n;
278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 0, edge_n, bi)
280 {
281 basic_block cond_bb = cd->get_edge_src (edge_n);
282 gimple *stmt = last_stmt (cond_bb);
283 if (stmt && is_ctrl_stmt (stmt))
284 {
285 struct graph_edge *e;
286 int c = rdg_vertex_for_stmt (rdg, stmt);
287 if (c < 0)
288 continue;
289
290 e = add_edge (rdg, c, v);
291 e->data = XNEW (struct rdg_edge);
292 RDGE_TYPE (e) = control_dd;
293 }
294 }
295 }
296
297 /* Creates the edges of the reduced dependence graph RDG. */
298
299 static void
300 create_rdg_flow_edges (struct graph *rdg)
301 {
302 int i;
303 def_operand_p def_p;
304 ssa_op_iter iter;
305
306 for (i = 0; i < rdg->n_vertices; i++)
307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 iter, SSA_OP_DEF)
309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
310 }
311
312 /* Creates the edges of the reduced dependence graph RDG. */
313
314 static void
315 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
316 {
317 int i;
318
319 for (i = 0; i < rdg->n_vertices; i++)
320 {
321 gimple *stmt = RDG_STMT (rdg, i);
322 if (gimple_code (stmt) == GIMPLE_PHI)
323 {
324 edge_iterator ei;
325 edge e;
326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
327 if (flow_bb_inside_loop_p (loop, e->src))
328 create_edge_for_control_dependence (rdg, e->src, i, cd);
329 }
330 else
331 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
332 }
333 }
334
335 /* Build the vertices of the reduced dependence graph RDG. Return false
336 if that failed. */
337
338 static bool
339 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
340 vec<data_reference_p> *datarefs)
341 {
342 int i;
343 gimple *stmt;
344
345 FOR_EACH_VEC_ELT (stmts, i, stmt)
346 {
347 struct vertex *v = &(rdg->vertices[i]);
348
349 /* Record statement to vertex mapping. */
350 gimple_set_uid (stmt, i);
351
352 v->data = XNEW (struct rdg_vertex);
353 RDGV_STMT (v) = stmt;
354 RDGV_DATAREFS (v).create (0);
355 RDGV_HAS_MEM_WRITE (v) = false;
356 RDGV_HAS_MEM_READS (v) = false;
357 if (gimple_code (stmt) == GIMPLE_PHI)
358 continue;
359
360 unsigned drp = datarefs->length ();
361 if (!find_data_references_in_stmt (loop, stmt, datarefs))
362 return false;
363 for (unsigned j = drp; j < datarefs->length (); ++j)
364 {
365 data_reference_p dr = (*datarefs)[j];
366 if (DR_IS_READ (dr))
367 RDGV_HAS_MEM_READS (v) = true;
368 else
369 RDGV_HAS_MEM_WRITE (v) = true;
370 RDGV_DATAREFS (v).safe_push (dr);
371 }
372 }
373 return true;
374 }
375
376 /* Initialize STMTS with all the statements of LOOP. The order in
377 which we discover statements is important as
378 generate_loops_for_partition is using the same traversal for
379 identifying statements in loop copies. */
380
381 static void
382 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
383 {
384 unsigned int i;
385 basic_block *bbs = get_loop_body_in_dom_order (loop);
386
387 for (i = 0; i < loop->num_nodes; i++)
388 {
389 basic_block bb = bbs[i];
390
391 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
392 gsi_next (&bsi))
393 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
394 stmts->safe_push (bsi.phi ());
395
396 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
397 gsi_next (&bsi))
398 {
399 gimple *stmt = gsi_stmt (bsi);
400 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
401 stmts->safe_push (stmt);
402 }
403 }
404
405 free (bbs);
406 }
407
408 /* Free the reduced dependence graph RDG. */
409
410 static void
411 free_rdg (struct graph *rdg)
412 {
413 int i;
414
415 for (i = 0; i < rdg->n_vertices; i++)
416 {
417 struct vertex *v = &(rdg->vertices[i]);
418 struct graph_edge *e;
419
420 for (e = v->succ; e; e = e->succ_next)
421 free (e->data);
422
423 if (v->data)
424 {
425 gimple_set_uid (RDGV_STMT (v), -1);
426 free_data_refs (RDGV_DATAREFS (v));
427 free (v->data);
428 }
429 }
430
431 free_graph (rdg);
432 }
433
434 /* Build the Reduced Dependence Graph (RDG) with one vertex per
435 statement of the loop nest LOOP_NEST, and one edge per data dependence or
436 scalar dependence. */
437
438 static struct graph *
439 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
440 {
441 struct graph *rdg;
442 vec<data_reference_p> datarefs;
443
444 /* Create the RDG vertices from the stmts of the loop nest. */
445 auto_vec<gimple *, 10> stmts;
446 stmts_from_loop (loop_nest[0], &stmts);
447 rdg = new_graph (stmts.length ());
448 datarefs.create (10);
449 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
450 {
451 datarefs.release ();
452 free_rdg (rdg);
453 return NULL;
454 }
455 stmts.release ();
456
457 create_rdg_flow_edges (rdg);
458 if (cd)
459 create_rdg_cd_edges (rdg, cd, loop_nest[0]);
460
461 datarefs.release ();
462
463 return rdg;
464 }
465
466
467
468 enum partition_kind {
469 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
470 };
471
472 struct partition
473 {
474 bitmap stmts;
475 bitmap loops;
476 bool reduction_p;
477 enum partition_kind kind;
478 /* data-references a kind != PKIND_NORMAL partition is about. */
479 data_reference_p main_dr;
480 data_reference_p secondary_dr;
481 tree niter;
482 bool plus_one;
483 };
484
485
486 /* Allocate and initialize a partition from BITMAP. */
487
488 static partition *
489 partition_alloc (bitmap stmts, bitmap loops)
490 {
491 partition *partition = XCNEW (struct partition);
492 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
493 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
494 partition->reduction_p = false;
495 partition->kind = PKIND_NORMAL;
496 return partition;
497 }
498
499 /* Free PARTITION. */
500
501 static void
502 partition_free (partition *partition)
503 {
504 BITMAP_FREE (partition->stmts);
505 BITMAP_FREE (partition->loops);
506 free (partition);
507 }
508
509 /* Returns true if the partition can be generated as a builtin. */
510
511 static bool
512 partition_builtin_p (partition *partition)
513 {
514 return partition->kind != PKIND_NORMAL;
515 }
516
517 /* Returns true if the partition contains a reduction. */
518
519 static bool
520 partition_reduction_p (partition *partition)
521 {
522 return partition->reduction_p;
523 }
524
525 /* Merge PARTITION into the partition DEST. */
526
527 static void
528 partition_merge_into (partition *dest, partition *partition)
529 {
530 dest->kind = PKIND_NORMAL;
531 bitmap_ior_into (dest->stmts, partition->stmts);
532 if (partition_reduction_p (partition))
533 dest->reduction_p = true;
534 }
535
536
537 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
538 the LOOP. */
539
540 static bool
541 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
542 {
543 imm_use_iterator imm_iter;
544 use_operand_p use_p;
545
546 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
547 {
548 gimple *use_stmt = USE_STMT (use_p);
549 if (!is_gimple_debug (use_stmt)
550 && loop != loop_containing_stmt (use_stmt))
551 return true;
552 }
553
554 return false;
555 }
556
557 /* Returns true when STMT defines a scalar variable used after the
558 loop LOOP. */
559
560 static bool
561 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
562 {
563 def_operand_p def_p;
564 ssa_op_iter op_iter;
565
566 if (gimple_code (stmt) == GIMPLE_PHI)
567 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
568
569 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
570 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
571 return true;
572
573 return false;
574 }
575
576 /* Return a copy of LOOP placed before LOOP. */
577
578 static struct loop *
579 copy_loop_before (struct loop *loop)
580 {
581 struct loop *res;
582 edge preheader = loop_preheader_edge (loop);
583
584 initialize_original_copy_tables ();
585 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
586 gcc_assert (res != NULL);
587 free_original_copy_tables ();
588 delete_update_ssa ();
589
590 return res;
591 }
592
593 /* Creates an empty basic block after LOOP. */
594
595 static void
596 create_bb_after_loop (struct loop *loop)
597 {
598 edge exit = single_exit (loop);
599
600 if (!exit)
601 return;
602
603 split_edge (exit);
604 }
605
606 /* Generate code for PARTITION from the code in LOOP. The loop is
607 copied when COPY_P is true. All the statements not flagged in the
608 PARTITION bitmap are removed from the loop or from its copy. The
609 statements are indexed in sequence inside a basic block, and the
610 basic blocks of a loop are taken in dom order. */
611
612 static void
613 generate_loops_for_partition (struct loop *loop, partition *partition,
614 bool copy_p)
615 {
616 unsigned i;
617 basic_block *bbs;
618
619 if (copy_p)
620 {
621 loop = copy_loop_before (loop);
622 gcc_assert (loop != NULL);
623 create_preheader (loop, CP_SIMPLE_PREHEADERS);
624 create_bb_after_loop (loop);
625 }
626
627 /* Remove stmts not in the PARTITION bitmap. */
628 bbs = get_loop_body_in_dom_order (loop);
629
630 if (MAY_HAVE_DEBUG_STMTS)
631 for (i = 0; i < loop->num_nodes; i++)
632 {
633 basic_block bb = bbs[i];
634
635 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
636 gsi_next (&bsi))
637 {
638 gphi *phi = bsi.phi ();
639 if (!virtual_operand_p (gimple_phi_result (phi))
640 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
641 reset_debug_uses (phi);
642 }
643
644 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
645 {
646 gimple *stmt = gsi_stmt (bsi);
647 if (gimple_code (stmt) != GIMPLE_LABEL
648 && !is_gimple_debug (stmt)
649 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
650 reset_debug_uses (stmt);
651 }
652 }
653
654 for (i = 0; i < loop->num_nodes; i++)
655 {
656 basic_block bb = bbs[i];
657
658 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
659 {
660 gphi *phi = bsi.phi ();
661 if (!virtual_operand_p (gimple_phi_result (phi))
662 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
663 remove_phi_node (&bsi, true);
664 else
665 gsi_next (&bsi);
666 }
667
668 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
669 {
670 gimple *stmt = gsi_stmt (bsi);
671 if (gimple_code (stmt) != GIMPLE_LABEL
672 && !is_gimple_debug (stmt)
673 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
674 {
675 /* Choose an arbitrary path through the empty CFG part
676 that this unnecessary control stmt controls. */
677 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
678 {
679 gimple_cond_make_false (cond_stmt);
680 update_stmt (stmt);
681 }
682 else if (gimple_code (stmt) == GIMPLE_SWITCH)
683 {
684 gswitch *switch_stmt = as_a <gswitch *> (stmt);
685 gimple_switch_set_index
686 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
687 update_stmt (stmt);
688 }
689 else
690 {
691 unlink_stmt_vdef (stmt);
692 gsi_remove (&bsi, true);
693 release_defs (stmt);
694 continue;
695 }
696 }
697 gsi_next (&bsi);
698 }
699 }
700
701 free (bbs);
702 }
703
704 /* Build the size argument for a memory operation call. */
705
706 static tree
707 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
708 bool plus_one)
709 {
710 tree size = fold_convert_loc (loc, sizetype, nb_iter);
711 if (plus_one)
712 size = size_binop (PLUS_EXPR, size, size_one_node);
713 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
714 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
715 size = fold_convert_loc (loc, size_type_node, size);
716 return size;
717 }
718
719 /* Build an address argument for a memory operation call. */
720
721 static tree
722 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
723 {
724 tree addr_base;
725
726 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
727 addr_base = fold_convert_loc (loc, sizetype, addr_base);
728
729 /* Test for a negative stride, iterating over every element. */
730 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
731 {
732 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
733 fold_convert_loc (loc, sizetype, nb_bytes));
734 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
735 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
736 }
737
738 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
739 }
740
741 /* If VAL memory representation contains the same value in all bytes,
742 return that value, otherwise return -1.
743 E.g. for 0x24242424 return 0x24, for IEEE double
744 747708026454360457216.0 return 0x44, etc. */
745
746 static int
747 const_with_all_bytes_same (tree val)
748 {
749 unsigned char buf[64];
750 int i, len;
751
752 if (integer_zerop (val)
753 || (TREE_CODE (val) == CONSTRUCTOR
754 && !TREE_CLOBBER_P (val)
755 && CONSTRUCTOR_NELTS (val) == 0))
756 return 0;
757
758 if (real_zerop (val))
759 {
760 /* Only return 0 for +0.0, not for -0.0, which doesn't have
761 an all bytes same memory representation. Don't transform
762 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */
763 switch (TREE_CODE (val))
764 {
765 case REAL_CST:
766 if (!real_isneg (TREE_REAL_CST_PTR (val)))
767 return 0;
768 break;
769 case COMPLEX_CST:
770 if (!const_with_all_bytes_same (TREE_REALPART (val))
771 && !const_with_all_bytes_same (TREE_IMAGPART (val)))
772 return 0;
773 break;
774 case VECTOR_CST:
775 unsigned int j;
776 for (j = 0; j < VECTOR_CST_NELTS (val); ++j)
777 if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j)))
778 break;
779 if (j == VECTOR_CST_NELTS (val))
780 return 0;
781 break;
782 default:
783 break;
784 }
785 }
786
787 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
788 return -1;
789
790 len = native_encode_expr (val, buf, sizeof (buf));
791 if (len == 0)
792 return -1;
793 for (i = 1; i < len; i++)
794 if (buf[i] != buf[0])
795 return -1;
796 return buf[0];
797 }
798
799 /* Generate a call to memset for PARTITION in LOOP. */
800
801 static void
802 generate_memset_builtin (struct loop *loop, partition *partition)
803 {
804 gimple_stmt_iterator gsi;
805 gimple *stmt, *fn_call;
806 tree mem, fn, nb_bytes;
807 location_t loc;
808 tree val;
809
810 stmt = DR_STMT (partition->main_dr);
811 loc = gimple_location (stmt);
812
813 /* The new statements will be placed before LOOP. */
814 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
815
816 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
817 partition->plus_one);
818 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
819 false, GSI_CONTINUE_LINKING);
820 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
821 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
822 false, GSI_CONTINUE_LINKING);
823
824 /* This exactly matches the pattern recognition in classify_partition. */
825 val = gimple_assign_rhs1 (stmt);
826 /* Handle constants like 0x15151515 and similarly
827 floating point constants etc. where all bytes are the same. */
828 int bytev = const_with_all_bytes_same (val);
829 if (bytev != -1)
830 val = build_int_cst (integer_type_node, bytev);
831 else if (TREE_CODE (val) == INTEGER_CST)
832 val = fold_convert (integer_type_node, val);
833 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
834 {
835 tree tem = make_ssa_name (integer_type_node);
836 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
837 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
838 val = tem;
839 }
840
841 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
842 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
843 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
844
845 if (dump_file && (dump_flags & TDF_DETAILS))
846 {
847 fprintf (dump_file, "generated memset");
848 if (bytev == 0)
849 fprintf (dump_file, " zero\n");
850 else
851 fprintf (dump_file, "\n");
852 }
853 }
854
855 /* Generate a call to memcpy for PARTITION in LOOP. */
856
857 static void
858 generate_memcpy_builtin (struct loop *loop, partition *partition)
859 {
860 gimple_stmt_iterator gsi;
861 gimple *stmt, *fn_call;
862 tree dest, src, fn, nb_bytes;
863 location_t loc;
864 enum built_in_function kind;
865
866 stmt = DR_STMT (partition->main_dr);
867 loc = gimple_location (stmt);
868
869 /* The new statements will be placed before LOOP. */
870 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
871
872 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
873 partition->plus_one);
874 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
875 false, GSI_CONTINUE_LINKING);
876 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
877 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
878 if (partition->kind == PKIND_MEMCPY
879 || ! ptr_derefs_may_alias_p (dest, src))
880 kind = BUILT_IN_MEMCPY;
881 else
882 kind = BUILT_IN_MEMMOVE;
883
884 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
885 false, GSI_CONTINUE_LINKING);
886 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
887 false, GSI_CONTINUE_LINKING);
888 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
889 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
890 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
891
892 if (dump_file && (dump_flags & TDF_DETAILS))
893 {
894 if (kind == BUILT_IN_MEMCPY)
895 fprintf (dump_file, "generated memcpy\n");
896 else
897 fprintf (dump_file, "generated memmove\n");
898 }
899 }
900
901 /* Remove and destroy the loop LOOP. */
902
903 static void
904 destroy_loop (struct loop *loop)
905 {
906 unsigned nbbs = loop->num_nodes;
907 edge exit = single_exit (loop);
908 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
909 basic_block *bbs;
910 unsigned i;
911
912 bbs = get_loop_body_in_dom_order (loop);
913
914 redirect_edge_pred (exit, src);
915 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
916 exit->flags |= EDGE_FALLTHRU;
917 cancel_loop_tree (loop);
918 rescan_loop_exit (exit, false, true);
919
920 i = nbbs;
921 do
922 {
923 /* We have made sure to not leave any dangling uses of SSA
924 names defined in the loop. With the exception of virtuals.
925 Make sure we replace all uses of virtual defs that will remain
926 outside of the loop with the bare symbol as delete_basic_block
927 will release them. */
928 --i;
929 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
930 gsi_next (&gsi))
931 {
932 gphi *phi = gsi.phi ();
933 if (virtual_operand_p (gimple_phi_result (phi)))
934 mark_virtual_phi_result_for_renaming (phi);
935 }
936 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
937 gsi_next (&gsi))
938 {
939 gimple *stmt = gsi_stmt (gsi);
940 tree vdef = gimple_vdef (stmt);
941 if (vdef && TREE_CODE (vdef) == SSA_NAME)
942 mark_virtual_operand_for_renaming (vdef);
943 }
944 delete_basic_block (bbs[i]);
945 }
946 while (i != 0);
947
948 free (bbs);
949
950 set_immediate_dominator (CDI_DOMINATORS, dest,
951 recompute_dominator (CDI_DOMINATORS, dest));
952 }
953
954 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
955
956 static bool
957 generate_code_for_partition (struct loop *loop,
958 partition *partition, bool copy_p)
959 {
960 switch (partition->kind)
961 {
962 case PKIND_NORMAL:
963 /* Reductions all have to be in the last partition. */
964 gcc_assert (!partition_reduction_p (partition)
965 || !copy_p);
966 generate_loops_for_partition (loop, partition, copy_p);
967 return false;
968
969 case PKIND_MEMSET:
970 generate_memset_builtin (loop, partition);
971 break;
972
973 case PKIND_MEMCPY:
974 case PKIND_MEMMOVE:
975 generate_memcpy_builtin (loop, partition);
976 break;
977
978 default:
979 gcc_unreachable ();
980 }
981
982 /* Common tail for partitions we turn into a call. If this was the last
983 partition for which we generate code, we have to destroy the loop. */
984 if (!copy_p)
985 return true;
986 return false;
987 }
988
989
990 /* Returns a partition with all the statements needed for computing
991 the vertex V of the RDG, also including the loop exit conditions. */
992
993 static partition *
994 build_rdg_partition_for_vertex (struct graph *rdg, int v)
995 {
996 partition *partition = partition_alloc (NULL, NULL);
997 auto_vec<int, 3> nodes;
998 unsigned i;
999 int x;
1000
1001 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
1002
1003 FOR_EACH_VEC_ELT (nodes, i, x)
1004 {
1005 bitmap_set_bit (partition->stmts, x);
1006 bitmap_set_bit (partition->loops,
1007 loop_containing_stmt (RDG_STMT (rdg, x))->num);
1008 }
1009
1010 return partition;
1011 }
1012
1013 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1014 For the moment we detect only the memset zero pattern. */
1015
1016 static void
1017 classify_partition (loop_p loop, struct graph *rdg, partition *partition)
1018 {
1019 bitmap_iterator bi;
1020 unsigned i;
1021 tree nb_iter;
1022 data_reference_p single_load, single_store;
1023 bool volatiles_p = false;
1024 bool plus_one = false;
1025
1026 partition->kind = PKIND_NORMAL;
1027 partition->main_dr = NULL;
1028 partition->secondary_dr = NULL;
1029 partition->niter = NULL_TREE;
1030 partition->plus_one = false;
1031
1032 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1033 {
1034 gimple *stmt = RDG_STMT (rdg, i);
1035
1036 if (gimple_has_volatile_ops (stmt))
1037 volatiles_p = true;
1038
1039 /* If the stmt has uses outside of the loop mark it as reduction. */
1040 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1041 {
1042 partition->reduction_p = true;
1043 return;
1044 }
1045 }
1046
1047 /* Perform general partition disqualification for builtins. */
1048 if (volatiles_p
1049 || !flag_tree_loop_distribute_patterns)
1050 return;
1051
1052 /* Detect memset and memcpy. */
1053 single_load = NULL;
1054 single_store = NULL;
1055 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1056 {
1057 gimple *stmt = RDG_STMT (rdg, i);
1058 data_reference_p dr;
1059 unsigned j;
1060
1061 if (gimple_code (stmt) == GIMPLE_PHI)
1062 continue;
1063
1064 /* Any scalar stmts are ok. */
1065 if (!gimple_vuse (stmt))
1066 continue;
1067
1068 /* Otherwise just regular loads/stores. */
1069 if (!gimple_assign_single_p (stmt))
1070 return;
1071
1072 /* But exactly one store and/or load. */
1073 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1074 {
1075 if (DR_IS_READ (dr))
1076 {
1077 if (single_load != NULL)
1078 return;
1079 single_load = dr;
1080 }
1081 else
1082 {
1083 if (single_store != NULL)
1084 return;
1085 single_store = dr;
1086 }
1087 }
1088 }
1089
1090 if (!single_store)
1091 return;
1092
1093 nb_iter = number_of_latch_executions (loop);
1094 if (!nb_iter || nb_iter == chrec_dont_know)
1095 return;
1096 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1097 gimple_bb (DR_STMT (single_store))))
1098 plus_one = true;
1099
1100 if (single_store && !single_load)
1101 {
1102 gimple *stmt = DR_STMT (single_store);
1103 tree rhs = gimple_assign_rhs1 (stmt);
1104 if (const_with_all_bytes_same (rhs) == -1
1105 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1106 || (TYPE_MODE (TREE_TYPE (rhs))
1107 != TYPE_MODE (unsigned_char_type_node))))
1108 return;
1109 if (TREE_CODE (rhs) == SSA_NAME
1110 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1111 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1112 return;
1113 if (!adjacent_dr_p (single_store)
1114 || !dominated_by_p (CDI_DOMINATORS,
1115 loop->latch, gimple_bb (stmt)))
1116 return;
1117 partition->kind = PKIND_MEMSET;
1118 partition->main_dr = single_store;
1119 partition->niter = nb_iter;
1120 partition->plus_one = plus_one;
1121 }
1122 else if (single_store && single_load)
1123 {
1124 gimple *store = DR_STMT (single_store);
1125 gimple *load = DR_STMT (single_load);
1126 /* Direct aggregate copy or via an SSA name temporary. */
1127 if (load != store
1128 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1129 return;
1130 if (!adjacent_dr_p (single_store)
1131 || !adjacent_dr_p (single_load)
1132 || !operand_equal_p (DR_STEP (single_store),
1133 DR_STEP (single_load), 0)
1134 || !dominated_by_p (CDI_DOMINATORS,
1135 loop->latch, gimple_bb (store)))
1136 return;
1137 /* Now check that if there is a dependence this dependence is
1138 of a suitable form for memmove. */
1139 vec<loop_p> loops = vNULL;
1140 ddr_p ddr;
1141 loops.safe_push (loop);
1142 ddr = initialize_data_dependence_relation (single_load, single_store,
1143 loops);
1144 compute_affine_dependence (ddr, loop);
1145 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1146 {
1147 free_dependence_relation (ddr);
1148 loops.release ();
1149 return;
1150 }
1151 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1152 {
1153 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1154 {
1155 free_dependence_relation (ddr);
1156 loops.release ();
1157 return;
1158 }
1159 lambda_vector dist_v;
1160 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1161 {
1162 int dist = dist_v[index_in_loop_nest (loop->num,
1163 DDR_LOOP_NEST (ddr))];
1164 if (dist > 0 && !DDR_REVERSED_P (ddr))
1165 {
1166 free_dependence_relation (ddr);
1167 loops.release ();
1168 return;
1169 }
1170 }
1171 partition->kind = PKIND_MEMMOVE;
1172 }
1173 else
1174 partition->kind = PKIND_MEMCPY;
1175 free_dependence_relation (ddr);
1176 loops.release ();
1177 partition->main_dr = single_store;
1178 partition->secondary_dr = single_load;
1179 partition->niter = nb_iter;
1180 partition->plus_one = plus_one;
1181 }
1182 }
1183
1184 /* For a data reference REF, return the declaration of its base
1185 address or NULL_TREE if the base is not determined. */
1186
1187 static tree
1188 ref_base_address (data_reference_p dr)
1189 {
1190 tree base_address = DR_BASE_ADDRESS (dr);
1191 if (base_address
1192 && TREE_CODE (base_address) == ADDR_EXPR)
1193 return TREE_OPERAND (base_address, 0);
1194
1195 return base_address;
1196 }
1197
1198 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1199 accesses in RDG. */
1200
1201 static bool
1202 similar_memory_accesses (struct graph *rdg, partition *partition1,
1203 partition *partition2)
1204 {
1205 unsigned i, j, k, l;
1206 bitmap_iterator bi, bj;
1207 data_reference_p ref1, ref2;
1208
1209 /* First check whether in the intersection of the two partitions are
1210 any loads or stores. Common loads are the situation that happens
1211 most often. */
1212 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1213 if (RDG_MEM_WRITE_STMT (rdg, i)
1214 || RDG_MEM_READS_STMT (rdg, i))
1215 return true;
1216
1217 /* Then check all data-references against each other. */
1218 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1219 if (RDG_MEM_WRITE_STMT (rdg, i)
1220 || RDG_MEM_READS_STMT (rdg, i))
1221 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1222 if (RDG_MEM_WRITE_STMT (rdg, j)
1223 || RDG_MEM_READS_STMT (rdg, j))
1224 {
1225 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1226 {
1227 tree base1 = ref_base_address (ref1);
1228 if (base1)
1229 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1230 if (base1 == ref_base_address (ref2))
1231 return true;
1232 }
1233 }
1234
1235 return false;
1236 }
1237
1238 /* Aggregate several components into a useful partition that is
1239 registered in the PARTITIONS vector. Partitions will be
1240 distributed in different loops. */
1241
1242 static void
1243 rdg_build_partitions (struct graph *rdg,
1244 vec<gimple *> starting_stmts,
1245 vec<partition *> *partitions)
1246 {
1247 bitmap processed = BITMAP_ALLOC (NULL);
1248 int i;
1249 gimple *stmt;
1250
1251 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1252 {
1253 int v = rdg_vertex_for_stmt (rdg, stmt);
1254
1255 if (dump_file && (dump_flags & TDF_DETAILS))
1256 fprintf (dump_file,
1257 "ldist asked to generate code for vertex %d\n", v);
1258
1259 /* If the vertex is already contained in another partition so
1260 is the partition rooted at it. */
1261 if (bitmap_bit_p (processed, v))
1262 continue;
1263
1264 partition *partition = build_rdg_partition_for_vertex (rdg, v);
1265 bitmap_ior_into (processed, partition->stmts);
1266
1267 if (dump_file && (dump_flags & TDF_DETAILS))
1268 {
1269 fprintf (dump_file, "ldist useful partition:\n");
1270 dump_bitmap (dump_file, partition->stmts);
1271 }
1272
1273 partitions->safe_push (partition);
1274 }
1275
1276 /* All vertices should have been assigned to at least one partition now,
1277 other than vertices belonging to dead code. */
1278
1279 BITMAP_FREE (processed);
1280 }
1281
1282 /* Dump to FILE the PARTITIONS. */
1283
1284 static void
1285 dump_rdg_partitions (FILE *file, vec<partition *> partitions)
1286 {
1287 int i;
1288 partition *partition;
1289
1290 FOR_EACH_VEC_ELT (partitions, i, partition)
1291 debug_bitmap_file (file, partition->stmts);
1292 }
1293
1294 /* Debug PARTITIONS. */
1295 extern void debug_rdg_partitions (vec<partition *> );
1296
1297 DEBUG_FUNCTION void
1298 debug_rdg_partitions (vec<partition *> partitions)
1299 {
1300 dump_rdg_partitions (stderr, partitions);
1301 }
1302
1303 /* Returns the number of read and write operations in the RDG. */
1304
1305 static int
1306 number_of_rw_in_rdg (struct graph *rdg)
1307 {
1308 int i, res = 0;
1309
1310 for (i = 0; i < rdg->n_vertices; i++)
1311 {
1312 if (RDG_MEM_WRITE_STMT (rdg, i))
1313 ++res;
1314
1315 if (RDG_MEM_READS_STMT (rdg, i))
1316 ++res;
1317 }
1318
1319 return res;
1320 }
1321
1322 /* Returns the number of read and write operations in a PARTITION of
1323 the RDG. */
1324
1325 static int
1326 number_of_rw_in_partition (struct graph *rdg, partition *partition)
1327 {
1328 int res = 0;
1329 unsigned i;
1330 bitmap_iterator ii;
1331
1332 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1333 {
1334 if (RDG_MEM_WRITE_STMT (rdg, i))
1335 ++res;
1336
1337 if (RDG_MEM_READS_STMT (rdg, i))
1338 ++res;
1339 }
1340
1341 return res;
1342 }
1343
1344 /* Returns true when one of the PARTITIONS contains all the read or
1345 write operations of RDG. */
1346
1347 static bool
1348 partition_contains_all_rw (struct graph *rdg,
1349 vec<partition *> partitions)
1350 {
1351 int i;
1352 partition *partition;
1353 int nrw = number_of_rw_in_rdg (rdg);
1354
1355 FOR_EACH_VEC_ELT (partitions, i, partition)
1356 if (nrw == number_of_rw_in_partition (rdg, partition))
1357 return true;
1358
1359 return false;
1360 }
1361
1362 /* Compute partition dependence created by the data references in DRS1
1363 and DRS2 and modify and return DIR according to that. */
1364
1365 static int
1366 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1367 vec<data_reference_p> drs1,
1368 vec<data_reference_p> drs2)
1369 {
1370 data_reference_p dr1, dr2;
1371
1372 /* dependence direction - 0 is no dependence, -1 is back,
1373 1 is forth, 2 is both (we can stop then, merging will occur). */
1374 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1375 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1376 {
1377 data_reference_p saved_dr1 = dr1;
1378 int this_dir = 1;
1379 ddr_p ddr;
1380 /* Re-shuffle data-refs to be in dominator order. */
1381 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1382 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1383 {
1384 std::swap (dr1, dr2);
1385 this_dir = -this_dir;
1386 }
1387 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1388 compute_affine_dependence (ddr, loops[0]);
1389 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1390 this_dir = 2;
1391 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1392 {
1393 if (DDR_REVERSED_P (ddr))
1394 {
1395 std::swap (dr1, dr2);
1396 this_dir = -this_dir;
1397 }
1398 /* Known dependences can still be unordered througout the
1399 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1400 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1401 this_dir = 2;
1402 /* If the overlap is exact preserve stmt order. */
1403 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1404 ;
1405 else
1406 {
1407 /* Else as the distance vector is lexicographic positive
1408 swap the dependence direction. */
1409 this_dir = -this_dir;
1410 }
1411 }
1412 else
1413 this_dir = 0;
1414 free_dependence_relation (ddr);
1415 if (this_dir == 2)
1416 return 2;
1417 else if (dir == 0)
1418 dir = this_dir;
1419 else if (this_dir != 0 && dir != this_dir)
1420 return 2;
1421 /* Shuffle "back" dr1. */
1422 dr1 = saved_dr1;
1423 }
1424 return dir;
1425 }
1426
1427 /* Compare postorder number of the partition graph vertices V1 and V2. */
1428
1429 static int
1430 pgcmp (const void *v1_, const void *v2_)
1431 {
1432 const vertex *v1 = (const vertex *)v1_;
1433 const vertex *v2 = (const vertex *)v2_;
1434 return v2->post - v1->post;
1435 }
1436
1437 /* Distributes the code from LOOP in such a way that producer
1438 statements are placed before consumer statements. Tries to separate
1439 only the statements from STMTS into separate loops.
1440 Returns the number of distributed loops. Set *DESTROY_P to whether
1441 LOOP needs to be destroyed. */
1442
1443 static int
1444 distribute_loop (struct loop *loop, vec<gimple *> stmts,
1445 control_dependences *cd, int *nb_calls, bool *destroy_p)
1446 {
1447 struct graph *rdg;
1448 partition *partition;
1449 bool any_builtin;
1450 int i, nbp;
1451 graph *pg = NULL;
1452 int num_sccs = 1;
1453
1454 *destroy_p = false;
1455 *nb_calls = 0;
1456 auto_vec<loop_p, 3> loop_nest;
1457 if (!find_loop_nest (loop, &loop_nest))
1458 return 0;
1459
1460 rdg = build_rdg (loop_nest, cd);
1461 if (!rdg)
1462 {
1463 if (dump_file && (dump_flags & TDF_DETAILS))
1464 fprintf (dump_file,
1465 "Loop %d not distributed: failed to build the RDG.\n",
1466 loop->num);
1467
1468 return 0;
1469 }
1470
1471 if (dump_file && (dump_flags & TDF_DETAILS))
1472 dump_rdg (dump_file, rdg);
1473
1474 auto_vec<struct partition *, 3> partitions;
1475 rdg_build_partitions (rdg, stmts, &partitions);
1476
1477 any_builtin = false;
1478 FOR_EACH_VEC_ELT (partitions, i, partition)
1479 {
1480 classify_partition (loop, rdg, partition);
1481 any_builtin |= partition_builtin_p (partition);
1482 }
1483
1484 /* If we are only distributing patterns but did not detect any,
1485 simply bail out. */
1486 if (!flag_tree_loop_distribution
1487 && !any_builtin)
1488 {
1489 nbp = 0;
1490 goto ldist_done;
1491 }
1492
1493 /* If we are only distributing patterns fuse all partitions that
1494 were not classified as builtins. This also avoids chopping
1495 a loop into pieces, separated by builtin calls. That is, we
1496 only want no or a single loop body remaining. */
1497 struct partition *into;
1498 if (!flag_tree_loop_distribution)
1499 {
1500 for (i = 0; partitions.iterate (i, &into); ++i)
1501 if (!partition_builtin_p (into))
1502 break;
1503 for (++i; partitions.iterate (i, &partition); ++i)
1504 if (!partition_builtin_p (partition))
1505 {
1506 if (dump_file && (dump_flags & TDF_DETAILS))
1507 {
1508 fprintf (dump_file, "fusing non-builtin partitions\n");
1509 dump_bitmap (dump_file, into->stmts);
1510 dump_bitmap (dump_file, partition->stmts);
1511 }
1512 partition_merge_into (into, partition);
1513 partitions.unordered_remove (i);
1514 partition_free (partition);
1515 i--;
1516 }
1517 }
1518
1519 /* Due to limitations in the transform phase we have to fuse all
1520 reduction partitions into the last partition so the existing
1521 loop will contain all loop-closed PHI nodes. */
1522 for (i = 0; partitions.iterate (i, &into); ++i)
1523 if (partition_reduction_p (into))
1524 break;
1525 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1526 if (partition_reduction_p (partition))
1527 {
1528 if (dump_file && (dump_flags & TDF_DETAILS))
1529 {
1530 fprintf (dump_file, "fusing partitions\n");
1531 dump_bitmap (dump_file, into->stmts);
1532 dump_bitmap (dump_file, partition->stmts);
1533 fprintf (dump_file, "because they have reductions\n");
1534 }
1535 partition_merge_into (into, partition);
1536 partitions.unordered_remove (i);
1537 partition_free (partition);
1538 i--;
1539 }
1540
1541 /* Apply our simple cost model - fuse partitions with similar
1542 memory accesses. */
1543 for (i = 0; partitions.iterate (i, &into); ++i)
1544 {
1545 bool changed = false;
1546 if (partition_builtin_p (into))
1547 continue;
1548 for (int j = i + 1;
1549 partitions.iterate (j, &partition); ++j)
1550 {
1551 if (similar_memory_accesses (rdg, into, partition))
1552 {
1553 if (dump_file && (dump_flags & TDF_DETAILS))
1554 {
1555 fprintf (dump_file, "fusing partitions\n");
1556 dump_bitmap (dump_file, into->stmts);
1557 dump_bitmap (dump_file, partition->stmts);
1558 fprintf (dump_file, "because they have similar "
1559 "memory accesses\n");
1560 }
1561 partition_merge_into (into, partition);
1562 partitions.unordered_remove (j);
1563 partition_free (partition);
1564 j--;
1565 changed = true;
1566 }
1567 }
1568 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
1569 accesses when 1 and 2 have similar accesses but not 0 and 1
1570 then in the next iteration we will fail to consider merging
1571 1 into 0,2. So try again if we did any merging into 0. */
1572 if (changed)
1573 i--;
1574 }
1575
1576 /* Build the partition dependency graph. */
1577 if (partitions.length () > 1)
1578 {
1579 pg = new_graph (partitions.length ());
1580 struct pgdata {
1581 struct partition *partition;
1582 vec<data_reference_p> writes;
1583 vec<data_reference_p> reads;
1584 };
1585 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1586 for (i = 0; partitions.iterate (i, &partition); ++i)
1587 {
1588 vertex *v = &pg->vertices[i];
1589 pgdata *data = new pgdata;
1590 data_reference_p dr;
1591 /* FIXME - leaks. */
1592 v->data = data;
1593 bitmap_iterator bi;
1594 unsigned j;
1595 data->partition = partition;
1596 data->reads = vNULL;
1597 data->writes = vNULL;
1598 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1599 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1600 if (DR_IS_READ (dr))
1601 data->reads.safe_push (dr);
1602 else
1603 data->writes.safe_push (dr);
1604 }
1605 struct partition *partition1, *partition2;
1606 for (i = 0; partitions.iterate (i, &partition1); ++i)
1607 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1608 {
1609 /* dependence direction - 0 is no dependence, -1 is back,
1610 1 is forth, 2 is both (we can stop then, merging will occur). */
1611 int dir = 0;
1612 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1613 PGDATA(i)->writes,
1614 PGDATA(j)->reads);
1615 if (dir != 2)
1616 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1617 PGDATA(i)->reads,
1618 PGDATA(j)->writes);
1619 if (dir != 2)
1620 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1621 PGDATA(i)->writes,
1622 PGDATA(j)->writes);
1623 if (dir == 1 || dir == 2)
1624 add_edge (pg, i, j);
1625 if (dir == -1 || dir == 2)
1626 add_edge (pg, j, i);
1627 }
1628
1629 /* Add edges to the reduction partition (if any) to force it last. */
1630 unsigned j;
1631 for (j = 0; partitions.iterate (j, &partition); ++j)
1632 if (partition_reduction_p (partition))
1633 break;
1634 if (j < partitions.length ())
1635 {
1636 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1637 if (i != j)
1638 add_edge (pg, i, j);
1639 }
1640
1641 /* Compute partitions we cannot separate and fuse them. */
1642 num_sccs = graphds_scc (pg, NULL);
1643 for (i = 0; i < num_sccs; ++i)
1644 {
1645 struct partition *first;
1646 int j;
1647 for (j = 0; partitions.iterate (j, &first); ++j)
1648 if (pg->vertices[j].component == i)
1649 break;
1650 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1651 if (pg->vertices[j].component == i)
1652 {
1653 if (dump_file && (dump_flags & TDF_DETAILS))
1654 {
1655 fprintf (dump_file, "fusing partitions\n");
1656 dump_bitmap (dump_file, first->stmts);
1657 dump_bitmap (dump_file, partition->stmts);
1658 fprintf (dump_file, "because they are in the same "
1659 "dependence SCC\n");
1660 }
1661 partition_merge_into (first, partition);
1662 partitions[j] = NULL;
1663 partition_free (partition);
1664 PGDATA (j)->partition = NULL;
1665 }
1666 }
1667
1668 /* Now order the remaining nodes in postorder. */
1669 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1670 partitions.truncate (0);
1671 for (i = 0; i < pg->n_vertices; ++i)
1672 {
1673 pgdata *data = PGDATA (i);
1674 if (data->partition)
1675 partitions.safe_push (data->partition);
1676 data->reads.release ();
1677 data->writes.release ();
1678 delete data;
1679 }
1680 gcc_assert (partitions.length () == (unsigned)num_sccs);
1681 free_graph (pg);
1682 }
1683
1684 nbp = partitions.length ();
1685 if (nbp == 0
1686 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1687 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1688 {
1689 nbp = 0;
1690 goto ldist_done;
1691 }
1692
1693 if (dump_file && (dump_flags & TDF_DETAILS))
1694 dump_rdg_partitions (dump_file, partitions);
1695
1696 FOR_EACH_VEC_ELT (partitions, i, partition)
1697 {
1698 if (partition_builtin_p (partition))
1699 (*nb_calls)++;
1700 *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
1701 }
1702
1703 ldist_done:
1704
1705 FOR_EACH_VEC_ELT (partitions, i, partition)
1706 partition_free (partition);
1707
1708 free_rdg (rdg);
1709 return nbp - *nb_calls;
1710 }
1711
1712 /* Distribute all loops in the current function. */
1713
1714 namespace {
1715
1716 const pass_data pass_data_loop_distribution =
1717 {
1718 GIMPLE_PASS, /* type */
1719 "ldist", /* name */
1720 OPTGROUP_LOOP, /* optinfo_flags */
1721 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1722 ( PROP_cfg | PROP_ssa ), /* properties_required */
1723 0, /* properties_provided */
1724 0, /* properties_destroyed */
1725 0, /* todo_flags_start */
1726 0, /* todo_flags_finish */
1727 };
1728
1729 class pass_loop_distribution : public gimple_opt_pass
1730 {
1731 public:
1732 pass_loop_distribution (gcc::context *ctxt)
1733 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1734 {}
1735
1736 /* opt_pass methods: */
1737 virtual bool gate (function *)
1738 {
1739 return flag_tree_loop_distribution
1740 || flag_tree_loop_distribute_patterns;
1741 }
1742
1743 virtual unsigned int execute (function *);
1744
1745 }; // class pass_loop_distribution
1746
1747 unsigned int
1748 pass_loop_distribution::execute (function *fun)
1749 {
1750 struct loop *loop;
1751 bool changed = false;
1752 basic_block bb;
1753 control_dependences *cd = NULL;
1754 auto_vec<loop_p> loops_to_be_destroyed;
1755
1756 FOR_ALL_BB_FN (bb, fun)
1757 {
1758 gimple_stmt_iterator gsi;
1759 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1760 gimple_set_uid (gsi_stmt (gsi), -1);
1761 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1762 gimple_set_uid (gsi_stmt (gsi), -1);
1763 }
1764
1765 /* We can at the moment only distribute non-nested loops, thus restrict
1766 walking to innermost loops. */
1767 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1768 {
1769 auto_vec<gimple *> work_list;
1770 basic_block *bbs;
1771 int num = loop->num;
1772 unsigned int i;
1773
1774 /* If the loop doesn't have a single exit we will fail anyway,
1775 so do that early. */
1776 if (!single_exit (loop))
1777 continue;
1778
1779 /* Only optimize hot loops. */
1780 if (!optimize_loop_for_speed_p (loop))
1781 continue;
1782
1783 /* Initialize the worklist with stmts we seed the partitions with. */
1784 bbs = get_loop_body_in_dom_order (loop);
1785 for (i = 0; i < loop->num_nodes; ++i)
1786 {
1787 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1788 !gsi_end_p (gsi);
1789 gsi_next (&gsi))
1790 {
1791 gphi *phi = gsi.phi ();
1792 if (virtual_operand_p (gimple_phi_result (phi)))
1793 continue;
1794 /* Distribute stmts which have defs that are used outside of
1795 the loop. */
1796 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1797 continue;
1798 work_list.safe_push (phi);
1799 }
1800 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1801 !gsi_end_p (gsi);
1802 gsi_next (&gsi))
1803 {
1804 gimple *stmt = gsi_stmt (gsi);
1805
1806 /* If there is a stmt with side-effects bail out - we
1807 cannot and should not distribute this loop. */
1808 if (gimple_has_side_effects (stmt))
1809 {
1810 work_list.truncate (0);
1811 goto out;
1812 }
1813
1814 /* Distribute stmts which have defs that are used outside of
1815 the loop. */
1816 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1817 ;
1818 /* Otherwise only distribute stores for now. */
1819 else if (!gimple_vdef (stmt))
1820 continue;
1821
1822 work_list.safe_push (stmt);
1823 }
1824 }
1825 out:
1826 free (bbs);
1827
1828 int nb_generated_loops = 0;
1829 int nb_generated_calls = 0;
1830 location_t loc = find_loop_location (loop);
1831 if (work_list.length () > 0)
1832 {
1833 if (!cd)
1834 {
1835 calculate_dominance_info (CDI_DOMINATORS);
1836 calculate_dominance_info (CDI_POST_DOMINATORS);
1837 cd = new control_dependences ();
1838 free_dominance_info (CDI_POST_DOMINATORS);
1839 }
1840 bool destroy_p;
1841 nb_generated_loops = distribute_loop (loop, work_list, cd,
1842 &nb_generated_calls,
1843 &destroy_p);
1844 if (destroy_p)
1845 loops_to_be_destroyed.safe_push (loop);
1846 }
1847
1848 if (nb_generated_loops + nb_generated_calls > 0)
1849 {
1850 changed = true;
1851 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1852 loc, "Loop %d distributed: split to %d loops "
1853 "and %d library calls.\n",
1854 num, nb_generated_loops, nb_generated_calls);
1855 }
1856 else if (dump_file && (dump_flags & TDF_DETAILS))
1857 fprintf (dump_file, "Loop %d is the same.\n", num);
1858 }
1859
1860 if (cd)
1861 delete cd;
1862
1863 if (changed)
1864 {
1865 /* Destroy loop bodies that could not be reused. Do this late as we
1866 otherwise can end up refering to stale data in control dependences. */
1867 unsigned i;
1868 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
1869 destroy_loop (loop);
1870
1871 /* Cached scalar evolutions now may refer to wrong or non-existing
1872 loops. */
1873 scev_reset_htab ();
1874 mark_virtual_operands_for_renaming (fun);
1875 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1876 }
1877
1878 checking_verify_loop_structure ();
1879
1880 return 0;
1881 }
1882
1883 } // anon namespace
1884
1885 gimple_opt_pass *
1886 make_pass_loop_distribution (gcc::context *ctxt)
1887 {
1888 return new pass_loop_distribution (ctxt);
1889 }