]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-loop-distribution.c
tree-core.h: Include symtab.h.
[thirdparty/gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "alias.h"
48 #include "backend.h"
49 #include "tree.h"
50 #include "gimple.h"
51 #include "hard-reg-set.h"
52 #include "ssa.h"
53 #include "options.h"
54 #include "fold-const.h"
55 #include "cfganal.h"
56 #include "internal-fn.h"
57 #include "gimple-iterator.h"
58 #include "gimplify-me.h"
59 #include "stor-layout.h"
60 #include "tree-cfg.h"
61 #include "tree-ssa-loop-manip.h"
62 #include "tree-ssa-loop.h"
63 #include "tree-into-ssa.h"
64 #include "tree-ssa.h"
65 #include "cfgloop.h"
66 #include "tree-chrec.h"
67 #include "tree-data-ref.h"
68 #include "tree-scalar-evolution.h"
69 #include "tree-pass.h"
70 #include "gimple-pretty-print.h"
71 #include "tree-vectorizer.h"
72
73
74 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
75 typedef struct rdg_vertex
76 {
77 /* The statement represented by this vertex. */
78 gimple stmt;
79
80 /* Vector of data-references in this statement. */
81 vec<data_reference_p> datarefs;
82
83 /* True when the statement contains a write to memory. */
84 bool has_mem_write;
85
86 /* True when the statement contains a read from memory. */
87 bool has_mem_reads;
88 } *rdg_vertex_p;
89
90 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
91 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
92 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
93 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
94 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
95 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
96 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
97 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
98
99 /* Data dependence type. */
100
101 enum rdg_dep_type
102 {
103 /* Read After Write (RAW). */
104 flow_dd = 'f',
105
106 /* Control dependence (execute conditional on). */
107 control_dd = 'c'
108 };
109
110 /* Dependence information attached to an edge of the RDG. */
111
112 typedef struct rdg_edge
113 {
114 /* Type of the dependence. */
115 enum rdg_dep_type type;
116 } *rdg_edge_p;
117
118 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
119
120 /* Dump vertex I in RDG to FILE. */
121
122 static void
123 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
124 {
125 struct vertex *v = &(rdg->vertices[i]);
126 struct graph_edge *e;
127
128 fprintf (file, "(vertex %d: (%s%s) (in:", i,
129 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
130 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
131
132 if (v->pred)
133 for (e = v->pred; e; e = e->pred_next)
134 fprintf (file, " %d", e->src);
135
136 fprintf (file, ") (out:");
137
138 if (v->succ)
139 for (e = v->succ; e; e = e->succ_next)
140 fprintf (file, " %d", e->dest);
141
142 fprintf (file, ")\n");
143 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
144 fprintf (file, ")\n");
145 }
146
147 /* Call dump_rdg_vertex on stderr. */
148
149 DEBUG_FUNCTION void
150 debug_rdg_vertex (struct graph *rdg, int i)
151 {
152 dump_rdg_vertex (stderr, rdg, i);
153 }
154
155 /* Dump the reduced dependence graph RDG to FILE. */
156
157 static void
158 dump_rdg (FILE *file, struct graph *rdg)
159 {
160 fprintf (file, "(rdg\n");
161 for (int i = 0; i < rdg->n_vertices; i++)
162 dump_rdg_vertex (file, rdg, i);
163 fprintf (file, ")\n");
164 }
165
166 /* Call dump_rdg on stderr. */
167
168 DEBUG_FUNCTION void
169 debug_rdg (struct graph *rdg)
170 {
171 dump_rdg (stderr, rdg);
172 }
173
174 static void
175 dot_rdg_1 (FILE *file, struct graph *rdg)
176 {
177 int i;
178 pretty_printer buffer;
179 pp_needs_newline (&buffer) = false;
180 buffer.buffer->stream = file;
181
182 fprintf (file, "digraph RDG {\n");
183
184 for (i = 0; i < rdg->n_vertices; i++)
185 {
186 struct vertex *v = &(rdg->vertices[i]);
187 struct graph_edge *e;
188
189 fprintf (file, "%d [label=\"[%d] ", i, i);
190 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
191 pp_flush (&buffer);
192 fprintf (file, "\"]\n");
193
194 /* Highlight reads from memory. */
195 if (RDG_MEM_READS_STMT (rdg, i))
196 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
197
198 /* Highlight stores to memory. */
199 if (RDG_MEM_WRITE_STMT (rdg, i))
200 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
201
202 if (v->succ)
203 for (e = v->succ; e; e = e->succ_next)
204 switch (RDGE_TYPE (e))
205 {
206 case flow_dd:
207 /* These are the most common dependences: don't print these. */
208 fprintf (file, "%d -> %d \n", i, e->dest);
209 break;
210
211 case control_dd:
212 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
213 break;
214
215 default:
216 gcc_unreachable ();
217 }
218 }
219
220 fprintf (file, "}\n\n");
221 }
222
223 /* Display the Reduced Dependence Graph using dotty. */
224
225 DEBUG_FUNCTION void
226 dot_rdg (struct graph *rdg)
227 {
228 /* When debugging, you may want to enable the following code. */
229 #ifdef HAVE_POPEN
230 FILE *file = popen ("dot -Tx11", "w");
231 if (!file)
232 return;
233 dot_rdg_1 (file, rdg);
234 fflush (file);
235 close (fileno (file));
236 pclose (file);
237 #else
238 dot_rdg_1 (stderr, rdg);
239 #endif
240 }
241
242 /* Returns the index of STMT in RDG. */
243
244 static int
245 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
246 {
247 int index = gimple_uid (stmt);
248 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
249 return index;
250 }
251
252 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
253 the index of DEF in RDG. */
254
255 static void
256 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
257 {
258 use_operand_p imm_use_p;
259 imm_use_iterator iterator;
260
261 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
262 {
263 struct graph_edge *e;
264 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
265
266 if (use < 0)
267 continue;
268
269 e = add_edge (rdg, idef, use);
270 e->data = XNEW (struct rdg_edge);
271 RDGE_TYPE (e) = flow_dd;
272 }
273 }
274
275 /* Creates an edge for the control dependences of BB to the vertex V. */
276
277 static void
278 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
279 int v, control_dependences *cd)
280 {
281 bitmap_iterator bi;
282 unsigned edge_n;
283 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
284 0, edge_n, bi)
285 {
286 basic_block cond_bb = cd->get_edge (edge_n)->src;
287 gimple stmt = last_stmt (cond_bb);
288 if (stmt && is_ctrl_stmt (stmt))
289 {
290 struct graph_edge *e;
291 int c = rdg_vertex_for_stmt (rdg, stmt);
292 if (c < 0)
293 continue;
294
295 e = add_edge (rdg, c, v);
296 e->data = XNEW (struct rdg_edge);
297 RDGE_TYPE (e) = control_dd;
298 }
299 }
300 }
301
302 /* Creates the edges of the reduced dependence graph RDG. */
303
304 static void
305 create_rdg_flow_edges (struct graph *rdg)
306 {
307 int i;
308 def_operand_p def_p;
309 ssa_op_iter iter;
310
311 for (i = 0; i < rdg->n_vertices; i++)
312 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
313 iter, SSA_OP_DEF)
314 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
315 }
316
317 /* Creates the edges of the reduced dependence graph RDG. */
318
319 static void
320 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
321 {
322 int i;
323
324 for (i = 0; i < rdg->n_vertices; i++)
325 {
326 gimple stmt = RDG_STMT (rdg, i);
327 if (gimple_code (stmt) == GIMPLE_PHI)
328 {
329 edge_iterator ei;
330 edge e;
331 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
332 create_edge_for_control_dependence (rdg, e->src, i, cd);
333 }
334 else
335 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
336 }
337 }
338
339 /* Build the vertices of the reduced dependence graph RDG. Return false
340 if that failed. */
341
342 static bool
343 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
344 vec<data_reference_p> *datarefs)
345 {
346 int i;
347 gimple stmt;
348
349 FOR_EACH_VEC_ELT (stmts, i, stmt)
350 {
351 struct vertex *v = &(rdg->vertices[i]);
352
353 /* Record statement to vertex mapping. */
354 gimple_set_uid (stmt, i);
355
356 v->data = XNEW (struct rdg_vertex);
357 RDGV_STMT (v) = stmt;
358 RDGV_DATAREFS (v).create (0);
359 RDGV_HAS_MEM_WRITE (v) = false;
360 RDGV_HAS_MEM_READS (v) = false;
361 if (gimple_code (stmt) == GIMPLE_PHI)
362 continue;
363
364 unsigned drp = datarefs->length ();
365 if (!find_data_references_in_stmt (loop, stmt, datarefs))
366 return false;
367 for (unsigned j = drp; j < datarefs->length (); ++j)
368 {
369 data_reference_p dr = (*datarefs)[j];
370 if (DR_IS_READ (dr))
371 RDGV_HAS_MEM_READS (v) = true;
372 else
373 RDGV_HAS_MEM_WRITE (v) = true;
374 RDGV_DATAREFS (v).safe_push (dr);
375 }
376 }
377 return true;
378 }
379
380 /* Initialize STMTS with all the statements of LOOP. The order in
381 which we discover statements is important as
382 generate_loops_for_partition is using the same traversal for
383 identifying statements in loop copies. */
384
385 static void
386 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
387 {
388 unsigned int i;
389 basic_block *bbs = get_loop_body_in_dom_order (loop);
390
391 for (i = 0; i < loop->num_nodes; i++)
392 {
393 basic_block bb = bbs[i];
394
395 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
396 gsi_next (&bsi))
397 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
398 stmts->safe_push (bsi.phi ());
399
400 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
401 gsi_next (&bsi))
402 {
403 gimple stmt = gsi_stmt (bsi);
404 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
405 stmts->safe_push (stmt);
406 }
407 }
408
409 free (bbs);
410 }
411
412 /* Free the reduced dependence graph RDG. */
413
414 static void
415 free_rdg (struct graph *rdg)
416 {
417 int i;
418
419 for (i = 0; i < rdg->n_vertices; i++)
420 {
421 struct vertex *v = &(rdg->vertices[i]);
422 struct graph_edge *e;
423
424 for (e = v->succ; e; e = e->succ_next)
425 free (e->data);
426
427 if (v->data)
428 {
429 gimple_set_uid (RDGV_STMT (v), -1);
430 free_data_refs (RDGV_DATAREFS (v));
431 free (v->data);
432 }
433 }
434
435 free_graph (rdg);
436 }
437
438 /* Build the Reduced Dependence Graph (RDG) with one vertex per
439 statement of the loop nest LOOP_NEST, and one edge per data dependence or
440 scalar dependence. */
441
442 static struct graph *
443 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
444 {
445 struct graph *rdg;
446 vec<data_reference_p> datarefs;
447
448 /* Create the RDG vertices from the stmts of the loop nest. */
449 auto_vec<gimple, 10> stmts;
450 stmts_from_loop (loop_nest[0], &stmts);
451 rdg = new_graph (stmts.length ());
452 datarefs.create (10);
453 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
454 {
455 datarefs.release ();
456 free_rdg (rdg);
457 return NULL;
458 }
459 stmts.release ();
460
461 create_rdg_flow_edges (rdg);
462 if (cd)
463 create_rdg_cd_edges (rdg, cd);
464
465 datarefs.release ();
466
467 return rdg;
468 }
469
470
471
472 enum partition_kind {
473 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
474 };
475
476 typedef struct partition_s
477 {
478 bitmap stmts;
479 bitmap loops;
480 bool reduction_p;
481 enum partition_kind kind;
482 /* data-references a kind != PKIND_NORMAL partition is about. */
483 data_reference_p main_dr;
484 data_reference_p secondary_dr;
485 tree niter;
486 bool plus_one;
487 } *partition_t;
488
489
490 /* Allocate and initialize a partition from BITMAP. */
491
492 static partition_t
493 partition_alloc (bitmap stmts, bitmap loops)
494 {
495 partition_t partition = XCNEW (struct partition_s);
496 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
497 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
498 partition->reduction_p = false;
499 partition->kind = PKIND_NORMAL;
500 return partition;
501 }
502
503 /* Free PARTITION. */
504
505 static void
506 partition_free (partition_t partition)
507 {
508 BITMAP_FREE (partition->stmts);
509 BITMAP_FREE (partition->loops);
510 free (partition);
511 }
512
513 /* Returns true if the partition can be generated as a builtin. */
514
515 static bool
516 partition_builtin_p (partition_t partition)
517 {
518 return partition->kind != PKIND_NORMAL;
519 }
520
521 /* Returns true if the partition contains a reduction. */
522
523 static bool
524 partition_reduction_p (partition_t partition)
525 {
526 return partition->reduction_p;
527 }
528
529 /* Merge PARTITION into the partition DEST. */
530
531 static void
532 partition_merge_into (partition_t dest, partition_t partition)
533 {
534 dest->kind = PKIND_NORMAL;
535 bitmap_ior_into (dest->stmts, partition->stmts);
536 if (partition_reduction_p (partition))
537 dest->reduction_p = true;
538 }
539
540
541 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
542 the LOOP. */
543
544 static bool
545 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
546 {
547 imm_use_iterator imm_iter;
548 use_operand_p use_p;
549
550 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
551 {
552 gimple use_stmt = USE_STMT (use_p);
553 if (!is_gimple_debug (use_stmt)
554 && loop != loop_containing_stmt (use_stmt))
555 return true;
556 }
557
558 return false;
559 }
560
561 /* Returns true when STMT defines a scalar variable used after the
562 loop LOOP. */
563
564 static bool
565 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
566 {
567 def_operand_p def_p;
568 ssa_op_iter op_iter;
569
570 if (gimple_code (stmt) == GIMPLE_PHI)
571 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
572
573 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
574 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
575 return true;
576
577 return false;
578 }
579
580 /* Return a copy of LOOP placed before LOOP. */
581
582 static struct loop *
583 copy_loop_before (struct loop *loop)
584 {
585 struct loop *res;
586 edge preheader = loop_preheader_edge (loop);
587
588 initialize_original_copy_tables ();
589 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
590 gcc_assert (res != NULL);
591 free_original_copy_tables ();
592 delete_update_ssa ();
593
594 return res;
595 }
596
597 /* Creates an empty basic block after LOOP. */
598
599 static void
600 create_bb_after_loop (struct loop *loop)
601 {
602 edge exit = single_exit (loop);
603
604 if (!exit)
605 return;
606
607 split_edge (exit);
608 }
609
610 /* Generate code for PARTITION from the code in LOOP. The loop is
611 copied when COPY_P is true. All the statements not flagged in the
612 PARTITION bitmap are removed from the loop or from its copy. The
613 statements are indexed in sequence inside a basic block, and the
614 basic blocks of a loop are taken in dom order. */
615
616 static void
617 generate_loops_for_partition (struct loop *loop, partition_t partition,
618 bool copy_p)
619 {
620 unsigned i;
621 basic_block *bbs;
622
623 if (copy_p)
624 {
625 loop = copy_loop_before (loop);
626 gcc_assert (loop != NULL);
627 create_preheader (loop, CP_SIMPLE_PREHEADERS);
628 create_bb_after_loop (loop);
629 }
630
631 /* Remove stmts not in the PARTITION bitmap. */
632 bbs = get_loop_body_in_dom_order (loop);
633
634 if (MAY_HAVE_DEBUG_STMTS)
635 for (i = 0; i < loop->num_nodes; i++)
636 {
637 basic_block bb = bbs[i];
638
639 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
640 gsi_next (&bsi))
641 {
642 gphi *phi = bsi.phi ();
643 if (!virtual_operand_p (gimple_phi_result (phi))
644 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
645 reset_debug_uses (phi);
646 }
647
648 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
649 {
650 gimple stmt = gsi_stmt (bsi);
651 if (gimple_code (stmt) != GIMPLE_LABEL
652 && !is_gimple_debug (stmt)
653 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
654 reset_debug_uses (stmt);
655 }
656 }
657
658 for (i = 0; i < loop->num_nodes; i++)
659 {
660 basic_block bb = bbs[i];
661
662 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
663 {
664 gphi *phi = bsi.phi ();
665 if (!virtual_operand_p (gimple_phi_result (phi))
666 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
667 remove_phi_node (&bsi, true);
668 else
669 gsi_next (&bsi);
670 }
671
672 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
673 {
674 gimple stmt = gsi_stmt (bsi);
675 if (gimple_code (stmt) != GIMPLE_LABEL
676 && !is_gimple_debug (stmt)
677 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
678 {
679 /* Choose an arbitrary path through the empty CFG part
680 that this unnecessary control stmt controls. */
681 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
682 {
683 gimple_cond_make_false (cond_stmt);
684 update_stmt (stmt);
685 }
686 else if (gimple_code (stmt) == GIMPLE_SWITCH)
687 {
688 gswitch *switch_stmt = as_a <gswitch *> (stmt);
689 gimple_switch_set_index
690 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
691 update_stmt (stmt);
692 }
693 else
694 {
695 unlink_stmt_vdef (stmt);
696 gsi_remove (&bsi, true);
697 release_defs (stmt);
698 continue;
699 }
700 }
701 gsi_next (&bsi);
702 }
703 }
704
705 free (bbs);
706 }
707
708 /* Build the size argument for a memory operation call. */
709
710 static tree
711 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
712 bool plus_one)
713 {
714 tree size = fold_convert_loc (loc, sizetype, nb_iter);
715 if (plus_one)
716 size = size_binop (PLUS_EXPR, size, size_one_node);
717 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
718 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
719 size = fold_convert_loc (loc, size_type_node, size);
720 return size;
721 }
722
723 /* Build an address argument for a memory operation call. */
724
725 static tree
726 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
727 {
728 tree addr_base;
729
730 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
731 addr_base = fold_convert_loc (loc, sizetype, addr_base);
732
733 /* Test for a negative stride, iterating over every element. */
734 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
735 {
736 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
737 fold_convert_loc (loc, sizetype, nb_bytes));
738 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
739 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
740 }
741
742 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
743 }
744
745 /* If VAL memory representation contains the same value in all bytes,
746 return that value, otherwise return -1.
747 E.g. for 0x24242424 return 0x24, for IEEE double
748 747708026454360457216.0 return 0x44, etc. */
749
750 static int
751 const_with_all_bytes_same (tree val)
752 {
753 unsigned char buf[64];
754 int i, len;
755
756 if (integer_zerop (val)
757 || real_zerop (val)
758 || (TREE_CODE (val) == CONSTRUCTOR
759 && !TREE_CLOBBER_P (val)
760 && CONSTRUCTOR_NELTS (val) == 0))
761 return 0;
762
763 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
764 return -1;
765
766 len = native_encode_expr (val, buf, sizeof (buf));
767 if (len == 0)
768 return -1;
769 for (i = 1; i < len; i++)
770 if (buf[i] != buf[0])
771 return -1;
772 return buf[0];
773 }
774
775 /* Generate a call to memset for PARTITION in LOOP. */
776
777 static void
778 generate_memset_builtin (struct loop *loop, partition_t partition)
779 {
780 gimple_stmt_iterator gsi;
781 gimple stmt, fn_call;
782 tree mem, fn, nb_bytes;
783 location_t loc;
784 tree val;
785
786 stmt = DR_STMT (partition->main_dr);
787 loc = gimple_location (stmt);
788
789 /* The new statements will be placed before LOOP. */
790 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
791
792 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
793 partition->plus_one);
794 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
795 false, GSI_CONTINUE_LINKING);
796 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
797 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
798 false, GSI_CONTINUE_LINKING);
799
800 /* This exactly matches the pattern recognition in classify_partition. */
801 val = gimple_assign_rhs1 (stmt);
802 /* Handle constants like 0x15151515 and similarly
803 floating point constants etc. where all bytes are the same. */
804 int bytev = const_with_all_bytes_same (val);
805 if (bytev != -1)
806 val = build_int_cst (integer_type_node, bytev);
807 else if (TREE_CODE (val) == INTEGER_CST)
808 val = fold_convert (integer_type_node, val);
809 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
810 {
811 tree tem = make_ssa_name (integer_type_node);
812 gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val);
813 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
814 val = tem;
815 }
816
817 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
818 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
819 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
820
821 if (dump_file && (dump_flags & TDF_DETAILS))
822 {
823 fprintf (dump_file, "generated memset");
824 if (bytev == 0)
825 fprintf (dump_file, " zero\n");
826 else
827 fprintf (dump_file, "\n");
828 }
829 }
830
831 /* Generate a call to memcpy for PARTITION in LOOP. */
832
833 static void
834 generate_memcpy_builtin (struct loop *loop, partition_t partition)
835 {
836 gimple_stmt_iterator gsi;
837 gimple stmt, fn_call;
838 tree dest, src, fn, nb_bytes;
839 location_t loc;
840 enum built_in_function kind;
841
842 stmt = DR_STMT (partition->main_dr);
843 loc = gimple_location (stmt);
844
845 /* The new statements will be placed before LOOP. */
846 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
847
848 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
849 partition->plus_one);
850 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
851 false, GSI_CONTINUE_LINKING);
852 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
853 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
854 if (ptr_derefs_may_alias_p (dest, src))
855 kind = BUILT_IN_MEMMOVE;
856 else
857 kind = BUILT_IN_MEMCPY;
858
859 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
860 false, GSI_CONTINUE_LINKING);
861 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
862 false, GSI_CONTINUE_LINKING);
863 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
864 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
865 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
866
867 if (dump_file && (dump_flags & TDF_DETAILS))
868 {
869 if (kind == BUILT_IN_MEMCPY)
870 fprintf (dump_file, "generated memcpy\n");
871 else
872 fprintf (dump_file, "generated memmove\n");
873 }
874 }
875
876 /* Remove and destroy the loop LOOP. */
877
878 static void
879 destroy_loop (struct loop *loop)
880 {
881 unsigned nbbs = loop->num_nodes;
882 edge exit = single_exit (loop);
883 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
884 basic_block *bbs;
885 unsigned i;
886
887 bbs = get_loop_body_in_dom_order (loop);
888
889 redirect_edge_pred (exit, src);
890 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
891 exit->flags |= EDGE_FALLTHRU;
892 cancel_loop_tree (loop);
893 rescan_loop_exit (exit, false, true);
894
895 for (i = 0; i < nbbs; i++)
896 {
897 /* We have made sure to not leave any dangling uses of SSA
898 names defined in the loop. With the exception of virtuals.
899 Make sure we replace all uses of virtual defs that will remain
900 outside of the loop with the bare symbol as delete_basic_block
901 will release them. */
902 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
903 gsi_next (&gsi))
904 {
905 gphi *phi = gsi.phi ();
906 if (virtual_operand_p (gimple_phi_result (phi)))
907 mark_virtual_phi_result_for_renaming (phi);
908 }
909 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
910 gsi_next (&gsi))
911 {
912 gimple stmt = gsi_stmt (gsi);
913 tree vdef = gimple_vdef (stmt);
914 if (vdef && TREE_CODE (vdef) == SSA_NAME)
915 mark_virtual_operand_for_renaming (vdef);
916 }
917 delete_basic_block (bbs[i]);
918 }
919 free (bbs);
920
921 set_immediate_dominator (CDI_DOMINATORS, dest,
922 recompute_dominator (CDI_DOMINATORS, dest));
923 }
924
925 /* Generates code for PARTITION. */
926
927 static void
928 generate_code_for_partition (struct loop *loop,
929 partition_t partition, bool copy_p)
930 {
931 switch (partition->kind)
932 {
933 case PKIND_NORMAL:
934 /* Reductions all have to be in the last partition. */
935 gcc_assert (!partition_reduction_p (partition)
936 || !copy_p);
937 generate_loops_for_partition (loop, partition, copy_p);
938 return;
939
940 case PKIND_MEMSET:
941 generate_memset_builtin (loop, partition);
942 break;
943
944 case PKIND_MEMCPY:
945 generate_memcpy_builtin (loop, partition);
946 break;
947
948 default:
949 gcc_unreachable ();
950 }
951
952 /* Common tail for partitions we turn into a call. If this was the last
953 partition for which we generate code, we have to destroy the loop. */
954 if (!copy_p)
955 destroy_loop (loop);
956 }
957
958
959 /* Returns a partition with all the statements needed for computing
960 the vertex V of the RDG, also including the loop exit conditions. */
961
962 static partition_t
963 build_rdg_partition_for_vertex (struct graph *rdg, int v)
964 {
965 partition_t partition = partition_alloc (NULL, NULL);
966 auto_vec<int, 3> nodes;
967 unsigned i;
968 int x;
969
970 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
971
972 FOR_EACH_VEC_ELT (nodes, i, x)
973 {
974 bitmap_set_bit (partition->stmts, x);
975 bitmap_set_bit (partition->loops,
976 loop_containing_stmt (RDG_STMT (rdg, x))->num);
977 }
978
979 return partition;
980 }
981
982 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
983 For the moment we detect only the memset zero pattern. */
984
985 static void
986 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
987 {
988 bitmap_iterator bi;
989 unsigned i;
990 tree nb_iter;
991 data_reference_p single_load, single_store;
992 bool volatiles_p = false;
993 bool plus_one = false;
994
995 partition->kind = PKIND_NORMAL;
996 partition->main_dr = NULL;
997 partition->secondary_dr = NULL;
998 partition->niter = NULL_TREE;
999 partition->plus_one = false;
1000
1001 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1002 {
1003 gimple stmt = RDG_STMT (rdg, i);
1004
1005 if (gimple_has_volatile_ops (stmt))
1006 volatiles_p = true;
1007
1008 /* If the stmt has uses outside of the loop mark it as reduction. */
1009 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1010 {
1011 partition->reduction_p = true;
1012 return;
1013 }
1014 }
1015
1016 /* Perform general partition disqualification for builtins. */
1017 if (volatiles_p
1018 || !flag_tree_loop_distribute_patterns)
1019 return;
1020
1021 /* Detect memset and memcpy. */
1022 single_load = NULL;
1023 single_store = NULL;
1024 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1025 {
1026 gimple stmt = RDG_STMT (rdg, i);
1027 data_reference_p dr;
1028 unsigned j;
1029
1030 if (gimple_code (stmt) == GIMPLE_PHI)
1031 continue;
1032
1033 /* Any scalar stmts are ok. */
1034 if (!gimple_vuse (stmt))
1035 continue;
1036
1037 /* Otherwise just regular loads/stores. */
1038 if (!gimple_assign_single_p (stmt))
1039 return;
1040
1041 /* But exactly one store and/or load. */
1042 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1043 {
1044 if (DR_IS_READ (dr))
1045 {
1046 if (single_load != NULL)
1047 return;
1048 single_load = dr;
1049 }
1050 else
1051 {
1052 if (single_store != NULL)
1053 return;
1054 single_store = dr;
1055 }
1056 }
1057 }
1058
1059 if (!single_store)
1060 return;
1061
1062 nb_iter = number_of_latch_executions (loop);
1063 if (!nb_iter || nb_iter == chrec_dont_know)
1064 return;
1065 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1066 gimple_bb (DR_STMT (single_store))))
1067 plus_one = true;
1068
1069 if (single_store && !single_load)
1070 {
1071 gimple stmt = DR_STMT (single_store);
1072 tree rhs = gimple_assign_rhs1 (stmt);
1073 if (const_with_all_bytes_same (rhs) == -1
1074 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1075 || (TYPE_MODE (TREE_TYPE (rhs))
1076 != TYPE_MODE (unsigned_char_type_node))))
1077 return;
1078 if (TREE_CODE (rhs) == SSA_NAME
1079 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1080 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1081 return;
1082 if (!adjacent_dr_p (single_store)
1083 || !dominated_by_p (CDI_DOMINATORS,
1084 loop->latch, gimple_bb (stmt)))
1085 return;
1086 partition->kind = PKIND_MEMSET;
1087 partition->main_dr = single_store;
1088 partition->niter = nb_iter;
1089 partition->plus_one = plus_one;
1090 }
1091 else if (single_store && single_load)
1092 {
1093 gimple store = DR_STMT (single_store);
1094 gimple load = DR_STMT (single_load);
1095 /* Direct aggregate copy or via an SSA name temporary. */
1096 if (load != store
1097 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1098 return;
1099 if (!adjacent_dr_p (single_store)
1100 || !adjacent_dr_p (single_load)
1101 || !operand_equal_p (DR_STEP (single_store),
1102 DR_STEP (single_load), 0)
1103 || !dominated_by_p (CDI_DOMINATORS,
1104 loop->latch, gimple_bb (store)))
1105 return;
1106 /* Now check that if there is a dependence this dependence is
1107 of a suitable form for memmove. */
1108 vec<loop_p> loops = vNULL;
1109 ddr_p ddr;
1110 loops.safe_push (loop);
1111 ddr = initialize_data_dependence_relation (single_load, single_store,
1112 loops);
1113 compute_affine_dependence (ddr, loop);
1114 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1115 {
1116 free_dependence_relation (ddr);
1117 loops.release ();
1118 return;
1119 }
1120 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1121 {
1122 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1123 {
1124 free_dependence_relation (ddr);
1125 loops.release ();
1126 return;
1127 }
1128 lambda_vector dist_v;
1129 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1130 {
1131 int dist = dist_v[index_in_loop_nest (loop->num,
1132 DDR_LOOP_NEST (ddr))];
1133 if (dist > 0 && !DDR_REVERSED_P (ddr))
1134 {
1135 free_dependence_relation (ddr);
1136 loops.release ();
1137 return;
1138 }
1139 }
1140 }
1141 free_dependence_relation (ddr);
1142 loops.release ();
1143 partition->kind = PKIND_MEMCPY;
1144 partition->main_dr = single_store;
1145 partition->secondary_dr = single_load;
1146 partition->niter = nb_iter;
1147 partition->plus_one = plus_one;
1148 }
1149 }
1150
1151 /* For a data reference REF, return the declaration of its base
1152 address or NULL_TREE if the base is not determined. */
1153
1154 static tree
1155 ref_base_address (data_reference_p dr)
1156 {
1157 tree base_address = DR_BASE_ADDRESS (dr);
1158 if (base_address
1159 && TREE_CODE (base_address) == ADDR_EXPR)
1160 return TREE_OPERAND (base_address, 0);
1161
1162 return base_address;
1163 }
1164
1165 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1166 accesses in RDG. */
1167
1168 static bool
1169 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1170 partition_t partition2)
1171 {
1172 unsigned i, j, k, l;
1173 bitmap_iterator bi, bj;
1174 data_reference_p ref1, ref2;
1175
1176 /* First check whether in the intersection of the two partitions are
1177 any loads or stores. Common loads are the situation that happens
1178 most often. */
1179 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1180 if (RDG_MEM_WRITE_STMT (rdg, i)
1181 || RDG_MEM_READS_STMT (rdg, i))
1182 return true;
1183
1184 /* Then check all data-references against each other. */
1185 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1186 if (RDG_MEM_WRITE_STMT (rdg, i)
1187 || RDG_MEM_READS_STMT (rdg, i))
1188 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1189 if (RDG_MEM_WRITE_STMT (rdg, j)
1190 || RDG_MEM_READS_STMT (rdg, j))
1191 {
1192 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1193 {
1194 tree base1 = ref_base_address (ref1);
1195 if (base1)
1196 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1197 if (base1 == ref_base_address (ref2))
1198 return true;
1199 }
1200 }
1201
1202 return false;
1203 }
1204
1205 /* Aggregate several components into a useful partition that is
1206 registered in the PARTITIONS vector. Partitions will be
1207 distributed in different loops. */
1208
1209 static void
1210 rdg_build_partitions (struct graph *rdg,
1211 vec<gimple> starting_stmts,
1212 vec<partition_t> *partitions)
1213 {
1214 bitmap processed = BITMAP_ALLOC (NULL);
1215 int i;
1216 gimple stmt;
1217
1218 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1219 {
1220 int v = rdg_vertex_for_stmt (rdg, stmt);
1221
1222 if (dump_file && (dump_flags & TDF_DETAILS))
1223 fprintf (dump_file,
1224 "ldist asked to generate code for vertex %d\n", v);
1225
1226 /* If the vertex is already contained in another partition so
1227 is the partition rooted at it. */
1228 if (bitmap_bit_p (processed, v))
1229 continue;
1230
1231 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1232 bitmap_ior_into (processed, partition->stmts);
1233
1234 if (dump_file && (dump_flags & TDF_DETAILS))
1235 {
1236 fprintf (dump_file, "ldist useful partition:\n");
1237 dump_bitmap (dump_file, partition->stmts);
1238 }
1239
1240 partitions->safe_push (partition);
1241 }
1242
1243 /* All vertices should have been assigned to at least one partition now,
1244 other than vertices belonging to dead code. */
1245
1246 BITMAP_FREE (processed);
1247 }
1248
1249 /* Dump to FILE the PARTITIONS. */
1250
1251 static void
1252 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1253 {
1254 int i;
1255 partition_t partition;
1256
1257 FOR_EACH_VEC_ELT (partitions, i, partition)
1258 debug_bitmap_file (file, partition->stmts);
1259 }
1260
1261 /* Debug PARTITIONS. */
1262 extern void debug_rdg_partitions (vec<partition_t> );
1263
1264 DEBUG_FUNCTION void
1265 debug_rdg_partitions (vec<partition_t> partitions)
1266 {
1267 dump_rdg_partitions (stderr, partitions);
1268 }
1269
1270 /* Returns the number of read and write operations in the RDG. */
1271
1272 static int
1273 number_of_rw_in_rdg (struct graph *rdg)
1274 {
1275 int i, res = 0;
1276
1277 for (i = 0; i < rdg->n_vertices; i++)
1278 {
1279 if (RDG_MEM_WRITE_STMT (rdg, i))
1280 ++res;
1281
1282 if (RDG_MEM_READS_STMT (rdg, i))
1283 ++res;
1284 }
1285
1286 return res;
1287 }
1288
1289 /* Returns the number of read and write operations in a PARTITION of
1290 the RDG. */
1291
1292 static int
1293 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1294 {
1295 int res = 0;
1296 unsigned i;
1297 bitmap_iterator ii;
1298
1299 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1300 {
1301 if (RDG_MEM_WRITE_STMT (rdg, i))
1302 ++res;
1303
1304 if (RDG_MEM_READS_STMT (rdg, i))
1305 ++res;
1306 }
1307
1308 return res;
1309 }
1310
1311 /* Returns true when one of the PARTITIONS contains all the read or
1312 write operations of RDG. */
1313
1314 static bool
1315 partition_contains_all_rw (struct graph *rdg,
1316 vec<partition_t> partitions)
1317 {
1318 int i;
1319 partition_t partition;
1320 int nrw = number_of_rw_in_rdg (rdg);
1321
1322 FOR_EACH_VEC_ELT (partitions, i, partition)
1323 if (nrw == number_of_rw_in_partition (rdg, partition))
1324 return true;
1325
1326 return false;
1327 }
1328
1329 /* Compute partition dependence created by the data references in DRS1
1330 and DRS2 and modify and return DIR according to that. */
1331
1332 static int
1333 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1334 vec<data_reference_p> drs1,
1335 vec<data_reference_p> drs2)
1336 {
1337 data_reference_p dr1, dr2;
1338
1339 /* dependence direction - 0 is no dependence, -1 is back,
1340 1 is forth, 2 is both (we can stop then, merging will occur). */
1341 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1342 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1343 {
1344 data_reference_p saved_dr1 = dr1;
1345 int this_dir = 1;
1346 ddr_p ddr;
1347 /* Re-shuffle data-refs to be in dominator order. */
1348 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1349 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1350 {
1351 std::swap (dr1, dr2);
1352 this_dir = -this_dir;
1353 }
1354 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1355 compute_affine_dependence (ddr, loops[0]);
1356 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1357 this_dir = 2;
1358 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1359 {
1360 if (DDR_REVERSED_P (ddr))
1361 {
1362 std::swap (dr1, dr2);
1363 this_dir = -this_dir;
1364 }
1365 /* Known dependences can still be unordered througout the
1366 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1367 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1368 this_dir = 2;
1369 /* If the overlap is exact preserve stmt order. */
1370 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1371 ;
1372 else
1373 {
1374 /* Else as the distance vector is lexicographic positive
1375 swap the dependence direction. */
1376 this_dir = -this_dir;
1377 }
1378 }
1379 else
1380 this_dir = 0;
1381 free_dependence_relation (ddr);
1382 if (dir == 0)
1383 dir = this_dir;
1384 else if (dir != this_dir)
1385 return 2;
1386 /* Shuffle "back" dr1. */
1387 dr1 = saved_dr1;
1388 }
1389 return dir;
1390 }
1391
1392 /* Compare postorder number of the partition graph vertices V1 and V2. */
1393
1394 static int
1395 pgcmp (const void *v1_, const void *v2_)
1396 {
1397 const vertex *v1 = (const vertex *)v1_;
1398 const vertex *v2 = (const vertex *)v2_;
1399 return v2->post - v1->post;
1400 }
1401
1402 /* Distributes the code from LOOP in such a way that producer
1403 statements are placed before consumer statements. Tries to separate
1404 only the statements from STMTS into separate loops.
1405 Returns the number of distributed loops. */
1406
1407 static int
1408 distribute_loop (struct loop *loop, vec<gimple> stmts,
1409 control_dependences *cd, int *nb_calls)
1410 {
1411 struct graph *rdg;
1412 partition_t partition;
1413 bool any_builtin;
1414 int i, nbp;
1415 graph *pg = NULL;
1416 int num_sccs = 1;
1417
1418 *nb_calls = 0;
1419 auto_vec<loop_p, 3> loop_nest;
1420 if (!find_loop_nest (loop, &loop_nest))
1421 return 0;
1422
1423 rdg = build_rdg (loop_nest, cd);
1424 if (!rdg)
1425 {
1426 if (dump_file && (dump_flags & TDF_DETAILS))
1427 fprintf (dump_file,
1428 "Loop %d not distributed: failed to build the RDG.\n",
1429 loop->num);
1430
1431 return 0;
1432 }
1433
1434 if (dump_file && (dump_flags & TDF_DETAILS))
1435 dump_rdg (dump_file, rdg);
1436
1437 auto_vec<partition_t, 3> partitions;
1438 rdg_build_partitions (rdg, stmts, &partitions);
1439
1440 any_builtin = false;
1441 FOR_EACH_VEC_ELT (partitions, i, partition)
1442 {
1443 classify_partition (loop, rdg, partition);
1444 any_builtin |= partition_builtin_p (partition);
1445 }
1446
1447 /* If we are only distributing patterns but did not detect any,
1448 simply bail out. */
1449 if (!flag_tree_loop_distribution
1450 && !any_builtin)
1451 {
1452 nbp = 0;
1453 goto ldist_done;
1454 }
1455
1456 /* If we are only distributing patterns fuse all partitions that
1457 were not classified as builtins. This also avoids chopping
1458 a loop into pieces, separated by builtin calls. That is, we
1459 only want no or a single loop body remaining. */
1460 partition_t into;
1461 if (!flag_tree_loop_distribution)
1462 {
1463 for (i = 0; partitions.iterate (i, &into); ++i)
1464 if (!partition_builtin_p (into))
1465 break;
1466 for (++i; partitions.iterate (i, &partition); ++i)
1467 if (!partition_builtin_p (partition))
1468 {
1469 if (dump_file && (dump_flags & TDF_DETAILS))
1470 {
1471 fprintf (dump_file, "fusing non-builtin partitions\n");
1472 dump_bitmap (dump_file, into->stmts);
1473 dump_bitmap (dump_file, partition->stmts);
1474 }
1475 partition_merge_into (into, partition);
1476 partitions.unordered_remove (i);
1477 partition_free (partition);
1478 i--;
1479 }
1480 }
1481
1482 /* Due to limitations in the transform phase we have to fuse all
1483 reduction partitions into the last partition so the existing
1484 loop will contain all loop-closed PHI nodes. */
1485 for (i = 0; partitions.iterate (i, &into); ++i)
1486 if (partition_reduction_p (into))
1487 break;
1488 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1489 if (partition_reduction_p (partition))
1490 {
1491 if (dump_file && (dump_flags & TDF_DETAILS))
1492 {
1493 fprintf (dump_file, "fusing partitions\n");
1494 dump_bitmap (dump_file, into->stmts);
1495 dump_bitmap (dump_file, partition->stmts);
1496 fprintf (dump_file, "because they have reductions\n");
1497 }
1498 partition_merge_into (into, partition);
1499 partitions.unordered_remove (i);
1500 partition_free (partition);
1501 i--;
1502 }
1503
1504 /* Apply our simple cost model - fuse partitions with similar
1505 memory accesses. */
1506 for (i = 0; partitions.iterate (i, &into); ++i)
1507 {
1508 if (partition_builtin_p (into))
1509 continue;
1510 for (int j = i + 1;
1511 partitions.iterate (j, &partition); ++j)
1512 {
1513 if (!partition_builtin_p (partition)
1514 && similar_memory_accesses (rdg, into, partition))
1515 {
1516 if (dump_file && (dump_flags & TDF_DETAILS))
1517 {
1518 fprintf (dump_file, "fusing partitions\n");
1519 dump_bitmap (dump_file, into->stmts);
1520 dump_bitmap (dump_file, partition->stmts);
1521 fprintf (dump_file, "because they have similar "
1522 "memory accesses\n");
1523 }
1524 partition_merge_into (into, partition);
1525 partitions.unordered_remove (j);
1526 partition_free (partition);
1527 j--;
1528 }
1529 }
1530 }
1531
1532 /* Build the partition dependency graph. */
1533 if (partitions.length () > 1)
1534 {
1535 pg = new_graph (partitions.length ());
1536 struct pgdata {
1537 partition_t partition;
1538 vec<data_reference_p> writes;
1539 vec<data_reference_p> reads;
1540 };
1541 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1542 for (i = 0; partitions.iterate (i, &partition); ++i)
1543 {
1544 vertex *v = &pg->vertices[i];
1545 pgdata *data = new pgdata;
1546 data_reference_p dr;
1547 /* FIXME - leaks. */
1548 v->data = data;
1549 bitmap_iterator bi;
1550 unsigned j;
1551 data->partition = partition;
1552 data->reads = vNULL;
1553 data->writes = vNULL;
1554 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1555 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1556 if (DR_IS_READ (dr))
1557 data->reads.safe_push (dr);
1558 else
1559 data->writes.safe_push (dr);
1560 }
1561 partition_t partition1, partition2;
1562 for (i = 0; partitions.iterate (i, &partition1); ++i)
1563 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1564 {
1565 /* dependence direction - 0 is no dependence, -1 is back,
1566 1 is forth, 2 is both (we can stop then, merging will occur). */
1567 int dir = 0;
1568 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1569 PGDATA(i)->writes,
1570 PGDATA(j)->reads);
1571 if (dir != 2)
1572 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1573 PGDATA(i)->reads,
1574 PGDATA(j)->writes);
1575 if (dir != 2)
1576 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1577 PGDATA(i)->writes,
1578 PGDATA(j)->writes);
1579 if (dir == 1 || dir == 2)
1580 add_edge (pg, i, j);
1581 if (dir == -1 || dir == 2)
1582 add_edge (pg, j, i);
1583 }
1584
1585 /* Add edges to the reduction partition (if any) to force it last. */
1586 unsigned j;
1587 for (j = 0; partitions.iterate (j, &partition); ++j)
1588 if (partition_reduction_p (partition))
1589 break;
1590 if (j < partitions.length ())
1591 {
1592 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1593 if (i != j)
1594 add_edge (pg, i, j);
1595 }
1596
1597 /* Compute partitions we cannot separate and fuse them. */
1598 num_sccs = graphds_scc (pg, NULL);
1599 for (i = 0; i < num_sccs; ++i)
1600 {
1601 partition_t first;
1602 int j;
1603 for (j = 0; partitions.iterate (j, &first); ++j)
1604 if (pg->vertices[j].component == i)
1605 break;
1606 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1607 if (pg->vertices[j].component == i)
1608 {
1609 if (dump_file && (dump_flags & TDF_DETAILS))
1610 {
1611 fprintf (dump_file, "fusing partitions\n");
1612 dump_bitmap (dump_file, first->stmts);
1613 dump_bitmap (dump_file, partition->stmts);
1614 fprintf (dump_file, "because they are in the same "
1615 "dependence SCC\n");
1616 }
1617 partition_merge_into (first, partition);
1618 partitions[j] = NULL;
1619 partition_free (partition);
1620 PGDATA (j)->partition = NULL;
1621 }
1622 }
1623
1624 /* Now order the remaining nodes in postorder. */
1625 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1626 partitions.truncate (0);
1627 for (i = 0; i < pg->n_vertices; ++i)
1628 {
1629 pgdata *data = PGDATA (i);
1630 if (data->partition)
1631 partitions.safe_push (data->partition);
1632 data->reads.release ();
1633 data->writes.release ();
1634 delete data;
1635 }
1636 gcc_assert (partitions.length () == (unsigned)num_sccs);
1637 free_graph (pg);
1638 }
1639
1640 nbp = partitions.length ();
1641 if (nbp == 0
1642 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1643 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1644 {
1645 nbp = 0;
1646 goto ldist_done;
1647 }
1648
1649 if (dump_file && (dump_flags & TDF_DETAILS))
1650 dump_rdg_partitions (dump_file, partitions);
1651
1652 FOR_EACH_VEC_ELT (partitions, i, partition)
1653 {
1654 if (partition_builtin_p (partition))
1655 (*nb_calls)++;
1656 generate_code_for_partition (loop, partition, i < nbp - 1);
1657 }
1658
1659 ldist_done:
1660
1661 FOR_EACH_VEC_ELT (partitions, i, partition)
1662 partition_free (partition);
1663
1664 free_rdg (rdg);
1665 return nbp - *nb_calls;
1666 }
1667
1668 /* Distribute all loops in the current function. */
1669
1670 namespace {
1671
1672 const pass_data pass_data_loop_distribution =
1673 {
1674 GIMPLE_PASS, /* type */
1675 "ldist", /* name */
1676 OPTGROUP_LOOP, /* optinfo_flags */
1677 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1678 ( PROP_cfg | PROP_ssa ), /* properties_required */
1679 0, /* properties_provided */
1680 0, /* properties_destroyed */
1681 0, /* todo_flags_start */
1682 0, /* todo_flags_finish */
1683 };
1684
1685 class pass_loop_distribution : public gimple_opt_pass
1686 {
1687 public:
1688 pass_loop_distribution (gcc::context *ctxt)
1689 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1690 {}
1691
1692 /* opt_pass methods: */
1693 virtual bool gate (function *)
1694 {
1695 return flag_tree_loop_distribution
1696 || flag_tree_loop_distribute_patterns;
1697 }
1698
1699 virtual unsigned int execute (function *);
1700
1701 }; // class pass_loop_distribution
1702
1703 unsigned int
1704 pass_loop_distribution::execute (function *fun)
1705 {
1706 struct loop *loop;
1707 bool changed = false;
1708 basic_block bb;
1709 control_dependences *cd = NULL;
1710
1711 FOR_ALL_BB_FN (bb, fun)
1712 {
1713 gimple_stmt_iterator gsi;
1714 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1715 gimple_set_uid (gsi_stmt (gsi), -1);
1716 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1717 gimple_set_uid (gsi_stmt (gsi), -1);
1718 }
1719
1720 /* We can at the moment only distribute non-nested loops, thus restrict
1721 walking to innermost loops. */
1722 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1723 {
1724 auto_vec<gimple> work_list;
1725 basic_block *bbs;
1726 int num = loop->num;
1727 unsigned int i;
1728
1729 /* If the loop doesn't have a single exit we will fail anyway,
1730 so do that early. */
1731 if (!single_exit (loop))
1732 continue;
1733
1734 /* Only optimize hot loops. */
1735 if (!optimize_loop_for_speed_p (loop))
1736 continue;
1737
1738 /* Initialize the worklist with stmts we seed the partitions with. */
1739 bbs = get_loop_body_in_dom_order (loop);
1740 for (i = 0; i < loop->num_nodes; ++i)
1741 {
1742 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1743 !gsi_end_p (gsi);
1744 gsi_next (&gsi))
1745 {
1746 gphi *phi = gsi.phi ();
1747 if (virtual_operand_p (gimple_phi_result (phi)))
1748 continue;
1749 /* Distribute stmts which have defs that are used outside of
1750 the loop. */
1751 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1752 continue;
1753 work_list.safe_push (phi);
1754 }
1755 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1756 !gsi_end_p (gsi);
1757 gsi_next (&gsi))
1758 {
1759 gimple stmt = gsi_stmt (gsi);
1760
1761 /* If there is a stmt with side-effects bail out - we
1762 cannot and should not distribute this loop. */
1763 if (gimple_has_side_effects (stmt))
1764 {
1765 work_list.truncate (0);
1766 goto out;
1767 }
1768
1769 /* Distribute stmts which have defs that are used outside of
1770 the loop. */
1771 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1772 ;
1773 /* Otherwise only distribute stores for now. */
1774 else if (!gimple_vdef (stmt))
1775 continue;
1776
1777 work_list.safe_push (stmt);
1778 }
1779 }
1780 out:
1781 free (bbs);
1782
1783 int nb_generated_loops = 0;
1784 int nb_generated_calls = 0;
1785 location_t loc = find_loop_location (loop);
1786 if (work_list.length () > 0)
1787 {
1788 if (!cd)
1789 {
1790 calculate_dominance_info (CDI_DOMINATORS);
1791 calculate_dominance_info (CDI_POST_DOMINATORS);
1792 cd = new control_dependences (create_edge_list ());
1793 free_dominance_info (CDI_POST_DOMINATORS);
1794 }
1795 nb_generated_loops = distribute_loop (loop, work_list, cd,
1796 &nb_generated_calls);
1797 }
1798
1799 if (nb_generated_loops + nb_generated_calls > 0)
1800 {
1801 changed = true;
1802 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1803 loc, "Loop %d distributed: split to %d loops "
1804 "and %d library calls.\n",
1805 num, nb_generated_loops, nb_generated_calls);
1806 }
1807 else if (dump_file && (dump_flags & TDF_DETAILS))
1808 fprintf (dump_file, "Loop %d is the same.\n", num);
1809 }
1810
1811 if (cd)
1812 delete cd;
1813
1814 if (changed)
1815 {
1816 /* Cached scalar evolutions now may refer to wrong or non-existing
1817 loops. */
1818 scev_reset_htab ();
1819 mark_virtual_operands_for_renaming (fun);
1820 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1821 }
1822
1823 #ifdef ENABLE_CHECKING
1824 verify_loop_structure ();
1825 #endif
1826
1827 return 0;
1828 }
1829
1830 } // anon namespace
1831
1832 gimple_opt_pass *
1833 make_pass_loop_distribution (gcc::context *ctxt)
1834 {
1835 return new pass_loop_distribution (ctxt);
1836 }