2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
46 #include "coretypes.h"
51 #include "hard-reg-set.h"
54 #include "fold-const.h"
56 #include "internal-fn.h"
57 #include "gimple-iterator.h"
58 #include "gimplify-me.h"
59 #include "stor-layout.h"
61 #include "tree-ssa-loop-manip.h"
62 #include "tree-ssa-loop.h"
63 #include "tree-into-ssa.h"
66 #include "tree-chrec.h"
67 #include "tree-data-ref.h"
68 #include "tree-scalar-evolution.h"
69 #include "tree-pass.h"
70 #include "gimple-pretty-print.h"
71 #include "tree-vectorizer.h"
74 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
75 typedef struct rdg_vertex
77 /* The statement represented by this vertex. */
80 /* Vector of data-references in this statement. */
81 vec
<data_reference_p
> datarefs
;
83 /* True when the statement contains a write to memory. */
86 /* True when the statement contains a read from memory. */
90 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
91 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
92 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
93 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
94 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
95 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
96 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
97 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
99 /* Data dependence type. */
103 /* Read After Write (RAW). */
106 /* Control dependence (execute conditional on). */
110 /* Dependence information attached to an edge of the RDG. */
112 typedef struct rdg_edge
114 /* Type of the dependence. */
115 enum rdg_dep_type type
;
118 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
120 /* Dump vertex I in RDG to FILE. */
123 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
125 struct vertex
*v
= &(rdg
->vertices
[i
]);
126 struct graph_edge
*e
;
128 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
129 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
130 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
133 for (e
= v
->pred
; e
; e
= e
->pred_next
)
134 fprintf (file
, " %d", e
->src
);
136 fprintf (file
, ") (out:");
139 for (e
= v
->succ
; e
; e
= e
->succ_next
)
140 fprintf (file
, " %d", e
->dest
);
142 fprintf (file
, ")\n");
143 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
144 fprintf (file
, ")\n");
147 /* Call dump_rdg_vertex on stderr. */
150 debug_rdg_vertex (struct graph
*rdg
, int i
)
152 dump_rdg_vertex (stderr
, rdg
, i
);
155 /* Dump the reduced dependence graph RDG to FILE. */
158 dump_rdg (FILE *file
, struct graph
*rdg
)
160 fprintf (file
, "(rdg\n");
161 for (int i
= 0; i
< rdg
->n_vertices
; i
++)
162 dump_rdg_vertex (file
, rdg
, i
);
163 fprintf (file
, ")\n");
166 /* Call dump_rdg on stderr. */
169 debug_rdg (struct graph
*rdg
)
171 dump_rdg (stderr
, rdg
);
175 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
178 pretty_printer buffer
;
179 pp_needs_newline (&buffer
) = false;
180 buffer
.buffer
->stream
= file
;
182 fprintf (file
, "digraph RDG {\n");
184 for (i
= 0; i
< rdg
->n_vertices
; i
++)
186 struct vertex
*v
= &(rdg
->vertices
[i
]);
187 struct graph_edge
*e
;
189 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
190 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
192 fprintf (file
, "\"]\n");
194 /* Highlight reads from memory. */
195 if (RDG_MEM_READS_STMT (rdg
, i
))
196 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
198 /* Highlight stores to memory. */
199 if (RDG_MEM_WRITE_STMT (rdg
, i
))
200 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
203 for (e
= v
->succ
; e
; e
= e
->succ_next
)
204 switch (RDGE_TYPE (e
))
207 /* These are the most common dependences: don't print these. */
208 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
212 fprintf (file
, "%d -> %d [label=control] \n", i
, e
->dest
);
220 fprintf (file
, "}\n\n");
223 /* Display the Reduced Dependence Graph using dotty. */
226 dot_rdg (struct graph
*rdg
)
228 /* When debugging, you may want to enable the following code. */
230 FILE *file
= popen ("dot -Tx11", "w");
233 dot_rdg_1 (file
, rdg
);
235 close (fileno (file
));
238 dot_rdg_1 (stderr
, rdg
);
242 /* Returns the index of STMT in RDG. */
245 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple stmt
)
247 int index
= gimple_uid (stmt
);
248 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
252 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
253 the index of DEF in RDG. */
256 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
258 use_operand_p imm_use_p
;
259 imm_use_iterator iterator
;
261 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
263 struct graph_edge
*e
;
264 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
269 e
= add_edge (rdg
, idef
, use
);
270 e
->data
= XNEW (struct rdg_edge
);
271 RDGE_TYPE (e
) = flow_dd
;
275 /* Creates an edge for the control dependences of BB to the vertex V. */
278 create_edge_for_control_dependence (struct graph
*rdg
, basic_block bb
,
279 int v
, control_dependences
*cd
)
283 EXECUTE_IF_SET_IN_BITMAP (cd
->get_edges_dependent_on (bb
->index
),
286 basic_block cond_bb
= cd
->get_edge (edge_n
)->src
;
287 gimple stmt
= last_stmt (cond_bb
);
288 if (stmt
&& is_ctrl_stmt (stmt
))
290 struct graph_edge
*e
;
291 int c
= rdg_vertex_for_stmt (rdg
, stmt
);
295 e
= add_edge (rdg
, c
, v
);
296 e
->data
= XNEW (struct rdg_edge
);
297 RDGE_TYPE (e
) = control_dd
;
302 /* Creates the edges of the reduced dependence graph RDG. */
305 create_rdg_flow_edges (struct graph
*rdg
)
311 for (i
= 0; i
< rdg
->n_vertices
; i
++)
312 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
314 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
317 /* Creates the edges of the reduced dependence graph RDG. */
320 create_rdg_cd_edges (struct graph
*rdg
, control_dependences
*cd
)
324 for (i
= 0; i
< rdg
->n_vertices
; i
++)
326 gimple stmt
= RDG_STMT (rdg
, i
);
327 if (gimple_code (stmt
) == GIMPLE_PHI
)
331 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->preds
)
332 create_edge_for_control_dependence (rdg
, e
->src
, i
, cd
);
335 create_edge_for_control_dependence (rdg
, gimple_bb (stmt
), i
, cd
);
339 /* Build the vertices of the reduced dependence graph RDG. Return false
343 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
> stmts
, loop_p loop
,
344 vec
<data_reference_p
> *datarefs
)
349 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
351 struct vertex
*v
= &(rdg
->vertices
[i
]);
353 /* Record statement to vertex mapping. */
354 gimple_set_uid (stmt
, i
);
356 v
->data
= XNEW (struct rdg_vertex
);
357 RDGV_STMT (v
) = stmt
;
358 RDGV_DATAREFS (v
).create (0);
359 RDGV_HAS_MEM_WRITE (v
) = false;
360 RDGV_HAS_MEM_READS (v
) = false;
361 if (gimple_code (stmt
) == GIMPLE_PHI
)
364 unsigned drp
= datarefs
->length ();
365 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
367 for (unsigned j
= drp
; j
< datarefs
->length (); ++j
)
369 data_reference_p dr
= (*datarefs
)[j
];
371 RDGV_HAS_MEM_READS (v
) = true;
373 RDGV_HAS_MEM_WRITE (v
) = true;
374 RDGV_DATAREFS (v
).safe_push (dr
);
380 /* Initialize STMTS with all the statements of LOOP. The order in
381 which we discover statements is important as
382 generate_loops_for_partition is using the same traversal for
383 identifying statements in loop copies. */
386 stmts_from_loop (struct loop
*loop
, vec
<gimple
> *stmts
)
389 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
391 for (i
= 0; i
< loop
->num_nodes
; i
++)
393 basic_block bb
= bbs
[i
];
395 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
397 if (!virtual_operand_p (gimple_phi_result (bsi
.phi ())))
398 stmts
->safe_push (bsi
.phi ());
400 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
403 gimple stmt
= gsi_stmt (bsi
);
404 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
405 stmts
->safe_push (stmt
);
412 /* Free the reduced dependence graph RDG. */
415 free_rdg (struct graph
*rdg
)
419 for (i
= 0; i
< rdg
->n_vertices
; i
++)
421 struct vertex
*v
= &(rdg
->vertices
[i
]);
422 struct graph_edge
*e
;
424 for (e
= v
->succ
; e
; e
= e
->succ_next
)
429 gimple_set_uid (RDGV_STMT (v
), -1);
430 free_data_refs (RDGV_DATAREFS (v
));
438 /* Build the Reduced Dependence Graph (RDG) with one vertex per
439 statement of the loop nest LOOP_NEST, and one edge per data dependence or
440 scalar dependence. */
442 static struct graph
*
443 build_rdg (vec
<loop_p
> loop_nest
, control_dependences
*cd
)
446 vec
<data_reference_p
> datarefs
;
448 /* Create the RDG vertices from the stmts of the loop nest. */
449 auto_vec
<gimple
, 10> stmts
;
450 stmts_from_loop (loop_nest
[0], &stmts
);
451 rdg
= new_graph (stmts
.length ());
452 datarefs
.create (10);
453 if (!create_rdg_vertices (rdg
, stmts
, loop_nest
[0], &datarefs
))
461 create_rdg_flow_edges (rdg
);
463 create_rdg_cd_edges (rdg
, cd
);
472 enum partition_kind
{
473 PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
476 typedef struct partition_s
481 enum partition_kind kind
;
482 /* data-references a kind != PKIND_NORMAL partition is about. */
483 data_reference_p main_dr
;
484 data_reference_p secondary_dr
;
490 /* Allocate and initialize a partition from BITMAP. */
493 partition_alloc (bitmap stmts
, bitmap loops
)
495 partition_t partition
= XCNEW (struct partition_s
);
496 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
497 partition
->loops
= loops
? loops
: BITMAP_ALLOC (NULL
);
498 partition
->reduction_p
= false;
499 partition
->kind
= PKIND_NORMAL
;
503 /* Free PARTITION. */
506 partition_free (partition_t partition
)
508 BITMAP_FREE (partition
->stmts
);
509 BITMAP_FREE (partition
->loops
);
513 /* Returns true if the partition can be generated as a builtin. */
516 partition_builtin_p (partition_t partition
)
518 return partition
->kind
!= PKIND_NORMAL
;
521 /* Returns true if the partition contains a reduction. */
524 partition_reduction_p (partition_t partition
)
526 return partition
->reduction_p
;
529 /* Merge PARTITION into the partition DEST. */
532 partition_merge_into (partition_t dest
, partition_t partition
)
534 dest
->kind
= PKIND_NORMAL
;
535 bitmap_ior_into (dest
->stmts
, partition
->stmts
);
536 if (partition_reduction_p (partition
))
537 dest
->reduction_p
= true;
541 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
545 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
547 imm_use_iterator imm_iter
;
550 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
552 gimple use_stmt
= USE_STMT (use_p
);
553 if (!is_gimple_debug (use_stmt
)
554 && loop
!= loop_containing_stmt (use_stmt
))
561 /* Returns true when STMT defines a scalar variable used after the
565 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
570 if (gimple_code (stmt
) == GIMPLE_PHI
)
571 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
573 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
574 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
580 /* Return a copy of LOOP placed before LOOP. */
583 copy_loop_before (struct loop
*loop
)
586 edge preheader
= loop_preheader_edge (loop
);
588 initialize_original_copy_tables ();
589 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, NULL
, preheader
);
590 gcc_assert (res
!= NULL
);
591 free_original_copy_tables ();
592 delete_update_ssa ();
597 /* Creates an empty basic block after LOOP. */
600 create_bb_after_loop (struct loop
*loop
)
602 edge exit
= single_exit (loop
);
610 /* Generate code for PARTITION from the code in LOOP. The loop is
611 copied when COPY_P is true. All the statements not flagged in the
612 PARTITION bitmap are removed from the loop or from its copy. The
613 statements are indexed in sequence inside a basic block, and the
614 basic blocks of a loop are taken in dom order. */
617 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
625 loop
= copy_loop_before (loop
);
626 gcc_assert (loop
!= NULL
);
627 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
628 create_bb_after_loop (loop
);
631 /* Remove stmts not in the PARTITION bitmap. */
632 bbs
= get_loop_body_in_dom_order (loop
);
634 if (MAY_HAVE_DEBUG_STMTS
)
635 for (i
= 0; i
< loop
->num_nodes
; i
++)
637 basic_block bb
= bbs
[i
];
639 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
642 gphi
*phi
= bsi
.phi ();
643 if (!virtual_operand_p (gimple_phi_result (phi
))
644 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
645 reset_debug_uses (phi
);
648 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
650 gimple stmt
= gsi_stmt (bsi
);
651 if (gimple_code (stmt
) != GIMPLE_LABEL
652 && !is_gimple_debug (stmt
)
653 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
654 reset_debug_uses (stmt
);
658 for (i
= 0; i
< loop
->num_nodes
; i
++)
660 basic_block bb
= bbs
[i
];
662 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
664 gphi
*phi
= bsi
.phi ();
665 if (!virtual_operand_p (gimple_phi_result (phi
))
666 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
667 remove_phi_node (&bsi
, true);
672 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
674 gimple stmt
= gsi_stmt (bsi
);
675 if (gimple_code (stmt
) != GIMPLE_LABEL
676 && !is_gimple_debug (stmt
)
677 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
679 /* Choose an arbitrary path through the empty CFG part
680 that this unnecessary control stmt controls. */
681 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
683 gimple_cond_make_false (cond_stmt
);
686 else if (gimple_code (stmt
) == GIMPLE_SWITCH
)
688 gswitch
*switch_stmt
= as_a
<gswitch
*> (stmt
);
689 gimple_switch_set_index
690 (switch_stmt
, CASE_LOW (gimple_switch_label (switch_stmt
, 1)));
695 unlink_stmt_vdef (stmt
);
696 gsi_remove (&bsi
, true);
708 /* Build the size argument for a memory operation call. */
711 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
,
714 tree size
= fold_convert_loc (loc
, sizetype
, nb_iter
);
716 size
= size_binop (PLUS_EXPR
, size
, size_one_node
);
717 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
, size
,
718 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
719 size
= fold_convert_loc (loc
, size_type_node
, size
);
723 /* Build an address argument for a memory operation call. */
726 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
730 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
731 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
733 /* Test for a negative stride, iterating over every element. */
734 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
736 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
737 fold_convert_loc (loc
, sizetype
, nb_bytes
));
738 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
739 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
742 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
745 /* If VAL memory representation contains the same value in all bytes,
746 return that value, otherwise return -1.
747 E.g. for 0x24242424 return 0x24, for IEEE double
748 747708026454360457216.0 return 0x44, etc. */
751 const_with_all_bytes_same (tree val
)
753 unsigned char buf
[64];
756 if (integer_zerop (val
)
758 || (TREE_CODE (val
) == CONSTRUCTOR
759 && !TREE_CLOBBER_P (val
)
760 && CONSTRUCTOR_NELTS (val
) == 0))
763 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
766 len
= native_encode_expr (val
, buf
, sizeof (buf
));
769 for (i
= 1; i
< len
; i
++)
770 if (buf
[i
] != buf
[0])
775 /* Generate a call to memset for PARTITION in LOOP. */
778 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
780 gimple_stmt_iterator gsi
;
781 gimple stmt
, fn_call
;
782 tree mem
, fn
, nb_bytes
;
786 stmt
= DR_STMT (partition
->main_dr
);
787 loc
= gimple_location (stmt
);
789 /* The new statements will be placed before LOOP. */
790 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
792 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
793 partition
->plus_one
);
794 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
795 false, GSI_CONTINUE_LINKING
);
796 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
797 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
798 false, GSI_CONTINUE_LINKING
);
800 /* This exactly matches the pattern recognition in classify_partition. */
801 val
= gimple_assign_rhs1 (stmt
);
802 /* Handle constants like 0x15151515 and similarly
803 floating point constants etc. where all bytes are the same. */
804 int bytev
= const_with_all_bytes_same (val
);
806 val
= build_int_cst (integer_type_node
, bytev
);
807 else if (TREE_CODE (val
) == INTEGER_CST
)
808 val
= fold_convert (integer_type_node
, val
);
809 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
811 tree tem
= make_ssa_name (integer_type_node
);
812 gimple cstmt
= gimple_build_assign (tem
, NOP_EXPR
, val
);
813 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
817 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
818 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
819 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
823 fprintf (dump_file
, "generated memset");
825 fprintf (dump_file
, " zero\n");
827 fprintf (dump_file
, "\n");
831 /* Generate a call to memcpy for PARTITION in LOOP. */
834 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
836 gimple_stmt_iterator gsi
;
837 gimple stmt
, fn_call
;
838 tree dest
, src
, fn
, nb_bytes
;
840 enum built_in_function kind
;
842 stmt
= DR_STMT (partition
->main_dr
);
843 loc
= gimple_location (stmt
);
845 /* The new statements will be placed before LOOP. */
846 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
848 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
849 partition
->plus_one
);
850 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
851 false, GSI_CONTINUE_LINKING
);
852 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
853 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
854 if (ptr_derefs_may_alias_p (dest
, src
))
855 kind
= BUILT_IN_MEMMOVE
;
857 kind
= BUILT_IN_MEMCPY
;
859 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
860 false, GSI_CONTINUE_LINKING
);
861 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
862 false, GSI_CONTINUE_LINKING
);
863 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
864 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
865 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
867 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
869 if (kind
== BUILT_IN_MEMCPY
)
870 fprintf (dump_file
, "generated memcpy\n");
872 fprintf (dump_file
, "generated memmove\n");
876 /* Remove and destroy the loop LOOP. */
879 destroy_loop (struct loop
*loop
)
881 unsigned nbbs
= loop
->num_nodes
;
882 edge exit
= single_exit (loop
);
883 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
887 bbs
= get_loop_body_in_dom_order (loop
);
889 redirect_edge_pred (exit
, src
);
890 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
891 exit
->flags
|= EDGE_FALLTHRU
;
892 cancel_loop_tree (loop
);
893 rescan_loop_exit (exit
, false, true);
895 for (i
= 0; i
< nbbs
; i
++)
897 /* We have made sure to not leave any dangling uses of SSA
898 names defined in the loop. With the exception of virtuals.
899 Make sure we replace all uses of virtual defs that will remain
900 outside of the loop with the bare symbol as delete_basic_block
901 will release them. */
902 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
);
905 gphi
*phi
= gsi
.phi ();
906 if (virtual_operand_p (gimple_phi_result (phi
)))
907 mark_virtual_phi_result_for_renaming (phi
);
909 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
);
912 gimple stmt
= gsi_stmt (gsi
);
913 tree vdef
= gimple_vdef (stmt
);
914 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
915 mark_virtual_operand_for_renaming (vdef
);
917 delete_basic_block (bbs
[i
]);
921 set_immediate_dominator (CDI_DOMINATORS
, dest
,
922 recompute_dominator (CDI_DOMINATORS
, dest
));
925 /* Generates code for PARTITION. */
928 generate_code_for_partition (struct loop
*loop
,
929 partition_t partition
, bool copy_p
)
931 switch (partition
->kind
)
934 /* Reductions all have to be in the last partition. */
935 gcc_assert (!partition_reduction_p (partition
)
937 generate_loops_for_partition (loop
, partition
, copy_p
);
941 generate_memset_builtin (loop
, partition
);
945 generate_memcpy_builtin (loop
, partition
);
952 /* Common tail for partitions we turn into a call. If this was the last
953 partition for which we generate code, we have to destroy the loop. */
959 /* Returns a partition with all the statements needed for computing
960 the vertex V of the RDG, also including the loop exit conditions. */
963 build_rdg_partition_for_vertex (struct graph
*rdg
, int v
)
965 partition_t partition
= partition_alloc (NULL
, NULL
);
966 auto_vec
<int, 3> nodes
;
970 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
972 FOR_EACH_VEC_ELT (nodes
, i
, x
)
974 bitmap_set_bit (partition
->stmts
, x
);
975 bitmap_set_bit (partition
->loops
,
976 loop_containing_stmt (RDG_STMT (rdg
, x
))->num
);
982 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
983 For the moment we detect only the memset zero pattern. */
986 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
991 data_reference_p single_load
, single_store
;
992 bool volatiles_p
= false;
993 bool plus_one
= false;
995 partition
->kind
= PKIND_NORMAL
;
996 partition
->main_dr
= NULL
;
997 partition
->secondary_dr
= NULL
;
998 partition
->niter
= NULL_TREE
;
999 partition
->plus_one
= false;
1001 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1003 gimple stmt
= RDG_STMT (rdg
, i
);
1005 if (gimple_has_volatile_ops (stmt
))
1008 /* If the stmt has uses outside of the loop mark it as reduction. */
1009 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1011 partition
->reduction_p
= true;
1016 /* Perform general partition disqualification for builtins. */
1018 || !flag_tree_loop_distribute_patterns
)
1021 /* Detect memset and memcpy. */
1023 single_store
= NULL
;
1024 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1026 gimple stmt
= RDG_STMT (rdg
, i
);
1027 data_reference_p dr
;
1030 if (gimple_code (stmt
) == GIMPLE_PHI
)
1033 /* Any scalar stmts are ok. */
1034 if (!gimple_vuse (stmt
))
1037 /* Otherwise just regular loads/stores. */
1038 if (!gimple_assign_single_p (stmt
))
1041 /* But exactly one store and/or load. */
1042 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1044 if (DR_IS_READ (dr
))
1046 if (single_load
!= NULL
)
1052 if (single_store
!= NULL
)
1062 nb_iter
= number_of_latch_executions (loop
);
1063 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
1065 if (dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
1066 gimple_bb (DR_STMT (single_store
))))
1069 if (single_store
&& !single_load
)
1071 gimple stmt
= DR_STMT (single_store
);
1072 tree rhs
= gimple_assign_rhs1 (stmt
);
1073 if (const_with_all_bytes_same (rhs
) == -1
1074 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1075 || (TYPE_MODE (TREE_TYPE (rhs
))
1076 != TYPE_MODE (unsigned_char_type_node
))))
1078 if (TREE_CODE (rhs
) == SSA_NAME
1079 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1080 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1082 if (!adjacent_dr_p (single_store
)
1083 || !dominated_by_p (CDI_DOMINATORS
,
1084 loop
->latch
, gimple_bb (stmt
)))
1086 partition
->kind
= PKIND_MEMSET
;
1087 partition
->main_dr
= single_store
;
1088 partition
->niter
= nb_iter
;
1089 partition
->plus_one
= plus_one
;
1091 else if (single_store
&& single_load
)
1093 gimple store
= DR_STMT (single_store
);
1094 gimple load
= DR_STMT (single_load
);
1095 /* Direct aggregate copy or via an SSA name temporary. */
1097 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1099 if (!adjacent_dr_p (single_store
)
1100 || !adjacent_dr_p (single_load
)
1101 || !operand_equal_p (DR_STEP (single_store
),
1102 DR_STEP (single_load
), 0)
1103 || !dominated_by_p (CDI_DOMINATORS
,
1104 loop
->latch
, gimple_bb (store
)))
1106 /* Now check that if there is a dependence this dependence is
1107 of a suitable form for memmove. */
1108 vec
<loop_p
> loops
= vNULL
;
1110 loops
.safe_push (loop
);
1111 ddr
= initialize_data_dependence_relation (single_load
, single_store
,
1113 compute_affine_dependence (ddr
, loop
);
1114 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1116 free_dependence_relation (ddr
);
1120 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1122 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1124 free_dependence_relation (ddr
);
1128 lambda_vector dist_v
;
1129 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1131 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1132 DDR_LOOP_NEST (ddr
))];
1133 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1135 free_dependence_relation (ddr
);
1141 free_dependence_relation (ddr
);
1143 partition
->kind
= PKIND_MEMCPY
;
1144 partition
->main_dr
= single_store
;
1145 partition
->secondary_dr
= single_load
;
1146 partition
->niter
= nb_iter
;
1147 partition
->plus_one
= plus_one
;
1151 /* For a data reference REF, return the declaration of its base
1152 address or NULL_TREE if the base is not determined. */
1155 ref_base_address (data_reference_p dr
)
1157 tree base_address
= DR_BASE_ADDRESS (dr
);
1159 && TREE_CODE (base_address
) == ADDR_EXPR
)
1160 return TREE_OPERAND (base_address
, 0);
1162 return base_address
;
1165 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1169 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1170 partition_t partition2
)
1172 unsigned i
, j
, k
, l
;
1173 bitmap_iterator bi
, bj
;
1174 data_reference_p ref1
, ref2
;
1176 /* First check whether in the intersection of the two partitions are
1177 any loads or stores. Common loads are the situation that happens
1179 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1180 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1181 || RDG_MEM_READS_STMT (rdg
, i
))
1184 /* Then check all data-references against each other. */
1185 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1186 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1187 || RDG_MEM_READS_STMT (rdg
, i
))
1188 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1189 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1190 || RDG_MEM_READS_STMT (rdg
, j
))
1192 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, i
), k
, ref1
)
1194 tree base1
= ref_base_address (ref1
);
1196 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg
, j
), l
, ref2
)
1197 if (base1
== ref_base_address (ref2
))
1205 /* Aggregate several components into a useful partition that is
1206 registered in the PARTITIONS vector. Partitions will be
1207 distributed in different loops. */
1210 rdg_build_partitions (struct graph
*rdg
,
1211 vec
<gimple
> starting_stmts
,
1212 vec
<partition_t
> *partitions
)
1214 bitmap processed
= BITMAP_ALLOC (NULL
);
1218 FOR_EACH_VEC_ELT (starting_stmts
, i
, stmt
)
1220 int v
= rdg_vertex_for_stmt (rdg
, stmt
);
1222 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1224 "ldist asked to generate code for vertex %d\n", v
);
1226 /* If the vertex is already contained in another partition so
1227 is the partition rooted at it. */
1228 if (bitmap_bit_p (processed
, v
))
1231 partition_t partition
= build_rdg_partition_for_vertex (rdg
, v
);
1232 bitmap_ior_into (processed
, partition
->stmts
);
1234 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1236 fprintf (dump_file
, "ldist useful partition:\n");
1237 dump_bitmap (dump_file
, partition
->stmts
);
1240 partitions
->safe_push (partition
);
1243 /* All vertices should have been assigned to at least one partition now,
1244 other than vertices belonging to dead code. */
1246 BITMAP_FREE (processed
);
1249 /* Dump to FILE the PARTITIONS. */
1252 dump_rdg_partitions (FILE *file
, vec
<partition_t
> partitions
)
1255 partition_t partition
;
1257 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1258 debug_bitmap_file (file
, partition
->stmts
);
1261 /* Debug PARTITIONS. */
1262 extern void debug_rdg_partitions (vec
<partition_t
> );
1265 debug_rdg_partitions (vec
<partition_t
> partitions
)
1267 dump_rdg_partitions (stderr
, partitions
);
1270 /* Returns the number of read and write operations in the RDG. */
1273 number_of_rw_in_rdg (struct graph
*rdg
)
1277 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1279 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1282 if (RDG_MEM_READS_STMT (rdg
, i
))
1289 /* Returns the number of read and write operations in a PARTITION of
1293 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1299 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1301 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1304 if (RDG_MEM_READS_STMT (rdg
, i
))
1311 /* Returns true when one of the PARTITIONS contains all the read or
1312 write operations of RDG. */
1315 partition_contains_all_rw (struct graph
*rdg
,
1316 vec
<partition_t
> partitions
)
1319 partition_t partition
;
1320 int nrw
= number_of_rw_in_rdg (rdg
);
1322 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1323 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1329 /* Compute partition dependence created by the data references in DRS1
1330 and DRS2 and modify and return DIR according to that. */
1333 pg_add_dependence_edges (struct graph
*rdg
, vec
<loop_p
> loops
, int dir
,
1334 vec
<data_reference_p
> drs1
,
1335 vec
<data_reference_p
> drs2
)
1337 data_reference_p dr1
, dr2
;
1339 /* dependence direction - 0 is no dependence, -1 is back,
1340 1 is forth, 2 is both (we can stop then, merging will occur). */
1341 for (int ii
= 0; drs1
.iterate (ii
, &dr1
); ++ii
)
1342 for (int jj
= 0; drs2
.iterate (jj
, &dr2
); ++jj
)
1344 data_reference_p saved_dr1
= dr1
;
1347 /* Re-shuffle data-refs to be in dominator order. */
1348 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1349 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1351 std::swap (dr1
, dr2
);
1352 this_dir
= -this_dir
;
1354 ddr
= initialize_data_dependence_relation (dr1
, dr2
, loops
);
1355 compute_affine_dependence (ddr
, loops
[0]);
1356 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1358 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1360 if (DDR_REVERSED_P (ddr
))
1362 std::swap (dr1
, dr2
);
1363 this_dir
= -this_dir
;
1365 /* Known dependences can still be unordered througout the
1366 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1367 if (DDR_NUM_DIST_VECTS (ddr
) != 1)
1369 /* If the overlap is exact preserve stmt order. */
1370 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1374 /* Else as the distance vector is lexicographic positive
1375 swap the dependence direction. */
1376 this_dir
= -this_dir
;
1381 free_dependence_relation (ddr
);
1384 else if (dir
!= this_dir
)
1386 /* Shuffle "back" dr1. */
1392 /* Compare postorder number of the partition graph vertices V1 and V2. */
1395 pgcmp (const void *v1_
, const void *v2_
)
1397 const vertex
*v1
= (const vertex
*)v1_
;
1398 const vertex
*v2
= (const vertex
*)v2_
;
1399 return v2
->post
- v1
->post
;
1402 /* Distributes the code from LOOP in such a way that producer
1403 statements are placed before consumer statements. Tries to separate
1404 only the statements from STMTS into separate loops.
1405 Returns the number of distributed loops. */
1408 distribute_loop (struct loop
*loop
, vec
<gimple
> stmts
,
1409 control_dependences
*cd
, int *nb_calls
)
1412 partition_t partition
;
1419 auto_vec
<loop_p
, 3> loop_nest
;
1420 if (!find_loop_nest (loop
, &loop_nest
))
1423 rdg
= build_rdg (loop_nest
, cd
);
1426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1428 "Loop %d not distributed: failed to build the RDG.\n",
1434 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1435 dump_rdg (dump_file
, rdg
);
1437 auto_vec
<partition_t
, 3> partitions
;
1438 rdg_build_partitions (rdg
, stmts
, &partitions
);
1440 any_builtin
= false;
1441 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1443 classify_partition (loop
, rdg
, partition
);
1444 any_builtin
|= partition_builtin_p (partition
);
1447 /* If we are only distributing patterns but did not detect any,
1449 if (!flag_tree_loop_distribution
1456 /* If we are only distributing patterns fuse all partitions that
1457 were not classified as builtins. This also avoids chopping
1458 a loop into pieces, separated by builtin calls. That is, we
1459 only want no or a single loop body remaining. */
1461 if (!flag_tree_loop_distribution
)
1463 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1464 if (!partition_builtin_p (into
))
1466 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
1467 if (!partition_builtin_p (partition
))
1469 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1471 fprintf (dump_file
, "fusing non-builtin partitions\n");
1472 dump_bitmap (dump_file
, into
->stmts
);
1473 dump_bitmap (dump_file
, partition
->stmts
);
1475 partition_merge_into (into
, partition
);
1476 partitions
.unordered_remove (i
);
1477 partition_free (partition
);
1482 /* Due to limitations in the transform phase we have to fuse all
1483 reduction partitions into the last partition so the existing
1484 loop will contain all loop-closed PHI nodes. */
1485 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1486 if (partition_reduction_p (into
))
1488 for (i
= i
+ 1; partitions
.iterate (i
, &partition
); ++i
)
1489 if (partition_reduction_p (partition
))
1491 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1493 fprintf (dump_file
, "fusing partitions\n");
1494 dump_bitmap (dump_file
, into
->stmts
);
1495 dump_bitmap (dump_file
, partition
->stmts
);
1496 fprintf (dump_file
, "because they have reductions\n");
1498 partition_merge_into (into
, partition
);
1499 partitions
.unordered_remove (i
);
1500 partition_free (partition
);
1504 /* Apply our simple cost model - fuse partitions with similar
1506 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
1508 if (partition_builtin_p (into
))
1511 partitions
.iterate (j
, &partition
); ++j
)
1513 if (!partition_builtin_p (partition
)
1514 && similar_memory_accesses (rdg
, into
, partition
))
1516 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1518 fprintf (dump_file
, "fusing partitions\n");
1519 dump_bitmap (dump_file
, into
->stmts
);
1520 dump_bitmap (dump_file
, partition
->stmts
);
1521 fprintf (dump_file
, "because they have similar "
1522 "memory accesses\n");
1524 partition_merge_into (into
, partition
);
1525 partitions
.unordered_remove (j
);
1526 partition_free (partition
);
1532 /* Build the partition dependency graph. */
1533 if (partitions
.length () > 1)
1535 pg
= new_graph (partitions
.length ());
1537 partition_t partition
;
1538 vec
<data_reference_p
> writes
;
1539 vec
<data_reference_p
> reads
;
1541 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1542 for (i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1544 vertex
*v
= &pg
->vertices
[i
];
1545 pgdata
*data
= new pgdata
;
1546 data_reference_p dr
;
1547 /* FIXME - leaks. */
1551 data
->partition
= partition
;
1552 data
->reads
= vNULL
;
1553 data
->writes
= vNULL
;
1554 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, j
, bi
)
1555 for (int k
= 0; RDG_DATAREFS (rdg
, j
).iterate (k
, &dr
); ++k
)
1556 if (DR_IS_READ (dr
))
1557 data
->reads
.safe_push (dr
);
1559 data
->writes
.safe_push (dr
);
1561 partition_t partition1
, partition2
;
1562 for (i
= 0; partitions
.iterate (i
, &partition1
); ++i
)
1563 for (int j
= i
+ 1; partitions
.iterate (j
, &partition2
); ++j
)
1565 /* dependence direction - 0 is no dependence, -1 is back,
1566 1 is forth, 2 is both (we can stop then, merging will occur). */
1568 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1572 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1576 dir
= pg_add_dependence_edges (rdg
, loop_nest
, dir
,
1579 if (dir
== 1 || dir
== 2)
1580 add_edge (pg
, i
, j
);
1581 if (dir
== -1 || dir
== 2)
1582 add_edge (pg
, j
, i
);
1585 /* Add edges to the reduction partition (if any) to force it last. */
1587 for (j
= 0; partitions
.iterate (j
, &partition
); ++j
)
1588 if (partition_reduction_p (partition
))
1590 if (j
< partitions
.length ())
1592 for (unsigned i
= 0; partitions
.iterate (i
, &partition
); ++i
)
1594 add_edge (pg
, i
, j
);
1597 /* Compute partitions we cannot separate and fuse them. */
1598 num_sccs
= graphds_scc (pg
, NULL
);
1599 for (i
= 0; i
< num_sccs
; ++i
)
1603 for (j
= 0; partitions
.iterate (j
, &first
); ++j
)
1604 if (pg
->vertices
[j
].component
== i
)
1606 for (j
= j
+ 1; partitions
.iterate (j
, &partition
); ++j
)
1607 if (pg
->vertices
[j
].component
== i
)
1609 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1611 fprintf (dump_file
, "fusing partitions\n");
1612 dump_bitmap (dump_file
, first
->stmts
);
1613 dump_bitmap (dump_file
, partition
->stmts
);
1614 fprintf (dump_file
, "because they are in the same "
1615 "dependence SCC\n");
1617 partition_merge_into (first
, partition
);
1618 partitions
[j
] = NULL
;
1619 partition_free (partition
);
1620 PGDATA (j
)->partition
= NULL
;
1624 /* Now order the remaining nodes in postorder. */
1625 qsort (pg
->vertices
, pg
->n_vertices
, sizeof (vertex
), pgcmp
);
1626 partitions
.truncate (0);
1627 for (i
= 0; i
< pg
->n_vertices
; ++i
)
1629 pgdata
*data
= PGDATA (i
);
1630 if (data
->partition
)
1631 partitions
.safe_push (data
->partition
);
1632 data
->reads
.release ();
1633 data
->writes
.release ();
1636 gcc_assert (partitions
.length () == (unsigned)num_sccs
);
1640 nbp
= partitions
.length ();
1642 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
1643 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
1649 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1650 dump_rdg_partitions (dump_file
, partitions
);
1652 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1654 if (partition_builtin_p (partition
))
1656 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1661 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1662 partition_free (partition
);
1665 return nbp
- *nb_calls
;
1668 /* Distribute all loops in the current function. */
1672 const pass_data pass_data_loop_distribution
=
1674 GIMPLE_PASS
, /* type */
1676 OPTGROUP_LOOP
, /* optinfo_flags */
1677 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1678 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1679 0, /* properties_provided */
1680 0, /* properties_destroyed */
1681 0, /* todo_flags_start */
1682 0, /* todo_flags_finish */
1685 class pass_loop_distribution
: public gimple_opt_pass
1688 pass_loop_distribution (gcc::context
*ctxt
)
1689 : gimple_opt_pass (pass_data_loop_distribution
, ctxt
)
1692 /* opt_pass methods: */
1693 virtual bool gate (function
*)
1695 return flag_tree_loop_distribution
1696 || flag_tree_loop_distribute_patterns
;
1699 virtual unsigned int execute (function
*);
1701 }; // class pass_loop_distribution
1704 pass_loop_distribution::execute (function
*fun
)
1707 bool changed
= false;
1709 control_dependences
*cd
= NULL
;
1711 FOR_ALL_BB_FN (bb
, fun
)
1713 gimple_stmt_iterator gsi
;
1714 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1715 gimple_set_uid (gsi_stmt (gsi
), -1);
1716 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1717 gimple_set_uid (gsi_stmt (gsi
), -1);
1720 /* We can at the moment only distribute non-nested loops, thus restrict
1721 walking to innermost loops. */
1722 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
1724 auto_vec
<gimple
> work_list
;
1726 int num
= loop
->num
;
1729 /* If the loop doesn't have a single exit we will fail anyway,
1730 so do that early. */
1731 if (!single_exit (loop
))
1734 /* Only optimize hot loops. */
1735 if (!optimize_loop_for_speed_p (loop
))
1738 /* Initialize the worklist with stmts we seed the partitions with. */
1739 bbs
= get_loop_body_in_dom_order (loop
);
1740 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1742 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]);
1746 gphi
*phi
= gsi
.phi ();
1747 if (virtual_operand_p (gimple_phi_result (phi
)))
1749 /* Distribute stmts which have defs that are used outside of
1751 if (!stmt_has_scalar_dependences_outside_loop (loop
, phi
))
1753 work_list
.safe_push (phi
);
1755 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
1759 gimple stmt
= gsi_stmt (gsi
);
1761 /* If there is a stmt with side-effects bail out - we
1762 cannot and should not distribute this loop. */
1763 if (gimple_has_side_effects (stmt
))
1765 work_list
.truncate (0);
1769 /* Distribute stmts which have defs that are used outside of
1771 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1773 /* Otherwise only distribute stores for now. */
1774 else if (!gimple_vdef (stmt
))
1777 work_list
.safe_push (stmt
);
1783 int nb_generated_loops
= 0;
1784 int nb_generated_calls
= 0;
1785 location_t loc
= find_loop_location (loop
);
1786 if (work_list
.length () > 0)
1790 calculate_dominance_info (CDI_DOMINATORS
);
1791 calculate_dominance_info (CDI_POST_DOMINATORS
);
1792 cd
= new control_dependences (create_edge_list ());
1793 free_dominance_info (CDI_POST_DOMINATORS
);
1795 nb_generated_loops
= distribute_loop (loop
, work_list
, cd
,
1796 &nb_generated_calls
);
1799 if (nb_generated_loops
+ nb_generated_calls
> 0)
1802 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
,
1803 loc
, "Loop %d distributed: split to %d loops "
1804 "and %d library calls.\n",
1805 num
, nb_generated_loops
, nb_generated_calls
);
1807 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1808 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1816 /* Cached scalar evolutions now may refer to wrong or non-existing
1819 mark_virtual_operands_for_renaming (fun
);
1820 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1823 #ifdef ENABLE_CHECKING
1824 verify_loop_structure ();
1833 make_pass_loop_distribution (gcc::context
*ctxt
)
1835 return new pass_loop_distribution (ctxt
);