2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
47 #include "coretypes.h"
48 #include "tree-flow.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 enum partition_kind
{ PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
};
57 typedef struct partition_s
61 enum partition_kind kind
;
62 /* data-references a kind != PKIND_NORMAL partition is about. */
63 data_reference_p main_dr
;
64 data_reference_p secondary_dr
;
67 DEF_VEC_P (partition_t
);
68 DEF_VEC_ALLOC_P (partition_t
, heap
);
70 /* Allocate and initialize a partition from BITMAP. */
73 partition_alloc (bitmap stmts
)
75 partition_t partition
= XCNEW (struct partition_s
);
76 partition
->stmts
= stmts
? stmts
: BITMAP_ALLOC (NULL
);
77 partition
->has_writes
= false;
78 partition
->kind
= PKIND_NORMAL
;
85 partition_free (partition_t partition
)
87 BITMAP_FREE (partition
->stmts
);
91 /* Returns true if the partition can be generated as a builtin. */
94 partition_builtin_p (partition_t partition
)
96 return partition
->kind
!= PKIND_NORMAL
;
99 /* Returns true if the partition has an writes. */
102 partition_has_writes (partition_t partition
)
104 return partition
->has_writes
;
107 /* If bit I is not set, it means that this node represents an
108 operation that has already been performed, and that should not be
109 performed again. This is the subgraph of remaining important
110 computations that is passed to the DFS algorithm for avoiding to
111 include several times the same stores in different loops. */
112 static bitmap remaining_stmts
;
114 /* A node of the RDG is marked in this bitmap when it has as a
115 predecessor a node that writes to memory. */
116 static bitmap upstream_mem_writes
;
118 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
122 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
124 imm_use_iterator imm_iter
;
127 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
128 if (loop
!= loop_containing_stmt (USE_STMT (use_p
)))
134 /* Returns true when STMT defines a scalar variable used after the
138 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple stmt
)
143 if (gimple_code (stmt
) == GIMPLE_PHI
)
144 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
146 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
147 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
153 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
157 update_phis_for_loop_copy (struct loop
*orig_loop
, struct loop
*new_loop
)
160 gimple_stmt_iterator si_new
, si_orig
;
161 edge orig_loop_latch
= loop_latch_edge (orig_loop
);
162 edge orig_entry_e
= loop_preheader_edge (orig_loop
);
163 edge new_loop_entry_e
= loop_preheader_edge (new_loop
);
165 /* Scan the phis in the headers of the old and new loops
166 (they are organized in exactly the same order). */
167 for (si_new
= gsi_start_phis (new_loop
->header
),
168 si_orig
= gsi_start_phis (orig_loop
->header
);
169 !gsi_end_p (si_new
) && !gsi_end_p (si_orig
);
170 gsi_next (&si_new
), gsi_next (&si_orig
))
173 source_location locus
;
175 gimple phi_new
= gsi_stmt (si_new
);
176 gimple phi_orig
= gsi_stmt (si_orig
);
178 /* Add the first phi argument for the phi in NEW_LOOP (the one
179 associated with the entry of NEW_LOOP) */
180 def
= PHI_ARG_DEF_FROM_EDGE (phi_orig
, orig_entry_e
);
181 locus
= gimple_phi_arg_location_from_edge (phi_orig
, orig_entry_e
);
182 block
= gimple_phi_arg_block_from_edge (phi_orig
, orig_entry_e
);
183 add_phi_arg (phi_new
, def
, new_loop_entry_e
, locus
, block
);
185 /* Add the second phi argument for the phi in NEW_LOOP (the one
186 associated with the latch of NEW_LOOP) */
187 def
= PHI_ARG_DEF_FROM_EDGE (phi_orig
, orig_loop_latch
);
188 locus
= gimple_phi_arg_location_from_edge (phi_orig
, orig_loop_latch
);
189 block
= gimple_phi_arg_block_from_edge (phi_orig
, orig_loop_latch
);
191 if (TREE_CODE (def
) == SSA_NAME
)
193 new_ssa_name
= get_current_def (def
);
196 /* This only happens if there are no definitions inside the
197 loop. Use the the invariant in the new loop as is. */
201 /* Could be an integer. */
204 add_phi_arg (phi_new
, new_ssa_name
, loop_latch_edge (new_loop
), locus
,
209 /* Return a copy of LOOP placed before LOOP. */
212 copy_loop_before (struct loop
*loop
)
215 edge preheader
= loop_preheader_edge (loop
);
217 initialize_original_copy_tables ();
218 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, preheader
);
219 gcc_assert (res
!= NULL
);
220 free_original_copy_tables ();
222 update_phis_for_loop_copy (loop
, res
);
223 rename_variables_in_loop (res
);
228 /* Creates an empty basic block after LOOP. */
231 create_bb_after_loop (struct loop
*loop
)
233 edge exit
= single_exit (loop
);
241 /* Generate code for PARTITION from the code in LOOP. The loop is
242 copied when COPY_P is true. All the statements not flagged in the
243 PARTITION bitmap are removed from the loop or from its copy. The
244 statements are indexed in sequence inside a basic block, and the
245 basic blocks of a loop are taken in dom order. */
248 generate_loops_for_partition (struct loop
*loop
, partition_t partition
,
252 gimple_stmt_iterator bsi
;
257 loop
= copy_loop_before (loop
);
258 gcc_assert (loop
!= NULL
);
259 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
260 create_bb_after_loop (loop
);
263 /* Remove stmts not in the PARTITION bitmap. The order in which we
264 visit the phi nodes and the statements is exactly as in
266 bbs
= get_loop_body_in_dom_order (loop
);
268 if (MAY_HAVE_DEBUG_STMTS
)
269 for (x
= 0, i
= 0; i
< loop
->num_nodes
; i
++)
271 basic_block bb
= bbs
[i
];
273 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
274 if (!bitmap_bit_p (partition
->stmts
, x
++))
275 reset_debug_uses (gsi_stmt (bsi
));
277 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
279 gimple stmt
= gsi_stmt (bsi
);
280 if (gimple_code (stmt
) != GIMPLE_LABEL
281 && !is_gimple_debug (stmt
)
282 && !bitmap_bit_p (partition
->stmts
, x
++))
283 reset_debug_uses (stmt
);
287 for (x
= 0, i
= 0; i
< loop
->num_nodes
; i
++)
289 basic_block bb
= bbs
[i
];
291 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
292 if (!bitmap_bit_p (partition
->stmts
, x
++))
294 gimple phi
= gsi_stmt (bsi
);
295 if (!is_gimple_reg (gimple_phi_result (phi
)))
296 mark_virtual_phi_result_for_renaming (phi
);
297 remove_phi_node (&bsi
, true);
302 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
304 gimple stmt
= gsi_stmt (bsi
);
305 if (gimple_code (stmt
) != GIMPLE_LABEL
306 && !is_gimple_debug (stmt
)
307 && !bitmap_bit_p (partition
->stmts
, x
++))
309 unlink_stmt_vdef (stmt
);
310 gsi_remove (&bsi
, true);
321 /* Build the size argument for a memory operation call. */
324 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
)
327 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
,
328 fold_convert_loc (loc
, sizetype
, nb_iter
),
329 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
330 return fold_convert_loc (loc
, size_type_node
, size
);
333 /* Build an address argument for a memory operation call. */
336 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
340 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
341 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
343 /* Test for a negative stride, iterating over every element. */
344 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
346 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
347 fold_convert_loc (loc
, sizetype
, nb_bytes
));
348 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
349 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
352 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
355 /* Generate a call to memset for PARTITION in LOOP. */
358 generate_memset_builtin (struct loop
*loop
, partition_t partition
)
360 gimple_stmt_iterator gsi
;
361 gimple stmt
, fn_call
;
362 tree nb_iter
, mem
, fn
, nb_bytes
;
366 stmt
= DR_STMT (partition
->main_dr
);
367 loc
= gimple_location (stmt
);
368 if (gimple_bb (stmt
) == loop
->latch
)
369 nb_iter
= number_of_latch_executions (loop
);
371 nb_iter
= number_of_exit_cond_executions (loop
);
373 /* The new statements will be placed before LOOP. */
374 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
376 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, nb_iter
);
377 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
378 false, GSI_CONTINUE_LINKING
);
379 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
380 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
381 false, GSI_CONTINUE_LINKING
);
383 /* This exactly matches the pattern recognition in classify_partition. */
384 val
= gimple_assign_rhs1 (stmt
);
385 if (integer_zerop (val
)
387 || TREE_CODE (val
) == CONSTRUCTOR
)
388 val
= integer_zero_node
;
389 else if (integer_all_onesp (val
))
390 val
= build_int_cst (integer_type_node
, -1);
393 if (TREE_CODE (val
) == INTEGER_CST
)
394 val
= fold_convert (integer_type_node
, val
);
395 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
398 tree tem
= create_tmp_reg (integer_type_node
, NULL
);
399 tem
= make_ssa_name (tem
, NULL
);
400 cstmt
= gimple_build_assign_with_ops (NOP_EXPR
, tem
, val
, NULL_TREE
);
401 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
406 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
407 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
408 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
410 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
412 fprintf (dump_file
, "generated memset");
413 if (integer_zerop (val
))
414 fprintf (dump_file
, " zero\n");
415 else if (integer_all_onesp (val
))
416 fprintf (dump_file
, " minus one\n");
418 fprintf (dump_file
, "\n");
422 /* Generate a call to memcpy for PARTITION in LOOP. */
425 generate_memcpy_builtin (struct loop
*loop
, partition_t partition
)
427 gimple_stmt_iterator gsi
;
428 gimple stmt
, fn_call
;
429 tree nb_iter
, dest
, src
, fn
, nb_bytes
;
431 enum built_in_function kind
;
433 stmt
= DR_STMT (partition
->main_dr
);
434 loc
= gimple_location (stmt
);
435 if (gimple_bb (stmt
) == loop
->latch
)
436 nb_iter
= number_of_latch_executions (loop
);
438 nb_iter
= number_of_exit_cond_executions (loop
);
440 /* The new statements will be placed before LOOP. */
441 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
443 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, nb_iter
);
444 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
445 false, GSI_CONTINUE_LINKING
);
446 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
447 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
448 if (ptr_derefs_may_alias_p (dest
, src
))
449 kind
= BUILT_IN_MEMMOVE
;
451 kind
= BUILT_IN_MEMCPY
;
453 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
454 false, GSI_CONTINUE_LINKING
);
455 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
456 false, GSI_CONTINUE_LINKING
);
457 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
458 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
459 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
461 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
463 if (kind
== BUILT_IN_MEMCPY
)
464 fprintf (dump_file
, "generated memcpy\n");
466 fprintf (dump_file
, "generated memmove\n");
470 /* Remove and destroy the loop LOOP. */
473 destroy_loop (struct loop
*loop
)
475 unsigned nbbs
= loop
->num_nodes
;
476 edge exit
= single_exit (loop
);
477 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
481 bbs
= get_loop_body_in_dom_order (loop
);
483 redirect_edge_pred (exit
, src
);
484 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
485 exit
->flags
|= EDGE_FALLTHRU
;
486 cancel_loop_tree (loop
);
487 rescan_loop_exit (exit
, false, true);
489 for (i
= 0; i
< nbbs
; i
++)
491 /* We have made sure to not leave any dangling uses of SSA
492 names defined in the loop. With the exception of virtuals.
493 Make sure we replace all uses of virtual defs that will remain
494 outside of the loop with the bare symbol as delete_basic_block
495 will release them. */
496 gimple_stmt_iterator gsi
;
497 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
499 gimple phi
= gsi_stmt (gsi
);
500 if (!is_gimple_reg (gimple_phi_result (phi
)))
501 mark_virtual_phi_result_for_renaming (phi
);
503 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
505 gimple stmt
= gsi_stmt (gsi
);
506 tree vdef
= gimple_vdef (stmt
);
507 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
508 mark_virtual_operand_for_renaming (vdef
);
510 delete_basic_block (bbs
[i
]);
514 set_immediate_dominator (CDI_DOMINATORS
, dest
,
515 recompute_dominator (CDI_DOMINATORS
, dest
));
518 /* Generates code for PARTITION. */
521 generate_code_for_partition (struct loop
*loop
,
522 partition_t partition
, bool copy_p
)
524 switch (partition
->kind
)
527 generate_memset_builtin (loop
, partition
);
528 /* If this is the last partition for which we generate code, we have
529 to destroy the loop. */
535 generate_memcpy_builtin (loop
, partition
);
536 /* If this is the last partition for which we generate code, we have
537 to destroy the loop. */
543 generate_loops_for_partition (loop
, partition
, copy_p
);
552 /* Returns true if the node V of RDG cannot be recomputed. */
555 rdg_cannot_recompute_vertex_p (struct graph
*rdg
, int v
)
557 if (RDG_MEM_WRITE_STMT (rdg
, v
))
563 /* Returns true when the vertex V has already been generated in the
564 current partition (V is in PROCESSED), or when V belongs to another
565 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
568 already_processed_vertex_p (bitmap processed
, int v
)
570 return (bitmap_bit_p (processed
, v
)
571 || !bitmap_bit_p (remaining_stmts
, v
));
574 /* Returns NULL when there is no anti-dependence among the successors
575 of vertex V, otherwise returns the edge with the anti-dep. */
577 static struct graph_edge
*
578 has_anti_dependence (struct vertex
*v
)
580 struct graph_edge
*e
;
583 for (e
= v
->succ
; e
; e
= e
->succ_next
)
584 if (RDGE_TYPE (e
) == anti_dd
)
590 /* Returns true when V has an anti-dependence edge among its successors. */
593 predecessor_has_mem_write (struct graph
*rdg
, struct vertex
*v
)
595 struct graph_edge
*e
;
598 for (e
= v
->pred
; e
; e
= e
->pred_next
)
599 if (bitmap_bit_p (upstream_mem_writes
, e
->src
)
600 /* Don't consider flow channels: a write to memory followed
601 by a read from memory. These channels allow the split of
602 the RDG in different partitions. */
603 && !RDG_MEM_WRITE_STMT (rdg
, e
->src
))
609 /* Initializes the upstream_mem_writes bitmap following the
610 information from RDG. */
613 mark_nodes_having_upstream_mem_writes (struct graph
*rdg
)
616 bitmap seen
= BITMAP_ALLOC (NULL
);
618 for (v
= rdg
->n_vertices
- 1; v
>= 0; v
--)
619 if (!bitmap_bit_p (seen
, v
))
622 VEC (int, heap
) *nodes
= VEC_alloc (int, heap
, 3);
624 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
626 FOR_EACH_VEC_ELT (int, nodes
, i
, x
)
628 if (!bitmap_set_bit (seen
, x
))
631 if (RDG_MEM_WRITE_STMT (rdg
, x
)
632 || predecessor_has_mem_write (rdg
, &(rdg
->vertices
[x
]))
633 /* In anti dependences the read should occur before
634 the write, this is why both the read and the write
635 should be placed in the same partition. */
636 || has_anti_dependence (&(rdg
->vertices
[x
])))
638 bitmap_set_bit (upstream_mem_writes
, x
);
642 VEC_free (int, heap
, nodes
);
646 /* Returns true when vertex u has a memory write node as a predecessor
650 has_upstream_mem_writes (int u
)
652 return bitmap_bit_p (upstream_mem_writes
, u
);
655 static void rdg_flag_vertex_and_dependent (struct graph
*, int, partition_t
,
658 /* Flag the uses of U stopping following the information from
659 upstream_mem_writes. */
662 rdg_flag_uses (struct graph
*rdg
, int u
, partition_t partition
, bitmap loops
,
666 struct vertex
*x
= &(rdg
->vertices
[u
]);
667 gimple stmt
= RDGV_STMT (x
);
668 struct graph_edge
*anti_dep
= has_anti_dependence (x
);
670 /* Keep in the same partition the destination of an antidependence,
671 because this is a store to the exact same location. Putting this
672 in another partition is bad for cache locality. */
675 int v
= anti_dep
->dest
;
677 if (!already_processed_vertex_p (processed
, v
))
678 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
682 if (gimple_code (stmt
) != GIMPLE_PHI
)
684 if ((use_p
= gimple_vuse_op (stmt
)) != NULL_USE_OPERAND_P
)
686 tree use
= USE_FROM_PTR (use_p
);
688 if (TREE_CODE (use
) == SSA_NAME
)
690 gimple def_stmt
= SSA_NAME_DEF_STMT (use
);
691 int v
= rdg_vertex_for_stmt (rdg
, def_stmt
);
694 && !already_processed_vertex_p (processed
, v
))
695 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
701 if (is_gimple_assign (stmt
) && has_upstream_mem_writes (u
))
703 tree op0
= gimple_assign_lhs (stmt
);
705 /* Scalar channels don't have enough space for transmitting data
706 between tasks, unless we add more storage by privatizing. */
707 if (is_gimple_reg (op0
))
710 imm_use_iterator iter
;
712 FOR_EACH_IMM_USE_FAST (use_p
, iter
, op0
)
714 int v
= rdg_vertex_for_stmt (rdg
, USE_STMT (use_p
));
716 if (!already_processed_vertex_p (processed
, v
))
717 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
,
724 /* Flag V from RDG as part of PARTITION, and also flag its loop number
728 rdg_flag_vertex (struct graph
*rdg
, int v
, partition_t partition
, bitmap loops
)
732 if (!bitmap_set_bit (partition
->stmts
, v
))
735 loop
= loop_containing_stmt (RDG_STMT (rdg
, v
));
736 bitmap_set_bit (loops
, loop
->num
);
738 if (rdg_cannot_recompute_vertex_p (rdg
, v
))
740 partition
->has_writes
= true;
741 bitmap_clear_bit (remaining_stmts
, v
);
745 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
746 Also flag their loop number in LOOPS. */
749 rdg_flag_vertex_and_dependent (struct graph
*rdg
, int v
, partition_t partition
,
750 bitmap loops
, bitmap processed
)
753 VEC (int, heap
) *nodes
= VEC_alloc (int, heap
, 3);
756 bitmap_set_bit (processed
, v
);
757 rdg_flag_uses (rdg
, v
, partition
, loops
, processed
);
758 graphds_dfs (rdg
, &v
, 1, &nodes
, false, remaining_stmts
);
759 rdg_flag_vertex (rdg
, v
, partition
, loops
);
761 FOR_EACH_VEC_ELT (int, nodes
, i
, x
)
762 if (!already_processed_vertex_p (processed
, x
))
763 rdg_flag_vertex_and_dependent (rdg
, x
, partition
, loops
, processed
);
765 VEC_free (int, heap
, nodes
);
768 /* Initialize CONDS with all the condition statements from the basic
772 collect_condition_stmts (struct loop
*loop
, VEC (gimple
, heap
) **conds
)
776 VEC (edge
, heap
) *exits
= get_loop_exit_edges (loop
);
778 FOR_EACH_VEC_ELT (edge
, exits
, i
, e
)
780 gimple cond
= last_stmt (e
->src
);
783 VEC_safe_push (gimple
, heap
, *conds
, cond
);
786 VEC_free (edge
, heap
, exits
);
789 /* Add to PARTITION all the exit condition statements for LOOPS
790 together with all their dependent statements determined from
794 rdg_flag_loop_exits (struct graph
*rdg
, bitmap loops
, partition_t partition
,
799 VEC (gimple
, heap
) *conds
= VEC_alloc (gimple
, heap
, 3);
801 EXECUTE_IF_SET_IN_BITMAP (loops
, 0, i
, bi
)
802 collect_condition_stmts (get_loop (i
), &conds
);
804 while (!VEC_empty (gimple
, conds
))
806 gimple cond
= VEC_pop (gimple
, conds
);
807 int v
= rdg_vertex_for_stmt (rdg
, cond
);
808 bitmap new_loops
= BITMAP_ALLOC (NULL
);
810 if (!already_processed_vertex_p (processed
, v
))
811 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, new_loops
, processed
);
813 EXECUTE_IF_SET_IN_BITMAP (new_loops
, 0, i
, bi
)
814 if (bitmap_set_bit (loops
, i
))
815 collect_condition_stmts (get_loop (i
), &conds
);
817 BITMAP_FREE (new_loops
);
820 VEC_free (gimple
, heap
, conds
);
823 /* Returns a bitmap in which all the statements needed for computing
824 the strongly connected component C of the RDG are flagged, also
825 including the loop exit conditions. */
828 build_rdg_partition_for_component (struct graph
*rdg
, rdgc c
)
831 partition_t partition
= partition_alloc (NULL
);
832 bitmap loops
= BITMAP_ALLOC (NULL
);
833 bitmap processed
= BITMAP_ALLOC (NULL
);
835 FOR_EACH_VEC_ELT (int, c
->vertices
, i
, v
)
836 if (!already_processed_vertex_p (processed
, v
))
837 rdg_flag_vertex_and_dependent (rdg
, v
, partition
, loops
, processed
);
839 rdg_flag_loop_exits (rdg
, loops
, partition
, processed
);
841 BITMAP_FREE (processed
);
846 /* Free memory for COMPONENTS. */
849 free_rdg_components (VEC (rdgc
, heap
) *components
)
854 FOR_EACH_VEC_ELT (rdgc
, components
, i
, x
)
856 VEC_free (int, heap
, x
->vertices
);
860 VEC_free (rdgc
, heap
, components
);
863 /* Build the COMPONENTS vector with the strongly connected components
864 of RDG in which the STARTING_VERTICES occur. */
867 rdg_build_components (struct graph
*rdg
, VEC (int, heap
) *starting_vertices
,
868 VEC (rdgc
, heap
) **components
)
871 bitmap saved_components
= BITMAP_ALLOC (NULL
);
872 int n_components
= graphds_scc (rdg
, NULL
);
873 VEC (int, heap
) **all_components
= XNEWVEC (VEC (int, heap
) *, n_components
);
875 for (i
= 0; i
< n_components
; i
++)
876 all_components
[i
] = VEC_alloc (int, heap
, 3);
878 for (i
= 0; i
< rdg
->n_vertices
; i
++)
879 VEC_safe_push (int, heap
, all_components
[rdg
->vertices
[i
].component
], i
);
881 FOR_EACH_VEC_ELT (int, starting_vertices
, i
, v
)
883 int c
= rdg
->vertices
[v
].component
;
885 if (bitmap_set_bit (saved_components
, c
))
887 rdgc x
= XCNEW (struct rdg_component
);
889 x
->vertices
= all_components
[c
];
891 VEC_safe_push (rdgc
, heap
, *components
, x
);
895 for (i
= 0; i
< n_components
; i
++)
896 if (!bitmap_bit_p (saved_components
, i
))
897 VEC_free (int, heap
, all_components
[i
]);
899 free (all_components
);
900 BITMAP_FREE (saved_components
);
903 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
904 For the moment we detect only the memset zero pattern. */
907 classify_partition (loop_p loop
, struct graph
*rdg
, partition_t partition
)
912 data_reference_p single_load
, single_store
;
914 partition
->kind
= PKIND_NORMAL
;
915 partition
->main_dr
= NULL
;
916 partition
->secondary_dr
= NULL
;
918 if (!flag_tree_loop_distribute_patterns
)
921 /* Perform general partition disqualification for builtins. */
922 nb_iter
= number_of_exit_cond_executions (loop
);
923 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
926 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
928 gimple stmt
= RDG_STMT (rdg
, i
);
930 if (gimple_has_volatile_ops (stmt
))
933 /* If the stmt has uses outside of the loop fail.
934 ??? If the stmt is generated in another partition that
935 is not created as builtin we can ignore this. */
936 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
938 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
939 fprintf (dump_file
, "not generating builtin, partition has "
940 "scalar uses outside of the loop\n");
945 /* Detect memset and memcpy. */
948 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
950 gimple stmt
= RDG_STMT (rdg
, i
);
954 if (gimple_code (stmt
) == GIMPLE_PHI
)
957 /* Any scalar stmts are ok. */
958 if (!gimple_vuse (stmt
))
961 /* Otherwise just regular loads/stores. */
962 if (!gimple_assign_single_p (stmt
))
965 /* But exactly one store and/or load. */
967 VEC_iterate (data_reference_p
, RDG_DATAREFS (rdg
, i
), j
, dr
); ++j
)
971 if (single_load
!= NULL
)
977 if (single_store
!= NULL
)
984 if (single_store
&& !single_load
)
986 gimple stmt
= DR_STMT (single_store
);
987 tree rhs
= gimple_assign_rhs1 (stmt
);
988 if (!(integer_zerop (rhs
)
989 || integer_all_onesp (rhs
)
991 || (TREE_CODE (rhs
) == CONSTRUCTOR
992 && !TREE_CLOBBER_P (rhs
))
993 || (INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
994 && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt
)))
995 == TYPE_MODE (unsigned_char_type_node
)))))
997 if (TREE_CODE (rhs
) == SSA_NAME
998 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
999 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1001 if (!adjacent_dr_p (single_store
))
1003 partition
->kind
= PKIND_MEMSET
;
1004 partition
->main_dr
= single_store
;
1006 else if (single_store
&& single_load
)
1008 gimple store
= DR_STMT (single_store
);
1009 gimple load
= DR_STMT (single_load
);
1010 /* Direct aggregate copy or via an SSA name temporary. */
1012 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1014 if (!adjacent_dr_p (single_store
)
1015 || !adjacent_dr_p (single_load
)
1016 || !operand_equal_p (DR_STEP (single_store
),
1017 DR_STEP (single_load
), 0))
1019 partition
->kind
= PKIND_MEMCPY
;
1020 partition
->main_dr
= single_store
;
1021 partition
->secondary_dr
= single_load
;
1025 /* For a data reference REF, return the declaration of its base
1026 address or NULL_TREE if the base is not determined. */
1029 ref_base_address (data_reference_p dr
)
1031 tree base_address
= DR_BASE_ADDRESS (dr
);
1033 && TREE_CODE (base_address
) == ADDR_EXPR
)
1034 return TREE_OPERAND (base_address
, 0);
1036 return base_address
;
1039 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1043 similar_memory_accesses (struct graph
*rdg
, partition_t partition1
,
1044 partition_t partition2
)
1046 unsigned i
, j
, k
, l
;
1047 bitmap_iterator bi
, bj
;
1048 data_reference_p ref1
, ref2
;
1050 /* First check whether in the intersection of the two partitions are
1051 any loads or stores. Common loads are the situation that happens
1053 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1054 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1055 || RDG_MEM_READS_STMT (rdg
, i
))
1058 /* Then check all data-references against each other. */
1059 EXECUTE_IF_SET_IN_BITMAP (partition1
->stmts
, 0, i
, bi
)
1060 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1061 || RDG_MEM_READS_STMT (rdg
, i
))
1062 EXECUTE_IF_SET_IN_BITMAP (partition2
->stmts
, 0, j
, bj
)
1063 if (RDG_MEM_WRITE_STMT (rdg
, j
)
1064 || RDG_MEM_READS_STMT (rdg
, j
))
1066 FOR_EACH_VEC_ELT (data_reference_p
, RDG_DATAREFS (rdg
, i
), k
, ref1
)
1068 tree base1
= ref_base_address (ref1
);
1070 FOR_EACH_VEC_ELT (data_reference_p
,
1071 RDG_DATAREFS (rdg
, j
), l
, ref2
)
1072 if (base1
== ref_base_address (ref2
))
1080 /* Aggregate several components into a useful partition that is
1081 registered in the PARTITIONS vector. Partitions will be
1082 distributed in different loops. */
1085 rdg_build_partitions (struct graph
*rdg
, VEC (rdgc
, heap
) *components
,
1086 VEC (int, heap
) **other_stores
,
1087 VEC (partition_t
, heap
) **partitions
, bitmap processed
)
1091 partition_t partition
= partition_alloc (NULL
);
1093 FOR_EACH_VEC_ELT (rdgc
, components
, i
, x
)
1096 int v
= VEC_index (int, x
->vertices
, 0);
1098 if (bitmap_bit_p (processed
, v
))
1101 np
= build_rdg_partition_for_component (rdg
, x
);
1102 bitmap_ior_into (partition
->stmts
, np
->stmts
);
1103 partition
->has_writes
= partition_has_writes (np
);
1104 bitmap_ior_into (processed
, np
->stmts
);
1105 partition_free (np
);
1107 if (partition_has_writes (partition
))
1109 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1111 fprintf (dump_file
, "ldist useful partition:\n");
1112 dump_bitmap (dump_file
, partition
->stmts
);
1115 VEC_safe_push (partition_t
, heap
, *partitions
, partition
);
1116 partition
= partition_alloc (NULL
);
1120 /* Add the nodes from the RDG that were not marked as processed, and
1121 that are used outside the current loop. These are scalar
1122 computations that are not yet part of previous partitions. */
1123 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1124 if (!bitmap_bit_p (processed
, i
)
1125 && rdg_defs_used_in_other_loops_p (rdg
, i
))
1126 VEC_safe_push (int, heap
, *other_stores
, i
);
1128 /* If there are still statements left in the OTHER_STORES array,
1129 create other components and partitions with these stores and
1130 their dependences. */
1131 if (VEC_length (int, *other_stores
) > 0)
1133 VEC (rdgc
, heap
) *comps
= VEC_alloc (rdgc
, heap
, 3);
1134 VEC (int, heap
) *foo
= VEC_alloc (int, heap
, 3);
1136 rdg_build_components (rdg
, *other_stores
, &comps
);
1137 rdg_build_partitions (rdg
, comps
, &foo
, partitions
, processed
);
1139 VEC_free (int, heap
, foo
);
1140 free_rdg_components (comps
);
1143 /* If there is something left in the last partition, save it. */
1144 if (bitmap_count_bits (partition
->stmts
) > 0)
1145 VEC_safe_push (partition_t
, heap
, *partitions
, partition
);
1147 partition_free (partition
);
1150 /* Dump to FILE the PARTITIONS. */
1153 dump_rdg_partitions (FILE *file
, VEC (partition_t
, heap
) *partitions
)
1156 partition_t partition
;
1158 FOR_EACH_VEC_ELT (partition_t
, partitions
, i
, partition
)
1159 debug_bitmap_file (file
, partition
->stmts
);
1162 /* Debug PARTITIONS. */
1163 extern void debug_rdg_partitions (VEC (partition_t
, heap
) *);
1166 debug_rdg_partitions (VEC (partition_t
, heap
) *partitions
)
1168 dump_rdg_partitions (stderr
, partitions
);
1171 /* Returns the number of read and write operations in the RDG. */
1174 number_of_rw_in_rdg (struct graph
*rdg
)
1178 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1180 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1183 if (RDG_MEM_READS_STMT (rdg
, i
))
1190 /* Returns the number of read and write operations in a PARTITION of
1194 number_of_rw_in_partition (struct graph
*rdg
, partition_t partition
)
1200 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1202 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1205 if (RDG_MEM_READS_STMT (rdg
, i
))
1212 /* Returns true when one of the PARTITIONS contains all the read or
1213 write operations of RDG. */
1216 partition_contains_all_rw (struct graph
*rdg
, VEC (partition_t
, heap
) *partitions
)
1219 partition_t partition
;
1220 int nrw
= number_of_rw_in_rdg (rdg
);
1222 FOR_EACH_VEC_ELT (partition_t
, partitions
, i
, partition
)
1223 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1229 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1230 distributed loops. */
1233 ldist_gen (struct loop
*loop
, struct graph
*rdg
,
1234 VEC (int, heap
) *starting_vertices
)
1237 VEC (rdgc
, heap
) *components
= VEC_alloc (rdgc
, heap
, 3);
1238 VEC (partition_t
, heap
) *partitions
= VEC_alloc (partition_t
, heap
, 3);
1239 VEC (int, heap
) *other_stores
= VEC_alloc (int, heap
, 3);
1240 partition_t partition
;
1241 bitmap processed
= BITMAP_ALLOC (NULL
);
1244 remaining_stmts
= BITMAP_ALLOC (NULL
);
1245 upstream_mem_writes
= BITMAP_ALLOC (NULL
);
1247 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1249 bitmap_set_bit (remaining_stmts
, i
);
1251 /* Save in OTHER_STORES all the memory writes that are not in
1252 STARTING_VERTICES. */
1253 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1259 FOR_EACH_VEC_ELT (int, starting_vertices
, j
, v
)
1267 VEC_safe_push (int, heap
, other_stores
, i
);
1271 mark_nodes_having_upstream_mem_writes (rdg
);
1272 rdg_build_components (rdg
, starting_vertices
, &components
);
1273 rdg_build_partitions (rdg
, components
, &other_stores
, &partitions
,
1275 BITMAP_FREE (processed
);
1277 any_builtin
= false;
1278 FOR_EACH_VEC_ELT (partition_t
, partitions
, i
, partition
)
1280 classify_partition (loop
, rdg
, partition
);
1281 any_builtin
|= partition_builtin_p (partition
);
1284 /* If we are only distributing patterns fuse all partitions that
1285 were not properly classified as builtins. Else fuse partitions
1286 with similar memory accesses. */
1287 if (!flag_tree_loop_distribution
)
1290 /* If we did not detect any builtin simply bail out. */
1296 for (i
= 0; VEC_iterate (partition_t
, partitions
, i
, into
); ++i
)
1297 if (!partition_builtin_p (into
))
1299 for (++i
; VEC_iterate (partition_t
, partitions
, i
, partition
); ++i
)
1300 if (!partition_builtin_p (partition
))
1302 bitmap_ior_into (into
->stmts
, partition
->stmts
);
1303 VEC_ordered_remove (partition_t
, partitions
, i
);
1311 for (i
= 0; VEC_iterate (partition_t
, partitions
, i
, into
); ++i
)
1313 if (partition_builtin_p (into
))
1316 VEC_iterate (partition_t
, partitions
, j
, partition
); ++j
)
1318 if (!partition_builtin_p (partition
)
1319 /* ??? The following is horribly inefficient,
1320 we are re-computing and analyzing data-references
1321 of the stmts in the partitions all the time. */
1322 && similar_memory_accesses (rdg
, into
, partition
))
1324 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1326 fprintf (dump_file
, "fusing partitions\n");
1327 dump_bitmap (dump_file
, into
->stmts
);
1328 dump_bitmap (dump_file
, partition
->stmts
);
1329 fprintf (dump_file
, "because they have similar "
1330 "memory accesses\n");
1332 bitmap_ior_into (into
->stmts
, partition
->stmts
);
1333 VEC_ordered_remove (partition_t
, partitions
, j
);
1340 nbp
= VEC_length (partition_t
, partitions
);
1343 && !partition_builtin_p (VEC_index (partition_t
, partitions
, 0)))
1345 && partition_contains_all_rw (rdg
, partitions
)))
1351 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1352 dump_rdg_partitions (dump_file
, partitions
);
1354 FOR_EACH_VEC_ELT (partition_t
, partitions
, i
, partition
)
1355 generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
1359 BITMAP_FREE (remaining_stmts
);
1360 BITMAP_FREE (upstream_mem_writes
);
1362 FOR_EACH_VEC_ELT (partition_t
, partitions
, i
, partition
)
1363 partition_free (partition
);
1365 VEC_free (int, heap
, other_stores
);
1366 VEC_free (partition_t
, heap
, partitions
);
1367 free_rdg_components (components
);
1371 /* Distributes the code from LOOP in such a way that producer
1372 statements are placed before consumer statements. When STMTS is
1373 NULL, performs the maximal distribution, if STMTS is not NULL,
1374 tries to separate only these statements from the LOOP's body.
1375 Returns the number of distributed loops. */
1378 distribute_loop (struct loop
*loop
, VEC (gimple
, heap
) *stmts
)
1384 VEC (int, heap
) *vertices
;
1385 VEC (ddr_p
, heap
) *dependence_relations
;
1386 VEC (data_reference_p
, heap
) *datarefs
;
1387 VEC (loop_p
, heap
) *loop_nest
;
1389 datarefs
= VEC_alloc (data_reference_p
, heap
, 10);
1390 dependence_relations
= VEC_alloc (ddr_p
, heap
, 100);
1391 loop_nest
= VEC_alloc (loop_p
, heap
, 3);
1392 rdg
= build_rdg (loop
, &loop_nest
, &dependence_relations
, &datarefs
);
1396 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1398 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1401 free_dependence_relations (dependence_relations
);
1402 free_data_refs (datarefs
);
1403 VEC_free (loop_p
, heap
, loop_nest
);
1407 vertices
= VEC_alloc (int, heap
, 3);
1409 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1410 dump_rdg (dump_file
, rdg
);
1412 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, s
)
1414 int v
= rdg_vertex_for_stmt (rdg
, s
);
1418 VEC_safe_push (int, heap
, vertices
, v
);
1420 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1422 "ldist asked to generate code for vertex %d\n", v
);
1426 res
= ldist_gen (loop
, rdg
, vertices
);
1427 VEC_free (int, heap
, vertices
);
1429 free_dependence_relations (dependence_relations
);
1430 free_data_refs (datarefs
);
1431 VEC_free (loop_p
, heap
, loop_nest
);
1435 /* Distribute all loops in the current function. */
1438 tree_loop_distribution (void)
1442 bool changed
= false;
1447 gimple_stmt_iterator gsi
;
1448 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1449 gimple_set_uid (gsi_stmt (gsi
), -1);
1450 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1451 gimple_set_uid (gsi_stmt (gsi
), -1);
1454 /* We can at the moment only distribute non-nested loops, thus restrict
1455 walking to innermost loops. */
1456 FOR_EACH_LOOP (li
, loop
, LI_ONLY_INNERMOST
)
1458 VEC (gimple
, heap
) *work_list
= NULL
;
1460 int num
= loop
->num
;
1461 int nb_generated_loops
= 0;
1464 /* If the loop doesn't have a single exit we will fail anyway,
1465 so do that early. */
1466 if (!single_exit (loop
))
1469 /* Only distribute loops with a header and latch for now. */
1470 if (loop
->num_nodes
> 2)
1473 /* Initialize the worklist with stmts we seed the partitions with. */
1474 bbs
= get_loop_body_in_dom_order (loop
);
1475 for (i
= 0; i
< loop
->num_nodes
; ++i
)
1477 gimple_stmt_iterator gsi
;
1478 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1480 gimple stmt
= gsi_stmt (gsi
);
1481 /* Only distribute stores for now.
1482 ??? We should also try to distribute scalar reductions,
1483 thus SSA defs that have scalar uses outside of the loop. */
1484 if (!gimple_assign_single_p (stmt
)
1485 || is_gimple_reg (gimple_assign_lhs (stmt
)))
1488 VEC_safe_push (gimple
, heap
, work_list
, stmt
);
1493 if (VEC_length (gimple
, work_list
) > 0)
1494 nb_generated_loops
= distribute_loop (loop
, work_list
);
1496 if (nb_generated_loops
> 0)
1499 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1501 if (nb_generated_loops
> 1)
1502 fprintf (dump_file
, "Loop %d distributed: split to %d loops.\n",
1503 num
, nb_generated_loops
);
1505 fprintf (dump_file
, "Loop %d is the same.\n", num
);
1508 VEC_free (gimple
, heap
, work_list
);
1513 mark_sym_for_renaming (gimple_vop (cfun
));
1514 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1517 #ifdef ENABLE_CHECKING
1518 verify_loop_structure ();
1525 gate_tree_loop_distribution (void)
1527 return flag_tree_loop_distribution
1528 || flag_tree_loop_distribute_patterns
;
1531 struct gimple_opt_pass pass_loop_distribution
=
1536 gate_tree_loop_distribution
, /* gate */
1537 tree_loop_distribution
, /* execute */
1540 0, /* static_pass_number */
1541 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
1542 PROP_cfg
| PROP_ssa
, /* properties_required */
1543 0, /* properties_provided */
1544 0, /* properties_destroyed */
1545 0, /* todo_flags_start */
1547 | TODO_verify_ssa
/* todo_flags_finish */