1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
4 Zdenek Dvorak <dvorakz@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "tree-flow.h"
31 #include "tree-data-ref.h"
32 #include "diagnostic.h"
33 #include "tree-pass.h"
34 #include "tree-scalar-evolution.h"
36 #include "langhooks.h"
37 #include "tree-vectorizer.h"
39 /* This pass tries to distribute iterations of loops into several threads.
40 The implementation is straightforward -- for each loop we test whether its
41 iterations are independent, and if it is the case (and some additional
42 conditions regarding profitability and correctness are satisfied), we
43 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
46 The most of the complexity is in bringing the code into shape expected
48 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
49 variable and that the exit test is at the start of the loop body
50 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
51 variables by accesses through pointers, and breaking up ssa chains
52 by storing the values incoming to the parallelized loop to a structure
53 passed to the new function as an argument (something similar is done
54 in omp gimplification, unfortunately only a small part of the code
58 -- if there are several parallelizable loops in a function, it may be
59 possible to generate the threads just once (using synchronization to
60 ensure that cross-loop dependences are obeyed).
61 -- handling of common scalar dependence patterns (accumulation, ...)
62 -- handling of non-innermost loops */
66 currently we use vect_is_simple_reduction() to detect reduction patterns.
67 The code transformation will be introduced by an example.
74 for (i = 0; i < N; i++)
84 # sum_29 = PHI <sum_11(5), 1(3)>
85 # i_28 = PHI <i_12(5), 0(3)>
88 sum_11 = D.1795_8 + sum_29;
96 # sum_21 = PHI <sum_11(4)>
97 printf (&"%d"[0], sum_21);
100 after reduction transformation (only relevant parts):
108 # Storing the initial value given by the user. #
110 .paral_data_store.32.sum.27 = 1;
112 #pragma omp parallel num_threads(4)
114 #pragma omp for schedule(static)
116 # The neutral element corresponding to the particular
117 reduction's operation, e.g. 0 for PLUS_EXPR,
118 1 for MULT_EXPR, etc. replaces the user's initial value. #
120 # sum.27_29 = PHI <sum.27_11, 0>
122 sum.27_11 = D.1827_8 + sum.27_29;
126 # Adding this reduction phi is done at create_phi_for_local_result() #
127 # sum.27_56 = PHI <sum.27_11, 0>
130 # Creating the atomic operation is done at
131 create_call_for_reduction_1() #
133 #pragma omp atomic_load
134 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
135 D.1840_60 = sum.27_56 + D.1839_59;
136 #pragma omp atomic_store (D.1840_60);
140 # collecting the result after the join of the threads is done at
141 create_loads_for_reductions().
142 The value computed by the threads is loaded from the
146 .paral_data_load.33_52 = &.paral_data_store.32;
147 sum_37 = .paral_data_load.33_52->sum.27;
148 sum_43 = D.1795_41 + sum_37;
151 # sum_21 = PHI <sum_43, sum_26>
152 printf (&"%d"[0], sum_21);
160 /* Minimal number of iterations of a loop that should be executed in each
162 #define MIN_PER_THREAD 100
164 /* Element of the hashtable, representing a
165 reduction in the current loop. */
166 struct reduction_info
168 gimple reduc_stmt
; /* reduction statement. */
169 gimple reduc_phi
; /* The phi node defining the reduction. */
170 enum tree_code reduction_code
;/* code for the reduction operation. */
171 gimple keep_res
; /* The PHI_RESULT of this phi is the resulting value
172 of the reduction variable when existing the loop. */
173 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
174 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
175 tree init
; /* reduction initialization value. */
176 gimple new_phi
; /* (helper field) Newly created phi node whose result
177 will be passed to the atomic operation. Represents
178 the local result each thread computed for the reduction
182 /* Equality and hash functions for hashtab code. */
185 reduction_info_eq (const void *aa
, const void *bb
)
187 const struct reduction_info
*a
= (const struct reduction_info
*) aa
;
188 const struct reduction_info
*b
= (const struct reduction_info
*) bb
;
190 return (a
->reduc_phi
== b
->reduc_phi
);
194 reduction_info_hash (const void *aa
)
196 const struct reduction_info
*a
= (const struct reduction_info
*) aa
;
198 return htab_hash_pointer (a
->reduc_phi
);
201 static struct reduction_info
*
202 reduction_phi (htab_t reduction_list
, gimple phi
)
204 struct reduction_info tmpred
, *red
;
206 if (htab_elements (reduction_list
) == 0)
209 tmpred
.reduc_phi
= phi
;
210 red
= (struct reduction_info
*) htab_find (reduction_list
, &tmpred
);
215 /* Element of hashtable of names to copy. */
217 struct name_to_copy_elt
219 unsigned version
; /* The version of the name to copy. */
220 tree new_name
; /* The new name used in the copy. */
221 tree field
; /* The field of the structure used to pass the
225 /* Equality and hash functions for hashtab code. */
228 name_to_copy_elt_eq (const void *aa
, const void *bb
)
230 const struct name_to_copy_elt
*a
= (const struct name_to_copy_elt
*) aa
;
231 const struct name_to_copy_elt
*b
= (const struct name_to_copy_elt
*) bb
;
233 return a
->version
== b
->version
;
237 name_to_copy_elt_hash (const void *aa
)
239 const struct name_to_copy_elt
*a
= (const struct name_to_copy_elt
*) aa
;
241 return (hashval_t
) a
->version
;
245 /* Data dependency analysis. Returns true if the iterations of LOOP
246 are independent on each other (that is, if we can execute them
250 loop_parallel_p (struct loop
*loop
)
252 VEC (ddr_p
, heap
) * dependence_relations
;
253 VEC (data_reference_p
, heap
) *datarefs
;
254 lambda_trans_matrix trans
;
257 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
258 fprintf (dump_file
, "\nConsidering loop %d\n", loop
->num
);
260 /* Check for problems with dependences. If the loop can be reversed,
261 the iterations are independent. */
262 datarefs
= VEC_alloc (data_reference_p
, heap
, 10);
263 dependence_relations
= VEC_alloc (ddr_p
, heap
, 10 * 10);
264 compute_data_dependences_for_loop (loop
, true, &datarefs
,
265 &dependence_relations
);
266 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
267 dump_data_dependence_relations (dump_file
, dependence_relations
);
269 trans
= lambda_trans_matrix_new (1, 1);
270 LTM_MATRIX (trans
)[0][0] = -1;
272 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
275 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
276 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
278 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
280 " FAILED: data dependencies exist across iterations\n");
282 free_dependence_relations (dependence_relations
);
283 free_data_refs (datarefs
);
288 /* Return true when LOOP contains basic blocks marked with the
289 BB_IRREDUCIBLE_LOOP flag. */
292 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
295 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
298 for (i
= 0; i
< loop
->num_nodes
; i
++)
299 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
308 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
309 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
310 to their addresses that can be reused. The address of OBJ is known to
311 be invariant in the whole function. */
314 take_address_of (tree obj
, tree type
, edge entry
, htab_t decl_address
)
318 struct int_tree_map ielt
, *nielt
;
319 tree
*var_p
, name
, bvar
, addr
;
323 /* Since the address of OBJ is invariant, the trees may be shared.
324 Avoid rewriting unrelated parts of the code. */
325 obj
= unshare_expr (obj
);
327 handled_component_p (*var_p
);
328 var_p
= &TREE_OPERAND (*var_p
, 0))
330 uid
= DECL_UID (*var_p
);
333 dslot
= htab_find_slot_with_hash (decl_address
, &ielt
, uid
, INSERT
);
336 addr
= build_addr (*var_p
, current_function_decl
);
337 bvar
= create_tmp_var (TREE_TYPE (addr
), get_name (*var_p
));
338 add_referenced_var (bvar
);
339 stmt
= gimple_build_assign (bvar
, addr
);
340 name
= make_ssa_name (bvar
, stmt
);
341 gimple_assign_set_lhs (stmt
, name
);
342 gsi_insert_on_edge_immediate (entry
, stmt
);
344 nielt
= XNEW (struct int_tree_map
);
350 name
= ((struct int_tree_map
*) *dslot
)->to
;
354 *var_p
= build1 (INDIRECT_REF
, TREE_TYPE (*var_p
), name
);
355 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
356 &stmts
, true, NULL_TREE
);
357 if (!gimple_seq_empty_p (stmts
))
358 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
361 if (TREE_TYPE (name
) != type
)
363 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
365 if (!gimple_seq_empty_p (stmts
))
366 gsi_insert_seq_on_edge_immediate (entry
, stmts
);
372 /* Callback for htab_traverse. Create the initialization statement
373 for reduction described in SLOT, and place it at the preheader of
374 the loop described in DATA. */
377 initialize_reductions (void **slot
, void *data
)
380 tree bvar
, type
, arg
;
383 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
384 struct loop
*loop
= (struct loop
*) data
;
386 /* Create initialization in preheader:
387 reduction_variable = initialization value of reduction. */
389 /* In the phi node at the header, replace the argument coming
390 from the preheader with the reduction initialization value. */
392 /* Create a new variable to initialize the reduction. */
393 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
394 bvar
= create_tmp_var (type
, "reduction");
395 add_referenced_var (bvar
);
397 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
398 OMP_CLAUSE_REDUCTION
);
399 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
400 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
402 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
405 /* Replace the argument representing the initialization value
406 with the initialization value for the reduction (neutral
407 element for the particular operation, e.g. 0 for PLUS_EXPR,
408 1 for MULT_EXPR, etc).
409 Keep the old value in a new variable "reduction_initial",
410 that will be taken in consideration after the parallel
411 computing is done. */
413 e
= loop_preheader_edge (loop
);
414 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
415 /* Create new variable to hold the initial value. */
417 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
418 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
419 reduc
->initial_value
= arg
;
425 struct walk_stmt_info info
;
431 /* Eliminates references to local variables in *TP out of the single
432 entry single exit region starting at DTA->ENTRY.
433 DECL_ADDRESS contains addresses of the references that had their
434 address taken already. If the expression is changed, CHANGED is
435 set to true. Callback for walk_tree. */
438 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
440 struct elv_data
*const dta
= (struct elv_data
*) data
;
441 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
447 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
450 type
= TREE_TYPE (t
);
451 addr_type
= build_pointer_type (type
);
452 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
);
453 *tp
= build1 (INDIRECT_REF
, TREE_TYPE (*tp
), addr
);
459 if (TREE_CODE (t
) == ADDR_EXPR
)
461 /* ADDR_EXPR may appear in two contexts:
462 -- as a gimple operand, when the address taken is a function invariant
463 -- as gimple rhs, when the resulting address in not a function
465 We do not need to do anything special in the latter case (the base of
466 the memory reference whose address is taken may be replaced in the
467 DECL_P case). The former case is more complicated, as we need to
468 ensure that the new address is still a gimple operand. Thus, it
469 is not sufficient to replace just the base of the memory reference --
470 we need to move the whole computation of the address out of the
472 if (!is_gimple_val (t
))
476 obj
= TREE_OPERAND (t
, 0);
477 var
= get_base_address (obj
);
478 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
481 addr_type
= TREE_TYPE (t
);
482 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
);
495 /* Moves the references to local variables in STMT out of the single
496 entry single exit region starting at ENTRY. DECL_ADDRESS contains
497 addresses of the references that had their address taken
501 eliminate_local_variables_stmt (edge entry
, gimple stmt
,
506 memset (&dta
.info
, '\0', sizeof (dta
.info
));
508 dta
.decl_address
= decl_address
;
511 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
517 /* Eliminates the references to local variables from the single entry
518 single exit region between the ENTRY and EXIT edges.
521 1) Taking address of a local variable -- these are moved out of the
522 region (and temporary variable is created to hold the address if
525 2) Dereferencing a local variable -- these are replaced with indirect
529 eliminate_local_variables (edge entry
, edge exit
)
532 VEC (basic_block
, heap
) *body
= VEC_alloc (basic_block
, heap
, 3);
534 gimple_stmt_iterator gsi
;
535 htab_t decl_address
= htab_create (10, int_tree_map_hash
, int_tree_map_eq
,
537 basic_block entry_bb
= entry
->src
;
538 basic_block exit_bb
= exit
->dest
;
540 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
542 for (i
= 0; VEC_iterate (basic_block
, body
, i
, bb
); i
++)
543 if (bb
!= entry_bb
&& bb
!= exit_bb
)
544 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
545 eliminate_local_variables_stmt (entry
, gsi_stmt (gsi
),
548 htab_delete (decl_address
);
549 VEC_free (basic_block
, heap
, body
);
552 /* Returns true if expression EXPR is not defined between ENTRY and
553 EXIT, i.e. if all its operands are defined outside of the region. */
556 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
558 basic_block entry_bb
= entry
->src
;
559 basic_block exit_bb
= exit
->dest
;
562 if (is_gimple_min_invariant (expr
))
565 if (TREE_CODE (expr
) == SSA_NAME
)
567 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
569 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
570 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
579 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
580 The copies are stored to NAME_COPIES, if NAME was already duplicated,
581 its duplicate stored in NAME_COPIES is returned.
583 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
584 duplicated, storing the copies in DECL_COPIES. */
587 separate_decls_in_region_name (tree name
,
588 htab_t name_copies
, htab_t decl_copies
,
591 tree copy
, var
, var_copy
;
592 unsigned idx
, uid
, nuid
;
593 struct int_tree_map ielt
, *nielt
;
594 struct name_to_copy_elt elt
, *nelt
;
595 void **slot
, **dslot
;
597 if (TREE_CODE (name
) != SSA_NAME
)
600 idx
= SSA_NAME_VERSION (name
);
602 slot
= htab_find_slot_with_hash (name_copies
, &elt
, idx
,
603 copy_name_p
? INSERT
: NO_INSERT
);
605 return ((struct name_to_copy_elt
*) *slot
)->new_name
;
607 var
= SSA_NAME_VAR (name
);
608 uid
= DECL_UID (var
);
610 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, uid
, INSERT
);
613 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
614 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
615 add_referenced_var (var_copy
);
616 nielt
= XNEW (struct int_tree_map
);
618 nielt
->to
= var_copy
;
621 /* Ensure that when we meet this decl next time, we won't duplicate
623 nuid
= DECL_UID (var_copy
);
625 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, nuid
, INSERT
);
626 gcc_assert (!*dslot
);
627 nielt
= XNEW (struct int_tree_map
);
629 nielt
->to
= var_copy
;
633 var_copy
= ((struct int_tree_map
*) *dslot
)->to
;
637 copy
= duplicate_ssa_name (name
, NULL
);
638 nelt
= XNEW (struct name_to_copy_elt
);
640 nelt
->new_name
= copy
;
641 nelt
->field
= NULL_TREE
;
650 SSA_NAME_VAR (copy
) = var_copy
;
654 /* Finds the ssa names used in STMT that are defined outside the
655 region between ENTRY and EXIT and replaces such ssa names with
656 their duplicates. The duplicates are stored to NAME_COPIES. Base
657 decls of all ssa names used in STMT (including those defined in
658 LOOP) are replaced with the new temporary variables; the
659 replacement decls are stored in DECL_COPIES. */
662 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
663 htab_t name_copies
, htab_t decl_copies
)
671 mark_virtual_ops_for_renaming (stmt
);
673 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
675 name
= DEF_FROM_PTR (def
);
676 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
677 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
679 gcc_assert (copy
== name
);
682 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
684 name
= USE_FROM_PTR (use
);
685 if (TREE_CODE (name
) != SSA_NAME
)
688 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
689 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
695 /* Callback for htab_traverse. Adds a field corresponding to the reduction
696 specified in SLOT. The type is passed in DATA. */
699 add_field_for_reduction (void **slot
, void *data
)
702 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
703 tree
const type
= (tree
) data
;
704 tree var
= SSA_NAME_VAR (gimple_assign_lhs (red
->reduc_stmt
));
705 tree field
= build_decl (gimple_location (red
->reduc_stmt
),
706 FIELD_DECL
, DECL_NAME (var
), TREE_TYPE (var
));
708 insert_field_into_struct (type
, field
);
715 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
716 described in SLOT. The type is passed in DATA. */
719 add_field_for_name (void **slot
, void *data
)
721 struct name_to_copy_elt
*const elt
= (struct name_to_copy_elt
*) *slot
;
722 tree type
= (tree
) data
;
723 tree name
= ssa_name (elt
->version
);
724 tree var
= SSA_NAME_VAR (name
);
725 tree field
= build_decl (DECL_SOURCE_LOCATION (var
),
726 FIELD_DECL
, DECL_NAME (var
), TREE_TYPE (var
));
728 insert_field_into_struct (type
, field
);
734 /* Callback for htab_traverse. A local result is the intermediate result
736 thread, or the initial value in case no iteration was executed.
737 This function creates a phi node reflecting these values.
738 The phi's result will be stored in NEW_PHI field of the
739 reduction's data structure. */
742 create_phi_for_local_result (void **slot
, void *data
)
744 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
745 const struct loop
*const loop
= (const struct loop
*) data
;
748 basic_block store_bb
;
751 /* STORE_BB is the block where the phi
752 should be stored. It is the destination of the loop exit.
753 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
754 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
756 /* STORE_BB has two predecessors. One coming from the loop
757 (the reduction's result is computed at the loop),
758 and another coming from a block preceding the loop,
760 are executed (the initial value should be taken). */
761 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
762 e
= EDGE_PRED (store_bb
, 1);
764 e
= EDGE_PRED (store_bb
, 0);
766 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
)),
768 new_phi
= create_phi_node (local_res
, store_bb
);
769 SSA_NAME_DEF_STMT (local_res
) = new_phi
;
770 add_phi_arg (new_phi
, reduc
->init
, e
);
771 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
772 FALLTHRU_EDGE (loop
->latch
));
773 reduc
->new_phi
= new_phi
;
783 basic_block store_bb
;
787 /* Callback for htab_traverse. Create an atomic instruction for the
788 reduction described in SLOT.
789 DATA annotates the place in memory the atomic operation relates to,
790 and the basic block it needs to be generated in. */
793 create_call_for_reduction_1 (void **slot
, void *data
)
795 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
796 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
797 gimple_stmt_iterator gsi
;
798 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
799 tree struct_type
= TREE_TYPE (TREE_TYPE (clsn_data
->load
));
804 tree t
, addr
, addr_type
, ref
, x
;
808 load_struct
= fold_build1 (INDIRECT_REF
, struct_type
, clsn_data
->load
);
809 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
810 addr_type
= build_pointer_type (type
);
812 addr
= build_addr (t
, current_function_decl
);
814 /* Create phi node. */
815 bb
= clsn_data
->load_bb
;
817 e
= split_block (bb
, t
);
820 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)), NULL
);
821 add_referenced_var (tmp_load
);
822 tmp_load
= make_ssa_name (tmp_load
, NULL
);
823 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
824 SSA_NAME_DEF_STMT (tmp_load
) = load
;
825 gsi
= gsi_start_bb (new_bb
);
826 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
828 e
= split_block (new_bb
, load
);
830 gsi
= gsi_start_bb (new_bb
);
832 x
= fold_build2 (reduc
->reduction_code
,
833 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
834 PHI_RESULT (reduc
->new_phi
));
836 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
837 GSI_CONTINUE_LINKING
);
839 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
843 /* Create the atomic operation at the join point of the threads.
844 REDUCTION_LIST describes the reductions in the LOOP.
845 LD_ST_DATA describes the shared data structure where
846 shared data is stored in and loaded from. */
848 create_call_for_reduction (struct loop
*loop
, htab_t reduction_list
,
849 struct clsn_data
*ld_st_data
)
851 htab_traverse (reduction_list
, create_phi_for_local_result
, loop
);
852 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
853 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
854 htab_traverse (reduction_list
, create_call_for_reduction_1
, ld_st_data
);
857 /* Callback for htab_traverse. Loads the final reduction value at the
858 join point of all threads, and inserts it in the right place. */
861 create_loads_for_reductions (void **slot
, void *data
)
863 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
864 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
866 gimple_stmt_iterator gsi
;
867 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
868 tree struct_type
= TREE_TYPE (TREE_TYPE (clsn_data
->load
));
873 gsi
= gsi_after_labels (clsn_data
->load_bb
);
874 load_struct
= fold_build1 (INDIRECT_REF
, struct_type
, clsn_data
->load
);
875 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
879 name
= PHI_RESULT (red
->keep_res
);
880 stmt
= gimple_build_assign (name
, x
);
881 SSA_NAME_DEF_STMT (name
) = stmt
;
883 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
885 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
886 !gsi_end_p (gsi
); gsi_next (&gsi
))
887 if (gsi_stmt (gsi
) == red
->keep_res
)
889 remove_phi_node (&gsi
, false);
895 /* Load the reduction result that was stored in LD_ST_DATA.
896 REDUCTION_LIST describes the list of reductions that the
897 loads should be generated for. */
899 create_final_loads_for_reduction (htab_t reduction_list
,
900 struct clsn_data
*ld_st_data
)
902 gimple_stmt_iterator gsi
;
906 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
907 t
= build_fold_addr_expr (ld_st_data
->store
);
908 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
910 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
911 SSA_NAME_DEF_STMT (ld_st_data
->load
) = stmt
;
913 htab_traverse (reduction_list
, create_loads_for_reductions
, ld_st_data
);
917 /* Callback for htab_traverse. Store the neutral value for the
918 particular reduction's operation, e.g. 0 for PLUS_EXPR,
919 1 for MULT_EXPR, etc. into the reduction field.
920 The reduction is specified in SLOT. The store information is
924 create_stores_for_reduction (void **slot
, void *data
)
926 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
927 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
930 gimple_stmt_iterator gsi
;
931 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
933 gsi
= gsi_last_bb (clsn_data
->store_bb
);
934 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
935 stmt
= gimple_build_assign (t
, red
->initial_value
);
936 mark_virtual_ops_for_renaming (stmt
);
937 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
942 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
943 store to a field of STORE in STORE_BB for the ssa name and its duplicate
944 specified in SLOT. */
947 create_loads_and_stores_for_name (void **slot
, void *data
)
949 struct name_to_copy_elt
*const elt
= (struct name_to_copy_elt
*) *slot
;
950 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
953 gimple_stmt_iterator gsi
;
954 tree type
= TREE_TYPE (elt
->new_name
);
955 tree struct_type
= TREE_TYPE (TREE_TYPE (clsn_data
->load
));
958 gsi
= gsi_last_bb (clsn_data
->store_bb
);
959 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
960 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
961 mark_virtual_ops_for_renaming (stmt
);
962 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
964 gsi
= gsi_last_bb (clsn_data
->load_bb
);
965 load_struct
= fold_build1 (INDIRECT_REF
, struct_type
, clsn_data
->load
);
966 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
967 stmt
= gimple_build_assign (elt
->new_name
, t
);
968 SSA_NAME_DEF_STMT (elt
->new_name
) = stmt
;
969 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
974 /* Moves all the variables used in LOOP and defined outside of it (including
975 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
976 name) to a structure created for this purpose. The code
984 is transformed this way:
999 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1000 pointer `new' is intentionally not initialized (the loop will be split to a
1001 separate function later, and `new' will be initialized from its arguments).
1002 LD_ST_DATA holds information about the shared data structure used to pass
1003 information among the threads. It is initialized here, and
1004 gen_parallel_loop will pass it to create_call_for_reduction that
1005 needs this information. REDUCTION_LIST describes the reductions
1009 separate_decls_in_region (edge entry
, edge exit
, htab_t reduction_list
,
1010 tree
*arg_struct
, tree
*new_arg_struct
,
1011 struct clsn_data
*ld_st_data
)
1014 basic_block bb1
= split_edge (entry
);
1015 basic_block bb0
= single_pred (bb1
);
1016 htab_t name_copies
= htab_create (10, name_to_copy_elt_hash
,
1017 name_to_copy_elt_eq
, free
);
1018 htab_t decl_copies
= htab_create (10, int_tree_map_hash
, int_tree_map_eq
,
1021 tree type
, type_name
, nvar
;
1022 gimple_stmt_iterator gsi
;
1023 struct clsn_data clsn_data
;
1024 VEC (basic_block
, heap
) *body
= VEC_alloc (basic_block
, heap
, 3);
1026 basic_block entry_bb
= bb1
;
1027 basic_block exit_bb
= exit
->dest
;
1029 entry
= single_succ_edge (entry_bb
);
1030 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1032 for (i
= 0; VEC_iterate (basic_block
, body
, i
, bb
); i
++)
1034 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1036 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1037 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1038 name_copies
, decl_copies
);
1040 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1041 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1042 name_copies
, decl_copies
);
1046 VEC_free (basic_block
, heap
, body
);
1048 if (htab_elements (name_copies
) == 0 && reduction_list
== 0)
1050 /* It may happen that there is nothing to copy (if there are only
1051 loop carried and external variables in the loop). */
1053 *new_arg_struct
= NULL
;
1057 /* Create the type for the structure to store the ssa names to. */
1058 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1059 type_name
= build_decl (BUILTINS_LOCATION
,
1060 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1062 TYPE_NAME (type
) = type_name
;
1064 htab_traverse (name_copies
, add_field_for_name
, type
);
1065 if (reduction_list
&& htab_elements (reduction_list
) > 0)
1067 /* Create the fields for reductions. */
1068 htab_traverse (reduction_list
, add_field_for_reduction
,
1073 /* Create the loads and stores. */
1074 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1075 add_referenced_var (*arg_struct
);
1076 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1077 add_referenced_var (nvar
);
1078 *new_arg_struct
= make_ssa_name (nvar
, NULL
);
1080 ld_st_data
->store
= *arg_struct
;
1081 ld_st_data
->load
= *new_arg_struct
;
1082 ld_st_data
->store_bb
= bb0
;
1083 ld_st_data
->load_bb
= bb1
;
1085 htab_traverse (name_copies
, create_loads_and_stores_for_name
,
1088 /* Load the calculation from memory (after the join of the threads). */
1090 if (reduction_list
&& htab_elements (reduction_list
) > 0)
1092 htab_traverse (reduction_list
, create_stores_for_reduction
,
1094 clsn_data
.load
= make_ssa_name (nvar
, NULL
);
1095 clsn_data
.load_bb
= exit
->dest
;
1096 clsn_data
.store
= ld_st_data
->store
;
1097 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1101 htab_delete (decl_copies
);
1102 htab_delete (name_copies
);
1105 /* Bitmap containing uids of functions created by parallelization. We cannot
1106 allocate it from the default obstack, as it must live across compilation
1107 of several functions; we make it gc allocated instead. */
1109 static GTY(()) bitmap parallelized_functions
;
1111 /* Returns true if FN was created by create_loop_fn. */
1114 parallelized_function_p (tree fn
)
1116 if (!parallelized_functions
|| !DECL_ARTIFICIAL (fn
))
1119 return bitmap_bit_p (parallelized_functions
, DECL_UID (fn
));
1122 /* Creates and returns an empty function that will receive the body of
1123 a parallelized loop. */
1126 create_loop_fn (void)
1130 tree decl
, type
, name
, t
;
1131 struct function
*act_cfun
= cfun
;
1132 static unsigned loopfn_num
;
1134 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1135 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1136 clean_symbol_name (tname
);
1137 name
= get_identifier (tname
);
1138 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1140 decl
= build_decl (BUILTINS_LOCATION
,
1141 FUNCTION_DECL
, name
, type
);
1142 if (!parallelized_functions
)
1143 parallelized_functions
= BITMAP_GGC_ALLOC ();
1144 bitmap_set_bit (parallelized_functions
, DECL_UID (decl
));
1146 TREE_STATIC (decl
) = 1;
1147 TREE_USED (decl
) = 1;
1148 DECL_ARTIFICIAL (decl
) = 1;
1149 DECL_IGNORED_P (decl
) = 0;
1150 TREE_PUBLIC (decl
) = 0;
1151 DECL_UNINLINABLE (decl
) = 1;
1152 DECL_EXTERNAL (decl
) = 0;
1153 DECL_CONTEXT (decl
) = NULL_TREE
;
1154 DECL_INITIAL (decl
) = make_node (BLOCK
);
1156 t
= build_decl (BUILTINS_LOCATION
,
1157 RESULT_DECL
, NULL_TREE
, void_type_node
);
1158 DECL_ARTIFICIAL (t
) = 1;
1159 DECL_IGNORED_P (t
) = 1;
1160 DECL_RESULT (decl
) = t
;
1162 t
= build_decl (BUILTINS_LOCATION
,
1163 PARM_DECL
, get_identifier (".paral_data_param"),
1165 DECL_ARTIFICIAL (t
) = 1;
1166 DECL_ARG_TYPE (t
) = ptr_type_node
;
1167 DECL_CONTEXT (t
) = decl
;
1169 DECL_ARGUMENTS (decl
) = t
;
1171 allocate_struct_function (decl
, false);
1173 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1175 set_cfun (act_cfun
);
1180 /* Moves the exit condition of LOOP to the beginning of its header, and
1181 duplicates the part of the last iteration that gets disabled to the
1182 exit of the loop. NIT is the number of iterations of the loop
1183 (used to initialize the variables in the duplicated part).
1185 TODO: the common case is that latch of the loop is empty and immediately
1186 follows the loop exit. In this case, it would be better not to copy the
1187 body of the loop, but only move the entry of the loop directly before the
1188 exit check and increase the number of iterations of the loop by one.
1189 This may need some additional preconditioning in case NIT = ~0.
1190 REDUCTION_LIST describes the reductions in LOOP. */
1193 transform_to_exit_first_loop (struct loop
*loop
, htab_t reduction_list
, tree nit
)
1195 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1198 edge exit
= single_dom_exit (loop
), hpred
;
1199 tree control
, control_name
, res
, t
;
1200 gimple phi
, nphi
, cond_stmt
, stmt
;
1201 gimple_stmt_iterator gsi
;
1203 split_block_after_labels (loop
->header
);
1204 orig_header
= single_succ (loop
->header
);
1205 hpred
= single_succ_edge (loop
->header
);
1207 cond_stmt
= last_stmt (exit
->src
);
1208 control
= gimple_cond_lhs (cond_stmt
);
1209 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1211 /* Make sure that we have phi nodes on exit for all loop header phis
1212 (create_parallel_loop requires that). */
1213 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1215 phi
= gsi_stmt (gsi
);
1216 res
= PHI_RESULT (phi
);
1217 t
= make_ssa_name (SSA_NAME_VAR (res
), phi
);
1218 SET_PHI_RESULT (phi
, t
);
1220 nphi
= create_phi_node (res
, orig_header
);
1221 SSA_NAME_DEF_STMT (res
) = nphi
;
1222 add_phi_arg (nphi
, t
, hpred
);
1226 gimple_cond_set_lhs (cond_stmt
, t
);
1227 update_stmt (cond_stmt
);
1232 bbs
= get_loop_body_in_dom_order (loop
);
1233 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1235 nbbs
= XNEWVEC (basic_block
, n
);
1236 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1243 /* Other than reductions, the only gimple reg that should be copied
1244 out of the loop is the control variable. */
1246 control_name
= NULL_TREE
;
1247 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); )
1249 phi
= gsi_stmt (gsi
);
1250 res
= PHI_RESULT (phi
);
1251 if (!is_gimple_reg (res
))
1257 /* Check if it is a part of reduction. If it is,
1258 keep the phi at the reduction's keep_res field. The
1259 PHI_RESULT of this phi is the resulting value of the reduction
1260 variable when exiting the loop. */
1262 exit
= single_dom_exit (loop
);
1264 if (htab_elements (reduction_list
) > 0)
1266 struct reduction_info
*red
;
1268 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1270 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1273 red
->keep_res
= phi
;
1278 gcc_assert (control_name
== NULL_TREE
1279 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1281 remove_phi_node (&gsi
, false);
1283 gcc_assert (control_name
!= NULL_TREE
);
1285 /* Initialize the control variable to NIT. */
1286 gsi
= gsi_after_labels (ex_bb
);
1287 nit
= force_gimple_operand_gsi (&gsi
,
1288 fold_convert (TREE_TYPE (control_name
), nit
),
1289 false, NULL_TREE
, false, GSI_SAME_STMT
);
1290 stmt
= gimple_build_assign (control_name
, nit
);
1291 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1292 SSA_NAME_DEF_STMT (control_name
) = stmt
;
1295 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1296 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1297 NEW_DATA is the variable that should be initialized from the argument
1298 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1299 basic block containing GIMPLE_OMP_PARALLEL tree. */
1302 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1303 tree new_data
, unsigned n_threads
)
1305 gimple_stmt_iterator gsi
;
1306 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1308 gimple stmt
, for_stmt
, phi
, cond_stmt
;
1309 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1310 edge exit
, nexit
, guard
, end
, e
;
1312 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1313 bb
= loop_preheader_edge (loop
)->src
;
1314 paral_bb
= single_pred (bb
);
1315 gsi
= gsi_last_bb (paral_bb
);
1317 t
= build_omp_clause (BUILTINS_LOCATION
, OMP_CLAUSE_NUM_THREADS
);
1318 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1319 = build_int_cst (integer_type_node
, n_threads
);
1320 stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1322 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1324 /* Initialize NEW_DATA. */
1327 gsi
= gsi_after_labels (bb
);
1329 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
), NULL
);
1330 stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1331 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1332 SSA_NAME_DEF_STMT (param
) = stmt
;
1334 stmt
= gimple_build_assign (new_data
,
1335 fold_convert (TREE_TYPE (new_data
), param
));
1336 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1337 SSA_NAME_DEF_STMT (new_data
) = stmt
;
1340 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1341 bb
= split_loop_exit_edge (single_dom_exit (loop
));
1342 gsi
= gsi_last_bb (bb
);
1343 gsi_insert_after (&gsi
, gimple_build_omp_return (false), GSI_NEW_STMT
);
1345 /* Extract data for GIMPLE_OMP_FOR. */
1346 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
1347 cond_stmt
= last_stmt (loop
->header
);
1349 cvar
= gimple_cond_lhs (cond_stmt
);
1350 cvar_base
= SSA_NAME_VAR (cvar
);
1351 phi
= SSA_NAME_DEF_STMT (cvar
);
1352 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1353 initvar
= make_ssa_name (cvar_base
, NULL
);
1354 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
1356 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1358 gsi
= gsi_last_bb (loop
->latch
);
1359 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
1360 gsi_remove (&gsi
, true);
1363 for_bb
= split_edge (loop_preheader_edge (loop
));
1364 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
1365 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
1366 gcc_assert (exit
== single_dom_exit (loop
));
1368 guard
= make_edge (for_bb
, ex_bb
, 0);
1369 single_succ_edge (loop
->latch
)->flags
= 0;
1370 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
1371 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1373 phi
= gsi_stmt (gsi
);
1374 res
= PHI_RESULT (phi
);
1375 stmt
= SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
));
1377 PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
)),
1379 add_phi_arg (phi
, PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
)),
1382 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
1383 PENDING_STMT (e
) = NULL
;
1385 /* Emit GIMPLE_OMP_FOR. */
1386 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
1387 type
= TREE_TYPE (cvar
);
1388 t
= build_omp_clause (BUILTINS_LOCATION
, OMP_CLAUSE_SCHEDULE
);
1389 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
1391 for_stmt
= gimple_build_omp_for (NULL
, t
, 1, NULL
);
1392 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
1393 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
1394 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
1395 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
1396 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
1398 build_int_cst (type
, 1)));
1400 gsi
= gsi_last_bb (for_bb
);
1401 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
1402 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
1404 /* Emit GIMPLE_OMP_CONTINUE. */
1405 gsi
= gsi_last_bb (loop
->latch
);
1406 stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
1407 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1408 SSA_NAME_DEF_STMT (cvar_next
) = stmt
;
1410 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1411 gsi
= gsi_last_bb (ex_bb
);
1412 gsi_insert_after (&gsi
, gimple_build_omp_return (true), GSI_NEW_STMT
);
1417 /* Generates code to execute the iterations of LOOP in N_THREADS
1418 threads in parallel.
1420 NITER describes number of iterations of LOOP.
1421 REDUCTION_LIST describes the reductions existent in the LOOP. */
1424 gen_parallel_loop (struct loop
*loop
, htab_t reduction_list
,
1425 unsigned n_threads
, struct tree_niter_desc
*niter
)
1429 tree many_iterations_cond
, type
, nit
;
1430 tree arg_struct
, new_arg_struct
;
1432 basic_block parallel_head
;
1434 struct clsn_data clsn_data
;
1439 ---------------------------------------------------------------------
1442 IV = phi (INIT, IV + STEP)
1448 ---------------------------------------------------------------------
1450 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1451 we generate the following code:
1453 ---------------------------------------------------------------------
1456 || NITER < MIN_PER_THREAD * N_THREADS)
1460 store all local loop-invariant variables used in body of the loop to DATA.
1461 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1462 load the variables from DATA.
1463 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1466 GIMPLE_OMP_CONTINUE;
1467 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1468 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1474 IV = phi (INIT, IV + STEP)
1485 /* Create two versions of the loop -- in the old one, we know that the
1486 number of iterations is large enough, and we will transform it into the
1487 loop that will be split to loop_fn, the new one will be used for the
1488 remaining iterations. */
1490 type
= TREE_TYPE (niter
->niter
);
1491 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
1494 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1496 many_iterations_cond
=
1497 fold_build2 (GE_EXPR
, boolean_type_node
,
1498 nit
, build_int_cst (type
, MIN_PER_THREAD
* n_threads
));
1499 many_iterations_cond
1500 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1501 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
1502 many_iterations_cond
);
1503 many_iterations_cond
1504 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
1506 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1507 if (!is_gimple_condexpr (many_iterations_cond
))
1509 many_iterations_cond
1510 = force_gimple_operand (many_iterations_cond
, &stmts
,
1513 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1516 initialize_original_copy_tables ();
1518 /* We assume that the loop usually iterates a lot. */
1519 prob
= 4 * REG_BR_PROB_BASE
/ 5;
1520 nloop
= loop_version (loop
, many_iterations_cond
, NULL
,
1521 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
1522 update_ssa (TODO_update_ssa
);
1523 free_original_copy_tables ();
1525 /* Base all the induction variables in LOOP on a single control one. */
1526 canonicalize_loop_ivs (loop
, &nit
);
1528 /* Ensure that the exit condition is the first statement in the loop. */
1529 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
1531 /* Generate initializations for reductions. */
1532 if (htab_elements (reduction_list
) > 0)
1533 htab_traverse (reduction_list
, initialize_reductions
, loop
);
1535 /* Eliminate the references to local variables from the loop. */
1536 gcc_assert (single_exit (loop
));
1537 entry
= loop_preheader_edge (loop
);
1538 exit
= single_dom_exit (loop
);
1540 eliminate_local_variables (entry
, exit
);
1541 /* In the old loop, move all variables non-local to the loop to a structure
1542 and back, and create separate decls for the variables used in loop. */
1543 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
1544 &new_arg_struct
, &clsn_data
);
1546 /* Create the parallel constructs. */
1547 parallel_head
= create_parallel_loop (loop
, create_loop_fn (), arg_struct
,
1548 new_arg_struct
, n_threads
);
1549 if (htab_elements (reduction_list
) > 0)
1550 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
1554 /* Cancel the loop (it is simpler to do it here rather than to teach the
1555 expander to do it). */
1556 cancel_loop_tree (loop
);
1558 /* Free loop bound estimations that could contain references to
1559 removed statements. */
1560 FOR_EACH_LOOP (li
, loop
, 0)
1561 free_numbers_of_iterations_estimates_loop (loop
);
1563 /* Expand the parallel constructs. We do it directly here instead of running
1564 a separate expand_omp pass, since it is more efficient, and less likely to
1565 cause troubles with further analyses not being able to deal with the
1568 omp_expand_local (parallel_head
);
1571 /* Returns true when LOOP contains vector phi nodes. */
1574 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
1577 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
1578 gimple_stmt_iterator gsi
;
1581 for (i
= 0; i
< loop
->num_nodes
; i
++)
1582 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1583 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi
)))) == VECTOR_TYPE
)
1592 /* Create a reduction_info struct, initialize it with REDUC_STMT
1593 and PHI, insert it to the REDUCTION_LIST. */
1596 build_new_reduction (htab_t reduction_list
, gimple reduc_stmt
, gimple phi
)
1599 struct reduction_info
*new_reduction
;
1601 gcc_assert (reduc_stmt
);
1603 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1606 "Detected reduction. reduction stmt is: \n");
1607 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
1608 fprintf (dump_file
, "\n");
1611 new_reduction
= XCNEW (struct reduction_info
);
1613 new_reduction
->reduc_stmt
= reduc_stmt
;
1614 new_reduction
->reduc_phi
= phi
;
1615 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
1616 slot
= htab_find_slot (reduction_list
, new_reduction
, INSERT
);
1617 *slot
= new_reduction
;
1620 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1623 gather_scalar_reductions (loop_p loop
, htab_t reduction_list
)
1625 gimple_stmt_iterator gsi
;
1626 loop_vec_info simple_loop_info
;
1629 simple_loop_info
= vect_analyze_loop_form (loop
);
1631 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1633 gimple phi
= gsi_stmt (gsi
);
1635 tree res
= PHI_RESULT (phi
);
1638 if (!is_gimple_reg (res
))
1641 if (!simple_iv (loop
, loop
, res
, &iv
, true)
1642 && simple_loop_info
)
1644 gimple reduc_stmt
= vect_is_simple_reduction (simple_loop_info
, phi
, true, &double_reduc
);
1646 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
1649 destroy_loop_vec_info (simple_loop_info
, true);
1652 /* Try to initialize NITER for code generation part. */
1655 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
1657 edge exit
= single_dom_exit (loop
);
1661 /* We need to know # of iterations, and there should be no uses of values
1662 defined inside loop outside of it, unless the values are invariants of
1664 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
1666 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1667 fprintf (dump_file
, " FAILED: number of iterations not known\n");
1674 /* Try to initialize REDUCTION_LIST for code generation part.
1675 REDUCTION_LIST describes the reductions. */
1678 try_create_reduction_list (loop_p loop
, htab_t reduction_list
)
1680 edge exit
= single_dom_exit (loop
);
1681 gimple_stmt_iterator gsi
;
1685 gather_scalar_reductions (loop
, reduction_list
);
1688 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1690 gimple phi
= gsi_stmt (gsi
);
1691 struct reduction_info
*red
;
1692 imm_use_iterator imm_iter
;
1693 use_operand_p use_p
;
1695 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1697 if (is_gimple_reg (val
))
1699 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1701 fprintf (dump_file
, "phi is ");
1702 print_gimple_stmt (dump_file
, phi
, 0, 0);
1703 fprintf (dump_file
, "arg of phi to exit: value ");
1704 print_generic_expr (dump_file
, val
, 0);
1705 fprintf (dump_file
, " used outside loop\n");
1707 " checking if it a part of reduction pattern: \n");
1709 if (htab_elements (reduction_list
) == 0)
1711 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1713 " FAILED: it is not a part of reduction.\n");
1717 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
1719 if (flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
1721 reduc_phi
= USE_STMT (use_p
);
1725 red
= reduction_phi (reduction_list
, reduc_phi
);
1728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1730 " FAILED: it is not a part of reduction.\n");
1733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1735 fprintf (dump_file
, "reduction phi is ");
1736 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
1737 fprintf (dump_file
, "reduction stmt is ");
1738 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
1743 /* The iterations of the loop may communicate only through bivs whose
1744 iteration space can be distributed efficiently. */
1745 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1747 gimple phi
= gsi_stmt (gsi
);
1748 tree def
= PHI_RESULT (phi
);
1751 if (is_gimple_reg (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
1753 struct reduction_info
*red
;
1755 red
= reduction_phi (reduction_list
, phi
);
1758 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1760 " FAILED: scalar dependency between iterations\n");
1770 /* Detect parallel loops and generate parallel code using libgomp
1771 primitives. Returns true if some loop was parallelized, false
1775 parallelize_loops (void)
1777 unsigned n_threads
= flag_tree_parallelize_loops
;
1778 bool changed
= false;
1780 struct tree_niter_desc niter_desc
;
1782 htab_t reduction_list
;
1784 /* Do not parallelize loops in the functions created by parallelization. */
1785 if (parallelized_function_p (cfun
->decl
))
1788 reduction_list
= htab_create (10, reduction_info_hash
,
1789 reduction_info_eq
, free
);
1790 init_stmt_vec_info_vec ();
1792 FOR_EACH_LOOP (li
, loop
, 0)
1794 htab_empty (reduction_list
);
1796 /* FIXME: Only consider innermost loops with just one exit. */
1797 if (loop
->inner
|| !single_dom_exit (loop
))
1800 if (/* And of course, the loop must be parallelizable. */
1801 !can_duplicate_loop_p (loop
)
1802 || loop_has_blocks_with_irreducible_flag (loop
)
1803 /* FIXME: the check for vector phi nodes could be removed. */
1804 || loop_has_vector_phi_nodes (loop
))
1807 if (/* Do not bother with loops in cold areas. */
1808 optimize_loop_nest_for_size_p (loop
)
1809 /* Or loops that roll too little. */
1810 || expected_loop_iterations (loop
) <= n_threads
)
1812 if (!try_get_loop_niter (loop
, &niter_desc
))
1815 if (!try_create_reduction_list (loop
, reduction_list
))
1818 if (!loop_parallel_p (loop
))
1822 gen_parallel_loop (loop
, reduction_list
,
1823 n_threads
, &niter_desc
);
1824 verify_flow_info ();
1825 verify_dominators (CDI_DOMINATORS
);
1826 verify_loop_structure ();
1827 verify_loop_closed_ssa ();
1830 free_stmt_vec_info_vec ();
1831 htab_delete (reduction_list
);
1833 /* Parallelization will cause new function calls to be inserted through
1834 which local variables will escape. Reset the points-to solutions
1835 for ESCAPED and CALLUSED. */
1838 pt_solution_reset (&cfun
->gimple_df
->escaped
);
1839 pt_solution_reset (&cfun
->gimple_df
->callused
);
1845 #include "gt-tree-parloops.h"