1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "tree-data-ref.h"
29 #include "tree-scalar-evolution.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-pass.h"
32 #include "langhooks.h"
33 #include "tree-vectorizer.h"
34 #include "tree-hasher.h"
35 #include "tree-parloops.h"
38 /* This pass tries to distribute iterations of loops into several threads.
39 The implementation is straightforward -- for each loop we test whether its
40 iterations are independent, and if it is the case (and some additional
41 conditions regarding profitability and correctness are satisfied), we
42 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
45 The most of the complexity is in bringing the code into shape expected
47 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
48 variable and that the exit test is at the start of the loop body
49 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
50 variables by accesses through pointers, and breaking up ssa chains
51 by storing the values incoming to the parallelized loop to a structure
52 passed to the new function as an argument (something similar is done
53 in omp gimplification, unfortunately only a small part of the code
57 -- if there are several parallelizable loops in a function, it may be
58 possible to generate the threads just once (using synchronization to
59 ensure that cross-loop dependences are obeyed).
60 -- handling of common reduction patterns for outer loops.
62 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
65 currently we use vect_force_simple_reduction() to detect reduction patterns.
66 The code transformation will be introduced by an example.
73 for (i = 0; i < N; i++)
83 # sum_29 = PHI <sum_11(5), 1(3)>
84 # i_28 = PHI <i_12(5), 0(3)>
87 sum_11 = D.1795_8 + sum_29;
95 # sum_21 = PHI <sum_11(4)>
96 printf (&"%d"[0], sum_21);
99 after reduction transformation (only relevant parts):
107 # Storing the initial value given by the user. #
109 .paral_data_store.32.sum.27 = 1;
111 #pragma omp parallel num_threads(4)
113 #pragma omp for schedule(static)
115 # The neutral element corresponding to the particular
116 reduction's operation, e.g. 0 for PLUS_EXPR,
117 1 for MULT_EXPR, etc. replaces the user's initial value. #
119 # sum.27_29 = PHI <sum.27_11, 0>
121 sum.27_11 = D.1827_8 + sum.27_29;
125 # Adding this reduction phi is done at create_phi_for_local_result() #
126 # sum.27_56 = PHI <sum.27_11, 0>
129 # Creating the atomic operation is done at
130 create_call_for_reduction_1() #
132 #pragma omp atomic_load
133 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
134 D.1840_60 = sum.27_56 + D.1839_59;
135 #pragma omp atomic_store (D.1840_60);
139 # collecting the result after the join of the threads is done at
140 create_loads_for_reductions().
141 The value computed by the threads is loaded from the
145 .paral_data_load.33_52 = &.paral_data_store.32;
146 sum_37 = .paral_data_load.33_52->sum.27;
147 sum_43 = D.1795_41 + sum_37;
150 # sum_21 = PHI <sum_43, sum_26>
151 printf (&"%d"[0], sum_21);
159 /* Minimal number of iterations of a loop that should be executed in each
161 #define MIN_PER_THREAD 100
163 /* Element of the hashtable, representing a
164 reduction in the current loop. */
165 struct reduction_info
167 gimple reduc_stmt
; /* reduction statement. */
168 gimple reduc_phi
; /* The phi node defining the reduction. */
169 enum tree_code reduction_code
;/* code for the reduction operation. */
170 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
172 gimple keep_res
; /* The PHI_RESULT of this phi is the resulting value
173 of the reduction variable when existing the loop. */
174 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
175 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
176 tree init
; /* reduction initialization value. */
177 gimple new_phi
; /* (helper field) Newly created phi node whose result
178 will be passed to the atomic operation. Represents
179 the local result each thread computed for the reduction
183 /* Reduction info hashtable helpers. */
185 struct reduction_hasher
: typed_free_remove
<reduction_info
>
187 typedef reduction_info value_type
;
188 typedef reduction_info compare_type
;
189 static inline hashval_t
hash (const value_type
*);
190 static inline bool equal (const value_type
*, const compare_type
*);
193 /* Equality and hash functions for hashtab code. */
196 reduction_hasher::equal (const value_type
*a
, const compare_type
*b
)
198 return (a
->reduc_phi
== b
->reduc_phi
);
202 reduction_hasher::hash (const value_type
*a
)
204 return a
->reduc_version
;
207 typedef hash_table
<reduction_hasher
> reduction_info_table_type
;
210 static struct reduction_info
*
211 reduction_phi (reduction_info_table_type reduction_list
, gimple phi
)
213 struct reduction_info tmpred
, *red
;
215 if (reduction_list
.elements () == 0 || phi
== NULL
)
218 tmpred
.reduc_phi
= phi
;
219 tmpred
.reduc_version
= gimple_uid (phi
);
220 red
= reduction_list
.find (&tmpred
);
225 /* Element of hashtable of names to copy. */
227 struct name_to_copy_elt
229 unsigned version
; /* The version of the name to copy. */
230 tree new_name
; /* The new name used in the copy. */
231 tree field
; /* The field of the structure used to pass the
235 /* Name copies hashtable helpers. */
237 struct name_to_copy_hasher
: typed_free_remove
<name_to_copy_elt
>
239 typedef name_to_copy_elt value_type
;
240 typedef name_to_copy_elt compare_type
;
241 static inline hashval_t
hash (const value_type
*);
242 static inline bool equal (const value_type
*, const compare_type
*);
245 /* Equality and hash functions for hashtab code. */
248 name_to_copy_hasher::equal (const value_type
*a
, const compare_type
*b
)
250 return a
->version
== b
->version
;
254 name_to_copy_hasher::hash (const value_type
*a
)
256 return (hashval_t
) a
->version
;
259 typedef hash_table
<name_to_copy_hasher
> name_to_copy_table_type
;
261 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
262 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
263 represents the denominator for every element in the matrix. */
264 typedef struct lambda_trans_matrix_s
266 lambda_matrix matrix
;
270 } *lambda_trans_matrix
;
271 #define LTM_MATRIX(T) ((T)->matrix)
272 #define LTM_ROWSIZE(T) ((T)->rowsize)
273 #define LTM_COLSIZE(T) ((T)->colsize)
274 #define LTM_DENOMINATOR(T) ((T)->denominator)
276 /* Allocate a new transformation matrix. */
278 static lambda_trans_matrix
279 lambda_trans_matrix_new (int colsize
, int rowsize
,
280 struct obstack
* lambda_obstack
)
282 lambda_trans_matrix ret
;
284 ret
= (lambda_trans_matrix
)
285 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
286 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
287 LTM_ROWSIZE (ret
) = rowsize
;
288 LTM_COLSIZE (ret
) = colsize
;
289 LTM_DENOMINATOR (ret
) = 1;
293 /* Multiply a vector VEC by a matrix MAT.
294 MAT is an M*N matrix, and VEC is a vector with length N. The result
295 is stored in DEST which must be a vector of length M. */
298 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
299 lambda_vector vec
, lambda_vector dest
)
303 lambda_vector_clear (dest
, m
);
304 for (i
= 0; i
< m
; i
++)
305 for (j
= 0; j
< n
; j
++)
306 dest
[i
] += matrix
[i
][j
] * vec
[j
];
309 /* Return true if TRANS is a legal transformation matrix that respects
310 the dependence vectors in DISTS and DIRS. The conservative answer
313 "Wolfe proves that a unimodular transformation represented by the
314 matrix T is legal when applied to a loop nest with a set of
315 lexicographically non-negative distance vectors RDG if and only if
316 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
317 i.e.: if and only if it transforms the lexicographically positive
318 distance vectors to lexicographically positive vectors. Note that
319 a unimodular matrix must transform the zero vector (and only it) to
320 the zero vector." S.Muchnick. */
323 lambda_transform_legal_p (lambda_trans_matrix trans
,
325 vec
<ddr_p
> dependence_relations
)
328 lambda_vector distres
;
329 struct data_dependence_relation
*ddr
;
331 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
332 && LTM_ROWSIZE (trans
) == nb_loops
);
334 /* When there are no dependences, the transformation is correct. */
335 if (dependence_relations
.length () == 0)
338 ddr
= dependence_relations
[0];
342 /* When there is an unknown relation in the dependence_relations, we
343 know that it is no worth looking at this loop nest: give up. */
344 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
347 distres
= lambda_vector_new (nb_loops
);
349 /* For each distance vector in the dependence graph. */
350 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
352 /* Don't care about relations for which we know that there is no
353 dependence, nor about read-read (aka. output-dependences):
354 these data accesses can happen in any order. */
355 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
356 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
359 /* Conservatively answer: "this transformation is not valid". */
360 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
363 /* If the dependence could not be captured by a distance vector,
364 conservatively answer that the transform is not valid. */
365 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
368 /* Compute trans.dist_vect */
369 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
371 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
372 DDR_DIST_VECT (ddr
, j
), distres
);
374 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
381 /* Data dependency analysis. Returns true if the iterations of LOOP
382 are independent on each other (that is, if we can execute them
386 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
388 vec
<loop_p
> loop_nest
;
389 vec
<ddr_p
> dependence_relations
;
390 vec
<data_reference_p
> datarefs
;
391 lambda_trans_matrix trans
;
394 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
396 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
398 fprintf (dump_file
, "loop is innermost\n");
400 fprintf (dump_file
, "loop NOT innermost\n");
403 /* Check for problems with dependences. If the loop can be reversed,
404 the iterations are independent. */
405 datarefs
.create (10);
406 dependence_relations
.create (10 * 10);
407 loop_nest
.create (3);
408 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
409 &dependence_relations
))
411 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
412 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
417 dump_data_dependence_relations (dump_file
, dependence_relations
);
419 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
420 LTM_MATRIX (trans
)[0][0] = -1;
422 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
425 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
426 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
428 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
430 " FAILED: data dependencies exist across iterations\n");
433 loop_nest
.release ();
434 free_dependence_relations (dependence_relations
);
435 free_data_refs (datarefs
);
440 /* Return true when LOOP contains basic blocks marked with the
441 BB_IRREDUCIBLE_LOOP flag. */
444 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
447 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
450 for (i
= 0; i
< loop
->num_nodes
; i
++)
451 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
460 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
461 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
462 to their addresses that can be reused. The address of OBJ is known to
463 be invariant in the whole function. Other needed statements are placed
467 take_address_of (tree obj
, tree type
, edge entry
,
468 int_tree_htab_type decl_address
, gimple_stmt_iterator
*gsi
)
471 int_tree_map
**dslot
;
472 struct int_tree_map ielt
, *nielt
;
473 tree
*var_p
, name
, addr
;
477 /* Since the address of OBJ is invariant, the trees may be shared.
478 Avoid rewriting unrelated parts of the code. */
479 obj
= unshare_expr (obj
);
481 handled_component_p (*var_p
);
482 var_p
= &TREE_OPERAND (*var_p
, 0))
485 /* Canonicalize the access to base on a MEM_REF. */
487 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
489 /* Assign a canonical SSA name to the address of the base decl used
490 in the address and share it for all accesses and addresses based
492 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
494 dslot
= decl_address
.find_slot_with_hash (&ielt
, uid
, INSERT
);
499 addr
= TREE_OPERAND (*var_p
, 0);
501 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
503 name
= make_temp_ssa_name (TREE_TYPE (addr
), NULL
, obj_name
);
505 name
= make_ssa_name (TREE_TYPE (addr
), NULL
);
506 stmt
= gimple_build_assign (name
, addr
);
507 gsi_insert_on_edge_immediate (entry
, stmt
);
509 nielt
= XNEW (struct int_tree_map
);
517 /* Express the address in terms of the canonical SSA name. */
518 TREE_OPERAND (*var_p
, 0) = name
;
520 return build_fold_addr_expr_with_type (obj
, type
);
522 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
523 &stmts
, true, NULL_TREE
);
524 if (!gimple_seq_empty_p (stmts
))
525 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
527 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
529 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
531 if (!gimple_seq_empty_p (stmts
))
532 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
538 /* Callback for htab_traverse. Create the initialization statement
539 for reduction described in SLOT, and place it at the preheader of
540 the loop described in DATA. */
543 initialize_reductions (reduction_info
**slot
, struct loop
*loop
)
546 tree bvar
, type
, arg
;
549 struct reduction_info
*const reduc
= *slot
;
551 /* Create initialization in preheader:
552 reduction_variable = initialization value of reduction. */
554 /* In the phi node at the header, replace the argument coming
555 from the preheader with the reduction initialization value. */
557 /* Create a new variable to initialize the reduction. */
558 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
559 bvar
= create_tmp_var (type
, "reduction");
561 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
562 OMP_CLAUSE_REDUCTION
);
563 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
564 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
566 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
569 /* Replace the argument representing the initialization value
570 with the initialization value for the reduction (neutral
571 element for the particular operation, e.g. 0 for PLUS_EXPR,
572 1 for MULT_EXPR, etc).
573 Keep the old value in a new variable "reduction_initial",
574 that will be taken in consideration after the parallel
575 computing is done. */
577 e
= loop_preheader_edge (loop
);
578 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
579 /* Create new variable to hold the initial value. */
581 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
582 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
583 reduc
->initial_value
= arg
;
589 struct walk_stmt_info info
;
591 int_tree_htab_type decl_address
;
592 gimple_stmt_iterator
*gsi
;
597 /* Eliminates references to local variables in *TP out of the single
598 entry single exit region starting at DTA->ENTRY.
599 DECL_ADDRESS contains addresses of the references that had their
600 address taken already. If the expression is changed, CHANGED is
601 set to true. Callback for walk_tree. */
604 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
606 struct elv_data
*const dta
= (struct elv_data
*) data
;
607 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
613 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
616 type
= TREE_TYPE (t
);
617 addr_type
= build_pointer_type (type
);
618 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
620 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
626 *tp
= build_simple_mem_ref (addr
);
632 if (TREE_CODE (t
) == ADDR_EXPR
)
634 /* ADDR_EXPR may appear in two contexts:
635 -- as a gimple operand, when the address taken is a function invariant
636 -- as gimple rhs, when the resulting address in not a function
638 We do not need to do anything special in the latter case (the base of
639 the memory reference whose address is taken may be replaced in the
640 DECL_P case). The former case is more complicated, as we need to
641 ensure that the new address is still a gimple operand. Thus, it
642 is not sufficient to replace just the base of the memory reference --
643 we need to move the whole computation of the address out of the
645 if (!is_gimple_val (t
))
649 obj
= TREE_OPERAND (t
, 0);
650 var
= get_base_address (obj
);
651 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
654 addr_type
= TREE_TYPE (t
);
655 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
657 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
674 /* Moves the references to local variables in STMT at *GSI out of the single
675 entry single exit region starting at ENTRY. DECL_ADDRESS contains
676 addresses of the references that had their address taken
680 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
681 int_tree_htab_type decl_address
)
684 gimple stmt
= gsi_stmt (*gsi
);
686 memset (&dta
.info
, '\0', sizeof (dta
.info
));
688 dta
.decl_address
= decl_address
;
692 if (gimple_debug_bind_p (stmt
))
695 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
696 eliminate_local_variables_1
, &dta
.info
, NULL
);
699 gimple_debug_bind_reset_value (stmt
);
703 else if (gimple_clobber_p (stmt
))
705 stmt
= gimple_build_nop ();
706 gsi_replace (gsi
, stmt
, false);
712 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
719 /* Eliminates the references to local variables from the single entry
720 single exit region between the ENTRY and EXIT edges.
723 1) Taking address of a local variable -- these are moved out of the
724 region (and temporary variable is created to hold the address if
727 2) Dereferencing a local variable -- these are replaced with indirect
731 eliminate_local_variables (edge entry
, edge exit
)
734 vec
<basic_block
> body
;
737 gimple_stmt_iterator gsi
;
738 bool has_debug_stmt
= false;
739 int_tree_htab_type decl_address
;
740 decl_address
.create (10);
741 basic_block entry_bb
= entry
->src
;
742 basic_block exit_bb
= exit
->dest
;
744 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
746 FOR_EACH_VEC_ELT (body
, i
, bb
)
747 if (bb
!= entry_bb
&& bb
!= exit_bb
)
748 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
749 if (is_gimple_debug (gsi_stmt (gsi
)))
751 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
752 has_debug_stmt
= true;
755 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
758 FOR_EACH_VEC_ELT (body
, i
, bb
)
759 if (bb
!= entry_bb
&& bb
!= exit_bb
)
760 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
761 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
762 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
764 decl_address
.dispose ();
768 /* Returns true if expression EXPR is not defined between ENTRY and
769 EXIT, i.e. if all its operands are defined outside of the region. */
772 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
774 basic_block entry_bb
= entry
->src
;
775 basic_block exit_bb
= exit
->dest
;
778 if (is_gimple_min_invariant (expr
))
781 if (TREE_CODE (expr
) == SSA_NAME
)
783 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
785 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
786 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
795 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
796 The copies are stored to NAME_COPIES, if NAME was already duplicated,
797 its duplicate stored in NAME_COPIES is returned.
799 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
800 duplicated, storing the copies in DECL_COPIES. */
803 separate_decls_in_region_name (tree name
, name_to_copy_table_type name_copies
,
804 int_tree_htab_type decl_copies
, bool copy_name_p
)
806 tree copy
, var
, var_copy
;
807 unsigned idx
, uid
, nuid
;
808 struct int_tree_map ielt
, *nielt
;
809 struct name_to_copy_elt elt
, *nelt
;
810 name_to_copy_elt
**slot
;
811 int_tree_map
**dslot
;
813 if (TREE_CODE (name
) != SSA_NAME
)
816 idx
= SSA_NAME_VERSION (name
);
818 slot
= name_copies
.find_slot_with_hash (&elt
, idx
,
819 copy_name_p
? INSERT
: NO_INSERT
);
821 return (*slot
)->new_name
;
825 copy
= duplicate_ssa_name (name
, NULL
);
826 nelt
= XNEW (struct name_to_copy_elt
);
828 nelt
->new_name
= copy
;
829 nelt
->field
= NULL_TREE
;
838 var
= SSA_NAME_VAR (name
);
842 uid
= DECL_UID (var
);
844 dslot
= decl_copies
.find_slot_with_hash (&ielt
, uid
, INSERT
);
847 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
848 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
849 nielt
= XNEW (struct int_tree_map
);
851 nielt
->to
= var_copy
;
854 /* Ensure that when we meet this decl next time, we won't duplicate
856 nuid
= DECL_UID (var_copy
);
858 dslot
= decl_copies
.find_slot_with_hash (&ielt
, nuid
, INSERT
);
859 gcc_assert (!*dslot
);
860 nielt
= XNEW (struct int_tree_map
);
862 nielt
->to
= var_copy
;
866 var_copy
= ((struct int_tree_map
*) *dslot
)->to
;
868 replace_ssa_name_symbol (copy
, var_copy
);
872 /* Finds the ssa names used in STMT that are defined outside the
873 region between ENTRY and EXIT and replaces such ssa names with
874 their duplicates. The duplicates are stored to NAME_COPIES. Base
875 decls of all ssa names used in STMT (including those defined in
876 LOOP) are replaced with the new temporary variables; the
877 replacement decls are stored in DECL_COPIES. */
880 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
881 name_to_copy_table_type name_copies
,
882 int_tree_htab_type decl_copies
)
890 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
892 name
= DEF_FROM_PTR (def
);
893 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
894 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
896 gcc_assert (copy
== name
);
899 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
901 name
= USE_FROM_PTR (use
);
902 if (TREE_CODE (name
) != SSA_NAME
)
905 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
906 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
912 /* Finds the ssa names used in STMT that are defined outside the
913 region between ENTRY and EXIT and replaces such ssa names with
914 their duplicates. The duplicates are stored to NAME_COPIES. Base
915 decls of all ssa names used in STMT (including those defined in
916 LOOP) are replaced with the new temporary variables; the
917 replacement decls are stored in DECL_COPIES. */
920 separate_decls_in_region_debug (gimple stmt
,
921 name_to_copy_table_type name_copies
,
922 int_tree_htab_type decl_copies
)
927 struct int_tree_map ielt
;
928 struct name_to_copy_elt elt
;
929 name_to_copy_elt
**slot
;
930 int_tree_map
**dslot
;
932 if (gimple_debug_bind_p (stmt
))
933 var
= gimple_debug_bind_get_var (stmt
);
934 else if (gimple_debug_source_bind_p (stmt
))
935 var
= gimple_debug_source_bind_get_var (stmt
);
938 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
940 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
941 ielt
.uid
= DECL_UID (var
);
942 dslot
= decl_copies
.find_slot_with_hash (&ielt
, ielt
.uid
, NO_INSERT
);
945 if (gimple_debug_bind_p (stmt
))
946 gimple_debug_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
947 else if (gimple_debug_source_bind_p (stmt
))
948 gimple_debug_source_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
950 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
952 name
= USE_FROM_PTR (use
);
953 if (TREE_CODE (name
) != SSA_NAME
)
956 elt
.version
= SSA_NAME_VERSION (name
);
957 slot
= name_copies
.find_slot_with_hash (&elt
, elt
.version
, NO_INSERT
);
960 gimple_debug_bind_reset_value (stmt
);
965 SET_USE (use
, (*slot
)->new_name
);
971 /* Callback for htab_traverse. Adds a field corresponding to the reduction
972 specified in SLOT. The type is passed in DATA. */
975 add_field_for_reduction (reduction_info
**slot
, tree type
)
978 struct reduction_info
*const red
= *slot
;
979 tree var
= gimple_assign_lhs (red
->reduc_stmt
);
980 tree field
= build_decl (gimple_location (red
->reduc_stmt
), FIELD_DECL
,
981 SSA_NAME_IDENTIFIER (var
), TREE_TYPE (var
));
983 insert_field_into_struct (type
, field
);
990 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
991 described in SLOT. The type is passed in DATA. */
994 add_field_for_name (name_to_copy_elt
**slot
, tree type
)
996 struct name_to_copy_elt
*const elt
= *slot
;
997 tree name
= ssa_name (elt
->version
);
998 tree field
= build_decl (UNKNOWN_LOCATION
,
999 FIELD_DECL
, SSA_NAME_IDENTIFIER (name
),
1002 insert_field_into_struct (type
, field
);
1008 /* Callback for htab_traverse. A local result is the intermediate result
1009 computed by a single
1010 thread, or the initial value in case no iteration was executed.
1011 This function creates a phi node reflecting these values.
1012 The phi's result will be stored in NEW_PHI field of the
1013 reduction's data structure. */
1016 create_phi_for_local_result (reduction_info
**slot
, struct loop
*loop
)
1018 struct reduction_info
*const reduc
= *slot
;
1021 basic_block store_bb
;
1023 source_location locus
;
1025 /* STORE_BB is the block where the phi
1026 should be stored. It is the destination of the loop exit.
1027 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1028 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1030 /* STORE_BB has two predecessors. One coming from the loop
1031 (the reduction's result is computed at the loop),
1032 and another coming from a block preceding the loop,
1034 are executed (the initial value should be taken). */
1035 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
1036 e
= EDGE_PRED (store_bb
, 1);
1038 e
= EDGE_PRED (store_bb
, 0);
1039 local_res
= copy_ssa_name (gimple_assign_lhs (reduc
->reduc_stmt
), NULL
);
1040 locus
= gimple_location (reduc
->reduc_stmt
);
1041 new_phi
= create_phi_node (local_res
, store_bb
);
1042 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1043 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
1044 FALLTHRU_EDGE (loop
->latch
), locus
);
1045 reduc
->new_phi
= new_phi
;
1055 basic_block store_bb
;
1056 basic_block load_bb
;
1059 /* Callback for htab_traverse. Create an atomic instruction for the
1060 reduction described in SLOT.
1061 DATA annotates the place in memory the atomic operation relates to,
1062 and the basic block it needs to be generated in. */
1065 create_call_for_reduction_1 (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1067 struct reduction_info
*const reduc
= *slot
;
1068 gimple_stmt_iterator gsi
;
1069 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1074 tree t
, addr
, ref
, x
;
1075 tree tmp_load
, name
;
1078 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1079 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1081 addr
= build_addr (t
, current_function_decl
);
1083 /* Create phi node. */
1084 bb
= clsn_data
->load_bb
;
1086 e
= split_block (bb
, t
);
1089 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)), NULL
);
1090 tmp_load
= make_ssa_name (tmp_load
, NULL
);
1091 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1092 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1093 gsi
= gsi_start_bb (new_bb
);
1094 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1096 e
= split_block (new_bb
, load
);
1098 gsi
= gsi_start_bb (new_bb
);
1100 x
= fold_build2 (reduc
->reduction_code
,
1101 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1102 PHI_RESULT (reduc
->new_phi
));
1104 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1105 GSI_CONTINUE_LINKING
);
1107 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1111 /* Create the atomic operation at the join point of the threads.
1112 REDUCTION_LIST describes the reductions in the LOOP.
1113 LD_ST_DATA describes the shared data structure where
1114 shared data is stored in and loaded from. */
1116 create_call_for_reduction (struct loop
*loop
,
1117 reduction_info_table_type reduction_list
,
1118 struct clsn_data
*ld_st_data
)
1120 reduction_list
.traverse
<struct loop
*, create_phi_for_local_result
> (loop
);
1121 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1122 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1124 .traverse
<struct clsn_data
*, create_call_for_reduction_1
> (ld_st_data
);
1127 /* Callback for htab_traverse. Loads the final reduction value at the
1128 join point of all threads, and inserts it in the right place. */
1131 create_loads_for_reductions (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1133 struct reduction_info
*const red
= *slot
;
1135 gimple_stmt_iterator gsi
;
1136 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1141 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1142 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1143 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1147 name
= PHI_RESULT (red
->keep_res
);
1148 stmt
= gimple_build_assign (name
, x
);
1149 SSA_NAME_DEF_STMT (name
) = stmt
;
1151 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1153 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1154 !gsi_end_p (gsi
); gsi_next (&gsi
))
1155 if (gsi_stmt (gsi
) == red
->keep_res
)
1157 remove_phi_node (&gsi
, false);
1163 /* Load the reduction result that was stored in LD_ST_DATA.
1164 REDUCTION_LIST describes the list of reductions that the
1165 loads should be generated for. */
1167 create_final_loads_for_reduction (reduction_info_table_type reduction_list
,
1168 struct clsn_data
*ld_st_data
)
1170 gimple_stmt_iterator gsi
;
1174 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1175 t
= build_fold_addr_expr (ld_st_data
->store
);
1176 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1178 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1179 SSA_NAME_DEF_STMT (ld_st_data
->load
) = stmt
;
1182 .traverse
<struct clsn_data
*, create_loads_for_reductions
> (ld_st_data
);
1186 /* Callback for htab_traverse. Store the neutral value for the
1187 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1188 1 for MULT_EXPR, etc. into the reduction field.
1189 The reduction is specified in SLOT. The store information is
1193 create_stores_for_reduction (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1195 struct reduction_info
*const red
= *slot
;
1198 gimple_stmt_iterator gsi
;
1199 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1201 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1202 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1203 stmt
= gimple_build_assign (t
, red
->initial_value
);
1204 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1209 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1210 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1211 specified in SLOT. */
1214 create_loads_and_stores_for_name (name_to_copy_elt
**slot
,
1215 struct clsn_data
*clsn_data
)
1217 struct name_to_copy_elt
*const elt
= *slot
;
1220 gimple_stmt_iterator gsi
;
1221 tree type
= TREE_TYPE (elt
->new_name
);
1224 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1225 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1226 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1227 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1229 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1230 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1231 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1232 stmt
= gimple_build_assign (elt
->new_name
, t
);
1233 SSA_NAME_DEF_STMT (elt
->new_name
) = stmt
;
1234 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1239 /* Moves all the variables used in LOOP and defined outside of it (including
1240 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1241 name) to a structure created for this purpose. The code
1249 is transformed this way:
1264 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1265 pointer `new' is intentionally not initialized (the loop will be split to a
1266 separate function later, and `new' will be initialized from its arguments).
1267 LD_ST_DATA holds information about the shared data structure used to pass
1268 information among the threads. It is initialized here, and
1269 gen_parallel_loop will pass it to create_call_for_reduction that
1270 needs this information. REDUCTION_LIST describes the reductions
1274 separate_decls_in_region (edge entry
, edge exit
,
1275 reduction_info_table_type reduction_list
,
1276 tree
*arg_struct
, tree
*new_arg_struct
,
1277 struct clsn_data
*ld_st_data
)
1280 basic_block bb1
= split_edge (entry
);
1281 basic_block bb0
= single_pred (bb1
);
1282 name_to_copy_table_type name_copies
;
1283 name_copies
.create (10);
1284 int_tree_htab_type decl_copies
;
1285 decl_copies
.create (10);
1287 tree type
, type_name
, nvar
;
1288 gimple_stmt_iterator gsi
;
1289 struct clsn_data clsn_data
;
1290 vec
<basic_block
> body
;
1293 basic_block entry_bb
= bb1
;
1294 basic_block exit_bb
= exit
->dest
;
1295 bool has_debug_stmt
= false;
1297 entry
= single_succ_edge (entry_bb
);
1298 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1300 FOR_EACH_VEC_ELT (body
, i
, bb
)
1302 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1304 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1305 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1306 name_copies
, decl_copies
);
1308 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1310 gimple stmt
= gsi_stmt (gsi
);
1312 if (is_gimple_debug (stmt
))
1313 has_debug_stmt
= true;
1315 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1316 name_copies
, decl_copies
);
1321 /* Now process debug bind stmts. We must not create decls while
1322 processing debug stmts, so we defer their processing so as to
1323 make sure we will have debug info for as many variables as
1324 possible (all of those that were dealt with in the loop above),
1325 and discard those for which we know there's nothing we can
1328 FOR_EACH_VEC_ELT (body
, i
, bb
)
1329 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1331 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1333 gimple stmt
= gsi_stmt (gsi
);
1335 if (is_gimple_debug (stmt
))
1337 if (separate_decls_in_region_debug (stmt
, name_copies
,
1340 gsi_remove (&gsi
, true);
1351 if (name_copies
.elements () == 0 && reduction_list
.elements () == 0)
1353 /* It may happen that there is nothing to copy (if there are only
1354 loop carried and external variables in the loop). */
1356 *new_arg_struct
= NULL
;
1360 /* Create the type for the structure to store the ssa names to. */
1361 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1362 type_name
= build_decl (UNKNOWN_LOCATION
,
1363 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1365 TYPE_NAME (type
) = type_name
;
1367 name_copies
.traverse
<tree
, add_field_for_name
> (type
);
1368 if (reduction_list
.is_created () && reduction_list
.elements () > 0)
1370 /* Create the fields for reductions. */
1371 reduction_list
.traverse
<tree
, add_field_for_reduction
> (type
);
1375 /* Create the loads and stores. */
1376 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1377 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1378 *new_arg_struct
= make_ssa_name (nvar
, NULL
);
1380 ld_st_data
->store
= *arg_struct
;
1381 ld_st_data
->load
= *new_arg_struct
;
1382 ld_st_data
->store_bb
= bb0
;
1383 ld_st_data
->load_bb
= bb1
;
1386 .traverse
<struct clsn_data
*, create_loads_and_stores_for_name
>
1389 /* Load the calculation from memory (after the join of the threads). */
1391 if (reduction_list
.is_created () && reduction_list
.elements () > 0)
1394 .traverse
<struct clsn_data
*, create_stores_for_reduction
>
1396 clsn_data
.load
= make_ssa_name (nvar
, NULL
);
1397 clsn_data
.load_bb
= exit
->dest
;
1398 clsn_data
.store
= ld_st_data
->store
;
1399 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1403 decl_copies
.dispose ();
1404 name_copies
.dispose ();
1407 /* Bitmap containing uids of functions created by parallelization. We cannot
1408 allocate it from the default obstack, as it must live across compilation
1409 of several functions; we make it gc allocated instead. */
1411 static GTY(()) bitmap parallelized_functions
;
1413 /* Returns true if FN was created by create_loop_fn. */
1416 parallelized_function_p (tree fn
)
1418 if (!parallelized_functions
|| !DECL_ARTIFICIAL (fn
))
1421 return bitmap_bit_p (parallelized_functions
, DECL_UID (fn
));
1424 /* Creates and returns an empty function that will receive the body of
1425 a parallelized loop. */
1428 create_loop_fn (location_t loc
)
1432 tree decl
, type
, name
, t
;
1433 struct function
*act_cfun
= cfun
;
1434 static unsigned loopfn_num
;
1436 loc
= LOCATION_LOCUS (loc
);
1437 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1438 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1439 clean_symbol_name (tname
);
1440 name
= get_identifier (tname
);
1441 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1443 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1444 if (!parallelized_functions
)
1445 parallelized_functions
= BITMAP_GGC_ALLOC ();
1446 bitmap_set_bit (parallelized_functions
, DECL_UID (decl
));
1448 TREE_STATIC (decl
) = 1;
1449 TREE_USED (decl
) = 1;
1450 DECL_ARTIFICIAL (decl
) = 1;
1451 DECL_IGNORED_P (decl
) = 0;
1452 TREE_PUBLIC (decl
) = 0;
1453 DECL_UNINLINABLE (decl
) = 1;
1454 DECL_EXTERNAL (decl
) = 0;
1455 DECL_CONTEXT (decl
) = NULL_TREE
;
1456 DECL_INITIAL (decl
) = make_node (BLOCK
);
1458 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1459 DECL_ARTIFICIAL (t
) = 1;
1460 DECL_IGNORED_P (t
) = 1;
1461 DECL_RESULT (decl
) = t
;
1463 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1465 DECL_ARTIFICIAL (t
) = 1;
1466 DECL_ARG_TYPE (t
) = ptr_type_node
;
1467 DECL_CONTEXT (t
) = decl
;
1469 DECL_ARGUMENTS (decl
) = t
;
1471 allocate_struct_function (decl
, false);
1473 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1475 set_cfun (act_cfun
);
1480 /* Moves the exit condition of LOOP to the beginning of its header, and
1481 duplicates the part of the last iteration that gets disabled to the
1482 exit of the loop. NIT is the number of iterations of the loop
1483 (used to initialize the variables in the duplicated part).
1485 TODO: the common case is that latch of the loop is empty and immediately
1486 follows the loop exit. In this case, it would be better not to copy the
1487 body of the loop, but only move the entry of the loop directly before the
1488 exit check and increase the number of iterations of the loop by one.
1489 This may need some additional preconditioning in case NIT = ~0.
1490 REDUCTION_LIST describes the reductions in LOOP. */
1493 transform_to_exit_first_loop (struct loop
*loop
,
1494 reduction_info_table_type reduction_list
,
1497 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1500 edge exit
= single_dom_exit (loop
), hpred
;
1501 tree control
, control_name
, res
, t
;
1502 gimple phi
, nphi
, cond_stmt
, stmt
, cond_nit
;
1503 gimple_stmt_iterator gsi
;
1506 split_block_after_labels (loop
->header
);
1507 orig_header
= single_succ (loop
->header
);
1508 hpred
= single_succ_edge (loop
->header
);
1510 cond_stmt
= last_stmt (exit
->src
);
1511 control
= gimple_cond_lhs (cond_stmt
);
1512 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1514 /* Make sure that we have phi nodes on exit for all loop header phis
1515 (create_parallel_loop requires that). */
1516 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1518 phi
= gsi_stmt (gsi
);
1519 res
= PHI_RESULT (phi
);
1520 t
= copy_ssa_name (res
, phi
);
1521 SET_PHI_RESULT (phi
, t
);
1522 nphi
= create_phi_node (res
, orig_header
);
1523 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1527 gimple_cond_set_lhs (cond_stmt
, t
);
1528 update_stmt (cond_stmt
);
1533 bbs
= get_loop_body_in_dom_order (loop
);
1535 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1537 nbbs
= XNEWVEC (basic_block
, n
);
1538 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1545 /* Other than reductions, the only gimple reg that should be copied
1546 out of the loop is the control variable. */
1547 exit
= single_dom_exit (loop
);
1548 control_name
= NULL_TREE
;
1549 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); )
1551 phi
= gsi_stmt (gsi
);
1552 res
= PHI_RESULT (phi
);
1553 if (virtual_operand_p (res
))
1559 /* Check if it is a part of reduction. If it is,
1560 keep the phi at the reduction's keep_res field. The
1561 PHI_RESULT of this phi is the resulting value of the reduction
1562 variable when exiting the loop. */
1564 if (reduction_list
.elements () > 0)
1566 struct reduction_info
*red
;
1568 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1569 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1572 red
->keep_res
= phi
;
1577 gcc_assert (control_name
== NULL_TREE
1578 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1580 remove_phi_node (&gsi
, false);
1582 gcc_assert (control_name
!= NULL_TREE
);
1584 /* Initialize the control variable to number of iterations
1585 according to the rhs of the exit condition. */
1586 gsi
= gsi_after_labels (ex_bb
);
1587 cond_nit
= last_stmt (exit
->src
);
1588 nit_1
= gimple_cond_rhs (cond_nit
);
1589 nit_1
= force_gimple_operand_gsi (&gsi
,
1590 fold_convert (TREE_TYPE (control_name
), nit_1
),
1591 false, NULL_TREE
, false, GSI_SAME_STMT
);
1592 stmt
= gimple_build_assign (control_name
, nit_1
);
1593 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1594 SSA_NAME_DEF_STMT (control_name
) = stmt
;
1597 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1598 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1599 NEW_DATA is the variable that should be initialized from the argument
1600 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1601 basic block containing GIMPLE_OMP_PARALLEL tree. */
1604 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1605 tree new_data
, unsigned n_threads
, location_t loc
)
1607 gimple_stmt_iterator gsi
;
1608 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1610 gimple stmt
, for_stmt
, phi
, cond_stmt
;
1611 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1612 edge exit
, nexit
, guard
, end
, e
;
1614 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1615 bb
= loop_preheader_edge (loop
)->src
;
1616 paral_bb
= single_pred (bb
);
1617 gsi
= gsi_last_bb (paral_bb
);
1619 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
1620 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1621 = build_int_cst (integer_type_node
, n_threads
);
1622 stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1623 gimple_set_location (stmt
, loc
);
1625 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1627 /* Initialize NEW_DATA. */
1630 gsi
= gsi_after_labels (bb
);
1632 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
), NULL
);
1633 stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1634 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1635 SSA_NAME_DEF_STMT (param
) = stmt
;
1637 stmt
= gimple_build_assign (new_data
,
1638 fold_convert (TREE_TYPE (new_data
), param
));
1639 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1640 SSA_NAME_DEF_STMT (new_data
) = stmt
;
1643 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1644 bb
= split_loop_exit_edge (single_dom_exit (loop
));
1645 gsi
= gsi_last_bb (bb
);
1646 stmt
= gimple_build_omp_return (false);
1647 gimple_set_location (stmt
, loc
);
1648 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1650 /* Extract data for GIMPLE_OMP_FOR. */
1651 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
1652 cond_stmt
= last_stmt (loop
->header
);
1654 cvar
= gimple_cond_lhs (cond_stmt
);
1655 cvar_base
= SSA_NAME_VAR (cvar
);
1656 phi
= SSA_NAME_DEF_STMT (cvar
);
1657 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1658 initvar
= copy_ssa_name (cvar
, NULL
);
1659 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
1661 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1663 gsi
= gsi_last_nondebug_bb (loop
->latch
);
1664 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
1665 gsi_remove (&gsi
, true);
1668 for_bb
= split_edge (loop_preheader_edge (loop
));
1669 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
1670 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
1671 gcc_assert (exit
== single_dom_exit (loop
));
1673 guard
= make_edge (for_bb
, ex_bb
, 0);
1674 single_succ_edge (loop
->latch
)->flags
= 0;
1675 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
1676 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1678 source_location locus
;
1680 phi
= gsi_stmt (gsi
);
1681 stmt
= SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
));
1683 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
1684 locus
= gimple_phi_arg_location_from_edge (stmt
,
1685 loop_preheader_edge (loop
));
1686 add_phi_arg (phi
, def
, guard
, locus
);
1688 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
1689 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
1690 add_phi_arg (phi
, def
, end
, locus
);
1692 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
1693 PENDING_STMT (e
) = NULL
;
1695 /* Emit GIMPLE_OMP_FOR. */
1696 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
1697 type
= TREE_TYPE (cvar
);
1698 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
1699 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
1701 for_stmt
= gimple_build_omp_for (NULL
, GF_OMP_FOR_KIND_FOR
, t
, 1, NULL
);
1702 gimple_set_location (for_stmt
, loc
);
1703 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
1704 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
1705 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
1706 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
1707 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
1709 build_int_cst (type
, 1)));
1711 gsi
= gsi_last_bb (for_bb
);
1712 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
1713 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
1715 /* Emit GIMPLE_OMP_CONTINUE. */
1716 gsi
= gsi_last_bb (loop
->latch
);
1717 stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
1718 gimple_set_location (stmt
, loc
);
1719 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1720 SSA_NAME_DEF_STMT (cvar_next
) = stmt
;
1722 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1723 gsi
= gsi_last_bb (ex_bb
);
1724 stmt
= gimple_build_omp_return (true);
1725 gimple_set_location (stmt
, loc
);
1726 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1728 /* After the above dom info is hosed. Re-compute it. */
1729 free_dominance_info (CDI_DOMINATORS
);
1730 calculate_dominance_info (CDI_DOMINATORS
);
1735 /* Generates code to execute the iterations of LOOP in N_THREADS
1736 threads in parallel.
1738 NITER describes number of iterations of LOOP.
1739 REDUCTION_LIST describes the reductions existent in the LOOP. */
1742 gen_parallel_loop (struct loop
*loop
, reduction_info_table_type reduction_list
,
1743 unsigned n_threads
, struct tree_niter_desc
*niter
)
1746 tree many_iterations_cond
, type
, nit
;
1747 tree arg_struct
, new_arg_struct
;
1749 basic_block parallel_head
;
1751 struct clsn_data clsn_data
;
1755 unsigned int m_p_thread
=2;
1759 ---------------------------------------------------------------------
1762 IV = phi (INIT, IV + STEP)
1768 ---------------------------------------------------------------------
1770 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1771 we generate the following code:
1773 ---------------------------------------------------------------------
1776 || NITER < MIN_PER_THREAD * N_THREADS)
1780 store all local loop-invariant variables used in body of the loop to DATA.
1781 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1782 load the variables from DATA.
1783 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1786 GIMPLE_OMP_CONTINUE;
1787 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1788 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1794 IV = phi (INIT, IV + STEP)
1805 /* Create two versions of the loop -- in the old one, we know that the
1806 number of iterations is large enough, and we will transform it into the
1807 loop that will be split to loop_fn, the new one will be used for the
1808 remaining iterations. */
1810 /* We should compute a better number-of-iterations value for outer loops.
1813 for (i = 0; i < n; ++i)
1814 for (j = 0; j < m; ++j)
1817 we should compute nit = n * m, not nit = n.
1818 Also may_be_zero handling would need to be adjusted. */
1820 type
= TREE_TYPE (niter
->niter
);
1821 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
1824 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1829 m_p_thread
=MIN_PER_THREAD
;
1831 many_iterations_cond
=
1832 fold_build2 (GE_EXPR
, boolean_type_node
,
1833 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
1835 many_iterations_cond
1836 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1837 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
1838 many_iterations_cond
);
1839 many_iterations_cond
1840 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
1842 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1843 if (!is_gimple_condexpr (many_iterations_cond
))
1845 many_iterations_cond
1846 = force_gimple_operand (many_iterations_cond
, &stmts
,
1849 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1852 initialize_original_copy_tables ();
1854 /* We assume that the loop usually iterates a lot. */
1855 prob
= 4 * REG_BR_PROB_BASE
/ 5;
1856 loop_version (loop
, many_iterations_cond
, NULL
,
1857 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
1858 update_ssa (TODO_update_ssa
);
1859 free_original_copy_tables ();
1861 /* Base all the induction variables in LOOP on a single control one. */
1862 canonicalize_loop_ivs (loop
, &nit
, true);
1864 /* Ensure that the exit condition is the first statement in the loop. */
1865 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
1867 /* Generate initializations for reductions. */
1868 if (reduction_list
.elements () > 0)
1869 reduction_list
.traverse
<struct loop
*, initialize_reductions
> (loop
);
1871 /* Eliminate the references to local variables from the loop. */
1872 gcc_assert (single_exit (loop
));
1873 entry
= loop_preheader_edge (loop
);
1874 exit
= single_dom_exit (loop
);
1876 eliminate_local_variables (entry
, exit
);
1877 /* In the old loop, move all variables non-local to the loop to a structure
1878 and back, and create separate decls for the variables used in loop. */
1879 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
1880 &new_arg_struct
, &clsn_data
);
1882 /* Create the parallel constructs. */
1883 loc
= UNKNOWN_LOCATION
;
1884 cond_stmt
= last_stmt (loop
->header
);
1886 loc
= gimple_location (cond_stmt
);
1887 parallel_head
= create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
1888 new_arg_struct
, n_threads
, loc
);
1889 if (reduction_list
.elements () > 0)
1890 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
1894 /* Cancel the loop (it is simpler to do it here rather than to teach the
1895 expander to do it). */
1896 cancel_loop_tree (loop
);
1898 /* Free loop bound estimations that could contain references to
1899 removed statements. */
1900 FOR_EACH_LOOP (li
, loop
, 0)
1901 free_numbers_of_iterations_estimates_loop (loop
);
1903 /* Expand the parallel constructs. We do it directly here instead of running
1904 a separate expand_omp pass, since it is more efficient, and less likely to
1905 cause troubles with further analyses not being able to deal with the
1908 omp_expand_local (parallel_head
);
1911 /* Returns true when LOOP contains vector phi nodes. */
1914 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
1917 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
1918 gimple_stmt_iterator gsi
;
1921 for (i
= 0; i
< loop
->num_nodes
; i
++)
1922 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1923 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi
)))) == VECTOR_TYPE
)
1932 /* Create a reduction_info struct, initialize it with REDUC_STMT
1933 and PHI, insert it to the REDUCTION_LIST. */
1936 build_new_reduction (reduction_info_table_type reduction_list
,
1937 gimple reduc_stmt
, gimple phi
)
1939 reduction_info
**slot
;
1940 struct reduction_info
*new_reduction
;
1942 gcc_assert (reduc_stmt
);
1944 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1947 "Detected reduction. reduction stmt is: \n");
1948 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
1949 fprintf (dump_file
, "\n");
1952 new_reduction
= XCNEW (struct reduction_info
);
1954 new_reduction
->reduc_stmt
= reduc_stmt
;
1955 new_reduction
->reduc_phi
= phi
;
1956 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
1957 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
1958 slot
= reduction_list
.find_slot (new_reduction
, INSERT
);
1959 *slot
= new_reduction
;
1962 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1965 set_reduc_phi_uids (reduction_info
**slot
, void *data ATTRIBUTE_UNUSED
)
1967 struct reduction_info
*const red
= *slot
;
1968 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1972 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1975 gather_scalar_reductions (loop_p loop
, reduction_info_table_type reduction_list
)
1977 gimple_stmt_iterator gsi
;
1978 loop_vec_info simple_loop_info
;
1980 simple_loop_info
= vect_analyze_loop_form (loop
);
1982 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1984 gimple phi
= gsi_stmt (gsi
);
1986 tree res
= PHI_RESULT (phi
);
1989 if (virtual_operand_p (res
))
1992 if (!simple_iv (loop
, loop
, res
, &iv
, true)
1993 && simple_loop_info
)
1995 gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info
,
1998 if (reduc_stmt
&& !double_reduc
)
1999 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
2002 destroy_loop_vec_info (simple_loop_info
, true);
2004 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2005 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2007 reduction_list
.traverse
<void *, set_reduc_phi_uids
> (NULL
);
2010 /* Try to initialize NITER for code generation part. */
2013 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2015 edge exit
= single_dom_exit (loop
);
2019 /* We need to know # of iterations, and there should be no uses of values
2020 defined inside loop outside of it, unless the values are invariants of
2022 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2024 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2025 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2032 /* Try to initialize REDUCTION_LIST for code generation part.
2033 REDUCTION_LIST describes the reductions. */
2036 try_create_reduction_list (loop_p loop
,
2037 reduction_info_table_type reduction_list
)
2039 edge exit
= single_dom_exit (loop
);
2040 gimple_stmt_iterator gsi
;
2044 gather_scalar_reductions (loop
, reduction_list
);
2047 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2049 gimple phi
= gsi_stmt (gsi
);
2050 struct reduction_info
*red
;
2051 imm_use_iterator imm_iter
;
2052 use_operand_p use_p
;
2054 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2056 if (!virtual_operand_p (val
))
2058 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2060 fprintf (dump_file
, "phi is ");
2061 print_gimple_stmt (dump_file
, phi
, 0, 0);
2062 fprintf (dump_file
, "arg of phi to exit: value ");
2063 print_generic_expr (dump_file
, val
, 0);
2064 fprintf (dump_file
, " used outside loop\n");
2066 " checking if it a part of reduction pattern: \n");
2068 if (reduction_list
.elements () == 0)
2070 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2072 " FAILED: it is not a part of reduction.\n");
2076 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2078 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2079 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2081 reduc_phi
= USE_STMT (use_p
);
2085 red
= reduction_phi (reduction_list
, reduc_phi
);
2088 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2090 " FAILED: it is not a part of reduction.\n");
2093 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2095 fprintf (dump_file
, "reduction phi is ");
2096 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2097 fprintf (dump_file
, "reduction stmt is ");
2098 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2103 /* The iterations of the loop may communicate only through bivs whose
2104 iteration space can be distributed efficiently. */
2105 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2107 gimple phi
= gsi_stmt (gsi
);
2108 tree def
= PHI_RESULT (phi
);
2111 if (!virtual_operand_p (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2113 struct reduction_info
*red
;
2115 red
= reduction_phi (reduction_list
, phi
);
2118 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2120 " FAILED: scalar dependency between iterations\n");
2130 /* Detect parallel loops and generate parallel code using libgomp
2131 primitives. Returns true if some loop was parallelized, false
2135 parallelize_loops (void)
2137 unsigned n_threads
= flag_tree_parallelize_loops
;
2138 bool changed
= false;
2140 struct tree_niter_desc niter_desc
;
2142 reduction_info_table_type reduction_list
;
2143 struct obstack parloop_obstack
;
2144 HOST_WIDE_INT estimated
;
2147 /* Do not parallelize loops in the functions created by parallelization. */
2148 if (parallelized_function_p (cfun
->decl
))
2150 if (cfun
->has_nonlocal_label
)
2153 gcc_obstack_init (&parloop_obstack
);
2154 reduction_list
.create (10);
2155 init_stmt_vec_info_vec ();
2157 FOR_EACH_LOOP (li
, loop
, 0)
2159 reduction_list
.empty ();
2160 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2162 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2164 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2166 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2169 /* If we use autopar in graphite pass, we use its marked dependency
2170 checking results. */
2171 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2173 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2174 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2178 if (!single_dom_exit (loop
))
2181 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2182 fprintf (dump_file
, "loop is !single_dom_exit\n");
2187 if (/* And of course, the loop must be parallelizable. */
2188 !can_duplicate_loop_p (loop
)
2189 || loop_has_blocks_with_irreducible_flag (loop
)
2190 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2191 /* FIXME: the check for vector phi nodes could be removed. */
2192 || loop_has_vector_phi_nodes (loop
))
2195 estimated
= estimated_stmt_executions_int (loop
);
2196 if (estimated
== -1)
2197 estimated
= max_stmt_executions_int (loop
);
2198 /* FIXME: Bypass this check as graphite doesn't update the
2199 count and frequency correctly now. */
2200 if (!flag_loop_parallelize_all
2201 && ((estimated
!= -1
2202 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2203 /* Do not bother with loops in cold areas. */
2204 || optimize_loop_nest_for_size_p (loop
)))
2207 if (!try_get_loop_niter (loop
, &niter_desc
))
2210 if (!try_create_reduction_list (loop
, reduction_list
))
2213 if (!flag_loop_parallelize_all
2214 && !loop_parallel_p (loop
, &parloop_obstack
))
2218 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2221 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2223 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2224 loop_loc
= find_loop_location (loop
);
2225 if (loop_loc
!= UNKNOWN_LOC
)
2226 fprintf (dump_file
, "\nloop at %s:%d: ",
2227 LOC_FILE (loop_loc
), LOC_LINE (loop_loc
));
2229 gen_parallel_loop (loop
, reduction_list
,
2230 n_threads
, &niter_desc
);
2233 free_stmt_vec_info_vec ();
2234 reduction_list
.dispose ();
2235 obstack_free (&parloop_obstack
, NULL
);
2237 /* Parallelization will cause new function calls to be inserted through
2238 which local variables will escape. Reset the points-to solution
2241 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2246 /* Parallelization. */
2249 gate_tree_parallelize_loops (void)
2251 return flag_tree_parallelize_loops
> 1;
2255 tree_parallelize_loops (void)
2257 if (number_of_loops (cfun
) <= 1)
2260 if (parallelize_loops ())
2261 return TODO_cleanup_cfg
| TODO_rebuild_alias
;
2267 const pass_data pass_data_parallelize_loops
=
2269 GIMPLE_PASS
, /* type */
2270 "parloops", /* name */
2271 OPTGROUP_LOOP
, /* optinfo_flags */
2272 true, /* has_gate */
2273 true, /* has_execute */
2274 TV_TREE_PARALLELIZE_LOOPS
, /* tv_id */
2275 ( PROP_cfg
| PROP_ssa
), /* properties_required */
2276 0, /* properties_provided */
2277 0, /* properties_destroyed */
2278 0, /* todo_flags_start */
2279 TODO_verify_flow
, /* todo_flags_finish */
2282 class pass_parallelize_loops
: public gimple_opt_pass
2285 pass_parallelize_loops (gcc::context
*ctxt
)
2286 : gimple_opt_pass (pass_data_parallelize_loops
, ctxt
)
2289 /* opt_pass methods: */
2290 bool gate () { return gate_tree_parallelize_loops (); }
2291 unsigned int execute () { return tree_parallelize_loops (); }
2293 }; // class pass_parallelize_loops
2298 make_pass_parallelize_loops (gcc::context
*ctxt
)
2300 return new pass_parallelize_loops (ctxt
);
2304 #include "gt-tree-parloops.h"