1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "tree-flow.h"
28 #include "tree-data-ref.h"
29 #include "tree-scalar-evolution.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-pass.h"
32 #include "langhooks.h"
33 #include "tree-vectorizer.h"
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
42 The most of the complexity is in bringing the code into shape expected
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common reduction patterns for outer loops.
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
70 for (i = 0; i < N; i++)
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
84 sum_11 = D.1795_8 + sum_29;
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
96 after reduction transformation (only relevant parts):
104 # Storing the initial value given by the user. #
106 .paral_data_store.32.sum.27 = 1;
108 #pragma omp parallel num_threads(4)
110 #pragma omp for schedule(static)
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
116 # sum.27_29 = PHI <sum.27_11, 0>
118 sum.27_11 = D.1827_8 + sum.27_29;
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
156 /* Minimal number of iterations of a loop that should be executed in each
158 #define MIN_PER_THREAD 100
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
164 gimple reduc_stmt
; /* reduction statement. */
165 gimple reduc_phi
; /* The phi node defining the reduction. */
166 enum tree_code reduction_code
;/* code for the reduction operation. */
167 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
169 gimple keep_res
; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
172 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init
; /* reduction initialization value. */
174 gimple new_phi
; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
180 /* Equality and hash functions for hashtab code. */
183 reduction_info_eq (const void *aa
, const void *bb
)
185 const struct reduction_info
*a
= (const struct reduction_info
*) aa
;
186 const struct reduction_info
*b
= (const struct reduction_info
*) bb
;
188 return (a
->reduc_phi
== b
->reduc_phi
);
192 reduction_info_hash (const void *aa
)
194 const struct reduction_info
*a
= (const struct reduction_info
*) aa
;
196 return a
->reduc_version
;
199 static struct reduction_info
*
200 reduction_phi (htab_t reduction_list
, gimple phi
)
202 struct reduction_info tmpred
, *red
;
204 if (htab_elements (reduction_list
) == 0 || phi
== NULL
)
207 tmpred
.reduc_phi
= phi
;
208 tmpred
.reduc_version
= gimple_uid (phi
);
209 red
= (struct reduction_info
*) htab_find (reduction_list
, &tmpred
);
214 /* Element of hashtable of names to copy. */
216 struct name_to_copy_elt
218 unsigned version
; /* The version of the name to copy. */
219 tree new_name
; /* The new name used in the copy. */
220 tree field
; /* The field of the structure used to pass the
224 /* Equality and hash functions for hashtab code. */
227 name_to_copy_elt_eq (const void *aa
, const void *bb
)
229 const struct name_to_copy_elt
*a
= (const struct name_to_copy_elt
*) aa
;
230 const struct name_to_copy_elt
*b
= (const struct name_to_copy_elt
*) bb
;
232 return a
->version
== b
->version
;
236 name_to_copy_elt_hash (const void *aa
)
238 const struct name_to_copy_elt
*a
= (const struct name_to_copy_elt
*) aa
;
240 return (hashval_t
) a
->version
;
243 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245 represents the denominator for every element in the matrix. */
246 typedef struct lambda_trans_matrix_s
248 lambda_matrix matrix
;
252 } *lambda_trans_matrix
;
253 #define LTM_MATRIX(T) ((T)->matrix)
254 #define LTM_ROWSIZE(T) ((T)->rowsize)
255 #define LTM_COLSIZE(T) ((T)->colsize)
256 #define LTM_DENOMINATOR(T) ((T)->denominator)
258 /* Allocate a new transformation matrix. */
260 static lambda_trans_matrix
261 lambda_trans_matrix_new (int colsize
, int rowsize
,
262 struct obstack
* lambda_obstack
)
264 lambda_trans_matrix ret
;
266 ret
= (lambda_trans_matrix
)
267 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
268 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
269 LTM_ROWSIZE (ret
) = rowsize
;
270 LTM_COLSIZE (ret
) = colsize
;
271 LTM_DENOMINATOR (ret
) = 1;
275 /* Multiply a vector VEC by a matrix MAT.
276 MAT is an M*N matrix, and VEC is a vector with length N. The result
277 is stored in DEST which must be a vector of length M. */
280 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
281 lambda_vector vec
, lambda_vector dest
)
285 lambda_vector_clear (dest
, m
);
286 for (i
= 0; i
< m
; i
++)
287 for (j
= 0; j
< n
; j
++)
288 dest
[i
] += matrix
[i
][j
] * vec
[j
];
291 /* Return true if TRANS is a legal transformation matrix that respects
292 the dependence vectors in DISTS and DIRS. The conservative answer
295 "Wolfe proves that a unimodular transformation represented by the
296 matrix T is legal when applied to a loop nest with a set of
297 lexicographically non-negative distance vectors RDG if and only if
298 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299 i.e.: if and only if it transforms the lexicographically positive
300 distance vectors to lexicographically positive vectors. Note that
301 a unimodular matrix must transform the zero vector (and only it) to
302 the zero vector." S.Muchnick. */
305 lambda_transform_legal_p (lambda_trans_matrix trans
,
307 VEC (ddr_p
, heap
) *dependence_relations
)
310 lambda_vector distres
;
311 struct data_dependence_relation
*ddr
;
313 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
314 && LTM_ROWSIZE (trans
) == nb_loops
);
316 /* When there are no dependences, the transformation is correct. */
317 if (VEC_length (ddr_p
, dependence_relations
) == 0)
320 ddr
= VEC_index (ddr_p
, dependence_relations
, 0);
324 /* When there is an unknown relation in the dependence_relations, we
325 know that it is no worth looking at this loop nest: give up. */
326 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
329 distres
= lambda_vector_new (nb_loops
);
331 /* For each distance vector in the dependence graph. */
332 FOR_EACH_VEC_ELT (ddr_p
, dependence_relations
, i
, ddr
)
334 /* Don't care about relations for which we know that there is no
335 dependence, nor about read-read (aka. output-dependences):
336 these data accesses can happen in any order. */
337 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
338 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
341 /* Conservatively answer: "this transformation is not valid". */
342 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
345 /* If the dependence could not be captured by a distance vector,
346 conservatively answer that the transform is not valid. */
347 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
350 /* Compute trans.dist_vect */
351 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
353 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
354 DDR_DIST_VECT (ddr
, j
), distres
);
356 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
363 /* Data dependency analysis. Returns true if the iterations of LOOP
364 are independent on each other (that is, if we can execute them
368 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
370 VEC (loop_p
, heap
) *loop_nest
;
371 VEC (ddr_p
, heap
) *dependence_relations
;
372 VEC (data_reference_p
, heap
) *datarefs
;
373 lambda_trans_matrix trans
;
376 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
378 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
380 fprintf (dump_file
, "loop is innermost\n");
382 fprintf (dump_file
, "loop NOT innermost\n");
385 /* Check for problems with dependences. If the loop can be reversed,
386 the iterations are independent. */
387 datarefs
= VEC_alloc (data_reference_p
, heap
, 10);
388 dependence_relations
= VEC_alloc (ddr_p
, heap
, 10 * 10);
389 loop_nest
= VEC_alloc (loop_p
, heap
, 3);
390 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
391 &dependence_relations
))
393 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
394 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
398 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
399 dump_data_dependence_relations (dump_file
, dependence_relations
);
401 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
402 LTM_MATRIX (trans
)[0][0] = -1;
404 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
407 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
408 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
410 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
412 " FAILED: data dependencies exist across iterations\n");
415 VEC_free (loop_p
, heap
, loop_nest
);
416 free_dependence_relations (dependence_relations
);
417 free_data_refs (datarefs
);
422 /* Return true when LOOP contains basic blocks marked with the
423 BB_IRREDUCIBLE_LOOP flag. */
426 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
429 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
432 for (i
= 0; i
< loop
->num_nodes
; i
++)
433 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
442 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
443 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
444 to their addresses that can be reused. The address of OBJ is known to
445 be invariant in the whole function. Other needed statements are placed
449 take_address_of (tree obj
, tree type
, edge entry
, htab_t decl_address
,
450 gimple_stmt_iterator
*gsi
)
454 struct int_tree_map ielt
, *nielt
;
455 tree
*var_p
, name
, bvar
, addr
;
459 /* Since the address of OBJ is invariant, the trees may be shared.
460 Avoid rewriting unrelated parts of the code. */
461 obj
= unshare_expr (obj
);
463 handled_component_p (*var_p
);
464 var_p
= &TREE_OPERAND (*var_p
, 0))
467 /* Canonicalize the access to base on a MEM_REF. */
469 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
471 /* Assign a canonical SSA name to the address of the base decl used
472 in the address and share it for all accesses and addresses based
474 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
476 dslot
= htab_find_slot_with_hash (decl_address
, &ielt
, uid
, INSERT
);
481 addr
= TREE_OPERAND (*var_p
, 0);
482 bvar
= create_tmp_var (TREE_TYPE (addr
),
483 get_name (TREE_OPERAND
484 (TREE_OPERAND (*var_p
, 0), 0)));
485 add_referenced_var (bvar
);
486 stmt
= gimple_build_assign (bvar
, addr
);
487 name
= make_ssa_name (bvar
, stmt
);
488 gimple_assign_set_lhs (stmt
, name
);
489 gsi_insert_on_edge_immediate (entry
, stmt
);
491 nielt
= XNEW (struct int_tree_map
);
497 name
= ((struct int_tree_map
*) *dslot
)->to
;
499 /* Express the address in terms of the canonical SSA name. */
500 TREE_OPERAND (*var_p
, 0) = name
;
502 return build_fold_addr_expr_with_type (obj
, type
);
504 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
505 &stmts
, true, NULL_TREE
);
506 if (!gimple_seq_empty_p (stmts
))
507 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
509 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
511 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
513 if (!gimple_seq_empty_p (stmts
))
514 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
520 /* Callback for htab_traverse. Create the initialization statement
521 for reduction described in SLOT, and place it at the preheader of
522 the loop described in DATA. */
525 initialize_reductions (void **slot
, void *data
)
528 tree bvar
, type
, arg
;
531 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
532 struct loop
*loop
= (struct loop
*) data
;
534 /* Create initialization in preheader:
535 reduction_variable = initialization value of reduction. */
537 /* In the phi node at the header, replace the argument coming
538 from the preheader with the reduction initialization value. */
540 /* Create a new variable to initialize the reduction. */
541 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
542 bvar
= create_tmp_var (type
, "reduction");
543 add_referenced_var (bvar
);
545 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
546 OMP_CLAUSE_REDUCTION
);
547 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
548 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
550 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
553 /* Replace the argument representing the initialization value
554 with the initialization value for the reduction (neutral
555 element for the particular operation, e.g. 0 for PLUS_EXPR,
556 1 for MULT_EXPR, etc).
557 Keep the old value in a new variable "reduction_initial",
558 that will be taken in consideration after the parallel
559 computing is done. */
561 e
= loop_preheader_edge (loop
);
562 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
563 /* Create new variable to hold the initial value. */
565 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
566 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
567 reduc
->initial_value
= arg
;
573 struct walk_stmt_info info
;
576 gimple_stmt_iterator
*gsi
;
581 /* Eliminates references to local variables in *TP out of the single
582 entry single exit region starting at DTA->ENTRY.
583 DECL_ADDRESS contains addresses of the references that had their
584 address taken already. If the expression is changed, CHANGED is
585 set to true. Callback for walk_tree. */
588 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
590 struct elv_data
*const dta
= (struct elv_data
*) data
;
591 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
597 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
600 type
= TREE_TYPE (t
);
601 addr_type
= build_pointer_type (type
);
602 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
604 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
610 *tp
= build_simple_mem_ref (addr
);
616 if (TREE_CODE (t
) == ADDR_EXPR
)
618 /* ADDR_EXPR may appear in two contexts:
619 -- as a gimple operand, when the address taken is a function invariant
620 -- as gimple rhs, when the resulting address in not a function
622 We do not need to do anything special in the latter case (the base of
623 the memory reference whose address is taken may be replaced in the
624 DECL_P case). The former case is more complicated, as we need to
625 ensure that the new address is still a gimple operand. Thus, it
626 is not sufficient to replace just the base of the memory reference --
627 we need to move the whole computation of the address out of the
629 if (!is_gimple_val (t
))
633 obj
= TREE_OPERAND (t
, 0);
634 var
= get_base_address (obj
);
635 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
638 addr_type
= TREE_TYPE (t
);
639 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
641 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
658 /* Moves the references to local variables in STMT at *GSI out of the single
659 entry single exit region starting at ENTRY. DECL_ADDRESS contains
660 addresses of the references that had their address taken
664 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
668 gimple stmt
= gsi_stmt (*gsi
);
670 memset (&dta
.info
, '\0', sizeof (dta
.info
));
672 dta
.decl_address
= decl_address
;
676 if (gimple_debug_bind_p (stmt
))
679 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
680 eliminate_local_variables_1
, &dta
.info
, NULL
);
683 gimple_debug_bind_reset_value (stmt
);
690 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
697 /* Eliminates the references to local variables from the single entry
698 single exit region between the ENTRY and EXIT edges.
701 1) Taking address of a local variable -- these are moved out of the
702 region (and temporary variable is created to hold the address if
705 2) Dereferencing a local variable -- these are replaced with indirect
709 eliminate_local_variables (edge entry
, edge exit
)
712 VEC (basic_block
, heap
) *body
= VEC_alloc (basic_block
, heap
, 3);
714 gimple_stmt_iterator gsi
;
715 bool has_debug_stmt
= false;
716 htab_t decl_address
= htab_create (10, int_tree_map_hash
, int_tree_map_eq
,
718 basic_block entry_bb
= entry
->src
;
719 basic_block exit_bb
= exit
->dest
;
721 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
723 FOR_EACH_VEC_ELT (basic_block
, body
, i
, bb
)
724 if (bb
!= entry_bb
&& bb
!= exit_bb
)
725 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
726 if (is_gimple_debug (gsi_stmt (gsi
)))
728 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
729 has_debug_stmt
= true;
732 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
735 FOR_EACH_VEC_ELT (basic_block
, body
, i
, bb
)
736 if (bb
!= entry_bb
&& bb
!= exit_bb
)
737 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
738 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
739 eliminate_local_variables_stmt (entry
, &gsi
, decl_address
);
741 htab_delete (decl_address
);
742 VEC_free (basic_block
, heap
, body
);
745 /* Returns true if expression EXPR is not defined between ENTRY and
746 EXIT, i.e. if all its operands are defined outside of the region. */
749 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
751 basic_block entry_bb
= entry
->src
;
752 basic_block exit_bb
= exit
->dest
;
755 if (is_gimple_min_invariant (expr
))
758 if (TREE_CODE (expr
) == SSA_NAME
)
760 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
762 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
763 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
772 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
773 The copies are stored to NAME_COPIES, if NAME was already duplicated,
774 its duplicate stored in NAME_COPIES is returned.
776 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
777 duplicated, storing the copies in DECL_COPIES. */
780 separate_decls_in_region_name (tree name
,
781 htab_t name_copies
, htab_t decl_copies
,
784 tree copy
, var
, var_copy
;
785 unsigned idx
, uid
, nuid
;
786 struct int_tree_map ielt
, *nielt
;
787 struct name_to_copy_elt elt
, *nelt
;
788 void **slot
, **dslot
;
790 if (TREE_CODE (name
) != SSA_NAME
)
793 idx
= SSA_NAME_VERSION (name
);
795 slot
= htab_find_slot_with_hash (name_copies
, &elt
, idx
,
796 copy_name_p
? INSERT
: NO_INSERT
);
798 return ((struct name_to_copy_elt
*) *slot
)->new_name
;
800 var
= SSA_NAME_VAR (name
);
801 uid
= DECL_UID (var
);
803 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, uid
, INSERT
);
806 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
807 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
808 add_referenced_var (var_copy
);
809 nielt
= XNEW (struct int_tree_map
);
811 nielt
->to
= var_copy
;
814 /* Ensure that when we meet this decl next time, we won't duplicate
816 nuid
= DECL_UID (var_copy
);
818 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, nuid
, INSERT
);
819 gcc_assert (!*dslot
);
820 nielt
= XNEW (struct int_tree_map
);
822 nielt
->to
= var_copy
;
826 var_copy
= ((struct int_tree_map
*) *dslot
)->to
;
830 copy
= duplicate_ssa_name (name
, NULL
);
831 nelt
= XNEW (struct name_to_copy_elt
);
833 nelt
->new_name
= copy
;
834 nelt
->field
= NULL_TREE
;
843 SSA_NAME_VAR (copy
) = var_copy
;
847 /* Finds the ssa names used in STMT that are defined outside the
848 region between ENTRY and EXIT and replaces such ssa names with
849 their duplicates. The duplicates are stored to NAME_COPIES. Base
850 decls of all ssa names used in STMT (including those defined in
851 LOOP) are replaced with the new temporary variables; the
852 replacement decls are stored in DECL_COPIES. */
855 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
856 htab_t name_copies
, htab_t decl_copies
)
864 mark_virtual_ops_for_renaming (stmt
);
866 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
868 name
= DEF_FROM_PTR (def
);
869 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
870 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
872 gcc_assert (copy
== name
);
875 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
877 name
= USE_FROM_PTR (use
);
878 if (TREE_CODE (name
) != SSA_NAME
)
881 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
882 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
888 /* Finds the ssa names used in STMT that are defined outside the
889 region between ENTRY and EXIT and replaces such ssa names with
890 their duplicates. The duplicates are stored to NAME_COPIES. Base
891 decls of all ssa names used in STMT (including those defined in
892 LOOP) are replaced with the new temporary variables; the
893 replacement decls are stored in DECL_COPIES. */
896 separate_decls_in_region_debug (gimple stmt
, htab_t name_copies
,
902 struct int_tree_map ielt
;
903 struct name_to_copy_elt elt
;
904 void **slot
, **dslot
;
906 if (gimple_debug_bind_p (stmt
))
907 var
= gimple_debug_bind_get_var (stmt
);
908 else if (gimple_debug_source_bind_p (stmt
))
909 var
= gimple_debug_source_bind_get_var (stmt
);
912 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
914 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
915 ielt
.uid
= DECL_UID (var
);
916 dslot
= htab_find_slot_with_hash (decl_copies
, &ielt
, ielt
.uid
, NO_INSERT
);
919 if (gimple_debug_bind_p (stmt
))
920 gimple_debug_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
921 else if (gimple_debug_source_bind_p (stmt
))
922 gimple_debug_source_bind_set_var (stmt
, ((struct int_tree_map
*) *dslot
)->to
);
924 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
926 name
= USE_FROM_PTR (use
);
927 if (TREE_CODE (name
) != SSA_NAME
)
930 elt
.version
= SSA_NAME_VERSION (name
);
931 slot
= htab_find_slot_with_hash (name_copies
, &elt
, elt
.version
, NO_INSERT
);
934 gimple_debug_bind_reset_value (stmt
);
939 SET_USE (use
, ((struct name_to_copy_elt
*) *slot
)->new_name
);
945 /* Callback for htab_traverse. Adds a field corresponding to the reduction
946 specified in SLOT. The type is passed in DATA. */
949 add_field_for_reduction (void **slot
, void *data
)
952 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
953 tree
const type
= (tree
) data
;
954 tree var
= SSA_NAME_VAR (gimple_assign_lhs (red
->reduc_stmt
));
955 tree field
= build_decl (gimple_location (red
->reduc_stmt
),
956 FIELD_DECL
, DECL_NAME (var
), TREE_TYPE (var
));
958 insert_field_into_struct (type
, field
);
965 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
966 described in SLOT. The type is passed in DATA. */
969 add_field_for_name (void **slot
, void *data
)
971 struct name_to_copy_elt
*const elt
= (struct name_to_copy_elt
*) *slot
;
972 tree type
= (tree
) data
;
973 tree name
= ssa_name (elt
->version
);
974 tree var
= SSA_NAME_VAR (name
);
975 tree field
= build_decl (DECL_SOURCE_LOCATION (var
),
976 FIELD_DECL
, DECL_NAME (var
), TREE_TYPE (var
));
978 insert_field_into_struct (type
, field
);
984 /* Callback for htab_traverse. A local result is the intermediate result
986 thread, or the initial value in case no iteration was executed.
987 This function creates a phi node reflecting these values.
988 The phi's result will be stored in NEW_PHI field of the
989 reduction's data structure. */
992 create_phi_for_local_result (void **slot
, void *data
)
994 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
995 const struct loop
*const loop
= (const struct loop
*) data
;
998 basic_block store_bb
;
1000 source_location locus
;
1003 /* STORE_BB is the block where the phi
1004 should be stored. It is the destination of the loop exit.
1005 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1006 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1008 /* STORE_BB has two predecessors. One coming from the loop
1009 (the reduction's result is computed at the loop),
1010 and another coming from a block preceding the loop,
1012 are executed (the initial value should be taken). */
1013 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
1014 e
= EDGE_PRED (store_bb
, 1);
1016 e
= EDGE_PRED (store_bb
, 0);
1018 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
)),
1020 locus
= gimple_location (reduc
->reduc_stmt
);
1021 block
= gimple_block (reduc
->reduc_stmt
);
1022 new_phi
= create_phi_node (local_res
, store_bb
);
1023 SSA_NAME_DEF_STMT (local_res
) = new_phi
;
1024 add_phi_arg (new_phi
, reduc
->init
, e
, locus
, block
);
1025 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
1026 FALLTHRU_EDGE (loop
->latch
), locus
, block
);
1027 reduc
->new_phi
= new_phi
;
1037 basic_block store_bb
;
1038 basic_block load_bb
;
1041 /* Callback for htab_traverse. Create an atomic instruction for the
1042 reduction described in SLOT.
1043 DATA annotates the place in memory the atomic operation relates to,
1044 and the basic block it needs to be generated in. */
1047 create_call_for_reduction_1 (void **slot
, void *data
)
1049 struct reduction_info
*const reduc
= (struct reduction_info
*) *slot
;
1050 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1051 gimple_stmt_iterator gsi
;
1052 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1057 tree t
, addr
, ref
, x
;
1058 tree tmp_load
, name
;
1061 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1062 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1064 addr
= build_addr (t
, current_function_decl
);
1066 /* Create phi node. */
1067 bb
= clsn_data
->load_bb
;
1069 e
= split_block (bb
, t
);
1072 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)), NULL
);
1073 add_referenced_var (tmp_load
);
1074 tmp_load
= make_ssa_name (tmp_load
, NULL
);
1075 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1076 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1077 gsi
= gsi_start_bb (new_bb
);
1078 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1080 e
= split_block (new_bb
, load
);
1082 gsi
= gsi_start_bb (new_bb
);
1084 x
= fold_build2 (reduc
->reduction_code
,
1085 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1086 PHI_RESULT (reduc
->new_phi
));
1088 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1089 GSI_CONTINUE_LINKING
);
1091 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1095 /* Create the atomic operation at the join point of the threads.
1096 REDUCTION_LIST describes the reductions in the LOOP.
1097 LD_ST_DATA describes the shared data structure where
1098 shared data is stored in and loaded from. */
1100 create_call_for_reduction (struct loop
*loop
, htab_t reduction_list
,
1101 struct clsn_data
*ld_st_data
)
1103 htab_traverse (reduction_list
, create_phi_for_local_result
, loop
);
1104 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1105 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1106 htab_traverse (reduction_list
, create_call_for_reduction_1
, ld_st_data
);
1109 /* Callback for htab_traverse. Loads the final reduction value at the
1110 join point of all threads, and inserts it in the right place. */
1113 create_loads_for_reductions (void **slot
, void *data
)
1115 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
1116 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1118 gimple_stmt_iterator gsi
;
1119 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1124 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1125 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1126 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1130 name
= PHI_RESULT (red
->keep_res
);
1131 stmt
= gimple_build_assign (name
, x
);
1132 SSA_NAME_DEF_STMT (name
) = stmt
;
1134 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1136 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1137 !gsi_end_p (gsi
); gsi_next (&gsi
))
1138 if (gsi_stmt (gsi
) == red
->keep_res
)
1140 remove_phi_node (&gsi
, false);
1146 /* Load the reduction result that was stored in LD_ST_DATA.
1147 REDUCTION_LIST describes the list of reductions that the
1148 loads should be generated for. */
1150 create_final_loads_for_reduction (htab_t reduction_list
,
1151 struct clsn_data
*ld_st_data
)
1153 gimple_stmt_iterator gsi
;
1157 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1158 t
= build_fold_addr_expr (ld_st_data
->store
);
1159 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1161 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1162 SSA_NAME_DEF_STMT (ld_st_data
->load
) = stmt
;
1164 htab_traverse (reduction_list
, create_loads_for_reductions
, ld_st_data
);
1168 /* Callback for htab_traverse. Store the neutral value for the
1169 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1170 1 for MULT_EXPR, etc. into the reduction field.
1171 The reduction is specified in SLOT. The store information is
1175 create_stores_for_reduction (void **slot
, void *data
)
1177 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
1178 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1181 gimple_stmt_iterator gsi
;
1182 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1184 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1185 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1186 stmt
= gimple_build_assign (t
, red
->initial_value
);
1187 mark_virtual_ops_for_renaming (stmt
);
1188 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1193 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1194 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1195 specified in SLOT. */
1198 create_loads_and_stores_for_name (void **slot
, void *data
)
1200 struct name_to_copy_elt
*const elt
= (struct name_to_copy_elt
*) *slot
;
1201 struct clsn_data
*const clsn_data
= (struct clsn_data
*) data
;
1204 gimple_stmt_iterator gsi
;
1205 tree type
= TREE_TYPE (elt
->new_name
);
1208 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1209 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1210 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1211 mark_virtual_ops_for_renaming (stmt
);
1212 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1214 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1215 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1216 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1217 stmt
= gimple_build_assign (elt
->new_name
, t
);
1218 SSA_NAME_DEF_STMT (elt
->new_name
) = stmt
;
1219 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1224 /* Moves all the variables used in LOOP and defined outside of it (including
1225 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1226 name) to a structure created for this purpose. The code
1234 is transformed this way:
1249 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1250 pointer `new' is intentionally not initialized (the loop will be split to a
1251 separate function later, and `new' will be initialized from its arguments).
1252 LD_ST_DATA holds information about the shared data structure used to pass
1253 information among the threads. It is initialized here, and
1254 gen_parallel_loop will pass it to create_call_for_reduction that
1255 needs this information. REDUCTION_LIST describes the reductions
1259 separate_decls_in_region (edge entry
, edge exit
, htab_t reduction_list
,
1260 tree
*arg_struct
, tree
*new_arg_struct
,
1261 struct clsn_data
*ld_st_data
)
1264 basic_block bb1
= split_edge (entry
);
1265 basic_block bb0
= single_pred (bb1
);
1266 htab_t name_copies
= htab_create (10, name_to_copy_elt_hash
,
1267 name_to_copy_elt_eq
, free
);
1268 htab_t decl_copies
= htab_create (10, int_tree_map_hash
, int_tree_map_eq
,
1271 tree type
, type_name
, nvar
;
1272 gimple_stmt_iterator gsi
;
1273 struct clsn_data clsn_data
;
1274 VEC (basic_block
, heap
) *body
= VEC_alloc (basic_block
, heap
, 3);
1276 basic_block entry_bb
= bb1
;
1277 basic_block exit_bb
= exit
->dest
;
1278 bool has_debug_stmt
= false;
1280 entry
= single_succ_edge (entry_bb
);
1281 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1283 FOR_EACH_VEC_ELT (basic_block
, body
, i
, bb
)
1285 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1287 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1288 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1289 name_copies
, decl_copies
);
1291 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1293 gimple stmt
= gsi_stmt (gsi
);
1295 if (is_gimple_debug (stmt
))
1296 has_debug_stmt
= true;
1298 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1299 name_copies
, decl_copies
);
1304 /* Now process debug bind stmts. We must not create decls while
1305 processing debug stmts, so we defer their processing so as to
1306 make sure we will have debug info for as many variables as
1307 possible (all of those that were dealt with in the loop above),
1308 and discard those for which we know there's nothing we can
1311 FOR_EACH_VEC_ELT (basic_block
, body
, i
, bb
)
1312 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1314 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1316 gimple stmt
= gsi_stmt (gsi
);
1318 if (is_gimple_debug (stmt
))
1320 if (separate_decls_in_region_debug (stmt
, name_copies
,
1323 gsi_remove (&gsi
, true);
1332 VEC_free (basic_block
, heap
, body
);
1334 if (htab_elements (name_copies
) == 0 && htab_elements (reduction_list
) == 0)
1336 /* It may happen that there is nothing to copy (if there are only
1337 loop carried and external variables in the loop). */
1339 *new_arg_struct
= NULL
;
1343 /* Create the type for the structure to store the ssa names to. */
1344 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1345 type_name
= build_decl (UNKNOWN_LOCATION
,
1346 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1348 TYPE_NAME (type
) = type_name
;
1350 htab_traverse (name_copies
, add_field_for_name
, type
);
1351 if (reduction_list
&& htab_elements (reduction_list
) > 0)
1353 /* Create the fields for reductions. */
1354 htab_traverse (reduction_list
, add_field_for_reduction
,
1359 /* Create the loads and stores. */
1360 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1361 add_referenced_var (*arg_struct
);
1362 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1363 add_referenced_var (nvar
);
1364 *new_arg_struct
= make_ssa_name (nvar
, NULL
);
1366 ld_st_data
->store
= *arg_struct
;
1367 ld_st_data
->load
= *new_arg_struct
;
1368 ld_st_data
->store_bb
= bb0
;
1369 ld_st_data
->load_bb
= bb1
;
1371 htab_traverse (name_copies
, create_loads_and_stores_for_name
,
1374 /* Load the calculation from memory (after the join of the threads). */
1376 if (reduction_list
&& htab_elements (reduction_list
) > 0)
1378 htab_traverse (reduction_list
, create_stores_for_reduction
,
1380 clsn_data
.load
= make_ssa_name (nvar
, NULL
);
1381 clsn_data
.load_bb
= exit
->dest
;
1382 clsn_data
.store
= ld_st_data
->store
;
1383 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1387 htab_delete (decl_copies
);
1388 htab_delete (name_copies
);
1391 /* Bitmap containing uids of functions created by parallelization. We cannot
1392 allocate it from the default obstack, as it must live across compilation
1393 of several functions; we make it gc allocated instead. */
1395 static GTY(()) bitmap parallelized_functions
;
1397 /* Returns true if FN was created by create_loop_fn. */
1400 parallelized_function_p (tree fn
)
1402 if (!parallelized_functions
|| !DECL_ARTIFICIAL (fn
))
1405 return bitmap_bit_p (parallelized_functions
, DECL_UID (fn
));
1408 /* Creates and returns an empty function that will receive the body of
1409 a parallelized loop. */
1412 create_loop_fn (location_t loc
)
1416 tree decl
, type
, name
, t
;
1417 struct function
*act_cfun
= cfun
;
1418 static unsigned loopfn_num
;
1420 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1421 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1422 clean_symbol_name (tname
);
1423 name
= get_identifier (tname
);
1424 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1426 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1427 if (!parallelized_functions
)
1428 parallelized_functions
= BITMAP_GGC_ALLOC ();
1429 bitmap_set_bit (parallelized_functions
, DECL_UID (decl
));
1431 TREE_STATIC (decl
) = 1;
1432 TREE_USED (decl
) = 1;
1433 DECL_ARTIFICIAL (decl
) = 1;
1434 DECL_IGNORED_P (decl
) = 0;
1435 TREE_PUBLIC (decl
) = 0;
1436 DECL_UNINLINABLE (decl
) = 1;
1437 DECL_EXTERNAL (decl
) = 0;
1438 DECL_CONTEXT (decl
) = NULL_TREE
;
1439 DECL_INITIAL (decl
) = make_node (BLOCK
);
1441 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1442 DECL_ARTIFICIAL (t
) = 1;
1443 DECL_IGNORED_P (t
) = 1;
1444 DECL_RESULT (decl
) = t
;
1446 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1448 DECL_ARTIFICIAL (t
) = 1;
1449 DECL_ARG_TYPE (t
) = ptr_type_node
;
1450 DECL_CONTEXT (t
) = decl
;
1452 DECL_ARGUMENTS (decl
) = t
;
1454 allocate_struct_function (decl
, false);
1456 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1458 set_cfun (act_cfun
);
1463 /* Moves the exit condition of LOOP to the beginning of its header, and
1464 duplicates the part of the last iteration that gets disabled to the
1465 exit of the loop. NIT is the number of iterations of the loop
1466 (used to initialize the variables in the duplicated part).
1468 TODO: the common case is that latch of the loop is empty and immediately
1469 follows the loop exit. In this case, it would be better not to copy the
1470 body of the loop, but only move the entry of the loop directly before the
1471 exit check and increase the number of iterations of the loop by one.
1472 This may need some additional preconditioning in case NIT = ~0.
1473 REDUCTION_LIST describes the reductions in LOOP. */
1476 transform_to_exit_first_loop (struct loop
*loop
, htab_t reduction_list
, tree nit
)
1478 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1481 edge exit
= single_dom_exit (loop
), hpred
;
1482 tree control
, control_name
, res
, t
;
1483 gimple phi
, nphi
, cond_stmt
, stmt
, cond_nit
;
1484 gimple_stmt_iterator gsi
;
1487 split_block_after_labels (loop
->header
);
1488 orig_header
= single_succ (loop
->header
);
1489 hpred
= single_succ_edge (loop
->header
);
1491 cond_stmt
= last_stmt (exit
->src
);
1492 control
= gimple_cond_lhs (cond_stmt
);
1493 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1495 /* Make sure that we have phi nodes on exit for all loop header phis
1496 (create_parallel_loop requires that). */
1497 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1499 phi
= gsi_stmt (gsi
);
1500 res
= PHI_RESULT (phi
);
1501 t
= make_ssa_name (SSA_NAME_VAR (res
), phi
);
1502 SET_PHI_RESULT (phi
, t
);
1503 nphi
= create_phi_node (res
, orig_header
);
1504 SSA_NAME_DEF_STMT (res
) = nphi
;
1505 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
, NULL
);
1509 gimple_cond_set_lhs (cond_stmt
, t
);
1510 update_stmt (cond_stmt
);
1515 bbs
= get_loop_body_in_dom_order (loop
);
1517 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1519 nbbs
= XNEWVEC (basic_block
, n
);
1520 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1527 /* Other than reductions, the only gimple reg that should be copied
1528 out of the loop is the control variable. */
1529 exit
= single_dom_exit (loop
);
1530 control_name
= NULL_TREE
;
1531 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); )
1533 phi
= gsi_stmt (gsi
);
1534 res
= PHI_RESULT (phi
);
1535 if (!is_gimple_reg (res
))
1541 /* Check if it is a part of reduction. If it is,
1542 keep the phi at the reduction's keep_res field. The
1543 PHI_RESULT of this phi is the resulting value of the reduction
1544 variable when exiting the loop. */
1546 if (htab_elements (reduction_list
) > 0)
1548 struct reduction_info
*red
;
1550 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1551 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1554 red
->keep_res
= phi
;
1559 gcc_assert (control_name
== NULL_TREE
1560 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1562 remove_phi_node (&gsi
, false);
1564 gcc_assert (control_name
!= NULL_TREE
);
1566 /* Initialize the control variable to number of iterations
1567 according to the rhs of the exit condition. */
1568 gsi
= gsi_after_labels (ex_bb
);
1569 cond_nit
= last_stmt (exit
->src
);
1570 nit_1
= gimple_cond_rhs (cond_nit
);
1571 nit_1
= force_gimple_operand_gsi (&gsi
,
1572 fold_convert (TREE_TYPE (control_name
), nit_1
),
1573 false, NULL_TREE
, false, GSI_SAME_STMT
);
1574 stmt
= gimple_build_assign (control_name
, nit_1
);
1575 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1576 SSA_NAME_DEF_STMT (control_name
) = stmt
;
1579 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1580 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1581 NEW_DATA is the variable that should be initialized from the argument
1582 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1583 basic block containing GIMPLE_OMP_PARALLEL tree. */
1586 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1587 tree new_data
, unsigned n_threads
, location_t loc
)
1589 gimple_stmt_iterator gsi
;
1590 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1592 gimple stmt
, for_stmt
, phi
, cond_stmt
;
1593 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1594 edge exit
, nexit
, guard
, end
, e
;
1596 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1597 bb
= loop_preheader_edge (loop
)->src
;
1598 paral_bb
= single_pred (bb
);
1599 gsi
= gsi_last_bb (paral_bb
);
1601 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
1602 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1603 = build_int_cst (integer_type_node
, n_threads
);
1604 stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1605 gimple_set_location (stmt
, loc
);
1607 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1609 /* Initialize NEW_DATA. */
1612 gsi
= gsi_after_labels (bb
);
1614 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
), NULL
);
1615 stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1616 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1617 SSA_NAME_DEF_STMT (param
) = stmt
;
1619 stmt
= gimple_build_assign (new_data
,
1620 fold_convert (TREE_TYPE (new_data
), param
));
1621 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1622 SSA_NAME_DEF_STMT (new_data
) = stmt
;
1625 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1626 bb
= split_loop_exit_edge (single_dom_exit (loop
));
1627 gsi
= gsi_last_bb (bb
);
1628 stmt
= gimple_build_omp_return (false);
1629 gimple_set_location (stmt
, loc
);
1630 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1632 /* Extract data for GIMPLE_OMP_FOR. */
1633 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
1634 cond_stmt
= last_stmt (loop
->header
);
1636 cvar
= gimple_cond_lhs (cond_stmt
);
1637 cvar_base
= SSA_NAME_VAR (cvar
);
1638 phi
= SSA_NAME_DEF_STMT (cvar
);
1639 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1640 initvar
= make_ssa_name (cvar_base
, NULL
);
1641 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
1643 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1645 gsi
= gsi_last_nondebug_bb (loop
->latch
);
1646 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
1647 gsi_remove (&gsi
, true);
1650 for_bb
= split_edge (loop_preheader_edge (loop
));
1651 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
1652 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
1653 gcc_assert (exit
== single_dom_exit (loop
));
1655 guard
= make_edge (for_bb
, ex_bb
, 0);
1656 single_succ_edge (loop
->latch
)->flags
= 0;
1657 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
1658 for (gsi
= gsi_start_phis (ex_bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1660 source_location locus
;
1663 phi
= gsi_stmt (gsi
);
1664 stmt
= SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
));
1666 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
1667 locus
= gimple_phi_arg_location_from_edge (stmt
,
1668 loop_preheader_edge (loop
));
1669 block
= gimple_phi_arg_block_from_edge (stmt
,
1670 loop_preheader_edge (loop
));
1671 add_phi_arg (phi
, def
, guard
, locus
, block
);
1673 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
1674 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
1675 block
= gimple_phi_arg_block_from_edge (stmt
, loop_latch_edge (loop
));
1676 add_phi_arg (phi
, def
, end
, locus
, block
);
1678 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
1679 PENDING_STMT (e
) = NULL
;
1681 /* Emit GIMPLE_OMP_FOR. */
1682 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
1683 type
= TREE_TYPE (cvar
);
1684 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
1685 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
1687 for_stmt
= gimple_build_omp_for (NULL
, t
, 1, NULL
);
1688 gimple_set_location (for_stmt
, loc
);
1689 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
1690 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
1691 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
1692 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
1693 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
1695 build_int_cst (type
, 1)));
1697 gsi
= gsi_last_bb (for_bb
);
1698 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
1699 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
1701 /* Emit GIMPLE_OMP_CONTINUE. */
1702 gsi
= gsi_last_bb (loop
->latch
);
1703 stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
1704 gimple_set_location (stmt
, loc
);
1705 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1706 SSA_NAME_DEF_STMT (cvar_next
) = stmt
;
1708 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1709 gsi
= gsi_last_bb (ex_bb
);
1710 stmt
= gimple_build_omp_return (true);
1711 gimple_set_location (stmt
, loc
);
1712 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1714 /* After the above dom info is hosed. Re-compute it. */
1715 free_dominance_info (CDI_DOMINATORS
);
1716 calculate_dominance_info (CDI_DOMINATORS
);
1721 /* Generates code to execute the iterations of LOOP in N_THREADS
1722 threads in parallel.
1724 NITER describes number of iterations of LOOP.
1725 REDUCTION_LIST describes the reductions existent in the LOOP. */
1728 gen_parallel_loop (struct loop
*loop
, htab_t reduction_list
,
1729 unsigned n_threads
, struct tree_niter_desc
*niter
)
1732 tree many_iterations_cond
, type
, nit
;
1733 tree arg_struct
, new_arg_struct
;
1735 basic_block parallel_head
;
1737 struct clsn_data clsn_data
;
1741 unsigned int m_p_thread
=2;
1745 ---------------------------------------------------------------------
1748 IV = phi (INIT, IV + STEP)
1754 ---------------------------------------------------------------------
1756 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1757 we generate the following code:
1759 ---------------------------------------------------------------------
1762 || NITER < MIN_PER_THREAD * N_THREADS)
1766 store all local loop-invariant variables used in body of the loop to DATA.
1767 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1768 load the variables from DATA.
1769 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1772 GIMPLE_OMP_CONTINUE;
1773 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1774 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1780 IV = phi (INIT, IV + STEP)
1791 /* Create two versions of the loop -- in the old one, we know that the
1792 number of iterations is large enough, and we will transform it into the
1793 loop that will be split to loop_fn, the new one will be used for the
1794 remaining iterations. */
1796 /* We should compute a better number-of-iterations value for outer loops.
1799 for (i = 0; i < n; ++i)
1800 for (j = 0; j < m; ++j)
1803 we should compute nit = n * m, not nit = n.
1804 Also may_be_zero handling would need to be adjusted. */
1806 type
= TREE_TYPE (niter
->niter
);
1807 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
1810 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1815 m_p_thread
=MIN_PER_THREAD
;
1817 many_iterations_cond
=
1818 fold_build2 (GE_EXPR
, boolean_type_node
,
1819 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
1821 many_iterations_cond
1822 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1823 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
1824 many_iterations_cond
);
1825 many_iterations_cond
1826 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
1828 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1829 if (!is_gimple_condexpr (many_iterations_cond
))
1831 many_iterations_cond
1832 = force_gimple_operand (many_iterations_cond
, &stmts
,
1835 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
1838 initialize_original_copy_tables ();
1840 /* We assume that the loop usually iterates a lot. */
1841 prob
= 4 * REG_BR_PROB_BASE
/ 5;
1842 loop_version (loop
, many_iterations_cond
, NULL
,
1843 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
1844 update_ssa (TODO_update_ssa
);
1845 free_original_copy_tables ();
1847 /* Base all the induction variables in LOOP on a single control one. */
1848 canonicalize_loop_ivs (loop
, &nit
, true);
1850 /* Ensure that the exit condition is the first statement in the loop. */
1851 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
1853 /* Generate initializations for reductions. */
1854 if (htab_elements (reduction_list
) > 0)
1855 htab_traverse (reduction_list
, initialize_reductions
, loop
);
1857 /* Eliminate the references to local variables from the loop. */
1858 gcc_assert (single_exit (loop
));
1859 entry
= loop_preheader_edge (loop
);
1860 exit
= single_dom_exit (loop
);
1862 eliminate_local_variables (entry
, exit
);
1863 /* In the old loop, move all variables non-local to the loop to a structure
1864 and back, and create separate decls for the variables used in loop. */
1865 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
1866 &new_arg_struct
, &clsn_data
);
1868 /* Create the parallel constructs. */
1869 loc
= UNKNOWN_LOCATION
;
1870 cond_stmt
= last_stmt (loop
->header
);
1872 loc
= gimple_location (cond_stmt
);
1873 parallel_head
= create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
1874 new_arg_struct
, n_threads
, loc
);
1875 if (htab_elements (reduction_list
) > 0)
1876 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
1880 /* Cancel the loop (it is simpler to do it here rather than to teach the
1881 expander to do it). */
1882 cancel_loop_tree (loop
);
1884 /* Free loop bound estimations that could contain references to
1885 removed statements. */
1886 FOR_EACH_LOOP (li
, loop
, 0)
1887 free_numbers_of_iterations_estimates_loop (loop
);
1889 /* Expand the parallel constructs. We do it directly here instead of running
1890 a separate expand_omp pass, since it is more efficient, and less likely to
1891 cause troubles with further analyses not being able to deal with the
1894 omp_expand_local (parallel_head
);
1897 /* Returns true when LOOP contains vector phi nodes. */
1900 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
1903 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
1904 gimple_stmt_iterator gsi
;
1907 for (i
= 0; i
< loop
->num_nodes
; i
++)
1908 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
1909 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi
)))) == VECTOR_TYPE
)
1918 /* Create a reduction_info struct, initialize it with REDUC_STMT
1919 and PHI, insert it to the REDUCTION_LIST. */
1922 build_new_reduction (htab_t reduction_list
, gimple reduc_stmt
, gimple phi
)
1925 struct reduction_info
*new_reduction
;
1927 gcc_assert (reduc_stmt
);
1929 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1932 "Detected reduction. reduction stmt is: \n");
1933 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
1934 fprintf (dump_file
, "\n");
1937 new_reduction
= XCNEW (struct reduction_info
);
1939 new_reduction
->reduc_stmt
= reduc_stmt
;
1940 new_reduction
->reduc_phi
= phi
;
1941 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
1942 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
1943 slot
= htab_find_slot (reduction_list
, new_reduction
, INSERT
);
1944 *slot
= new_reduction
;
1947 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1950 set_reduc_phi_uids (void **slot
, void *data ATTRIBUTE_UNUSED
)
1952 struct reduction_info
*const red
= (struct reduction_info
*) *slot
;
1953 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1957 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1960 gather_scalar_reductions (loop_p loop
, htab_t reduction_list
)
1962 gimple_stmt_iterator gsi
;
1963 loop_vec_info simple_loop_info
;
1966 simple_loop_info
= vect_analyze_loop_form (loop
);
1968 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1970 gimple phi
= gsi_stmt (gsi
);
1972 tree res
= PHI_RESULT (phi
);
1975 if (!is_gimple_reg (res
))
1978 if (!simple_iv (loop
, loop
, res
, &iv
, true)
1979 && simple_loop_info
)
1981 gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info
,
1984 if (reduc_stmt
&& !double_reduc
)
1985 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
1988 destroy_loop_vec_info (simple_loop_info
, true);
1990 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1991 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1993 htab_traverse (reduction_list
, set_reduc_phi_uids
, NULL
);
1996 /* Try to initialize NITER for code generation part. */
1999 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2001 edge exit
= single_dom_exit (loop
);
2005 /* We need to know # of iterations, and there should be no uses of values
2006 defined inside loop outside of it, unless the values are invariants of
2008 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2010 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2011 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2018 /* Try to initialize REDUCTION_LIST for code generation part.
2019 REDUCTION_LIST describes the reductions. */
2022 try_create_reduction_list (loop_p loop
, htab_t reduction_list
)
2024 edge exit
= single_dom_exit (loop
);
2025 gimple_stmt_iterator gsi
;
2029 gather_scalar_reductions (loop
, reduction_list
);
2032 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2034 gimple phi
= gsi_stmt (gsi
);
2035 struct reduction_info
*red
;
2036 imm_use_iterator imm_iter
;
2037 use_operand_p use_p
;
2039 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2041 if (is_gimple_reg (val
))
2043 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2045 fprintf (dump_file
, "phi is ");
2046 print_gimple_stmt (dump_file
, phi
, 0, 0);
2047 fprintf (dump_file
, "arg of phi to exit: value ");
2048 print_generic_expr (dump_file
, val
, 0);
2049 fprintf (dump_file
, " used outside loop\n");
2051 " checking if it a part of reduction pattern: \n");
2053 if (htab_elements (reduction_list
) == 0)
2055 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2057 " FAILED: it is not a part of reduction.\n");
2061 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2063 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2064 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2066 reduc_phi
= USE_STMT (use_p
);
2070 red
= reduction_phi (reduction_list
, reduc_phi
);
2073 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2075 " FAILED: it is not a part of reduction.\n");
2078 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2080 fprintf (dump_file
, "reduction phi is ");
2081 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2082 fprintf (dump_file
, "reduction stmt is ");
2083 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2088 /* The iterations of the loop may communicate only through bivs whose
2089 iteration space can be distributed efficiently. */
2090 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2092 gimple phi
= gsi_stmt (gsi
);
2093 tree def
= PHI_RESULT (phi
);
2096 if (is_gimple_reg (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2098 struct reduction_info
*red
;
2100 red
= reduction_phi (reduction_list
, phi
);
2103 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2105 " FAILED: scalar dependency between iterations\n");
2115 /* Detect parallel loops and generate parallel code using libgomp
2116 primitives. Returns true if some loop was parallelized, false
2120 parallelize_loops (void)
2122 unsigned n_threads
= flag_tree_parallelize_loops
;
2123 bool changed
= false;
2125 struct tree_niter_desc niter_desc
;
2127 htab_t reduction_list
;
2128 struct obstack parloop_obstack
;
2129 HOST_WIDE_INT estimated
;
2132 /* Do not parallelize loops in the functions created by parallelization. */
2133 if (parallelized_function_p (cfun
->decl
))
2135 if (cfun
->has_nonlocal_label
)
2138 gcc_obstack_init (&parloop_obstack
);
2139 reduction_list
= htab_create (10, reduction_info_hash
,
2140 reduction_info_eq
, free
);
2141 init_stmt_vec_info_vec ();
2143 FOR_EACH_LOOP (li
, loop
, 0)
2145 htab_empty (reduction_list
);
2146 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2148 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2150 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2152 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2155 /* If we use autopar in graphite pass, we use its marked dependency
2156 checking results. */
2157 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2159 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2160 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2164 if (!single_dom_exit (loop
))
2167 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2168 fprintf (dump_file
, "loop is !single_dom_exit\n");
2173 if (/* And of course, the loop must be parallelizable. */
2174 !can_duplicate_loop_p (loop
)
2175 || loop_has_blocks_with_irreducible_flag (loop
)
2176 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2177 /* FIXME: the check for vector phi nodes could be removed. */
2178 || loop_has_vector_phi_nodes (loop
))
2181 estimated
= estimated_stmt_executions_int (loop
);
2182 if (estimated
== -1)
2183 estimated
= max_stmt_executions_int (loop
);
2184 /* FIXME: Bypass this check as graphite doesn't update the
2185 count and frequency correctly now. */
2186 if (!flag_loop_parallelize_all
2187 && ((estimated
!= -1
2188 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2189 /* Do not bother with loops in cold areas. */
2190 || optimize_loop_nest_for_size_p (loop
)))
2193 if (!try_get_loop_niter (loop
, &niter_desc
))
2196 if (!try_create_reduction_list (loop
, reduction_list
))
2199 if (!flag_loop_parallelize_all
2200 && !loop_parallel_p (loop
, &parloop_obstack
))
2204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2207 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2209 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2210 loop_loc
= find_loop_location (loop
);
2211 if (loop_loc
!= UNKNOWN_LOC
)
2212 fprintf (dump_file
, "\nloop at %s:%d: ",
2213 LOC_FILE (loop_loc
), LOC_LINE (loop_loc
));
2215 gen_parallel_loop (loop
, reduction_list
,
2216 n_threads
, &niter_desc
);
2217 #ifdef ENABLE_CHECKING
2218 verify_flow_info ();
2219 verify_loop_structure ();
2220 verify_loop_closed_ssa (true);
2224 free_stmt_vec_info_vec ();
2225 htab_delete (reduction_list
);
2226 obstack_free (&parloop_obstack
, NULL
);
2228 /* Parallelization will cause new function calls to be inserted through
2229 which local variables will escape. Reset the points-to solution
2232 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2237 #include "gt-tree-parloops.h"