1 /* Loop autoparallelization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "hard-reg-set.h"
32 #include "fold-const.h"
33 #include "internal-fn.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
47 #include "tree-data-ref.h"
48 #include "tree-scalar-evolution.h"
49 #include "gimple-pretty-print.h"
50 #include "tree-pass.h"
51 #include "langhooks.h"
52 #include "tree-vectorizer.h"
53 #include "tree-hasher.h"
54 #include "tree-parloops.h"
56 #include "tree-nested.h"
60 /* This pass tries to distribute iterations of loops into several threads.
61 The implementation is straightforward -- for each loop we test whether its
62 iterations are independent, and if it is the case (and some additional
63 conditions regarding profitability and correctness are satisfied), we
64 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
67 The most of the complexity is in bringing the code into shape expected
69 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
70 variable and that the exit test is at the start of the loop body
71 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
72 variables by accesses through pointers, and breaking up ssa chains
73 by storing the values incoming to the parallelized loop to a structure
74 passed to the new function as an argument (something similar is done
75 in omp gimplification, unfortunately only a small part of the code
79 -- if there are several parallelizable loops in a function, it may be
80 possible to generate the threads just once (using synchronization to
81 ensure that cross-loop dependences are obeyed).
82 -- handling of common reduction patterns for outer loops.
84 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
87 currently we use vect_force_simple_reduction() to detect reduction patterns.
88 The code transformation will be introduced by an example.
95 for (i = 0; i < N; i++)
105 # sum_29 = PHI <sum_11(5), 1(3)>
106 # i_28 = PHI <i_12(5), 0(3)>
109 sum_11 = D.1795_8 + sum_29;
117 # sum_21 = PHI <sum_11(4)>
118 printf (&"%d"[0], sum_21);
121 after reduction transformation (only relevant parts):
129 # Storing the initial value given by the user. #
131 .paral_data_store.32.sum.27 = 1;
133 #pragma omp parallel num_threads(4)
135 #pragma omp for schedule(static)
137 # The neutral element corresponding to the particular
138 reduction's operation, e.g. 0 for PLUS_EXPR,
139 1 for MULT_EXPR, etc. replaces the user's initial value. #
141 # sum.27_29 = PHI <sum.27_11, 0>
143 sum.27_11 = D.1827_8 + sum.27_29;
147 # Adding this reduction phi is done at create_phi_for_local_result() #
148 # sum.27_56 = PHI <sum.27_11, 0>
151 # Creating the atomic operation is done at
152 create_call_for_reduction_1() #
154 #pragma omp atomic_load
155 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
156 D.1840_60 = sum.27_56 + D.1839_59;
157 #pragma omp atomic_store (D.1840_60);
161 # collecting the result after the join of the threads is done at
162 create_loads_for_reductions().
163 The value computed by the threads is loaded from the
167 .paral_data_load.33_52 = &.paral_data_store.32;
168 sum_37 = .paral_data_load.33_52->sum.27;
169 sum_43 = D.1795_41 + sum_37;
172 # sum_21 = PHI <sum_43, sum_26>
173 printf (&"%d"[0], sum_21);
181 /* Minimal number of iterations of a loop that should be executed in each
183 #define MIN_PER_THREAD 100
185 /* Element of the hashtable, representing a
186 reduction in the current loop. */
187 struct reduction_info
189 gimple reduc_stmt
; /* reduction statement. */
190 gimple reduc_phi
; /* The phi node defining the reduction. */
191 enum tree_code reduction_code
;/* code for the reduction operation. */
192 unsigned reduc_version
; /* SSA_NAME_VERSION of original reduc_phi
194 gphi
*keep_res
; /* The PHI_RESULT of this phi is the resulting value
195 of the reduction variable when existing the loop. */
196 tree initial_value
; /* The initial value of the reduction var before entering the loop. */
197 tree field
; /* the name of the field in the parloop data structure intended for reduction. */
198 tree init
; /* reduction initialization value. */
199 gphi
*new_phi
; /* (helper field) Newly created phi node whose result
200 will be passed to the atomic operation. Represents
201 the local result each thread computed for the reduction
205 /* Reduction info hashtable helpers. */
207 struct reduction_hasher
: free_ptr_hash
<reduction_info
>
209 static inline hashval_t
hash (const reduction_info
*);
210 static inline bool equal (const reduction_info
*, const reduction_info
*);
213 /* Equality and hash functions for hashtab code. */
216 reduction_hasher::equal (const reduction_info
*a
, const reduction_info
*b
)
218 return (a
->reduc_phi
== b
->reduc_phi
);
222 reduction_hasher::hash (const reduction_info
*a
)
224 return a
->reduc_version
;
227 typedef hash_table
<reduction_hasher
> reduction_info_table_type
;
230 static struct reduction_info
*
231 reduction_phi (reduction_info_table_type
*reduction_list
, gimple phi
)
233 struct reduction_info tmpred
, *red
;
235 if (reduction_list
->elements () == 0 || phi
== NULL
)
238 tmpred
.reduc_phi
= phi
;
239 tmpred
.reduc_version
= gimple_uid (phi
);
240 red
= reduction_list
->find (&tmpred
);
245 /* Element of hashtable of names to copy. */
247 struct name_to_copy_elt
249 unsigned version
; /* The version of the name to copy. */
250 tree new_name
; /* The new name used in the copy. */
251 tree field
; /* The field of the structure used to pass the
255 /* Name copies hashtable helpers. */
257 struct name_to_copy_hasher
: free_ptr_hash
<name_to_copy_elt
>
259 static inline hashval_t
hash (const name_to_copy_elt
*);
260 static inline bool equal (const name_to_copy_elt
*, const name_to_copy_elt
*);
263 /* Equality and hash functions for hashtab code. */
266 name_to_copy_hasher::equal (const name_to_copy_elt
*a
, const name_to_copy_elt
*b
)
268 return a
->version
== b
->version
;
272 name_to_copy_hasher::hash (const name_to_copy_elt
*a
)
274 return (hashval_t
) a
->version
;
277 typedef hash_table
<name_to_copy_hasher
> name_to_copy_table_type
;
279 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
280 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
281 represents the denominator for every element in the matrix. */
282 typedef struct lambda_trans_matrix_s
284 lambda_matrix matrix
;
288 } *lambda_trans_matrix
;
289 #define LTM_MATRIX(T) ((T)->matrix)
290 #define LTM_ROWSIZE(T) ((T)->rowsize)
291 #define LTM_COLSIZE(T) ((T)->colsize)
292 #define LTM_DENOMINATOR(T) ((T)->denominator)
294 /* Allocate a new transformation matrix. */
296 static lambda_trans_matrix
297 lambda_trans_matrix_new (int colsize
, int rowsize
,
298 struct obstack
* lambda_obstack
)
300 lambda_trans_matrix ret
;
302 ret
= (lambda_trans_matrix
)
303 obstack_alloc (lambda_obstack
, sizeof (struct lambda_trans_matrix_s
));
304 LTM_MATRIX (ret
) = lambda_matrix_new (rowsize
, colsize
, lambda_obstack
);
305 LTM_ROWSIZE (ret
) = rowsize
;
306 LTM_COLSIZE (ret
) = colsize
;
307 LTM_DENOMINATOR (ret
) = 1;
311 /* Multiply a vector VEC by a matrix MAT.
312 MAT is an M*N matrix, and VEC is a vector with length N. The result
313 is stored in DEST which must be a vector of length M. */
316 lambda_matrix_vector_mult (lambda_matrix matrix
, int m
, int n
,
317 lambda_vector vec
, lambda_vector dest
)
321 lambda_vector_clear (dest
, m
);
322 for (i
= 0; i
< m
; i
++)
323 for (j
= 0; j
< n
; j
++)
324 dest
[i
] += matrix
[i
][j
] * vec
[j
];
327 /* Return true if TRANS is a legal transformation matrix that respects
328 the dependence vectors in DISTS and DIRS. The conservative answer
331 "Wolfe proves that a unimodular transformation represented by the
332 matrix T is legal when applied to a loop nest with a set of
333 lexicographically non-negative distance vectors RDG if and only if
334 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
335 i.e.: if and only if it transforms the lexicographically positive
336 distance vectors to lexicographically positive vectors. Note that
337 a unimodular matrix must transform the zero vector (and only it) to
338 the zero vector." S.Muchnick. */
341 lambda_transform_legal_p (lambda_trans_matrix trans
,
343 vec
<ddr_p
> dependence_relations
)
346 lambda_vector distres
;
347 struct data_dependence_relation
*ddr
;
349 gcc_assert (LTM_COLSIZE (trans
) == nb_loops
350 && LTM_ROWSIZE (trans
) == nb_loops
);
352 /* When there are no dependences, the transformation is correct. */
353 if (dependence_relations
.length () == 0)
356 ddr
= dependence_relations
[0];
360 /* When there is an unknown relation in the dependence_relations, we
361 know that it is no worth looking at this loop nest: give up. */
362 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
365 distres
= lambda_vector_new (nb_loops
);
367 /* For each distance vector in the dependence graph. */
368 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
370 /* Don't care about relations for which we know that there is no
371 dependence, nor about read-read (aka. output-dependences):
372 these data accesses can happen in any order. */
373 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
374 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
377 /* Conservatively answer: "this transformation is not valid". */
378 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
381 /* If the dependence could not be captured by a distance vector,
382 conservatively answer that the transform is not valid. */
383 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
386 /* Compute trans.dist_vect */
387 for (j
= 0; j
< DDR_NUM_DIST_VECTS (ddr
); j
++)
389 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
390 DDR_DIST_VECT (ddr
, j
), distres
);
392 if (!lambda_vector_lexico_pos (distres
, nb_loops
))
399 /* Data dependency analysis. Returns true if the iterations of LOOP
400 are independent on each other (that is, if we can execute them
404 loop_parallel_p (struct loop
*loop
, struct obstack
* parloop_obstack
)
406 vec
<ddr_p
> dependence_relations
;
407 vec
<data_reference_p
> datarefs
;
408 lambda_trans_matrix trans
;
411 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
413 fprintf (dump_file
, "Considering loop %d\n", loop
->num
);
415 fprintf (dump_file
, "loop is innermost\n");
417 fprintf (dump_file
, "loop NOT innermost\n");
420 /* Check for problems with dependences. If the loop can be reversed,
421 the iterations are independent. */
422 auto_vec
<loop_p
, 3> loop_nest
;
423 datarefs
.create (10);
424 dependence_relations
.create (100);
425 if (! compute_data_dependences_for_loop (loop
, true, &loop_nest
, &datarefs
,
426 &dependence_relations
))
428 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
429 fprintf (dump_file
, " FAILED: cannot analyze data dependencies\n");
433 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
434 dump_data_dependence_relations (dump_file
, dependence_relations
);
436 trans
= lambda_trans_matrix_new (1, 1, parloop_obstack
);
437 LTM_MATRIX (trans
)[0][0] = -1;
439 if (lambda_transform_legal_p (trans
, 1, dependence_relations
))
442 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
443 fprintf (dump_file
, " SUCCESS: may be parallelized\n");
445 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
447 " FAILED: data dependencies exist across iterations\n");
450 free_dependence_relations (dependence_relations
);
451 free_data_refs (datarefs
);
456 /* Return true when LOOP contains basic blocks marked with the
457 BB_IRREDUCIBLE_LOOP flag. */
460 loop_has_blocks_with_irreducible_flag (struct loop
*loop
)
463 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
466 for (i
= 0; i
< loop
->num_nodes
; i
++)
467 if (bbs
[i
]->flags
& BB_IRREDUCIBLE_LOOP
)
476 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
477 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
478 to their addresses that can be reused. The address of OBJ is known to
479 be invariant in the whole function. Other needed statements are placed
483 take_address_of (tree obj
, tree type
, edge entry
,
484 int_tree_htab_type
*decl_address
, gimple_stmt_iterator
*gsi
)
487 tree
*var_p
, name
, addr
;
491 /* Since the address of OBJ is invariant, the trees may be shared.
492 Avoid rewriting unrelated parts of the code. */
493 obj
= unshare_expr (obj
);
495 handled_component_p (*var_p
);
496 var_p
= &TREE_OPERAND (*var_p
, 0))
499 /* Canonicalize the access to base on a MEM_REF. */
501 *var_p
= build_simple_mem_ref (build_fold_addr_expr (*var_p
));
503 /* Assign a canonical SSA name to the address of the base decl used
504 in the address and share it for all accesses and addresses based
506 uid
= DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
509 int_tree_map
*slot
= decl_address
->find_slot (elt
, INSERT
);
514 addr
= TREE_OPERAND (*var_p
, 0);
516 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p
, 0), 0));
518 name
= make_temp_ssa_name (TREE_TYPE (addr
), NULL
, obj_name
);
520 name
= make_ssa_name (TREE_TYPE (addr
));
521 stmt
= gimple_build_assign (name
, addr
);
522 gsi_insert_on_edge_immediate (entry
, stmt
);
530 /* Express the address in terms of the canonical SSA name. */
531 TREE_OPERAND (*var_p
, 0) = name
;
533 return build_fold_addr_expr_with_type (obj
, type
);
535 name
= force_gimple_operand (build_addr (obj
, current_function_decl
),
536 &stmts
, true, NULL_TREE
);
537 if (!gimple_seq_empty_p (stmts
))
538 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
540 if (!useless_type_conversion_p (type
, TREE_TYPE (name
)))
542 name
= force_gimple_operand (fold_convert (type
, name
), &stmts
, true,
544 if (!gimple_seq_empty_p (stmts
))
545 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
551 /* Callback for htab_traverse. Create the initialization statement
552 for reduction described in SLOT, and place it at the preheader of
553 the loop described in DATA. */
556 initialize_reductions (reduction_info
**slot
, struct loop
*loop
)
559 tree bvar
, type
, arg
;
562 struct reduction_info
*const reduc
= *slot
;
564 /* Create initialization in preheader:
565 reduction_variable = initialization value of reduction. */
567 /* In the phi node at the header, replace the argument coming
568 from the preheader with the reduction initialization value. */
570 /* Create a new variable to initialize the reduction. */
571 type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
572 bvar
= create_tmp_var (type
, "reduction");
574 c
= build_omp_clause (gimple_location (reduc
->reduc_stmt
),
575 OMP_CLAUSE_REDUCTION
);
576 OMP_CLAUSE_REDUCTION_CODE (c
) = reduc
->reduction_code
;
577 OMP_CLAUSE_DECL (c
) = SSA_NAME_VAR (gimple_assign_lhs (reduc
->reduc_stmt
));
579 init
= omp_reduction_init (c
, TREE_TYPE (bvar
));
582 /* Replace the argument representing the initialization value
583 with the initialization value for the reduction (neutral
584 element for the particular operation, e.g. 0 for PLUS_EXPR,
585 1 for MULT_EXPR, etc).
586 Keep the old value in a new variable "reduction_initial",
587 that will be taken in consideration after the parallel
588 computing is done. */
590 e
= loop_preheader_edge (loop
);
591 arg
= PHI_ARG_DEF_FROM_EDGE (reduc
->reduc_phi
, e
);
592 /* Create new variable to hold the initial value. */
594 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
595 (reduc
->reduc_phi
, loop_preheader_edge (loop
)), init
);
596 reduc
->initial_value
= arg
;
602 struct walk_stmt_info info
;
604 int_tree_htab_type
*decl_address
;
605 gimple_stmt_iterator
*gsi
;
610 /* Eliminates references to local variables in *TP out of the single
611 entry single exit region starting at DTA->ENTRY.
612 DECL_ADDRESS contains addresses of the references that had their
613 address taken already. If the expression is changed, CHANGED is
614 set to true. Callback for walk_tree. */
617 eliminate_local_variables_1 (tree
*tp
, int *walk_subtrees
, void *data
)
619 struct elv_data
*const dta
= (struct elv_data
*) data
;
620 tree t
= *tp
, var
, addr
, addr_type
, type
, obj
;
626 if (!SSA_VAR_P (t
) || DECL_EXTERNAL (t
))
629 type
= TREE_TYPE (t
);
630 addr_type
= build_pointer_type (type
);
631 addr
= take_address_of (t
, addr_type
, dta
->entry
, dta
->decl_address
,
633 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
639 *tp
= build_simple_mem_ref (addr
);
645 if (TREE_CODE (t
) == ADDR_EXPR
)
647 /* ADDR_EXPR may appear in two contexts:
648 -- as a gimple operand, when the address taken is a function invariant
649 -- as gimple rhs, when the resulting address in not a function
651 We do not need to do anything special in the latter case (the base of
652 the memory reference whose address is taken may be replaced in the
653 DECL_P case). The former case is more complicated, as we need to
654 ensure that the new address is still a gimple operand. Thus, it
655 is not sufficient to replace just the base of the memory reference --
656 we need to move the whole computation of the address out of the
658 if (!is_gimple_val (t
))
662 obj
= TREE_OPERAND (t
, 0);
663 var
= get_base_address (obj
);
664 if (!var
|| !SSA_VAR_P (var
) || DECL_EXTERNAL (var
))
667 addr_type
= TREE_TYPE (t
);
668 addr
= take_address_of (obj
, addr_type
, dta
->entry
, dta
->decl_address
,
670 if (dta
->gsi
== NULL
&& addr
== NULL_TREE
)
687 /* Moves the references to local variables in STMT at *GSI out of the single
688 entry single exit region starting at ENTRY. DECL_ADDRESS contains
689 addresses of the references that had their address taken
693 eliminate_local_variables_stmt (edge entry
, gimple_stmt_iterator
*gsi
,
694 int_tree_htab_type
*decl_address
)
697 gimple stmt
= gsi_stmt (*gsi
);
699 memset (&dta
.info
, '\0', sizeof (dta
.info
));
701 dta
.decl_address
= decl_address
;
705 if (gimple_debug_bind_p (stmt
))
708 walk_tree (gimple_debug_bind_get_value_ptr (stmt
),
709 eliminate_local_variables_1
, &dta
.info
, NULL
);
712 gimple_debug_bind_reset_value (stmt
);
716 else if (gimple_clobber_p (stmt
))
718 stmt
= gimple_build_nop ();
719 gsi_replace (gsi
, stmt
, false);
725 walk_gimple_op (stmt
, eliminate_local_variables_1
, &dta
.info
);
732 /* Eliminates the references to local variables from the single entry
733 single exit region between the ENTRY and EXIT edges.
736 1) Taking address of a local variable -- these are moved out of the
737 region (and temporary variable is created to hold the address if
740 2) Dereferencing a local variable -- these are replaced with indirect
744 eliminate_local_variables (edge entry
, edge exit
)
747 auto_vec
<basic_block
, 3> body
;
749 gimple_stmt_iterator gsi
;
750 bool has_debug_stmt
= false;
751 int_tree_htab_type
decl_address (10);
752 basic_block entry_bb
= entry
->src
;
753 basic_block exit_bb
= exit
->dest
;
755 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
757 FOR_EACH_VEC_ELT (body
, i
, bb
)
758 if (bb
!= entry_bb
&& bb
!= exit_bb
)
759 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
760 if (is_gimple_debug (gsi_stmt (gsi
)))
762 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
763 has_debug_stmt
= true;
766 eliminate_local_variables_stmt (entry
, &gsi
, &decl_address
);
769 FOR_EACH_VEC_ELT (body
, i
, bb
)
770 if (bb
!= entry_bb
&& bb
!= exit_bb
)
771 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
772 if (gimple_debug_bind_p (gsi_stmt (gsi
)))
773 eliminate_local_variables_stmt (entry
, &gsi
, &decl_address
);
776 /* Returns true if expression EXPR is not defined between ENTRY and
777 EXIT, i.e. if all its operands are defined outside of the region. */
780 expr_invariant_in_region_p (edge entry
, edge exit
, tree expr
)
782 basic_block entry_bb
= entry
->src
;
783 basic_block exit_bb
= exit
->dest
;
786 if (is_gimple_min_invariant (expr
))
789 if (TREE_CODE (expr
) == SSA_NAME
)
791 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
793 && dominated_by_p (CDI_DOMINATORS
, def_bb
, entry_bb
)
794 && !dominated_by_p (CDI_DOMINATORS
, def_bb
, exit_bb
))
803 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
804 The copies are stored to NAME_COPIES, if NAME was already duplicated,
805 its duplicate stored in NAME_COPIES is returned.
807 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
808 duplicated, storing the copies in DECL_COPIES. */
811 separate_decls_in_region_name (tree name
, name_to_copy_table_type
*name_copies
,
812 int_tree_htab_type
*decl_copies
,
815 tree copy
, var
, var_copy
;
816 unsigned idx
, uid
, nuid
;
817 struct int_tree_map ielt
;
818 struct name_to_copy_elt elt
, *nelt
;
819 name_to_copy_elt
**slot
;
822 if (TREE_CODE (name
) != SSA_NAME
)
825 idx
= SSA_NAME_VERSION (name
);
827 slot
= name_copies
->find_slot_with_hash (&elt
, idx
,
828 copy_name_p
? INSERT
: NO_INSERT
);
830 return (*slot
)->new_name
;
834 copy
= duplicate_ssa_name (name
, NULL
);
835 nelt
= XNEW (struct name_to_copy_elt
);
837 nelt
->new_name
= copy
;
838 nelt
->field
= NULL_TREE
;
847 var
= SSA_NAME_VAR (name
);
851 uid
= DECL_UID (var
);
853 dslot
= decl_copies
->find_slot_with_hash (ielt
, uid
, INSERT
);
856 var_copy
= create_tmp_var (TREE_TYPE (var
), get_name (var
));
857 DECL_GIMPLE_REG_P (var_copy
) = DECL_GIMPLE_REG_P (var
);
859 dslot
->to
= var_copy
;
861 /* Ensure that when we meet this decl next time, we won't duplicate
863 nuid
= DECL_UID (var_copy
);
865 dslot
= decl_copies
->find_slot_with_hash (ielt
, nuid
, INSERT
);
866 gcc_assert (!dslot
->to
);
868 dslot
->to
= var_copy
;
871 var_copy
= dslot
->to
;
873 replace_ssa_name_symbol (copy
, var_copy
);
877 /* Finds the ssa names used in STMT that are defined outside the
878 region between ENTRY and EXIT and replaces such ssa names with
879 their duplicates. The duplicates are stored to NAME_COPIES. Base
880 decls of all ssa names used in STMT (including those defined in
881 LOOP) are replaced with the new temporary variables; the
882 replacement decls are stored in DECL_COPIES. */
885 separate_decls_in_region_stmt (edge entry
, edge exit
, gimple stmt
,
886 name_to_copy_table_type
*name_copies
,
887 int_tree_htab_type
*decl_copies
)
895 FOR_EACH_PHI_OR_STMT_DEF (def
, stmt
, oi
, SSA_OP_DEF
)
897 name
= DEF_FROM_PTR (def
);
898 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
899 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
901 gcc_assert (copy
== name
);
904 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
906 name
= USE_FROM_PTR (use
);
907 if (TREE_CODE (name
) != SSA_NAME
)
910 copy_name_p
= expr_invariant_in_region_p (entry
, exit
, name
);
911 copy
= separate_decls_in_region_name (name
, name_copies
, decl_copies
,
917 /* Finds the ssa names used in STMT that are defined outside the
918 region between ENTRY and EXIT and replaces such ssa names with
919 their duplicates. The duplicates are stored to NAME_COPIES. Base
920 decls of all ssa names used in STMT (including those defined in
921 LOOP) are replaced with the new temporary variables; the
922 replacement decls are stored in DECL_COPIES. */
925 separate_decls_in_region_debug (gimple stmt
,
926 name_to_copy_table_type
*name_copies
,
927 int_tree_htab_type
*decl_copies
)
932 struct int_tree_map ielt
;
933 struct name_to_copy_elt elt
;
934 name_to_copy_elt
**slot
;
937 if (gimple_debug_bind_p (stmt
))
938 var
= gimple_debug_bind_get_var (stmt
);
939 else if (gimple_debug_source_bind_p (stmt
))
940 var
= gimple_debug_source_bind_get_var (stmt
);
943 if (TREE_CODE (var
) == DEBUG_EXPR_DECL
|| TREE_CODE (var
) == LABEL_DECL
)
945 gcc_assert (DECL_P (var
) && SSA_VAR_P (var
));
946 ielt
.uid
= DECL_UID (var
);
947 dslot
= decl_copies
->find_slot_with_hash (ielt
, ielt
.uid
, NO_INSERT
);
950 if (gimple_debug_bind_p (stmt
))
951 gimple_debug_bind_set_var (stmt
, dslot
->to
);
952 else if (gimple_debug_source_bind_p (stmt
))
953 gimple_debug_source_bind_set_var (stmt
, dslot
->to
);
955 FOR_EACH_PHI_OR_STMT_USE (use
, stmt
, oi
, SSA_OP_USE
)
957 name
= USE_FROM_PTR (use
);
958 if (TREE_CODE (name
) != SSA_NAME
)
961 elt
.version
= SSA_NAME_VERSION (name
);
962 slot
= name_copies
->find_slot_with_hash (&elt
, elt
.version
, NO_INSERT
);
965 gimple_debug_bind_reset_value (stmt
);
970 SET_USE (use
, (*slot
)->new_name
);
976 /* Callback for htab_traverse. Adds a field corresponding to the reduction
977 specified in SLOT. The type is passed in DATA. */
980 add_field_for_reduction (reduction_info
**slot
, tree type
)
983 struct reduction_info
*const red
= *slot
;
984 tree var
= gimple_assign_lhs (red
->reduc_stmt
);
985 tree field
= build_decl (gimple_location (red
->reduc_stmt
), FIELD_DECL
,
986 SSA_NAME_IDENTIFIER (var
), TREE_TYPE (var
));
988 insert_field_into_struct (type
, field
);
995 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
996 described in SLOT. The type is passed in DATA. */
999 add_field_for_name (name_to_copy_elt
**slot
, tree type
)
1001 struct name_to_copy_elt
*const elt
= *slot
;
1002 tree name
= ssa_name (elt
->version
);
1003 tree field
= build_decl (UNKNOWN_LOCATION
,
1004 FIELD_DECL
, SSA_NAME_IDENTIFIER (name
),
1007 insert_field_into_struct (type
, field
);
1013 /* Callback for htab_traverse. A local result is the intermediate result
1014 computed by a single
1015 thread, or the initial value in case no iteration was executed.
1016 This function creates a phi node reflecting these values.
1017 The phi's result will be stored in NEW_PHI field of the
1018 reduction's data structure. */
1021 create_phi_for_local_result (reduction_info
**slot
, struct loop
*loop
)
1023 struct reduction_info
*const reduc
= *slot
;
1026 basic_block store_bb
;
1028 source_location locus
;
1030 /* STORE_BB is the block where the phi
1031 should be stored. It is the destination of the loop exit.
1032 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1033 store_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1035 /* STORE_BB has two predecessors. One coming from the loop
1036 (the reduction's result is computed at the loop),
1037 and another coming from a block preceding the loop,
1039 are executed (the initial value should be taken). */
1040 if (EDGE_PRED (store_bb
, 0) == FALLTHRU_EDGE (loop
->latch
))
1041 e
= EDGE_PRED (store_bb
, 1);
1043 e
= EDGE_PRED (store_bb
, 0);
1044 local_res
= copy_ssa_name (gimple_assign_lhs (reduc
->reduc_stmt
));
1045 locus
= gimple_location (reduc
->reduc_stmt
);
1046 new_phi
= create_phi_node (local_res
, store_bb
);
1047 add_phi_arg (new_phi
, reduc
->init
, e
, locus
);
1048 add_phi_arg (new_phi
, gimple_assign_lhs (reduc
->reduc_stmt
),
1049 FALLTHRU_EDGE (loop
->latch
), locus
);
1050 reduc
->new_phi
= new_phi
;
1060 basic_block store_bb
;
1061 basic_block load_bb
;
1064 /* Callback for htab_traverse. Create an atomic instruction for the
1065 reduction described in SLOT.
1066 DATA annotates the place in memory the atomic operation relates to,
1067 and the basic block it needs to be generated in. */
1070 create_call_for_reduction_1 (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1072 struct reduction_info
*const reduc
= *slot
;
1073 gimple_stmt_iterator gsi
;
1074 tree type
= TREE_TYPE (PHI_RESULT (reduc
->reduc_phi
));
1079 tree t
, addr
, ref
, x
;
1080 tree tmp_load
, name
;
1083 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1084 t
= build3 (COMPONENT_REF
, type
, load_struct
, reduc
->field
, NULL_TREE
);
1086 addr
= build_addr (t
, current_function_decl
);
1088 /* Create phi node. */
1089 bb
= clsn_data
->load_bb
;
1091 gsi
= gsi_last_bb (bb
);
1092 e
= split_block (bb
, gsi_stmt (gsi
));
1095 tmp_load
= create_tmp_var (TREE_TYPE (TREE_TYPE (addr
)));
1096 tmp_load
= make_ssa_name (tmp_load
);
1097 load
= gimple_build_omp_atomic_load (tmp_load
, addr
);
1098 SSA_NAME_DEF_STMT (tmp_load
) = load
;
1099 gsi
= gsi_start_bb (new_bb
);
1100 gsi_insert_after (&gsi
, load
, GSI_NEW_STMT
);
1102 e
= split_block (new_bb
, load
);
1104 gsi
= gsi_start_bb (new_bb
);
1106 x
= fold_build2 (reduc
->reduction_code
,
1107 TREE_TYPE (PHI_RESULT (reduc
->new_phi
)), ref
,
1108 PHI_RESULT (reduc
->new_phi
));
1110 name
= force_gimple_operand_gsi (&gsi
, x
, true, NULL_TREE
, true,
1111 GSI_CONTINUE_LINKING
);
1113 gsi_insert_after (&gsi
, gimple_build_omp_atomic_store (name
), GSI_NEW_STMT
);
1117 /* Create the atomic operation at the join point of the threads.
1118 REDUCTION_LIST describes the reductions in the LOOP.
1119 LD_ST_DATA describes the shared data structure where
1120 shared data is stored in and loaded from. */
1122 create_call_for_reduction (struct loop
*loop
,
1123 reduction_info_table_type
*reduction_list
,
1124 struct clsn_data
*ld_st_data
)
1126 reduction_list
->traverse
<struct loop
*, create_phi_for_local_result
> (loop
);
1127 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1128 ld_st_data
->load_bb
= FALLTHRU_EDGE (loop
->latch
)->dest
;
1130 ->traverse
<struct clsn_data
*, create_call_for_reduction_1
> (ld_st_data
);
1133 /* Callback for htab_traverse. Loads the final reduction value at the
1134 join point of all threads, and inserts it in the right place. */
1137 create_loads_for_reductions (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1139 struct reduction_info
*const red
= *slot
;
1141 gimple_stmt_iterator gsi
;
1142 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1147 gsi
= gsi_after_labels (clsn_data
->load_bb
);
1148 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1149 load_struct
= build3 (COMPONENT_REF
, type
, load_struct
, red
->field
,
1153 name
= PHI_RESULT (red
->keep_res
);
1154 stmt
= gimple_build_assign (name
, x
);
1156 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1158 for (gsi
= gsi_start_phis (gimple_bb (red
->keep_res
));
1159 !gsi_end_p (gsi
); gsi_next (&gsi
))
1160 if (gsi_stmt (gsi
) == red
->keep_res
)
1162 remove_phi_node (&gsi
, false);
1168 /* Load the reduction result that was stored in LD_ST_DATA.
1169 REDUCTION_LIST describes the list of reductions that the
1170 loads should be generated for. */
1172 create_final_loads_for_reduction (reduction_info_table_type
*reduction_list
,
1173 struct clsn_data
*ld_st_data
)
1175 gimple_stmt_iterator gsi
;
1179 gsi
= gsi_after_labels (ld_st_data
->load_bb
);
1180 t
= build_fold_addr_expr (ld_st_data
->store
);
1181 stmt
= gimple_build_assign (ld_st_data
->load
, t
);
1183 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1186 ->traverse
<struct clsn_data
*, create_loads_for_reductions
> (ld_st_data
);
1190 /* Callback for htab_traverse. Store the neutral value for the
1191 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1192 1 for MULT_EXPR, etc. into the reduction field.
1193 The reduction is specified in SLOT. The store information is
1197 create_stores_for_reduction (reduction_info
**slot
, struct clsn_data
*clsn_data
)
1199 struct reduction_info
*const red
= *slot
;
1202 gimple_stmt_iterator gsi
;
1203 tree type
= TREE_TYPE (gimple_assign_lhs (red
->reduc_stmt
));
1205 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1206 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, red
->field
, NULL_TREE
);
1207 stmt
= gimple_build_assign (t
, red
->initial_value
);
1208 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1213 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1214 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1215 specified in SLOT. */
1218 create_loads_and_stores_for_name (name_to_copy_elt
**slot
,
1219 struct clsn_data
*clsn_data
)
1221 struct name_to_copy_elt
*const elt
= *slot
;
1224 gimple_stmt_iterator gsi
;
1225 tree type
= TREE_TYPE (elt
->new_name
);
1228 gsi
= gsi_last_bb (clsn_data
->store_bb
);
1229 t
= build3 (COMPONENT_REF
, type
, clsn_data
->store
, elt
->field
, NULL_TREE
);
1230 stmt
= gimple_build_assign (t
, ssa_name (elt
->version
));
1231 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1233 gsi
= gsi_last_bb (clsn_data
->load_bb
);
1234 load_struct
= build_simple_mem_ref (clsn_data
->load
);
1235 t
= build3 (COMPONENT_REF
, type
, load_struct
, elt
->field
, NULL_TREE
);
1236 stmt
= gimple_build_assign (elt
->new_name
, t
);
1237 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1242 /* Moves all the variables used in LOOP and defined outside of it (including
1243 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1244 name) to a structure created for this purpose. The code
1252 is transformed this way:
1267 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1268 pointer `new' is intentionally not initialized (the loop will be split to a
1269 separate function later, and `new' will be initialized from its arguments).
1270 LD_ST_DATA holds information about the shared data structure used to pass
1271 information among the threads. It is initialized here, and
1272 gen_parallel_loop will pass it to create_call_for_reduction that
1273 needs this information. REDUCTION_LIST describes the reductions
1277 separate_decls_in_region (edge entry
, edge exit
,
1278 reduction_info_table_type
*reduction_list
,
1279 tree
*arg_struct
, tree
*new_arg_struct
,
1280 struct clsn_data
*ld_st_data
)
1283 basic_block bb1
= split_edge (entry
);
1284 basic_block bb0
= single_pred (bb1
);
1285 name_to_copy_table_type
name_copies (10);
1286 int_tree_htab_type
decl_copies (10);
1288 tree type
, type_name
, nvar
;
1289 gimple_stmt_iterator gsi
;
1290 struct clsn_data clsn_data
;
1291 auto_vec
<basic_block
, 3> body
;
1293 basic_block entry_bb
= bb1
;
1294 basic_block exit_bb
= exit
->dest
;
1295 bool has_debug_stmt
= false;
1297 entry
= single_succ_edge (entry_bb
);
1298 gather_blocks_in_sese_region (entry_bb
, exit_bb
, &body
);
1300 FOR_EACH_VEC_ELT (body
, i
, bb
)
1302 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1304 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1305 separate_decls_in_region_stmt (entry
, exit
, gsi_stmt (gsi
),
1306 &name_copies
, &decl_copies
);
1308 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1310 gimple stmt
= gsi_stmt (gsi
);
1312 if (is_gimple_debug (stmt
))
1313 has_debug_stmt
= true;
1315 separate_decls_in_region_stmt (entry
, exit
, stmt
,
1316 &name_copies
, &decl_copies
);
1321 /* Now process debug bind stmts. We must not create decls while
1322 processing debug stmts, so we defer their processing so as to
1323 make sure we will have debug info for as many variables as
1324 possible (all of those that were dealt with in the loop above),
1325 and discard those for which we know there's nothing we can
1328 FOR_EACH_VEC_ELT (body
, i
, bb
)
1329 if (bb
!= entry_bb
&& bb
!= exit_bb
)
1331 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1333 gimple stmt
= gsi_stmt (gsi
);
1335 if (is_gimple_debug (stmt
))
1337 if (separate_decls_in_region_debug (stmt
, &name_copies
,
1340 gsi_remove (&gsi
, true);
1349 if (name_copies
.elements () == 0 && reduction_list
->elements () == 0)
1351 /* It may happen that there is nothing to copy (if there are only
1352 loop carried and external variables in the loop). */
1354 *new_arg_struct
= NULL
;
1358 /* Create the type for the structure to store the ssa names to. */
1359 type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1360 type_name
= build_decl (UNKNOWN_LOCATION
,
1361 TYPE_DECL
, create_tmp_var_name (".paral_data"),
1363 TYPE_NAME (type
) = type_name
;
1365 name_copies
.traverse
<tree
, add_field_for_name
> (type
);
1366 if (reduction_list
&& reduction_list
->elements () > 0)
1368 /* Create the fields for reductions. */
1369 reduction_list
->traverse
<tree
, add_field_for_reduction
> (type
);
1373 /* Create the loads and stores. */
1374 *arg_struct
= create_tmp_var (type
, ".paral_data_store");
1375 nvar
= create_tmp_var (build_pointer_type (type
), ".paral_data_load");
1376 *new_arg_struct
= make_ssa_name (nvar
);
1378 ld_st_data
->store
= *arg_struct
;
1379 ld_st_data
->load
= *new_arg_struct
;
1380 ld_st_data
->store_bb
= bb0
;
1381 ld_st_data
->load_bb
= bb1
;
1384 .traverse
<struct clsn_data
*, create_loads_and_stores_for_name
>
1387 /* Load the calculation from memory (after the join of the threads). */
1389 if (reduction_list
&& reduction_list
->elements () > 0)
1392 ->traverse
<struct clsn_data
*, create_stores_for_reduction
>
1394 clsn_data
.load
= make_ssa_name (nvar
);
1395 clsn_data
.load_bb
= exit
->dest
;
1396 clsn_data
.store
= ld_st_data
->store
;
1397 create_final_loads_for_reduction (reduction_list
, &clsn_data
);
1402 /* Returns true if FN was created to run in parallel. */
1405 parallelized_function_p (tree fndecl
)
1407 cgraph_node
*node
= cgraph_node::get (fndecl
);
1408 gcc_assert (node
!= NULL
);
1409 return node
->parallelized_function
;
1412 /* Creates and returns an empty function that will receive the body of
1413 a parallelized loop. */
1416 create_loop_fn (location_t loc
)
1420 tree decl
, type
, name
, t
;
1421 struct function
*act_cfun
= cfun
;
1422 static unsigned loopfn_num
;
1424 loc
= LOCATION_LOCUS (loc
);
1425 snprintf (buf
, 100, "%s.$loopfn", current_function_name ());
1426 ASM_FORMAT_PRIVATE_NAME (tname
, buf
, loopfn_num
++);
1427 clean_symbol_name (tname
);
1428 name
= get_identifier (tname
);
1429 type
= build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
1431 decl
= build_decl (loc
, FUNCTION_DECL
, name
, type
);
1432 TREE_STATIC (decl
) = 1;
1433 TREE_USED (decl
) = 1;
1434 DECL_ARTIFICIAL (decl
) = 1;
1435 DECL_IGNORED_P (decl
) = 0;
1436 TREE_PUBLIC (decl
) = 0;
1437 DECL_UNINLINABLE (decl
) = 1;
1438 DECL_EXTERNAL (decl
) = 0;
1439 DECL_CONTEXT (decl
) = NULL_TREE
;
1440 DECL_INITIAL (decl
) = make_node (BLOCK
);
1442 t
= build_decl (loc
, RESULT_DECL
, NULL_TREE
, void_type_node
);
1443 DECL_ARTIFICIAL (t
) = 1;
1444 DECL_IGNORED_P (t
) = 1;
1445 DECL_RESULT (decl
) = t
;
1447 t
= build_decl (loc
, PARM_DECL
, get_identifier (".paral_data_param"),
1449 DECL_ARTIFICIAL (t
) = 1;
1450 DECL_ARG_TYPE (t
) = ptr_type_node
;
1451 DECL_CONTEXT (t
) = decl
;
1453 DECL_ARGUMENTS (decl
) = t
;
1455 allocate_struct_function (decl
, false);
1457 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1459 set_cfun (act_cfun
);
1464 /* Replace uses of NAME by VAL in block BB. */
1467 replace_uses_in_bb_by (tree name
, tree val
, basic_block bb
)
1470 imm_use_iterator imm_iter
;
1472 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, name
)
1474 if (gimple_bb (use_stmt
) != bb
)
1477 use_operand_p use_p
;
1478 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
1479 SET_USE (use_p
, val
);
1483 /* Do transformation from:
1490 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1491 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1495 sum_b = sum_a + sum_update
1503 ivtmp_b = ivtmp_a + 1;
1507 sum_z = PHI <sum_b (cond[1]), ...>
1509 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1519 ivtmp_a = PHI <ivtmp_c (latch)>
1520 sum_a = PHI <sum_c (latch)>
1524 sum_b = sum_a + sum_update
1529 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1530 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1531 if (ivtmp_c < n + 1)
1537 ivtmp_b = ivtmp_a + 1;
1541 sum_y = PHI <sum_c (newheader)>
1544 sum_z = PHI <sum_y (newexit), ...>
1547 In unified diff format:
1552 + goto <bb newheader>
1555 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1556 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1557 + ivtmp_a = PHI <ivtmp_c (latch)>
1558 + sum_a = PHI <sum_c (latch)>
1562 sum_b = sum_a + sum_update
1569 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1570 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1571 + if (ivtmp_c < n + 1)
1577 ivtmp_b = ivtmp_a + 1;
1579 + goto <bb newheader>
1582 + sum_y = PHI <sum_c (newheader)>
1585 - sum_z = PHI <sum_b (cond[1]), ...>
1586 + sum_z = PHI <sum_y (newexit), ...>
1588 Note: the example does not show any virtual phis, but these are handled more
1589 or less as reductions.
1592 Moves the exit condition of LOOP to the beginning of its header.
1593 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1597 transform_to_exit_first_loop_alt (struct loop
*loop
,
1598 reduction_info_table_type
*reduction_list
,
1601 basic_block header
= loop
->header
;
1602 basic_block latch
= loop
->latch
;
1603 edge exit
= single_dom_exit (loop
);
1604 basic_block exit_block
= exit
->dest
;
1605 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1606 tree control
= gimple_cond_lhs (cond_stmt
);
1609 /* Rewriting virtuals into loop-closed ssa normal form makes this
1610 transformation simpler. It also ensures that the virtuals are in
1611 loop-closed ssa normal from after the transformation, which is required by
1612 create_parallel_loop. */
1613 rewrite_virtuals_into_loop_closed_ssa (loop
);
1615 /* Create the new_header block. */
1616 basic_block new_header
= split_block_before_cond_jump (exit
->src
);
1617 edge edge_at_split
= single_pred_edge (new_header
);
1619 /* Redirect entry edge to new_header. */
1620 edge entry
= loop_preheader_edge (loop
);
1621 e
= redirect_edge_and_branch (entry
, new_header
);
1622 gcc_assert (e
== entry
);
1624 /* Redirect post_inc_edge to new_header. */
1625 edge post_inc_edge
= single_succ_edge (latch
);
1626 e
= redirect_edge_and_branch (post_inc_edge
, new_header
);
1627 gcc_assert (e
== post_inc_edge
);
1629 /* Redirect post_cond_edge to header. */
1630 edge post_cond_edge
= single_pred_edge (latch
);
1631 e
= redirect_edge_and_branch (post_cond_edge
, header
);
1632 gcc_assert (e
== post_cond_edge
);
1634 /* Redirect edge_at_split to latch. */
1635 e
= redirect_edge_and_branch (edge_at_split
, latch
);
1636 gcc_assert (e
== edge_at_split
);
1638 /* Set the new loop bound. */
1639 gimple_cond_set_rhs (cond_stmt
, bound
);
1640 update_stmt (cond_stmt
);
1642 /* Repair the ssa. */
1643 vec
<edge_var_map
> *v
= redirect_edge_var_map_vector (post_inc_edge
);
1647 for (gsi
= gsi_start_phis (header
), i
= 0;
1648 !gsi_end_p (gsi
) && v
->iterate (i
, &vm
);
1649 gsi_next (&gsi
), i
++)
1651 gphi
*phi
= gsi
.phi ();
1652 tree res_a
= PHI_RESULT (phi
);
1654 /* Create new phi. */
1655 tree res_c
= copy_ssa_name (res_a
, phi
);
1656 gphi
*nphi
= create_phi_node (res_c
, new_header
);
1658 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1659 replace_uses_in_bb_by (res_a
, res_c
, new_header
);
1661 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1662 add_phi_arg (phi
, res_c
, post_cond_edge
, UNKNOWN_LOCATION
);
1664 /* Replace sum_b with sum_c in exit phi. */
1665 tree res_b
= redirect_edge_var_map_def (vm
);
1666 replace_uses_in_bb_by (res_b
, res_c
, exit_block
);
1668 struct reduction_info
*red
= reduction_phi (reduction_list
, phi
);
1669 gcc_assert (virtual_operand_p (res_a
)
1675 /* Register the new reduction phi. */
1676 red
->reduc_phi
= nphi
;
1677 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
1680 gcc_assert (gsi_end_p (gsi
) && !v
->iterate (i
, &vm
));
1682 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1683 flush_pending_stmts (entry
);
1685 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1686 flush_pending_stmts (post_inc_edge
);
1688 /* Create a new empty exit block, inbetween the new loop header and the old
1689 exit block. The function separate_decls_in_region needs this block to
1690 insert code that is active on loop exit, but not any other path. */
1691 basic_block new_exit_block
= split_edge (exit
);
1693 /* Insert and register the reduction exit phis. */
1694 for (gphi_iterator gsi
= gsi_start_phis (exit_block
);
1698 gphi
*phi
= gsi
.phi ();
1699 tree res_z
= PHI_RESULT (phi
);
1701 /* Now that we have a new exit block, duplicate the phi of the old exit
1702 block in the new exit block to preserve loop-closed ssa. */
1703 edge succ_new_exit_block
= single_succ_edge (new_exit_block
);
1704 edge pred_new_exit_block
= single_pred_edge (new_exit_block
);
1705 tree res_y
= copy_ssa_name (res_z
, phi
);
1706 gphi
*nphi
= create_phi_node (res_y
, new_exit_block
);
1707 tree res_c
= PHI_ARG_DEF_FROM_EDGE (phi
, succ_new_exit_block
);
1708 add_phi_arg (nphi
, res_c
, pred_new_exit_block
, UNKNOWN_LOCATION
);
1709 add_phi_arg (phi
, res_y
, succ_new_exit_block
, UNKNOWN_LOCATION
);
1711 if (virtual_operand_p (res_z
))
1714 gimple reduc_phi
= SSA_NAME_DEF_STMT (res_c
);
1715 struct reduction_info
*red
= reduction_phi (reduction_list
, reduc_phi
);
1717 red
->keep_res
= nphi
;
1720 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1721 then we're still using some fields, so only bother about fields that are
1722 still used: header and latch.
1723 The loop has a new header bb, so we update it. The latch bb stays the
1725 loop
->header
= new_header
;
1727 /* Recalculate dominance info. */
1728 free_dominance_info (CDI_DOMINATORS
);
1729 calculate_dominance_info (CDI_DOMINATORS
);
1732 /* Tries to moves the exit condition of LOOP to the beginning of its header
1733 without duplication of the loop body. NIT is the number of iterations of the
1734 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1735 transformation is successful. */
1738 try_transform_to_exit_first_loop_alt (struct loop
*loop
,
1739 reduction_info_table_type
*reduction_list
,
1742 /* Check whether the latch contains a single statement. */
1743 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop
->latch
)))
1746 /* Check whether the latch contains the loop iv increment. */
1747 edge back
= single_succ_edge (loop
->latch
);
1748 edge exit
= single_dom_exit (loop
);
1749 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1750 tree control
= gimple_cond_lhs (cond_stmt
);
1751 gphi
*phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (control
));
1752 tree inc_res
= gimple_phi_arg_def (phi
, back
->dest_idx
);
1753 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res
)) != loop
->latch
)
1756 /* Check whether there's no code between the loop condition and the latch. */
1757 if (!single_pred_p (loop
->latch
)
1758 || single_pred (loop
->latch
) != exit
->src
)
1761 tree alt_bound
= NULL_TREE
;
1762 tree nit_type
= TREE_TYPE (nit
);
1764 /* Figure out whether nit + 1 overflows. */
1765 if (TREE_CODE (nit
) == INTEGER_CST
)
1767 if (!tree_int_cst_equal (nit
, TYPE_MAXVAL (nit_type
)))
1769 alt_bound
= fold_build2_loc (UNKNOWN_LOCATION
, PLUS_EXPR
, nit_type
,
1770 nit
, build_one_cst (nit_type
));
1772 gcc_assert (TREE_CODE (alt_bound
) == INTEGER_CST
);
1773 transform_to_exit_first_loop_alt (loop
, reduction_list
, alt_bound
);
1778 /* Todo: Figure out if we can trigger this, if it's worth to handle
1779 optimally, and if we can handle it optimally. */
1784 gcc_assert (TREE_CODE (nit
) == SSA_NAME
);
1786 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1787 iv with base 0 and step 1 that is incremented in the latch, like this:
1790 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1801 The range of iv_1 is [0, nit]. The latch edge is taken for
1802 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1803 number of latch executions is equal to nit.
1805 The function max_loop_iterations gives us the maximum number of latch
1806 executions, so it gives us the maximum value of nit. */
1808 if (!max_loop_iterations (loop
, &nit_max
))
1811 /* Check if nit + 1 overflows. */
1812 widest_int type_max
= wi::to_widest (TYPE_MAXVAL (nit_type
));
1813 if (!wi::lts_p (nit_max
, type_max
))
1816 gimple def
= SSA_NAME_DEF_STMT (nit
);
1818 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
1820 && is_gimple_assign (def
)
1821 && gimple_assign_rhs_code (def
) == PLUS_EXPR
)
1823 tree op1
= gimple_assign_rhs1 (def
);
1824 tree op2
= gimple_assign_rhs2 (def
);
1825 if (integer_minus_onep (op1
))
1827 else if (integer_minus_onep (op2
))
1831 if (alt_bound
== NULL_TREE
)
1834 transform_to_exit_first_loop_alt (loop
, reduction_list
, alt_bound
);
1838 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1839 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1843 transform_to_exit_first_loop (struct loop
*loop
,
1844 reduction_info_table_type
*reduction_list
,
1847 basic_block
*bbs
, *nbbs
, ex_bb
, orig_header
;
1850 edge exit
= single_dom_exit (loop
), hpred
;
1851 tree control
, control_name
, res
, t
;
1854 gcond
*cond_stmt
, *cond_nit
;
1857 split_block_after_labels (loop
->header
);
1858 orig_header
= single_succ (loop
->header
);
1859 hpred
= single_succ_edge (loop
->header
);
1861 cond_stmt
= as_a
<gcond
*> (last_stmt (exit
->src
));
1862 control
= gimple_cond_lhs (cond_stmt
);
1863 gcc_assert (gimple_cond_rhs (cond_stmt
) == nit
);
1865 /* Make sure that we have phi nodes on exit for all loop header phis
1866 (create_parallel_loop requires that). */
1867 for (gphi_iterator gsi
= gsi_start_phis (loop
->header
);
1872 res
= PHI_RESULT (phi
);
1873 t
= copy_ssa_name (res
, phi
);
1874 SET_PHI_RESULT (phi
, t
);
1875 nphi
= create_phi_node (res
, orig_header
);
1876 add_phi_arg (nphi
, t
, hpred
, UNKNOWN_LOCATION
);
1880 gimple_cond_set_lhs (cond_stmt
, t
);
1881 update_stmt (cond_stmt
);
1886 bbs
= get_loop_body_in_dom_order (loop
);
1888 for (n
= 0; bbs
[n
] != exit
->src
; n
++)
1890 nbbs
= XNEWVEC (basic_block
, n
);
1891 ok
= gimple_duplicate_sese_tail (single_succ_edge (loop
->header
), exit
,
1898 /* Other than reductions, the only gimple reg that should be copied
1899 out of the loop is the control variable. */
1900 exit
= single_dom_exit (loop
);
1901 control_name
= NULL_TREE
;
1902 for (gphi_iterator gsi
= gsi_start_phis (ex_bb
);
1906 res
= PHI_RESULT (phi
);
1907 if (virtual_operand_p (res
))
1913 /* Check if it is a part of reduction. If it is,
1914 keep the phi at the reduction's keep_res field. The
1915 PHI_RESULT of this phi is the resulting value of the reduction
1916 variable when exiting the loop. */
1918 if (reduction_list
->elements () > 0)
1920 struct reduction_info
*red
;
1922 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1923 red
= reduction_phi (reduction_list
, SSA_NAME_DEF_STMT (val
));
1926 red
->keep_res
= phi
;
1931 gcc_assert (control_name
== NULL_TREE
1932 && SSA_NAME_VAR (res
) == SSA_NAME_VAR (control
));
1934 remove_phi_node (&gsi
, false);
1936 gcc_assert (control_name
!= NULL_TREE
);
1938 /* Initialize the control variable to number of iterations
1939 according to the rhs of the exit condition. */
1940 gimple_stmt_iterator gsi
= gsi_after_labels (ex_bb
);
1941 cond_nit
= as_a
<gcond
*> (last_stmt (exit
->src
));
1942 nit_1
= gimple_cond_rhs (cond_nit
);
1943 nit_1
= force_gimple_operand_gsi (&gsi
,
1944 fold_convert (TREE_TYPE (control_name
), nit_1
),
1945 false, NULL_TREE
, false, GSI_SAME_STMT
);
1946 stmt
= gimple_build_assign (control_name
, nit_1
);
1947 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
1950 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1951 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1952 NEW_DATA is the variable that should be initialized from the argument
1953 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1954 basic block containing GIMPLE_OMP_PARALLEL tree. */
1957 create_parallel_loop (struct loop
*loop
, tree loop_fn
, tree data
,
1958 tree new_data
, unsigned n_threads
, location_t loc
)
1960 gimple_stmt_iterator gsi
;
1961 basic_block bb
, paral_bb
, for_bb
, ex_bb
;
1963 gomp_parallel
*omp_par_stmt
;
1964 gimple omp_return_stmt1
, omp_return_stmt2
;
1968 gomp_continue
*omp_cont_stmt
;
1969 tree cvar
, cvar_init
, initvar
, cvar_next
, cvar_base
, type
;
1970 edge exit
, nexit
, guard
, end
, e
;
1972 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1973 bb
= loop_preheader_edge (loop
)->src
;
1974 paral_bb
= single_pred (bb
);
1975 gsi
= gsi_last_bb (paral_bb
);
1977 t
= build_omp_clause (loc
, OMP_CLAUSE_NUM_THREADS
);
1978 OMP_CLAUSE_NUM_THREADS_EXPR (t
)
1979 = build_int_cst (integer_type_node
, n_threads
);
1980 omp_par_stmt
= gimple_build_omp_parallel (NULL
, t
, loop_fn
, data
);
1981 gimple_set_location (omp_par_stmt
, loc
);
1983 gsi_insert_after (&gsi
, omp_par_stmt
, GSI_NEW_STMT
);
1985 /* Initialize NEW_DATA. */
1988 gassign
*assign_stmt
;
1990 gsi
= gsi_after_labels (bb
);
1992 param
= make_ssa_name (DECL_ARGUMENTS (loop_fn
));
1993 assign_stmt
= gimple_build_assign (param
, build_fold_addr_expr (data
));
1994 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
1996 assign_stmt
= gimple_build_assign (new_data
,
1997 fold_convert (TREE_TYPE (new_data
), param
));
1998 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
2001 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2002 bb
= split_loop_exit_edge (single_dom_exit (loop
));
2003 gsi
= gsi_last_bb (bb
);
2004 omp_return_stmt1
= gimple_build_omp_return (false);
2005 gimple_set_location (omp_return_stmt1
, loc
);
2006 gsi_insert_after (&gsi
, omp_return_stmt1
, GSI_NEW_STMT
);
2008 /* Extract data for GIMPLE_OMP_FOR. */
2009 gcc_assert (loop
->header
== single_dom_exit (loop
)->src
);
2010 cond_stmt
= as_a
<gcond
*> (last_stmt (loop
->header
));
2012 cvar
= gimple_cond_lhs (cond_stmt
);
2013 cvar_base
= SSA_NAME_VAR (cvar
);
2014 phi
= SSA_NAME_DEF_STMT (cvar
);
2015 cvar_init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
2016 initvar
= copy_ssa_name (cvar
);
2017 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, loop_preheader_edge (loop
)),
2019 cvar_next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
2021 gsi
= gsi_last_nondebug_bb (loop
->latch
);
2022 gcc_assert (gsi_stmt (gsi
) == SSA_NAME_DEF_STMT (cvar_next
));
2023 gsi_remove (&gsi
, true);
2026 for_bb
= split_edge (loop_preheader_edge (loop
));
2027 ex_bb
= split_loop_exit_edge (single_dom_exit (loop
));
2028 extract_true_false_edges_from_block (loop
->header
, &nexit
, &exit
);
2029 gcc_assert (exit
== single_dom_exit (loop
));
2031 guard
= make_edge (for_bb
, ex_bb
, 0);
2032 single_succ_edge (loop
->latch
)->flags
= 0;
2033 end
= make_edge (loop
->latch
, ex_bb
, EDGE_FALLTHRU
);
2034 for (gphi_iterator gpi
= gsi_start_phis (ex_bb
);
2035 !gsi_end_p (gpi
); gsi_next (&gpi
))
2037 source_location locus
;
2039 gphi
*phi
= gpi
.phi ();
2042 stmt
= as_a
<gphi
*> (
2043 SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi
, exit
)));
2045 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_preheader_edge (loop
));
2046 locus
= gimple_phi_arg_location_from_edge (stmt
,
2047 loop_preheader_edge (loop
));
2048 add_phi_arg (phi
, def
, guard
, locus
);
2050 def
= PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (loop
));
2051 locus
= gimple_phi_arg_location_from_edge (stmt
, loop_latch_edge (loop
));
2052 add_phi_arg (phi
, def
, end
, locus
);
2054 e
= redirect_edge_and_branch (exit
, nexit
->dest
);
2055 PENDING_STMT (e
) = NULL
;
2057 /* Emit GIMPLE_OMP_FOR. */
2058 gimple_cond_set_lhs (cond_stmt
, cvar_base
);
2059 type
= TREE_TYPE (cvar
);
2060 t
= build_omp_clause (loc
, OMP_CLAUSE_SCHEDULE
);
2061 OMP_CLAUSE_SCHEDULE_KIND (t
) = OMP_CLAUSE_SCHEDULE_STATIC
;
2063 for_stmt
= gimple_build_omp_for (NULL
, GF_OMP_FOR_KIND_FOR
, t
, 1, NULL
);
2064 gimple_set_location (for_stmt
, loc
);
2065 gimple_omp_for_set_index (for_stmt
, 0, initvar
);
2066 gimple_omp_for_set_initial (for_stmt
, 0, cvar_init
);
2067 gimple_omp_for_set_final (for_stmt
, 0, gimple_cond_rhs (cond_stmt
));
2068 gimple_omp_for_set_cond (for_stmt
, 0, gimple_cond_code (cond_stmt
));
2069 gimple_omp_for_set_incr (for_stmt
, 0, build2 (PLUS_EXPR
, type
,
2071 build_int_cst (type
, 1)));
2073 gsi
= gsi_last_bb (for_bb
);
2074 gsi_insert_after (&gsi
, for_stmt
, GSI_NEW_STMT
);
2075 SSA_NAME_DEF_STMT (initvar
) = for_stmt
;
2077 /* Emit GIMPLE_OMP_CONTINUE. */
2078 gsi
= gsi_last_bb (loop
->latch
);
2079 omp_cont_stmt
= gimple_build_omp_continue (cvar_next
, cvar
);
2080 gimple_set_location (omp_cont_stmt
, loc
);
2081 gsi_insert_after (&gsi
, omp_cont_stmt
, GSI_NEW_STMT
);
2082 SSA_NAME_DEF_STMT (cvar_next
) = omp_cont_stmt
;
2084 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2085 gsi
= gsi_last_bb (ex_bb
);
2086 omp_return_stmt2
= gimple_build_omp_return (true);
2087 gimple_set_location (omp_return_stmt2
, loc
);
2088 gsi_insert_after (&gsi
, omp_return_stmt2
, GSI_NEW_STMT
);
2090 /* After the above dom info is hosed. Re-compute it. */
2091 free_dominance_info (CDI_DOMINATORS
);
2092 calculate_dominance_info (CDI_DOMINATORS
);
2097 /* Generates code to execute the iterations of LOOP in N_THREADS
2098 threads in parallel.
2100 NITER describes number of iterations of LOOP.
2101 REDUCTION_LIST describes the reductions existent in the LOOP. */
2104 gen_parallel_loop (struct loop
*loop
,
2105 reduction_info_table_type
*reduction_list
,
2106 unsigned n_threads
, struct tree_niter_desc
*niter
)
2108 tree many_iterations_cond
, type
, nit
;
2109 tree arg_struct
, new_arg_struct
;
2112 struct clsn_data clsn_data
;
2116 unsigned int m_p_thread
=2;
2120 ---------------------------------------------------------------------
2123 IV = phi (INIT, IV + STEP)
2129 ---------------------------------------------------------------------
2131 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2132 we generate the following code:
2134 ---------------------------------------------------------------------
2137 || NITER < MIN_PER_THREAD * N_THREADS)
2141 store all local loop-invariant variables used in body of the loop to DATA.
2142 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2143 load the variables from DATA.
2144 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2147 GIMPLE_OMP_CONTINUE;
2148 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2149 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2155 IV = phi (INIT, IV + STEP)
2166 /* Create two versions of the loop -- in the old one, we know that the
2167 number of iterations is large enough, and we will transform it into the
2168 loop that will be split to loop_fn, the new one will be used for the
2169 remaining iterations. */
2171 /* We should compute a better number-of-iterations value for outer loops.
2174 for (i = 0; i < n; ++i)
2175 for (j = 0; j < m; ++j)
2178 we should compute nit = n * m, not nit = n.
2179 Also may_be_zero handling would need to be adjusted. */
2181 type
= TREE_TYPE (niter
->niter
);
2182 nit
= force_gimple_operand (unshare_expr (niter
->niter
), &stmts
, true,
2185 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2190 m_p_thread
=MIN_PER_THREAD
;
2192 many_iterations_cond
=
2193 fold_build2 (GE_EXPR
, boolean_type_node
,
2194 nit
, build_int_cst (type
, m_p_thread
* n_threads
));
2196 many_iterations_cond
2197 = fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2198 invert_truthvalue (unshare_expr (niter
->may_be_zero
)),
2199 many_iterations_cond
);
2200 many_iterations_cond
2201 = force_gimple_operand (many_iterations_cond
, &stmts
, false, NULL_TREE
);
2203 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2204 if (!is_gimple_condexpr (many_iterations_cond
))
2206 many_iterations_cond
2207 = force_gimple_operand (many_iterations_cond
, &stmts
,
2210 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2213 initialize_original_copy_tables ();
2215 /* We assume that the loop usually iterates a lot. */
2216 prob
= 4 * REG_BR_PROB_BASE
/ 5;
2217 loop_version (loop
, many_iterations_cond
, NULL
,
2218 prob
, prob
, REG_BR_PROB_BASE
- prob
, true);
2219 update_ssa (TODO_update_ssa
);
2220 free_original_copy_tables ();
2222 /* Base all the induction variables in LOOP on a single control one. */
2223 canonicalize_loop_ivs (loop
, &nit
, true);
2225 /* Ensure that the exit condition is the first statement in the loop.
2226 The common case is that latch of the loop is empty (apart from the
2227 increment) and immediately follows the loop exit test. Attempt to move the
2228 entry of the loop directly before the exit check and increase the number of
2229 iterations of the loop by one. */
2230 if (!try_transform_to_exit_first_loop_alt (loop
, reduction_list
, nit
))
2232 /* Fall back on the method that handles more cases, but duplicates the
2233 loop body: move the exit condition of LOOP to the beginning of its
2234 header, and duplicate the part of the last iteration that gets disabled
2235 to the exit of the loop. */
2236 transform_to_exit_first_loop (loop
, reduction_list
, nit
);
2239 /* Generate initializations for reductions. */
2240 if (reduction_list
->elements () > 0)
2241 reduction_list
->traverse
<struct loop
*, initialize_reductions
> (loop
);
2243 /* Eliminate the references to local variables from the loop. */
2244 gcc_assert (single_exit (loop
));
2245 entry
= loop_preheader_edge (loop
);
2246 exit
= single_dom_exit (loop
);
2248 eliminate_local_variables (entry
, exit
);
2249 /* In the old loop, move all variables non-local to the loop to a structure
2250 and back, and create separate decls for the variables used in loop. */
2251 separate_decls_in_region (entry
, exit
, reduction_list
, &arg_struct
,
2252 &new_arg_struct
, &clsn_data
);
2254 /* Create the parallel constructs. */
2255 loc
= UNKNOWN_LOCATION
;
2256 cond_stmt
= last_stmt (loop
->header
);
2258 loc
= gimple_location (cond_stmt
);
2259 create_parallel_loop (loop
, create_loop_fn (loc
), arg_struct
,
2260 new_arg_struct
, n_threads
, loc
);
2261 if (reduction_list
->elements () > 0)
2262 create_call_for_reduction (loop
, reduction_list
, &clsn_data
);
2266 /* Cancel the loop (it is simpler to do it here rather than to teach the
2267 expander to do it). */
2268 cancel_loop_tree (loop
);
2270 /* Free loop bound estimations that could contain references to
2271 removed statements. */
2272 FOR_EACH_LOOP (loop
, 0)
2273 free_numbers_of_iterations_estimates_loop (loop
);
2276 /* Returns true when LOOP contains vector phi nodes. */
2279 loop_has_vector_phi_nodes (struct loop
*loop ATTRIBUTE_UNUSED
)
2282 basic_block
*bbs
= get_loop_body_in_dom_order (loop
);
2286 for (i
= 0; i
< loop
->num_nodes
; i
++)
2287 for (gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
2288 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi
.phi ()))) == VECTOR_TYPE
)
2297 /* Create a reduction_info struct, initialize it with REDUC_STMT
2298 and PHI, insert it to the REDUCTION_LIST. */
2301 build_new_reduction (reduction_info_table_type
*reduction_list
,
2302 gimple reduc_stmt
, gphi
*phi
)
2304 reduction_info
**slot
;
2305 struct reduction_info
*new_reduction
;
2307 gcc_assert (reduc_stmt
);
2309 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2312 "Detected reduction. reduction stmt is: \n");
2313 print_gimple_stmt (dump_file
, reduc_stmt
, 0, 0);
2314 fprintf (dump_file
, "\n");
2317 new_reduction
= XCNEW (struct reduction_info
);
2319 new_reduction
->reduc_stmt
= reduc_stmt
;
2320 new_reduction
->reduc_phi
= phi
;
2321 new_reduction
->reduc_version
= SSA_NAME_VERSION (gimple_phi_result (phi
));
2322 new_reduction
->reduction_code
= gimple_assign_rhs_code (reduc_stmt
);
2323 slot
= reduction_list
->find_slot (new_reduction
, INSERT
);
2324 *slot
= new_reduction
;
2327 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2330 set_reduc_phi_uids (reduction_info
**slot
, void *data ATTRIBUTE_UNUSED
)
2332 struct reduction_info
*const red
= *slot
;
2333 gimple_set_uid (red
->reduc_phi
, red
->reduc_version
);
2337 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2340 gather_scalar_reductions (loop_p loop
, reduction_info_table_type
*reduction_list
)
2343 loop_vec_info simple_loop_info
;
2345 simple_loop_info
= vect_analyze_loop_form (loop
);
2347 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2349 gphi
*phi
= gsi
.phi ();
2351 tree res
= PHI_RESULT (phi
);
2354 if (virtual_operand_p (res
))
2357 if (!simple_iv (loop
, loop
, res
, &iv
, true)
2358 && simple_loop_info
)
2360 gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info
,
2363 if (reduc_stmt
&& !double_reduc
)
2364 build_new_reduction (reduction_list
, reduc_stmt
, phi
);
2367 destroy_loop_vec_info (simple_loop_info
, true);
2369 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2370 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2372 reduction_list
->traverse
<void *, set_reduc_phi_uids
> (NULL
);
2375 /* Try to initialize NITER for code generation part. */
2378 try_get_loop_niter (loop_p loop
, struct tree_niter_desc
*niter
)
2380 edge exit
= single_dom_exit (loop
);
2384 /* We need to know # of iterations, and there should be no uses of values
2385 defined inside loop outside of it, unless the values are invariants of
2387 if (!number_of_iterations_exit (loop
, exit
, niter
, false))
2389 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2390 fprintf (dump_file
, " FAILED: number of iterations not known\n");
2397 /* Try to initialize REDUCTION_LIST for code generation part.
2398 REDUCTION_LIST describes the reductions. */
2401 try_create_reduction_list (loop_p loop
,
2402 reduction_info_table_type
*reduction_list
)
2404 edge exit
= single_dom_exit (loop
);
2409 gather_scalar_reductions (loop
, reduction_list
);
2412 for (gsi
= gsi_start_phis (exit
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2414 gphi
*phi
= gsi
.phi ();
2415 struct reduction_info
*red
;
2416 imm_use_iterator imm_iter
;
2417 use_operand_p use_p
;
2419 tree val
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2421 if (!virtual_operand_p (val
))
2423 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2425 fprintf (dump_file
, "phi is ");
2426 print_gimple_stmt (dump_file
, phi
, 0, 0);
2427 fprintf (dump_file
, "arg of phi to exit: value ");
2428 print_generic_expr (dump_file
, val
, 0);
2429 fprintf (dump_file
, " used outside loop\n");
2431 " checking if it a part of reduction pattern: \n");
2433 if (reduction_list
->elements () == 0)
2435 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2437 " FAILED: it is not a part of reduction.\n");
2441 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, val
)
2443 if (!gimple_debug_bind_p (USE_STMT (use_p
))
2444 && flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
2446 reduc_phi
= USE_STMT (use_p
);
2450 red
= reduction_phi (reduction_list
, reduc_phi
);
2453 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2455 " FAILED: it is not a part of reduction.\n");
2458 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2460 fprintf (dump_file
, "reduction phi is ");
2461 print_gimple_stmt (dump_file
, red
->reduc_phi
, 0, 0);
2462 fprintf (dump_file
, "reduction stmt is ");
2463 print_gimple_stmt (dump_file
, red
->reduc_stmt
, 0, 0);
2468 /* The iterations of the loop may communicate only through bivs whose
2469 iteration space can be distributed efficiently. */
2470 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2472 gphi
*phi
= gsi
.phi ();
2473 tree def
= PHI_RESULT (phi
);
2476 if (!virtual_operand_p (def
) && !simple_iv (loop
, loop
, def
, &iv
, true))
2478 struct reduction_info
*red
;
2480 red
= reduction_phi (reduction_list
, phi
);
2483 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2485 " FAILED: scalar dependency between iterations\n");
2495 /* Detect parallel loops and generate parallel code using libgomp
2496 primitives. Returns true if some loop was parallelized, false
2500 parallelize_loops (void)
2502 unsigned n_threads
= flag_tree_parallelize_loops
;
2503 bool changed
= false;
2505 struct tree_niter_desc niter_desc
;
2506 struct obstack parloop_obstack
;
2507 HOST_WIDE_INT estimated
;
2508 source_location loop_loc
;
2510 /* Do not parallelize loops in the functions created by parallelization. */
2511 if (parallelized_function_p (cfun
->decl
))
2513 if (cfun
->has_nonlocal_label
)
2516 gcc_obstack_init (&parloop_obstack
);
2517 reduction_info_table_type
reduction_list (10);
2518 init_stmt_vec_info_vec ();
2520 FOR_EACH_LOOP (loop
, 0)
2522 reduction_list
.empty ();
2523 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2525 fprintf (dump_file
, "Trying loop %d as candidate\n",loop
->num
);
2527 fprintf (dump_file
, "loop %d is not innermost\n",loop
->num
);
2529 fprintf (dump_file
, "loop %d is innermost\n",loop
->num
);
2532 /* If we use autopar in graphite pass, we use its marked dependency
2533 checking results. */
2534 if (flag_loop_parallelize_all
&& !loop
->can_be_parallel
)
2536 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2537 fprintf (dump_file
, "loop is not parallel according to graphite\n");
2541 if (!single_dom_exit (loop
))
2544 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2545 fprintf (dump_file
, "loop is !single_dom_exit\n");
2550 if (/* And of course, the loop must be parallelizable. */
2551 !can_duplicate_loop_p (loop
)
2552 || loop_has_blocks_with_irreducible_flag (loop
)
2553 || (loop_preheader_edge (loop
)->src
->flags
& BB_IRREDUCIBLE_LOOP
)
2554 /* FIXME: the check for vector phi nodes could be removed. */
2555 || loop_has_vector_phi_nodes (loop
))
2558 estimated
= estimated_stmt_executions_int (loop
);
2559 if (estimated
== -1)
2560 estimated
= max_stmt_executions_int (loop
);
2561 /* FIXME: Bypass this check as graphite doesn't update the
2562 count and frequency correctly now. */
2563 if (!flag_loop_parallelize_all
2564 && ((estimated
!= -1
2565 && estimated
<= (HOST_WIDE_INT
) n_threads
* MIN_PER_THREAD
)
2566 /* Do not bother with loops in cold areas. */
2567 || optimize_loop_nest_for_size_p (loop
)))
2570 if (!try_get_loop_niter (loop
, &niter_desc
))
2573 if (!try_create_reduction_list (loop
, &reduction_list
))
2576 if (!flag_loop_parallelize_all
2577 && !loop_parallel_p (loop
, &parloop_obstack
))
2581 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2584 fprintf (dump_file
, "parallelizing outer loop %d\n",loop
->header
->index
);
2586 fprintf (dump_file
, "parallelizing inner loop %d\n",loop
->header
->index
);
2587 loop_loc
= find_loop_location (loop
);
2588 if (loop_loc
!= UNKNOWN_LOCATION
)
2589 fprintf (dump_file
, "\nloop at %s:%d: ",
2590 LOCATION_FILE (loop_loc
), LOCATION_LINE (loop_loc
));
2592 gen_parallel_loop (loop
, &reduction_list
,
2593 n_threads
, &niter_desc
);
2596 free_stmt_vec_info_vec ();
2597 obstack_free (&parloop_obstack
, NULL
);
2599 /* Parallelization will cause new function calls to be inserted through
2600 which local variables will escape. Reset the points-to solution
2603 pt_solution_reset (&cfun
->gimple_df
->escaped
);
2608 /* Parallelization. */
2612 const pass_data pass_data_parallelize_loops
=
2614 GIMPLE_PASS
, /* type */
2615 "parloops", /* name */
2616 OPTGROUP_LOOP
, /* optinfo_flags */
2617 TV_TREE_PARALLELIZE_LOOPS
, /* tv_id */
2618 ( PROP_cfg
| PROP_ssa
), /* properties_required */
2619 0, /* properties_provided */
2620 0, /* properties_destroyed */
2621 0, /* todo_flags_start */
2622 0, /* todo_flags_finish */
2625 class pass_parallelize_loops
: public gimple_opt_pass
2628 pass_parallelize_loops (gcc::context
*ctxt
)
2629 : gimple_opt_pass (pass_data_parallelize_loops
, ctxt
)
2632 /* opt_pass methods: */
2633 virtual bool gate (function
*) { return flag_tree_parallelize_loops
> 1; }
2634 virtual unsigned int execute (function
*);
2636 }; // class pass_parallelize_loops
2639 pass_parallelize_loops::execute (function
*fun
)
2641 if (number_of_loops (fun
) <= 1)
2644 if (parallelize_loops ())
2646 fun
->curr_properties
&= ~(PROP_gimple_eomp
);
2647 return TODO_update_ssa
;
2656 make_pass_parallelize_loops (gcc::context
*ctxt
)
2658 return new pass_parallelize_loops (ctxt
);