]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
arm: Remove empty directory.
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
ddb555ed 2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
c75c517d 3 Free Software Foundation, Inc.
5f40b3cb
ZD
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5 Zdenek Dvorak <dvorakz@suse.cz>.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
6da7fc87 11Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
6da7fc87
NC
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
5f40b3cb
ZD
26#include "tree-flow.h"
27#include "cfgloop.h"
5f40b3cb 28#include "tree-data-ref.h"
1bd6497c 29#include "tree-scalar-evolution.h"
cf835838 30#include "gimple-pretty-print.h"
5f40b3cb 31#include "tree-pass.h"
5f40b3cb 32#include "langhooks.h"
a509ebb5 33#include "tree-vectorizer.h"
5f40b3cb
ZD
34
35/* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40 machinery do its job.
b8698a0f 41
5f40b3cb
ZD
42 The most of the complexity is in bringing the code into shape expected
43 by the omp expanders:
726a989a
RB
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
51 can be shared).
52
53 TODO:
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common scalar dependence patterns (accumulation, ...)
58 -- handling of non-innermost loops */
59
b8698a0f 60/*
a509ebb5 61 Reduction handling:
8a9ecffd 62 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 63 The code transformation will be introduced by an example.
b8698a0f
L
64
65
a509ebb5
RL
66parloop
67{
68 int sum=1;
69
0eb7e7aa 70 for (i = 0; i < N; i++)
a509ebb5
RL
71 {
72 x[i] = i + 3;
73 sum+=x[i];
74 }
75}
76
0eb7e7aa 77gimple-like code:
a509ebb5
RL
78header_bb:
79
0eb7e7aa
RL
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
82 D.1795_8 = i_28 + 3;
83 x[i_28] = D.1795_8;
84 sum_11 = D.1795_8 + sum_29;
85 i_12 = i_28 + 1;
86 if (N_6(D) > i_12)
87 goto header_bb;
88
a509ebb5
RL
89
90exit_bb:
91
0eb7e7aa
RL
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
a509ebb5
RL
94
95
96after reduction transformation (only relevant parts):
97
98parloop
99{
100
101....
102
0eb7e7aa 103
fa10beec 104 # Storing the initial value given by the user. #
0eb7e7aa 105
ae0bce62 106 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
107
108 #pragma omp parallel num_threads(4)
a509ebb5 109
0eb7e7aa 110 #pragma omp for schedule(static)
ae0bce62
RL
111
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
115
116 # sum.27_29 = PHI <sum.27_11, 0>
117
0eb7e7aa 118 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 119
726a989a 120 GIMPLE_OMP_CONTINUE
a509ebb5 121
0eb7e7aa
RL
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 124 GIMPLE_OMP_RETURN
b8698a0f
L
125
126 # Creating the atomic operation is done at
0eb7e7aa 127 create_call_for_reduction_1() #
a509ebb5 128
0eb7e7aa
RL
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
b8698a0f 133
726a989a 134 GIMPLE_OMP_RETURN
b8698a0f 135
0eb7e7aa
RL
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
ae0bce62
RL
138 The value computed by the threads is loaded from the
139 shared struct. #
140
b8698a0f 141
0eb7e7aa 142 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 143 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
144 sum_43 = D.1795_41 + sum_37;
145
146 exit bb:
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
149
150...
151
a509ebb5
RL
152}
153
154*/
155
5f40b3cb
ZD
156/* Minimal number of iterations of a loop that should be executed in each
157 thread. */
158#define MIN_PER_THREAD 100
159
b8698a0f 160/* Element of the hashtable, representing a
a509ebb5
RL
161 reduction in the current loop. */
162struct reduction_info
163{
726a989a
RB
164 gimple reduc_stmt; /* reduction statement. */
165 gimple reduc_phi; /* The phi node defining the reduction. */
166 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
167 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168 result. */
b8698a0f 169 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 170 of the reduction variable when existing the loop. */
ae0bce62 171 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 172 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 173 tree init; /* reduction initialization value. */
b8698a0f 174 gimple new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
177 operation. */
178};
179
180/* Equality and hash functions for hashtab code. */
181
182static int
183reduction_info_eq (const void *aa, const void *bb)
184{
185 const struct reduction_info *a = (const struct reduction_info *) aa;
186 const struct reduction_info *b = (const struct reduction_info *) bb;
187
188 return (a->reduc_phi == b->reduc_phi);
189}
190
191static hashval_t
192reduction_info_hash (const void *aa)
193{
194 const struct reduction_info *a = (const struct reduction_info *) aa;
195
5d1fd1de 196 return a->reduc_version;
a509ebb5
RL
197}
198
199static struct reduction_info *
726a989a 200reduction_phi (htab_t reduction_list, gimple phi)
a509ebb5
RL
201{
202 struct reduction_info tmpred, *red;
203
87ebde38 204 if (htab_elements (reduction_list) == 0 || phi == NULL)
a509ebb5
RL
205 return NULL;
206
207 tmpred.reduc_phi = phi;
5d1fd1de 208 tmpred.reduc_version = gimple_uid (phi);
3d9a9f94 209 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
a509ebb5
RL
210
211 return red;
212}
213
5f40b3cb
ZD
214/* Element of hashtable of names to copy. */
215
216struct name_to_copy_elt
217{
218 unsigned version; /* The version of the name to copy. */
219 tree new_name; /* The new name used in the copy. */
220 tree field; /* The field of the structure used to pass the
221 value. */
222};
223
224/* Equality and hash functions for hashtab code. */
225
226static int
227name_to_copy_elt_eq (const void *aa, const void *bb)
228{
a509ebb5
RL
229 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
5f40b3cb
ZD
231
232 return a->version == b->version;
233}
234
235static hashval_t
236name_to_copy_elt_hash (const void *aa)
237{
a509ebb5 238 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
5f40b3cb
ZD
239
240 return (hashval_t) a->version;
241}
242
b305e3da
SP
243/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245 represents the denominator for every element in the matrix. */
246typedef struct lambda_trans_matrix_s
247{
248 lambda_matrix matrix;
249 int rowsize;
250 int colsize;
251 int denominator;
252} *lambda_trans_matrix;
253#define LTM_MATRIX(T) ((T)->matrix)
254#define LTM_ROWSIZE(T) ((T)->rowsize)
255#define LTM_COLSIZE(T) ((T)->colsize)
256#define LTM_DENOMINATOR(T) ((T)->denominator)
257
258/* Allocate a new transformation matrix. */
259
260static lambda_trans_matrix
261lambda_trans_matrix_new (int colsize, int rowsize,
262 struct obstack * lambda_obstack)
263{
264 lambda_trans_matrix ret;
265
266 ret = (lambda_trans_matrix)
267 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
268 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
269 LTM_ROWSIZE (ret) = rowsize;
270 LTM_COLSIZE (ret) = colsize;
271 LTM_DENOMINATOR (ret) = 1;
272 return ret;
273}
274
275/* Multiply a vector VEC by a matrix MAT.
276 MAT is an M*N matrix, and VEC is a vector with length N. The result
277 is stored in DEST which must be a vector of length M. */
278
279static void
280lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
281 lambda_vector vec, lambda_vector dest)
282{
283 int i, j;
284
285 lambda_vector_clear (dest, m);
286 for (i = 0; i < m; i++)
287 for (j = 0; j < n; j++)
288 dest[i] += matrix[i][j] * vec[j];
289}
290
291/* Return true if TRANS is a legal transformation matrix that respects
292 the dependence vectors in DISTS and DIRS. The conservative answer
293 is false.
294
295 "Wolfe proves that a unimodular transformation represented by the
296 matrix T is legal when applied to a loop nest with a set of
297 lexicographically non-negative distance vectors RDG if and only if
298 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299 i.e.: if and only if it transforms the lexicographically positive
300 distance vectors to lexicographically positive vectors. Note that
301 a unimodular matrix must transform the zero vector (and only it) to
302 the zero vector." S.Muchnick. */
303
304static bool
305lambda_transform_legal_p (lambda_trans_matrix trans,
306 int nb_loops,
307 VEC (ddr_p, heap) *dependence_relations)
308{
309 unsigned int i, j;
310 lambda_vector distres;
311 struct data_dependence_relation *ddr;
312
313 gcc_assert (LTM_COLSIZE (trans) == nb_loops
314 && LTM_ROWSIZE (trans) == nb_loops);
315
316 /* When there are no dependences, the transformation is correct. */
317 if (VEC_length (ddr_p, dependence_relations) == 0)
318 return true;
319
320 ddr = VEC_index (ddr_p, dependence_relations, 0);
321 if (ddr == NULL)
322 return true;
323
324 /* When there is an unknown relation in the dependence_relations, we
325 know that it is no worth looking at this loop nest: give up. */
326 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
327 return false;
328
329 distres = lambda_vector_new (nb_loops);
330
331 /* For each distance vector in the dependence graph. */
332 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
333 {
334 /* Don't care about relations for which we know that there is no
335 dependence, nor about read-read (aka. output-dependences):
336 these data accesses can happen in any order. */
337 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
338 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
339 continue;
340
341 /* Conservatively answer: "this transformation is not valid". */
342 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343 return false;
344
345 /* If the dependence could not be captured by a distance vector,
346 conservatively answer that the transform is not valid. */
347 if (DDR_NUM_DIST_VECTS (ddr) == 0)
348 return false;
349
350 /* Compute trans.dist_vect */
351 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352 {
353 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
354 DDR_DIST_VECT (ddr, j), distres);
355
356 if (!lambda_vector_lexico_pos (distres, nb_loops))
357 return false;
358 }
359 }
360 return true;
361}
08dab97a
RL
362
363/* Data dependency analysis. Returns true if the iterations of LOOP
364 are independent on each other (that is, if we can execute them
365 in parallel). */
5f40b3cb
ZD
366
367static bool
f873b205 368loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 369{
01be8516
SP
370 VEC (loop_p, heap) *loop_nest;
371 VEC (ddr_p, heap) *dependence_relations;
726a989a 372 VEC (data_reference_p, heap) *datarefs;
5f40b3cb
ZD
373 lambda_trans_matrix trans;
374 bool ret = false;
5f40b3cb
ZD
375
376 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
377 {
378 fprintf (dump_file, "Considering loop %d\n", loop->num);
379 if (!loop->inner)
380 fprintf (dump_file, "loop is innermost\n");
b8698a0f 381 else
48710229
RL
382 fprintf (dump_file, "loop NOT innermost\n");
383 }
5f40b3cb 384
5f40b3cb
ZD
385 /* Check for problems with dependences. If the loop can be reversed,
386 the iterations are independent. */
387 datarefs = VEC_alloc (data_reference_p, heap, 10);
388 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
01be8516
SP
389 loop_nest = VEC_alloc (loop_p, heap, 3);
390 compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
5f40b3cb
ZD
391 &dependence_relations);
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 dump_data_dependence_relations (dump_file, dependence_relations);
394
f873b205 395 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
396 LTM_MATRIX (trans)[0][0] = -1;
397
398 if (lambda_transform_legal_p (trans, 1, dependence_relations))
399 {
400 ret = true;
401 if (dump_file && (dump_flags & TDF_DETAILS))
402 fprintf (dump_file, " SUCCESS: may be parallelized\n");
403 }
404 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
405 fprintf (dump_file,
406 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 407
01be8516 408 VEC_free (loop_p, heap, loop_nest);
5f40b3cb
ZD
409 free_dependence_relations (dependence_relations);
410 free_data_refs (datarefs);
411
412 return ret;
413}
414
1d4af1e8
SP
415/* Return true when LOOP contains basic blocks marked with the
416 BB_IRREDUCIBLE_LOOP flag. */
417
418static inline bool
419loop_has_blocks_with_irreducible_flag (struct loop *loop)
420{
421 unsigned i;
422 basic_block *bbs = get_loop_body_in_dom_order (loop);
423 bool res = true;
424
425 for (i = 0; i < loop->num_nodes; i++)
426 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
427 goto end;
428
429 res = false;
430 end:
431 free (bbs);
432 return res;
433}
434
8a171a59 435/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 436 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 437 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
438 be invariant in the whole function. Other needed statements are placed
439 right before GSI. */
5f40b3cb
ZD
440
441static tree
cba1eb61
JJ
442take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
443 gimple_stmt_iterator *gsi)
5f40b3cb 444{
8a171a59 445 int uid;
5f40b3cb
ZD
446 void **dslot;
447 struct int_tree_map ielt, *nielt;
726a989a
RB
448 tree *var_p, name, bvar, addr;
449 gimple stmt;
450 gimple_seq stmts;
5f40b3cb 451
8a171a59
ZD
452 /* Since the address of OBJ is invariant, the trees may be shared.
453 Avoid rewriting unrelated parts of the code. */
454 obj = unshare_expr (obj);
455 for (var_p = &obj;
456 handled_component_p (*var_p);
457 var_p = &TREE_OPERAND (*var_p, 0))
458 continue;
8a171a59 459
c9a410f0
RG
460 /* Canonicalize the access to base on a MEM_REF. */
461 if (DECL_P (*var_p))
462 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
463
464 /* Assign a canonical SSA name to the address of the base decl used
465 in the address and share it for all accesses and addresses based
466 on it. */
467 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
5f40b3cb
ZD
468 ielt.uid = uid;
469 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
470 if (!*dslot)
471 {
cba1eb61
JJ
472 if (gsi == NULL)
473 return NULL;
c9a410f0
RG
474 addr = TREE_OPERAND (*var_p, 0);
475 bvar = create_tmp_var (TREE_TYPE (addr),
476 get_name (TREE_OPERAND
477 (TREE_OPERAND (*var_p, 0), 0)));
5f40b3cb 478 add_referenced_var (bvar);
726a989a 479 stmt = gimple_build_assign (bvar, addr);
5f40b3cb 480 name = make_ssa_name (bvar, stmt);
726a989a
RB
481 gimple_assign_set_lhs (stmt, name);
482 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb
ZD
483
484 nielt = XNEW (struct int_tree_map);
485 nielt->uid = uid;
486 nielt->to = name;
487 *dslot = nielt;
5f40b3cb 488 }
8a171a59
ZD
489 else
490 name = ((struct int_tree_map *) *dslot)->to;
5f40b3cb 491
c9a410f0
RG
492 /* Express the address in terms of the canonical SSA name. */
493 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
494 if (gsi == NULL)
495 return build_fold_addr_expr_with_type (obj, type);
496
c9a410f0
RG
497 name = force_gimple_operand (build_addr (obj, current_function_decl),
498 &stmts, true, NULL_TREE);
499 if (!gimple_seq_empty_p (stmts))
cba1eb61 500 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 501
c9a410f0 502 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 503 {
726a989a 504 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 505 NULL_TREE);
726a989a 506 if (!gimple_seq_empty_p (stmts))
cba1eb61 507 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 508 }
5f40b3cb
ZD
509
510 return name;
511}
512
a509ebb5 513/* Callback for htab_traverse. Create the initialization statement
b8698a0f 514 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
515 the loop described in DATA. */
516
517static int
518initialize_reductions (void **slot, void *data)
519{
a509ebb5 520 tree init, c;
a509ebb5
RL
521 tree bvar, type, arg;
522 edge e;
523
3d9a9f94 524 struct reduction_info *const reduc = (struct reduction_info *) *slot;
a509ebb5
RL
525 struct loop *loop = (struct loop *) data;
526
b8698a0f 527 /* Create initialization in preheader:
a509ebb5
RL
528 reduction_variable = initialization value of reduction. */
529
b8698a0f 530 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
531 from the preheader with the reduction initialization value. */
532
533 /* Create a new variable to initialize the reduction. */
534 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
535 bvar = create_tmp_var (type, "reduction");
536 add_referenced_var (bvar);
537
c2255bc4
AH
538 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
539 OMP_CLAUSE_REDUCTION);
a509ebb5 540 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
726a989a 541 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
a509ebb5
RL
542
543 init = omp_reduction_init (c, TREE_TYPE (bvar));
544 reduc->init = init;
545
b8698a0f
L
546 /* Replace the argument representing the initialization value
547 with the initialization value for the reduction (neutral
548 element for the particular operation, e.g. 0 for PLUS_EXPR,
549 1 for MULT_EXPR, etc).
550 Keep the old value in a new variable "reduction_initial",
551 that will be taken in consideration after the parallel
0eb7e7aa 552 computing is done. */
a509ebb5
RL
553
554 e = loop_preheader_edge (loop);
555 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
556 /* Create new variable to hold the initial value. */
a509ebb5 557
a509ebb5 558 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 559 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 560 reduc->initial_value = arg;
a509ebb5
RL
561 return 1;
562}
5f40b3cb
ZD
563
564struct elv_data
565{
726a989a 566 struct walk_stmt_info info;
9f9f72aa 567 edge entry;
5f40b3cb 568 htab_t decl_address;
cba1eb61 569 gimple_stmt_iterator *gsi;
5f40b3cb 570 bool changed;
cba1eb61 571 bool reset;
5f40b3cb
ZD
572};
573
9f9f72aa
AP
574/* Eliminates references to local variables in *TP out of the single
575 entry single exit region starting at DTA->ENTRY.
576 DECL_ADDRESS contains addresses of the references that had their
577 address taken already. If the expression is changed, CHANGED is
578 set to true. Callback for walk_tree. */
a509ebb5 579
5f40b3cb 580static tree
8a171a59 581eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 582{
3d9a9f94 583 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 584 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
585
586 if (DECL_P (t))
587 {
588 *walk_subtrees = 0;
589
590 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
591 return NULL_TREE;
592
593 type = TREE_TYPE (t);
594 addr_type = build_pointer_type (type);
cba1eb61
JJ
595 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
596 dta->gsi);
597 if (dta->gsi == NULL && addr == NULL_TREE)
598 {
599 dta->reset = true;
600 return NULL_TREE;
601 }
602
70f34814 603 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
604
605 dta->changed = true;
606 return NULL_TREE;
607 }
608
609 if (TREE_CODE (t) == ADDR_EXPR)
610 {
8a171a59
ZD
611 /* ADDR_EXPR may appear in two contexts:
612 -- as a gimple operand, when the address taken is a function invariant
613 -- as gimple rhs, when the resulting address in not a function
614 invariant
615 We do not need to do anything special in the latter case (the base of
616 the memory reference whose address is taken may be replaced in the
617 DECL_P case). The former case is more complicated, as we need to
618 ensure that the new address is still a gimple operand. Thus, it
619 is not sufficient to replace just the base of the memory reference --
620 we need to move the whole computation of the address out of the
621 loop. */
622 if (!is_gimple_val (t))
5f40b3cb
ZD
623 return NULL_TREE;
624
625 *walk_subtrees = 0;
8a171a59
ZD
626 obj = TREE_OPERAND (t, 0);
627 var = get_base_address (obj);
628 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
629 return NULL_TREE;
630
631 addr_type = TREE_TYPE (t);
cba1eb61
JJ
632 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
633 dta->gsi);
634 if (dta->gsi == NULL && addr == NULL_TREE)
635 {
636 dta->reset = true;
637 return NULL_TREE;
638 }
5f40b3cb
ZD
639 *tp = addr;
640
641 dta->changed = true;
642 return NULL_TREE;
643 }
644
726a989a 645 if (!EXPR_P (t))
5f40b3cb
ZD
646 *walk_subtrees = 0;
647
648 return NULL_TREE;
649}
650
cba1eb61 651/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
652 entry single exit region starting at ENTRY. DECL_ADDRESS contains
653 addresses of the references that had their address taken
654 already. */
5f40b3cb
ZD
655
656static void
cba1eb61 657eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
5f40b3cb
ZD
658 htab_t decl_address)
659{
660 struct elv_data dta;
cba1eb61 661 gimple stmt = gsi_stmt (*gsi);
5f40b3cb 662
726a989a 663 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 664 dta.entry = entry;
5f40b3cb
ZD
665 dta.decl_address = decl_address;
666 dta.changed = false;
cba1eb61 667 dta.reset = false;
5f40b3cb 668
b5b8b0ac 669 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
670 {
671 dta.gsi = NULL;
672 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
673 eliminate_local_variables_1, &dta.info, NULL);
674 if (dta.reset)
675 {
676 gimple_debug_bind_reset_value (stmt);
677 dta.changed = true;
678 }
679 }
b5b8b0ac 680 else
cba1eb61
JJ
681 {
682 dta.gsi = gsi;
683 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
684 }
5f40b3cb
ZD
685
686 if (dta.changed)
687 update_stmt (stmt);
688}
689
9f9f72aa
AP
690/* Eliminates the references to local variables from the single entry
691 single exit region between the ENTRY and EXIT edges.
b8698a0f 692
a509ebb5 693 This includes:
b8698a0f
L
694 1) Taking address of a local variable -- these are moved out of the
695 region (and temporary variable is created to hold the address if
a509ebb5 696 necessary).
9f9f72aa 697
5f40b3cb 698 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 699 references. */
5f40b3cb
ZD
700
701static void
9f9f72aa 702eliminate_local_variables (edge entry, edge exit)
5f40b3cb 703{
9f9f72aa
AP
704 basic_block bb;
705 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
5f40b3cb 706 unsigned i;
726a989a 707 gimple_stmt_iterator gsi;
cba1eb61 708 bool has_debug_stmt = false;
5f40b3cb
ZD
709 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
710 free);
9f9f72aa
AP
711 basic_block entry_bb = entry->src;
712 basic_block exit_bb = exit->dest;
5f40b3cb 713
9f9f72aa 714 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 715
ac47786e 716 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
9f9f72aa 717 if (bb != entry_bb && bb != exit_bb)
726a989a 718 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
ddb555ed
JJ
719 if (is_gimple_debug (gsi_stmt (gsi)))
720 {
721 if (gimple_debug_bind_p (gsi_stmt (gsi)))
722 has_debug_stmt = true;
723 }
cba1eb61
JJ
724 else
725 eliminate_local_variables_stmt (entry, &gsi, decl_address);
726
727 if (has_debug_stmt)
728 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
729 if (bb != entry_bb && bb != exit_bb)
730 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
731 if (gimple_debug_bind_p (gsi_stmt (gsi)))
732 eliminate_local_variables_stmt (entry, &gsi, decl_address);
5f40b3cb
ZD
733
734 htab_delete (decl_address);
9f9f72aa
AP
735 VEC_free (basic_block, heap, body);
736}
737
738/* Returns true if expression EXPR is not defined between ENTRY and
739 EXIT, i.e. if all its operands are defined outside of the region. */
740
741static bool
742expr_invariant_in_region_p (edge entry, edge exit, tree expr)
743{
744 basic_block entry_bb = entry->src;
745 basic_block exit_bb = exit->dest;
746 basic_block def_bb;
9f9f72aa
AP
747
748 if (is_gimple_min_invariant (expr))
749 return true;
750
751 if (TREE_CODE (expr) == SSA_NAME)
752 {
726a989a 753 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
754 if (def_bb
755 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
756 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
757 return false;
758
759 return true;
760 }
761
726a989a 762 return false;
5f40b3cb
ZD
763}
764
765/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
766 The copies are stored to NAME_COPIES, if NAME was already duplicated,
767 its duplicate stored in NAME_COPIES is returned.
b8698a0f 768
5f40b3cb
ZD
769 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
770 duplicated, storing the copies in DECL_COPIES. */
771
772static tree
9f9f72aa
AP
773separate_decls_in_region_name (tree name,
774 htab_t name_copies, htab_t decl_copies,
775 bool copy_name_p)
5f40b3cb
ZD
776{
777 tree copy, var, var_copy;
778 unsigned idx, uid, nuid;
779 struct int_tree_map ielt, *nielt;
780 struct name_to_copy_elt elt, *nelt;
781 void **slot, **dslot;
782
783 if (TREE_CODE (name) != SSA_NAME)
784 return name;
785
786 idx = SSA_NAME_VERSION (name);
787 elt.version = idx;
788 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
789 copy_name_p ? INSERT : NO_INSERT);
790 if (slot && *slot)
791 return ((struct name_to_copy_elt *) *slot)->new_name;
792
793 var = SSA_NAME_VAR (name);
794 uid = DECL_UID (var);
795 ielt.uid = uid;
796 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
797 if (!*dslot)
798 {
799 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 800 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
5f40b3cb
ZD
801 add_referenced_var (var_copy);
802 nielt = XNEW (struct int_tree_map);
803 nielt->uid = uid;
804 nielt->to = var_copy;
805 *dslot = nielt;
806
807 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 808 it again. */
5f40b3cb
ZD
809 nuid = DECL_UID (var_copy);
810 ielt.uid = nuid;
811 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
812 gcc_assert (!*dslot);
813 nielt = XNEW (struct int_tree_map);
814 nielt->uid = nuid;
815 nielt->to = var_copy;
816 *dslot = nielt;
817 }
818 else
819 var_copy = ((struct int_tree_map *) *dslot)->to;
820
821 if (copy_name_p)
822 {
726a989a 823 copy = duplicate_ssa_name (name, NULL);
5f40b3cb
ZD
824 nelt = XNEW (struct name_to_copy_elt);
825 nelt->version = idx;
826 nelt->new_name = copy;
827 nelt->field = NULL_TREE;
828 *slot = nelt;
829 }
830 else
831 {
832 gcc_assert (!slot);
833 copy = name;
834 }
835
836 SSA_NAME_VAR (copy) = var_copy;
837 return copy;
838}
839
9f9f72aa
AP
840/* Finds the ssa names used in STMT that are defined outside the
841 region between ENTRY and EXIT and replaces such ssa names with
842 their duplicates. The duplicates are stored to NAME_COPIES. Base
843 decls of all ssa names used in STMT (including those defined in
844 LOOP) are replaced with the new temporary variables; the
845 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
846
847static void
726a989a 848separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
9f9f72aa 849 htab_t name_copies, htab_t decl_copies)
5f40b3cb
ZD
850{
851 use_operand_p use;
852 def_operand_p def;
853 ssa_op_iter oi;
854 tree name, copy;
855 bool copy_name_p;
856
857 mark_virtual_ops_for_renaming (stmt);
858
859 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
860 {
861 name = DEF_FROM_PTR (def);
862 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
863 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
864 false);
a509ebb5
RL
865 gcc_assert (copy == name);
866 }
5f40b3cb
ZD
867
868 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
869 {
870 name = USE_FROM_PTR (use);
871 if (TREE_CODE (name) != SSA_NAME)
872 continue;
873
9f9f72aa
AP
874 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
875 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
876 copy_name_p);
a509ebb5
RL
877 SET_USE (use, copy);
878 }
5f40b3cb
ZD
879}
880
b5b8b0ac
AO
881/* Finds the ssa names used in STMT that are defined outside the
882 region between ENTRY and EXIT and replaces such ssa names with
883 their duplicates. The duplicates are stored to NAME_COPIES. Base
884 decls of all ssa names used in STMT (including those defined in
885 LOOP) are replaced with the new temporary variables; the
886 replacement decls are stored in DECL_COPIES. */
887
888static bool
ddb555ed
JJ
889separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
890 htab_t decl_copies)
b5b8b0ac
AO
891{
892 use_operand_p use;
893 ssa_op_iter oi;
894 tree var, name;
895 struct int_tree_map ielt;
896 struct name_to_copy_elt elt;
897 void **slot, **dslot;
898
ddb555ed
JJ
899 if (gimple_debug_bind_p (stmt))
900 var = gimple_debug_bind_get_var (stmt);
901 else if (gimple_debug_source_bind_p (stmt))
902 var = gimple_debug_source_bind_get_var (stmt);
903 else
904 return true;
4f2a9af8
JJ
905 if (TREE_CODE (var) == DEBUG_EXPR_DECL)
906 return true;
b5b8b0ac
AO
907 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
908 ielt.uid = DECL_UID (var);
909 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
910 if (!dslot)
911 return true;
ddb555ed
JJ
912 if (gimple_debug_bind_p (stmt))
913 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
914 else if (gimple_debug_source_bind_p (stmt))
915 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
b5b8b0ac
AO
916
917 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
918 {
919 name = USE_FROM_PTR (use);
920 if (TREE_CODE (name) != SSA_NAME)
921 continue;
922
923 elt.version = SSA_NAME_VERSION (name);
924 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
925 if (!slot)
926 {
927 gimple_debug_bind_reset_value (stmt);
928 update_stmt (stmt);
929 break;
930 }
931
932 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
933 }
934
935 return false;
936}
937
0eb7e7aa
RL
938/* Callback for htab_traverse. Adds a field corresponding to the reduction
939 specified in SLOT. The type is passed in DATA. */
940
941static int
942add_field_for_reduction (void **slot, void *data)
a509ebb5 943{
b8698a0f 944
3d9a9f94
KG
945 struct reduction_info *const red = (struct reduction_info *) *slot;
946 tree const type = (tree) data;
726a989a 947 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
c2255bc4
AH
948 tree field = build_decl (gimple_location (red->reduc_stmt),
949 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
0eb7e7aa
RL
950
951 insert_field_into_struct (type, field);
952
953 red->field = field;
954
955 return 1;
956}
a509ebb5 957
5f40b3cb 958/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 959 described in SLOT. The type is passed in DATA. */
5f40b3cb
ZD
960
961static int
962add_field_for_name (void **slot, void *data)
963{
3d9a9f94
KG
964 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
965 tree type = (tree) data;
5f40b3cb
ZD
966 tree name = ssa_name (elt->version);
967 tree var = SSA_NAME_VAR (name);
c2255bc4
AH
968 tree field = build_decl (DECL_SOURCE_LOCATION (var),
969 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
5f40b3cb
ZD
970
971 insert_field_into_struct (type, field);
972 elt->field = field;
a509ebb5 973
5f40b3cb
ZD
974 return 1;
975}
976
b8698a0f
L
977/* Callback for htab_traverse. A local result is the intermediate result
978 computed by a single
fa10beec 979 thread, or the initial value in case no iteration was executed.
b8698a0f
L
980 This function creates a phi node reflecting these values.
981 The phi's result will be stored in NEW_PHI field of the
982 reduction's data structure. */
a509ebb5
RL
983
984static int
985create_phi_for_local_result (void **slot, void *data)
986{
3d9a9f94
KG
987 struct reduction_info *const reduc = (struct reduction_info *) *slot;
988 const struct loop *const loop = (const struct loop *) data;
a509ebb5 989 edge e;
726a989a 990 gimple new_phi;
a509ebb5
RL
991 basic_block store_bb;
992 tree local_res;
f5045c96 993 source_location locus;
a509ebb5 994
b8698a0f
L
995 /* STORE_BB is the block where the phi
996 should be stored. It is the destination of the loop exit.
726a989a 997 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
a509ebb5
RL
998 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
999
1000 /* STORE_BB has two predecessors. One coming from the loop
1001 (the reduction's result is computed at the loop),
b8698a0f
L
1002 and another coming from a block preceding the loop,
1003 when no iterations
1004 are executed (the initial value should be taken). */
a509ebb5
RL
1005 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1006 e = EDGE_PRED (store_bb, 1);
1007 else
1008 e = EDGE_PRED (store_bb, 0);
726a989a
RB
1009 local_res
1010 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
1011 NULL);
f5045c96 1012 locus = gimple_location (reduc->reduc_stmt);
a509ebb5
RL
1013 new_phi = create_phi_node (local_res, store_bb);
1014 SSA_NAME_DEF_STMT (local_res) = new_phi;
f5045c96 1015 add_phi_arg (new_phi, reduc->init, e, locus);
726a989a 1016 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
f5045c96 1017 FALLTHRU_EDGE (loop->latch), locus);
a509ebb5
RL
1018 reduc->new_phi = new_phi;
1019
1020 return 1;
1021}
5f40b3cb
ZD
1022
1023struct clsn_data
1024{
1025 tree store;
1026 tree load;
1027
1028 basic_block store_bb;
1029 basic_block load_bb;
1030};
1031
a509ebb5 1032/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1033 reduction described in SLOT.
a509ebb5
RL
1034 DATA annotates the place in memory the atomic operation relates to,
1035 and the basic block it needs to be generated in. */
1036
1037static int
1038create_call_for_reduction_1 (void **slot, void *data)
1039{
3d9a9f94
KG
1040 struct reduction_info *const reduc = (struct reduction_info *) *slot;
1041 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a 1042 gimple_stmt_iterator gsi;
a509ebb5 1043 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1044 tree load_struct;
1045 basic_block bb;
1046 basic_block new_bb;
1047 edge e;
0f900dfa 1048 tree t, addr, ref, x;
726a989a
RB
1049 tree tmp_load, name;
1050 gimple load;
a509ebb5 1051
70f34814 1052 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1053 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5
RL
1054
1055 addr = build_addr (t, current_function_decl);
1056
1057 /* Create phi node. */
1058 bb = clsn_data->load_bb;
1059
1060 e = split_block (bb, t);
1061 new_bb = e->dest;
1062
1063 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1064 add_referenced_var (tmp_load);
1065 tmp_load = make_ssa_name (tmp_load, NULL);
726a989a 1066 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1067 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1068 gsi = gsi_start_bb (new_bb);
1069 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1070
1071 e = split_block (new_bb, load);
1072 new_bb = e->dest;
726a989a 1073 gsi = gsi_start_bb (new_bb);
a509ebb5 1074 ref = tmp_load;
726a989a
RB
1075 x = fold_build2 (reduc->reduction_code,
1076 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1077 PHI_RESULT (reduc->new_phi));
a509ebb5 1078
726a989a
RB
1079 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1080 GSI_CONTINUE_LINKING);
a509ebb5 1081
726a989a 1082 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1083 return 1;
1084}
1085
b8698a0f
L
1086/* Create the atomic operation at the join point of the threads.
1087 REDUCTION_LIST describes the reductions in the LOOP.
1088 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1089 shared data is stored in and loaded from. */
1090static void
b8698a0f 1091create_call_for_reduction (struct loop *loop, htab_t reduction_list,
a509ebb5
RL
1092 struct clsn_data *ld_st_data)
1093{
1094 htab_traverse (reduction_list, create_phi_for_local_result, loop);
726a989a 1095 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
a509ebb5
RL
1096 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1097 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1098}
1099
ae0bce62
RL
1100/* Callback for htab_traverse. Loads the final reduction value at the
1101 join point of all threads, and inserts it in the right place. */
a509ebb5
RL
1102
1103static int
1104create_loads_for_reductions (void **slot, void *data)
1105{
3d9a9f94
KG
1106 struct reduction_info *const red = (struct reduction_info *) *slot;
1107 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1108 gimple stmt;
1109 gimple_stmt_iterator gsi;
1110 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
a509ebb5 1111 tree load_struct;
ae0bce62 1112 tree name;
a509ebb5
RL
1113 tree x;
1114
726a989a 1115 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1116 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1117 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1118 NULL_TREE);
a509ebb5 1119
ae0bce62 1120 x = load_struct;
a509ebb5 1121 name = PHI_RESULT (red->keep_res);
726a989a 1122 stmt = gimple_build_assign (name, x);
a509ebb5
RL
1123 SSA_NAME_DEF_STMT (name) = stmt;
1124
726a989a 1125 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1126
726a989a
RB
1127 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1128 !gsi_end_p (gsi); gsi_next (&gsi))
1129 if (gsi_stmt (gsi) == red->keep_res)
1130 {
1131 remove_phi_node (&gsi, false);
1132 return 1;
1133 }
1134 gcc_unreachable ();
a509ebb5
RL
1135}
1136
b8698a0f 1137/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1138 REDUCTION_LIST describes the list of reductions that the
fa10beec 1139 loads should be generated for. */
a509ebb5 1140static void
b8698a0f 1141create_final_loads_for_reduction (htab_t reduction_list,
a509ebb5
RL
1142 struct clsn_data *ld_st_data)
1143{
726a989a 1144 gimple_stmt_iterator gsi;
a509ebb5 1145 tree t;
726a989a 1146 gimple stmt;
a509ebb5 1147
726a989a 1148 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1149 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1150 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1151
726a989a
RB
1152 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1153 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
a509ebb5
RL
1154
1155 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1156
1157}
1158
0eb7e7aa
RL
1159/* Callback for htab_traverse. Store the neutral value for the
1160 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1161 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1162 The reduction is specified in SLOT. The store information is
1163 passed in DATA. */
0eb7e7aa
RL
1164
1165static int
1166create_stores_for_reduction (void **slot, void *data)
1167{
3d9a9f94
KG
1168 struct reduction_info *const red = (struct reduction_info *) *slot;
1169 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1170 tree t;
1171 gimple stmt;
1172 gimple_stmt_iterator gsi;
1173 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1174
1175 gsi = gsi_last_bb (clsn_data->store_bb);
1176 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1177 stmt = gimple_build_assign (t, red->initial_value);
0eb7e7aa 1178 mark_virtual_ops_for_renaming (stmt);
726a989a 1179 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1180
1181 return 1;
1182}
1183
a509ebb5
RL
1184/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1185 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1186 specified in SLOT. */
1187
5f40b3cb
ZD
1188static int
1189create_loads_and_stores_for_name (void **slot, void *data)
1190{
3d9a9f94
KG
1191 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1192 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1193 tree t;
1194 gimple stmt;
1195 gimple_stmt_iterator gsi;
5f40b3cb 1196 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1197 tree load_struct;
1198
726a989a
RB
1199 gsi = gsi_last_bb (clsn_data->store_bb);
1200 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1201 stmt = gimple_build_assign (t, ssa_name (elt->version));
5f40b3cb 1202 mark_virtual_ops_for_renaming (stmt);
726a989a 1203 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1204
726a989a 1205 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1206 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1207 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1208 stmt = gimple_build_assign (elt->new_name, t);
5f40b3cb 1209 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
726a989a 1210 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1211
1212 return 1;
1213}
1214
1215/* Moves all the variables used in LOOP and defined outside of it (including
1216 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1217 name) to a structure created for this purpose. The code
b8698a0f 1218
5f40b3cb
ZD
1219 while (1)
1220 {
1221 use (a);
1222 use (b);
1223 }
1224
1225 is transformed this way:
1226
1227 bb0:
1228 old.a = a;
1229 old.b = b;
1230
1231 bb1:
1232 a' = new->a;
1233 b' = new->b;
1234 while (1)
1235 {
1236 use (a');
1237 use (b');
1238 }
1239
1240 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1241 pointer `new' is intentionally not initialized (the loop will be split to a
1242 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1243 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1244 information among the threads. It is initialized here, and
1245 gen_parallel_loop will pass it to create_call_for_reduction that
1246 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1247 in LOOP. */
5f40b3cb
ZD
1248
1249static void
9f9f72aa 1250separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
b8698a0f 1251 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1252 struct clsn_data *ld_st_data)
a509ebb5 1253
5f40b3cb 1254{
9f9f72aa 1255 basic_block bb1 = split_edge (entry);
5f40b3cb
ZD
1256 basic_block bb0 = single_pred (bb1);
1257 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1258 name_to_copy_elt_eq, free);
1259 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1260 free);
5f40b3cb 1261 unsigned i;
726a989a
RB
1262 tree type, type_name, nvar;
1263 gimple_stmt_iterator gsi;
5f40b3cb 1264 struct clsn_data clsn_data;
9f9f72aa
AP
1265 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1266 basic_block bb;
1267 basic_block entry_bb = bb1;
1268 basic_block exit_bb = exit->dest;
b5b8b0ac 1269 bool has_debug_stmt = false;
5f40b3cb 1270
726a989a 1271 entry = single_succ_edge (entry_bb);
9f9f72aa 1272 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1273
ac47786e 1274 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
9f9f72aa 1275 {
b8698a0f 1276 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1277 {
726a989a
RB
1278 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1279 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1280 name_copies, decl_copies);
1281
1282 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac
AO
1283 {
1284 gimple stmt = gsi_stmt (gsi);
1285
1286 if (is_gimple_debug (stmt))
1287 has_debug_stmt = true;
1288 else
1289 separate_decls_in_region_stmt (entry, exit, stmt,
1290 name_copies, decl_copies);
1291 }
9f9f72aa 1292 }
5f40b3cb 1293 }
9f9f72aa 1294
b5b8b0ac
AO
1295 /* Now process debug bind stmts. We must not create decls while
1296 processing debug stmts, so we defer their processing so as to
1297 make sure we will have debug info for as many variables as
1298 possible (all of those that were dealt with in the loop above),
1299 and discard those for which we know there's nothing we can
1300 do. */
1301 if (has_debug_stmt)
ac47786e 1302 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
b5b8b0ac
AO
1303 if (bb != entry_bb && bb != exit_bb)
1304 {
1305 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1306 {
1307 gimple stmt = gsi_stmt (gsi);
1308
ddb555ed 1309 if (is_gimple_debug (stmt))
b5b8b0ac 1310 {
ddb555ed
JJ
1311 if (separate_decls_in_region_debug (stmt, name_copies,
1312 decl_copies))
b5b8b0ac
AO
1313 {
1314 gsi_remove (&gsi, true);
1315 continue;
1316 }
1317 }
1318
1319 gsi_next (&gsi);
1320 }
1321 }
1322
9f9f72aa 1323 VEC_free (basic_block, heap, body);
5f40b3cb 1324
b8698a0f 1325 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
5f40b3cb
ZD
1326 {
1327 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1328 loop carried and external variables in the loop). */
5f40b3cb
ZD
1329 *arg_struct = NULL;
1330 *new_arg_struct = NULL;
1331 }
1332 else
1333 {
1334 /* Create the type for the structure to store the ssa names to. */
1335 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1336 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1337 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1338 type);
1339 TYPE_NAME (type) = type_name;
1340
0eb7e7aa 1341 htab_traverse (name_copies, add_field_for_name, type);
9f9f72aa 1342 if (reduction_list && htab_elements (reduction_list) > 0)
0eb7e7aa
RL
1343 {
1344 /* Create the fields for reductions. */
1345 htab_traverse (reduction_list, add_field_for_reduction,
1346 type);
1347 }
5f40b3cb 1348 layout_type (type);
b8698a0f 1349
5f40b3cb
ZD
1350 /* Create the loads and stores. */
1351 *arg_struct = create_tmp_var (type, ".paral_data_store");
1352 add_referenced_var (*arg_struct);
1353 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1354 add_referenced_var (nvar);
726a989a 1355 *new_arg_struct = make_ssa_name (nvar, NULL);
5f40b3cb 1356
a509ebb5
RL
1357 ld_st_data->store = *arg_struct;
1358 ld_st_data->load = *new_arg_struct;
1359 ld_st_data->store_bb = bb0;
1360 ld_st_data->load_bb = bb1;
0eb7e7aa 1361
5f40b3cb 1362 htab_traverse (name_copies, create_loads_and_stores_for_name,
a509ebb5
RL
1363 ld_st_data);
1364
ae0bce62
RL
1365 /* Load the calculation from memory (after the join of the threads). */
1366
9f9f72aa 1367 if (reduction_list && htab_elements (reduction_list) > 0)
a509ebb5 1368 {
0eb7e7aa 1369 htab_traverse (reduction_list, create_stores_for_reduction,
b8698a0f 1370 ld_st_data);
726a989a 1371 clsn_data.load = make_ssa_name (nvar, NULL);
9f9f72aa 1372 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1373 clsn_data.store = ld_st_data->store;
1374 create_final_loads_for_reduction (reduction_list, &clsn_data);
1375 }
5f40b3cb
ZD
1376 }
1377
1378 htab_delete (decl_copies);
1379 htab_delete (name_copies);
1380}
1381
1382/* Bitmap containing uids of functions created by parallelization. We cannot
1383 allocate it from the default obstack, as it must live across compilation
1384 of several functions; we make it gc allocated instead. */
1385
1386static GTY(()) bitmap parallelized_functions;
1387
1388/* Returns true if FN was created by create_loop_fn. */
1389
1390static bool
1391parallelized_function_p (tree fn)
1392{
1393 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1394 return false;
1395
1396 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1397}
1398
1399/* Creates and returns an empty function that will receive the body of
1400 a parallelized loop. */
1401
1402static tree
9ff70652 1403create_loop_fn (location_t loc)
5f40b3cb
ZD
1404{
1405 char buf[100];
1406 char *tname;
1407 tree decl, type, name, t;
1408 struct function *act_cfun = cfun;
1409 static unsigned loopfn_num;
1410
1411 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1412 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1413 clean_symbol_name (tname);
1414 name = get_identifier (tname);
1415 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1416
9ff70652 1417 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1418 if (!parallelized_functions)
1419 parallelized_functions = BITMAP_GGC_ALLOC ();
1420 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1421
1422 TREE_STATIC (decl) = 1;
1423 TREE_USED (decl) = 1;
1424 DECL_ARTIFICIAL (decl) = 1;
1425 DECL_IGNORED_P (decl) = 0;
1426 TREE_PUBLIC (decl) = 0;
1427 DECL_UNINLINABLE (decl) = 1;
1428 DECL_EXTERNAL (decl) = 0;
1429 DECL_CONTEXT (decl) = NULL_TREE;
1430 DECL_INITIAL (decl) = make_node (BLOCK);
1431
9ff70652 1432 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1433 DECL_ARTIFICIAL (t) = 1;
1434 DECL_IGNORED_P (t) = 1;
1435 DECL_RESULT (decl) = t;
1436
9ff70652 1437 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1438 ptr_type_node);
1439 DECL_ARTIFICIAL (t) = 1;
1440 DECL_ARG_TYPE (t) = ptr_type_node;
1441 DECL_CONTEXT (t) = decl;
1442 TREE_USED (t) = 1;
1443 DECL_ARGUMENTS (decl) = t;
1444
182e0d71 1445 allocate_struct_function (decl, false);
5f40b3cb
ZD
1446
1447 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1448 it. */
5576d6f2 1449 set_cfun (act_cfun);
5f40b3cb
ZD
1450
1451 return decl;
1452}
1453
5f40b3cb
ZD
1454/* Moves the exit condition of LOOP to the beginning of its header, and
1455 duplicates the part of the last iteration that gets disabled to the
1456 exit of the loop. NIT is the number of iterations of the loop
1457 (used to initialize the variables in the duplicated part).
b8698a0f 1458
fa10beec 1459 TODO: the common case is that latch of the loop is empty and immediately
5f40b3cb
ZD
1460 follows the loop exit. In this case, it would be better not to copy the
1461 body of the loop, but only move the entry of the loop directly before the
1462 exit check and increase the number of iterations of the loop by one.
b8698a0f 1463 This may need some additional preconditioning in case NIT = ~0.
a509ebb5 1464 REDUCTION_LIST describes the reductions in LOOP. */
5f40b3cb
ZD
1465
1466static void
a509ebb5 1467transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
5f40b3cb
ZD
1468{
1469 basic_block *bbs, *nbbs, ex_bb, orig_header;
1470 unsigned n;
1471 bool ok;
1472 edge exit = single_dom_exit (loop), hpred;
726a989a 1473 tree control, control_name, res, t;
48710229 1474 gimple phi, nphi, cond_stmt, stmt, cond_nit;
726a989a 1475 gimple_stmt_iterator gsi;
48710229 1476 tree nit_1;
12037899
RL
1477 edge exit_1;
1478 tree new_rhs;
5f40b3cb
ZD
1479
1480 split_block_after_labels (loop->header);
1481 orig_header = single_succ (loop->header);
1482 hpred = single_succ_edge (loop->header);
1483
1484 cond_stmt = last_stmt (exit->src);
726a989a
RB
1485 control = gimple_cond_lhs (cond_stmt);
1486 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1487
1488 /* Make sure that we have phi nodes on exit for all loop header phis
1489 (create_parallel_loop requires that). */
726a989a 1490 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
5f40b3cb 1491 {
726a989a 1492 phi = gsi_stmt (gsi);
5f40b3cb
ZD
1493 res = PHI_RESULT (phi);
1494 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1495 SET_PHI_RESULT (phi, t);
5f40b3cb
ZD
1496 nphi = create_phi_node (res, orig_header);
1497 SSA_NAME_DEF_STMT (res) = nphi;
f5045c96 1498 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1499
1500 if (res == control)
1501 {
726a989a 1502 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1503 update_stmt (cond_stmt);
1504 control = t;
1505 }
1506 }
12037899
RL
1507
1508 /* Setting the condition towards peeling the last iteration:
1509 If the block consisting of the exit condition has the latch as
1510 successor, then the body of the loop is executed before
1511 the exit condition is tested. In such case, moving the
1512 condition to the entry, causes that the loop will iterate
1513 one less iteration (which is the wanted outcome, since we
1514 peel out the last iteration). If the body is executed after
1515 the condition, moving the condition to the entry requires
1516 decrementing one iteration. */
1517 exit_1 = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
1518 if (exit_1->dest == loop->latch)
1519 new_rhs = gimple_cond_rhs (cond_stmt);
1520 else
1521 {
1522 new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)),
1523 gimple_cond_rhs (cond_stmt),
1524 build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1));
1525 if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME)
1526 {
1527 basic_block preheader;
1528 gimple_stmt_iterator gsi1;
1529
1530 preheader = loop_preheader_edge(loop)->src;
1531 gsi1 = gsi_after_labels (preheader);
1532 new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true,
1533 NULL_TREE,false,GSI_CONTINUE_LINKING);
1534 }
1535 }
1536 gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs));
1537 gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt)));
1538
5f40b3cb 1539 bbs = get_loop_body_in_dom_order (loop);
48710229
RL
1540
1541 for (n = 0; bbs[n] != loop->latch; n++)
5f40b3cb
ZD
1542 continue;
1543 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1544 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1545 bbs + 1, n, nbbs);
5f40b3cb
ZD
1546 gcc_assert (ok);
1547 free (bbs);
1548 ex_bb = nbbs[0];
1549 free (nbbs);
1550
b8698a0f 1551 /* Other than reductions, the only gimple reg that should be copied
726a989a 1552 out of the loop is the control variable. */
a509ebb5 1553
5f40b3cb 1554 control_name = NULL_TREE;
726a989a 1555 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
5f40b3cb 1556 {
726a989a 1557 phi = gsi_stmt (gsi);
5f40b3cb
ZD
1558 res = PHI_RESULT (phi);
1559 if (!is_gimple_reg (res))
726a989a
RB
1560 {
1561 gsi_next (&gsi);
1562 continue;
1563 }
5f40b3cb 1564
a509ebb5 1565 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1566 keep the phi at the reduction's keep_res field. The
1567 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1568 variable when exiting the loop. */
1569
1570 exit = single_dom_exit (loop);
1571
b8698a0f 1572 if (htab_elements (reduction_list) > 0)
a509ebb5
RL
1573 {
1574 struct reduction_info *red;
1575
1576 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1577 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1578 if (red)
726a989a
RB
1579 {
1580 red->keep_res = phi;
1581 gsi_next (&gsi);
1582 continue;
1583 }
a509ebb5 1584 }
726a989a
RB
1585 gcc_assert (control_name == NULL_TREE
1586 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1587 control_name = res;
726a989a 1588 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1589 }
1590 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1591
b8698a0f 1592 /* Initialize the control variable to number of iterations
48710229 1593 according to the rhs of the exit condition. */
726a989a 1594 gsi = gsi_after_labels (ex_bb);
b8698a0f 1595 cond_nit = last_stmt (exit->src);
48710229
RL
1596 nit_1 = gimple_cond_rhs (cond_nit);
1597 nit_1 = force_gimple_operand_gsi (&gsi,
1598 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1599 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1600 stmt = gimple_build_assign (control_name, nit_1);
726a989a
RB
1601 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1602 SSA_NAME_DEF_STMT (control_name) = stmt;
5f40b3cb
ZD
1603}
1604
1605/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1606 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb
ZD
1607 NEW_DATA is the variable that should be initialized from the argument
1608 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
726a989a 1609 basic block containing GIMPLE_OMP_PARALLEL tree. */
5f40b3cb
ZD
1610
1611static basic_block
1612create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1613 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1614{
726a989a 1615 gimple_stmt_iterator gsi;
5f40b3cb 1616 basic_block bb, paral_bb, for_bb, ex_bb;
0f900dfa 1617 tree t, param;
726a989a
RB
1618 gimple stmt, for_stmt, phi, cond_stmt;
1619 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
1620 edge exit, nexit, guard, end, e;
1621
726a989a 1622 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
1623 bb = loop_preheader_edge (loop)->src;
1624 paral_bb = single_pred (bb);
726a989a 1625 gsi = gsi_last_bb (paral_bb);
5f40b3cb 1626
9ff70652 1627 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 1628 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 1629 = build_int_cst (integer_type_node, n_threads);
726a989a 1630 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
9ff70652 1631 gimple_set_location (stmt, loc);
5f40b3cb 1632
726a989a 1633 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1634
1635 /* Initialize NEW_DATA. */
1636 if (data)
1637 {
726a989a
RB
1638 gsi = gsi_after_labels (bb);
1639
1640 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1641 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1642 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1643 SSA_NAME_DEF_STMT (param) = stmt;
1644
1645 stmt = gimple_build_assign (new_data,
1646 fold_convert (TREE_TYPE (new_data), param));
1647 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1648 SSA_NAME_DEF_STMT (new_data) = stmt;
5f40b3cb
ZD
1649 }
1650
726a989a 1651 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 1652 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 1653 gsi = gsi_last_bb (bb);
9ff70652
JJ
1654 stmt = gimple_build_omp_return (false);
1655 gimple_set_location (stmt, loc);
1656 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1657
726a989a 1658 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 1659 gcc_assert (loop->header == single_dom_exit (loop)->src);
726a989a 1660 cond_stmt = last_stmt (loop->header);
5f40b3cb 1661
726a989a 1662 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
1663 cvar_base = SSA_NAME_VAR (cvar);
1664 phi = SSA_NAME_DEF_STMT (cvar);
1665 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
726a989a 1666 initvar = make_ssa_name (cvar_base, NULL);
5f40b3cb
ZD
1667 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1668 initvar);
1669 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1670
1dff453d 1671 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
1672 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1673 gsi_remove (&gsi, true);
5f40b3cb
ZD
1674
1675 /* Prepare cfg. */
1676 for_bb = split_edge (loop_preheader_edge (loop));
1677 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1678 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1679 gcc_assert (exit == single_dom_exit (loop));
1680
1681 guard = make_edge (for_bb, ex_bb, 0);
1682 single_succ_edge (loop->latch)->flags = 0;
1683 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
726a989a 1684 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
5f40b3cb 1685 {
f5045c96
AM
1686 source_location locus;
1687 tree def;
726a989a 1688 phi = gsi_stmt (gsi);
726a989a 1689 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
f5045c96
AM
1690
1691 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 1692 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96
AM
1693 loop_preheader_edge (loop));
1694 add_phi_arg (phi, def, guard, locus);
1695
1696 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1697 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1698 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
1699 }
1700 e = redirect_edge_and_branch (exit, nexit->dest);
1701 PENDING_STMT (e) = NULL;
1702
726a989a
RB
1703 /* Emit GIMPLE_OMP_FOR. */
1704 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 1705 type = TREE_TYPE (cvar);
9ff70652 1706 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
5f40b3cb
ZD
1707 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1708
726a989a 1709 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
9ff70652 1710 gimple_set_location (for_stmt, loc);
726a989a
RB
1711 gimple_omp_for_set_index (for_stmt, 0, initvar);
1712 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1713 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1714 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1715 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1716 cvar_base,
1717 build_int_cst (type, 1)));
1718
1719 gsi = gsi_last_bb (for_bb);
1720 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1721 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1722
726a989a
RB
1723 /* Emit GIMPLE_OMP_CONTINUE. */
1724 gsi = gsi_last_bb (loop->latch);
1725 stmt = gimple_build_omp_continue (cvar_next, cvar);
9ff70652 1726 gimple_set_location (stmt, loc);
726a989a
RB
1727 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1728 SSA_NAME_DEF_STMT (cvar_next) = stmt;
5f40b3cb 1729
726a989a
RB
1730 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1731 gsi = gsi_last_bb (ex_bb);
9ff70652
JJ
1732 stmt = gimple_build_omp_return (true);
1733 gimple_set_location (stmt, loc);
1734 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1735
1736 return paral_bb;
1737}
1738
08dab97a
RL
1739/* Generates code to execute the iterations of LOOP in N_THREADS
1740 threads in parallel.
1741
1742 NITER describes number of iterations of LOOP.
fa10beec 1743 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
1744
1745static void
08dab97a 1746gen_parallel_loop (struct loop *loop, htab_t reduction_list,
a509ebb5 1747 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 1748{
9326236d 1749 loop_iterator li;
5f40b3cb 1750 tree many_iterations_cond, type, nit;
726a989a
RB
1751 tree arg_struct, new_arg_struct;
1752 gimple_seq stmts;
5f40b3cb 1753 basic_block parallel_head;
9f9f72aa 1754 edge entry, exit;
a509ebb5 1755 struct clsn_data clsn_data;
5f40b3cb 1756 unsigned prob;
9ff70652
JJ
1757 location_t loc;
1758 gimple cond_stmt;
5f40b3cb
ZD
1759
1760 /* From
1761
1762 ---------------------------------------------------------------------
1763 loop
1764 {
1765 IV = phi (INIT, IV + STEP)
1766 BODY1;
1767 if (COND)
1768 break;
1769 BODY2;
1770 }
1771 ---------------------------------------------------------------------
1772
1773 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1774 we generate the following code:
1775
1776 ---------------------------------------------------------------------
1777
1778 if (MAY_BE_ZERO
a509ebb5
RL
1779 || NITER < MIN_PER_THREAD * N_THREADS)
1780 goto original;
5f40b3cb
ZD
1781
1782 BODY1;
1783 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 1784 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 1785 load the variables from DATA.
726a989a 1786 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
1787 BODY2;
1788 BODY1;
726a989a
RB
1789 GIMPLE_OMP_CONTINUE;
1790 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1791 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
1792 goto end;
1793
1794 original:
1795 loop
1796 {
1797 IV = phi (INIT, IV + STEP)
1798 BODY1;
1799 if (COND)
1800 break;
1801 BODY2;
1802 }
1803
1804 end:
1805
1806 */
1807
1808 /* Create two versions of the loop -- in the old one, we know that the
1809 number of iterations is large enough, and we will transform it into the
1810 loop that will be split to loop_fn, the new one will be used for the
1811 remaining iterations. */
a509ebb5 1812
5f40b3cb
ZD
1813 type = TREE_TYPE (niter->niter);
1814 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1815 NULL_TREE);
1816 if (stmts)
726a989a 1817 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1818
1819 many_iterations_cond =
a509ebb5
RL
1820 fold_build2 (GE_EXPR, boolean_type_node,
1821 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
5f40b3cb 1822 many_iterations_cond
a509ebb5
RL
1823 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1824 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1825 many_iterations_cond);
5f40b3cb 1826 many_iterations_cond
a509ebb5 1827 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 1828 if (stmts)
726a989a 1829 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1830 if (!is_gimple_condexpr (many_iterations_cond))
1831 {
1832 many_iterations_cond
a509ebb5
RL
1833 = force_gimple_operand (many_iterations_cond, &stmts,
1834 true, NULL_TREE);
5f40b3cb 1835 if (stmts)
726a989a 1836 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1837 }
1838
1839 initialize_original_copy_tables ();
1840
1841 /* We assume that the loop usually iterates a lot. */
1842 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
1843 loop_version (loop, many_iterations_cond, NULL,
1844 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
1845 update_ssa (TODO_update_ssa);
1846 free_original_copy_tables ();
1847
1848 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 1849 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb
ZD
1850
1851 /* Ensure that the exit condition is the first statement in the loop. */
a509ebb5
RL
1852 transform_to_exit_first_loop (loop, reduction_list, nit);
1853
fa10beec 1854 /* Generate initializations for reductions. */
b8698a0f 1855 if (htab_elements (reduction_list) > 0)
a509ebb5 1856 htab_traverse (reduction_list, initialize_reductions, loop);
5f40b3cb
ZD
1857
1858 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
1859 gcc_assert (single_exit (loop));
1860 entry = loop_preheader_edge (loop);
1861 exit = single_dom_exit (loop);
5f40b3cb 1862
9f9f72aa 1863 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
1864 /* In the old loop, move all variables non-local to the loop to a structure
1865 and back, and create separate decls for the variables used in loop. */
b8698a0f 1866 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 1867 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
1868
1869 /* Create the parallel constructs. */
9ff70652
JJ
1870 loc = UNKNOWN_LOCATION;
1871 cond_stmt = last_stmt (loop->header);
1872 if (cond_stmt)
1873 loc = gimple_location (cond_stmt);
1874 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1875 new_arg_struct, n_threads, loc);
b8698a0f 1876 if (htab_elements (reduction_list) > 0)
a509ebb5 1877 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
1878
1879 scev_reset ();
1880
1881 /* Cancel the loop (it is simpler to do it here rather than to teach the
1882 expander to do it). */
1883 cancel_loop_tree (loop);
1884
92a6bdbd
SP
1885 /* Free loop bound estimations that could contain references to
1886 removed statements. */
1887 FOR_EACH_LOOP (li, loop, 0)
1888 free_numbers_of_iterations_estimates_loop (loop);
1889
5f40b3cb
ZD
1890 /* Expand the parallel constructs. We do it directly here instead of running
1891 a separate expand_omp pass, since it is more efficient, and less likely to
1892 cause troubles with further analyses not being able to deal with the
1893 OMP trees. */
a509ebb5 1894
5f40b3cb
ZD
1895 omp_expand_local (parallel_head);
1896}
1897
9857228c
SP
1898/* Returns true when LOOP contains vector phi nodes. */
1899
1900static bool
726a989a 1901loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
1902{
1903 unsigned i;
1904 basic_block *bbs = get_loop_body_in_dom_order (loop);
726a989a 1905 gimple_stmt_iterator gsi;
9857228c 1906 bool res = true;
9857228c
SP
1907
1908 for (i = 0; i < loop->num_nodes; i++)
726a989a
RB
1909 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1910 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
9857228c
SP
1911 goto end;
1912
1913 res = false;
1914 end:
1915 free (bbs);
1916 return res;
1917}
1918
08dab97a
RL
1919/* Create a reduction_info struct, initialize it with REDUC_STMT
1920 and PHI, insert it to the REDUCTION_LIST. */
1921
1922static void
1923build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1924{
1925 PTR *slot;
1926 struct reduction_info *new_reduction;
1927
1928 gcc_assert (reduc_stmt);
b8698a0f 1929
08dab97a
RL
1930 if (dump_file && (dump_flags & TDF_DETAILS))
1931 {
1932 fprintf (dump_file,
1933 "Detected reduction. reduction stmt is: \n");
1934 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1935 fprintf (dump_file, "\n");
1936 }
b8698a0f 1937
08dab97a 1938 new_reduction = XCNEW (struct reduction_info);
b8698a0f 1939
08dab97a
RL
1940 new_reduction->reduc_stmt = reduc_stmt;
1941 new_reduction->reduc_phi = phi;
5d1fd1de 1942 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
08dab97a
RL
1943 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1944 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1945 *slot = new_reduction;
1946}
1947
5d1fd1de
JJ
1948/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1949
1950static int
1951set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1952{
1953 struct reduction_info *const red = (struct reduction_info *) *slot;
1954 gimple_set_uid (red->reduc_phi, red->reduc_version);
1955 return 1;
1956}
1957
08dab97a
RL
1958/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1959
1960static void
1961gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1962{
1963 gimple_stmt_iterator gsi;
1964 loop_vec_info simple_loop_info;
1965
1966 vect_dump = NULL;
1967 simple_loop_info = vect_analyze_loop_form (loop);
1968
1969 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1970 {
1971 gimple phi = gsi_stmt (gsi);
1972 affine_iv iv;
1973 tree res = PHI_RESULT (phi);
1974 bool double_reduc;
1975
1976 if (!is_gimple_reg (res))
1977 continue;
1978
1979 if (!simple_iv (loop, loop, res, &iv, true)
1980 && simple_loop_info)
1981 {
8a9ecffd
MM
1982 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1983 phi, true,
1984 &double_reduc);
48710229 1985 if (reduc_stmt && !double_reduc)
08dab97a
RL
1986 build_new_reduction (reduction_list, reduc_stmt, phi);
1987 }
1988 }
5d1fd1de
JJ
1989 destroy_loop_vec_info (simple_loop_info, true);
1990
1991 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1992 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1993 only now. */
1994 htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
08dab97a
RL
1995}
1996
1997/* Try to initialize NITER for code generation part. */
1998
1999static bool
2000try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2001{
2002 edge exit = single_dom_exit (loop);
2003
2004 gcc_assert (exit);
2005
2006 /* We need to know # of iterations, and there should be no uses of values
2007 defined inside loop outside of it, unless the values are invariants of
2008 the loop. */
2009 if (!number_of_iterations_exit (loop, exit, niter, false))
2010 {
2011 if (dump_file && (dump_flags & TDF_DETAILS))
2012 fprintf (dump_file, " FAILED: number of iterations not known\n");
2013 return false;
2014 }
2015
2016 return true;
2017}
2018
2019/* Try to initialize REDUCTION_LIST for code generation part.
2020 REDUCTION_LIST describes the reductions. */
2021
2022static bool
2023try_create_reduction_list (loop_p loop, htab_t reduction_list)
2024{
2025 edge exit = single_dom_exit (loop);
2026 gimple_stmt_iterator gsi;
2027
2028 gcc_assert (exit);
2029
2030 gather_scalar_reductions (loop, reduction_list);
2031
b8698a0f 2032
08dab97a
RL
2033 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2034 {
2035 gimple phi = gsi_stmt (gsi);
2036 struct reduction_info *red;
2037 imm_use_iterator imm_iter;
2038 use_operand_p use_p;
2039 gimple reduc_phi;
2040 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2041
2042 if (is_gimple_reg (val))
2043 {
2044 if (dump_file && (dump_flags & TDF_DETAILS))
2045 {
2046 fprintf (dump_file, "phi is ");
2047 print_gimple_stmt (dump_file, phi, 0, 0);
2048 fprintf (dump_file, "arg of phi to exit: value ");
2049 print_generic_expr (dump_file, val, 0);
2050 fprintf (dump_file, " used outside loop\n");
2051 fprintf (dump_file,
2052 " checking if it a part of reduction pattern: \n");
2053 }
2054 if (htab_elements (reduction_list) == 0)
2055 {
2056 if (dump_file && (dump_flags & TDF_DETAILS))
2057 fprintf (dump_file,
2058 " FAILED: it is not a part of reduction.\n");
2059 return false;
2060 }
2061 reduc_phi = NULL;
2062 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2063 {
4942af9b
JJ
2064 if (!gimple_debug_bind_p (USE_STMT (use_p))
2065 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2066 {
2067 reduc_phi = USE_STMT (use_p);
2068 break;
2069 }
2070 }
2071 red = reduction_phi (reduction_list, reduc_phi);
2072 if (red == NULL)
2073 {
2074 if (dump_file && (dump_flags & TDF_DETAILS))
2075 fprintf (dump_file,
2076 " FAILED: it is not a part of reduction.\n");
2077 return false;
2078 }
2079 if (dump_file && (dump_flags & TDF_DETAILS))
2080 {
2081 fprintf (dump_file, "reduction phi is ");
2082 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2083 fprintf (dump_file, "reduction stmt is ");
2084 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2085 }
2086 }
2087 }
2088
2089 /* The iterations of the loop may communicate only through bivs whose
2090 iteration space can be distributed efficiently. */
2091 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2092 {
2093 gimple phi = gsi_stmt (gsi);
2094 tree def = PHI_RESULT (phi);
2095 affine_iv iv;
2096
2097 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
2098 {
2099 struct reduction_info *red;
2100
2101 red = reduction_phi (reduction_list, phi);
2102 if (red == NULL)
2103 {
2104 if (dump_file && (dump_flags & TDF_DETAILS))
2105 fprintf (dump_file,
2106 " FAILED: scalar dependency between iterations\n");
2107 return false;
2108 }
2109 }
2110 }
2111
2112
2113 return true;
2114}
2115
5f40b3cb
ZD
2116/* Detect parallel loops and generate parallel code using libgomp
2117 primitives. Returns true if some loop was parallelized, false
2118 otherwise. */
2119
2120bool
2121parallelize_loops (void)
2122{
2123 unsigned n_threads = flag_tree_parallelize_loops;
2124 bool changed = false;
2125 struct loop *loop;
2126 struct tree_niter_desc niter_desc;
2127 loop_iterator li;
a509ebb5 2128 htab_t reduction_list;
f873b205 2129 struct obstack parloop_obstack;
8adfe01d
RL
2130 HOST_WIDE_INT estimated;
2131 LOC loop_loc;
f873b205 2132
5f40b3cb
ZD
2133 /* Do not parallelize loops in the functions created by parallelization. */
2134 if (parallelized_function_p (cfun->decl))
2135 return false;
8adfe01d
RL
2136 if (cfun->has_nonlocal_label)
2137 return false;
5f40b3cb 2138
f873b205 2139 gcc_obstack_init (&parloop_obstack);
a509ebb5 2140 reduction_list = htab_create (10, reduction_info_hash,
08dab97a 2141 reduction_info_eq, free);
726a989a 2142 init_stmt_vec_info_vec ();
a509ebb5 2143
5f40b3cb
ZD
2144 FOR_EACH_LOOP (li, loop, 0)
2145 {
a509ebb5 2146 htab_empty (reduction_list);
48710229
RL
2147 if (dump_file && (dump_flags & TDF_DETAILS))
2148 {
2149 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2150 if (loop->inner)
2151 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2152 else
2153 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2154 }
b8698a0f 2155
48710229 2156 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2157 checking results. */
2158 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2159 {
2160 if (dump_file && (dump_flags & TDF_DETAILS))
2161 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2162 continue;
48710229 2163 }
87d4d0ee 2164
48710229
RL
2165 if (!single_dom_exit (loop))
2166 {
b8698a0f 2167
48710229
RL
2168 if (dump_file && (dump_flags & TDF_DETAILS))
2169 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2170
08dab97a 2171 continue;
48710229 2172 }
08dab97a
RL
2173
2174 if (/* And of course, the loop must be parallelizable. */
2175 !can_duplicate_loop_p (loop)
1d4af1e8 2176 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2177 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2178 /* FIXME: the check for vector phi nodes could be removed. */
08dab97a
RL
2179 || loop_has_vector_phi_nodes (loop))
2180 continue;
b4a9343c 2181 estimated = max_stmt_executions_int (loop, false);
87d4d0ee
SP
2182 /* FIXME: Bypass this check as graphite doesn't update the
2183 count and frequency correctly now. */
2184 if (!flag_loop_parallelize_all
8adfe01d
RL
2185 && ((estimated !=-1
2186 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2187 /* Do not bother with loops in cold areas. */
2188 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2189 continue;
b8698a0f 2190
08dab97a
RL
2191 if (!try_get_loop_niter (loop, &niter_desc))
2192 continue;
2193
2194 if (!try_create_reduction_list (loop, reduction_list))
2195 continue;
2196
f873b205
LB
2197 if (!flag_loop_parallelize_all
2198 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2199 continue;
2200
2201 changed = true;
48710229
RL
2202 if (dump_file && (dump_flags & TDF_DETAILS))
2203 {
48710229 2204 if (loop->inner)
8adfe01d 2205 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2206 else
8adfe01d
RL
2207 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2208 loop_loc = find_loop_location (loop);
2209 if (loop_loc != UNKNOWN_LOC)
2210 fprintf (dump_file, "\nloop at %s:%d: ",
2211 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
b8698a0f
L
2212 }
2213 gen_parallel_loop (loop, reduction_list,
08dab97a 2214 n_threads, &niter_desc);
5f40b3cb
ZD
2215 verify_flow_info ();
2216 verify_dominators (CDI_DOMINATORS);
2217 verify_loop_structure ();
a3b9e73c 2218 verify_loop_closed_ssa (true);
5f40b3cb
ZD
2219 }
2220
726a989a 2221 free_stmt_vec_info_vec ();
a509ebb5 2222 htab_delete (reduction_list);
f873b205 2223 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2224
2225 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2226 which local variables will escape. Reset the points-to solution
2227 for ESCAPED. */
6b8ed145 2228 if (changed)
d086d311 2229 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2230
5f40b3cb
ZD
2231 return changed;
2232}
2233
2234#include "gt-tree-parloops.h"