]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
backport: As described in http://gcc.gnu.org/ml/gcc/2012-08/msg00015.html...
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
e08120b1 2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
c75c517d 3 Free Software Foundation, Inc.
70837b71
RL
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
6da7fc87 11Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
6da7fc87
NC
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
5f40b3cb
ZD
26#include "tree-flow.h"
27#include "cfgloop.h"
5f40b3cb 28#include "tree-data-ref.h"
1bd6497c 29#include "tree-scalar-evolution.h"
cf835838 30#include "gimple-pretty-print.h"
5f40b3cb 31#include "tree-pass.h"
5f40b3cb 32#include "langhooks.h"
a509ebb5 33#include "tree-vectorizer.h"
5f40b3cb
ZD
34
35/* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40 machinery do its job.
b8698a0f 41
5f40b3cb
ZD
42 The most of the complexity is in bringing the code into shape expected
43 by the omp expanders:
726a989a
RB
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
51 can be shared).
52
53 TODO:
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
70837b71
RL
57 -- handling of common reduction patterns for outer loops.
58
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 60/*
a509ebb5 61 Reduction handling:
8a9ecffd 62 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 63 The code transformation will be introduced by an example.
b8698a0f
L
64
65
a509ebb5
RL
66parloop
67{
68 int sum=1;
69
0eb7e7aa 70 for (i = 0; i < N; i++)
a509ebb5
RL
71 {
72 x[i] = i + 3;
73 sum+=x[i];
74 }
75}
76
0eb7e7aa 77gimple-like code:
a509ebb5
RL
78header_bb:
79
0eb7e7aa
RL
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
82 D.1795_8 = i_28 + 3;
83 x[i_28] = D.1795_8;
84 sum_11 = D.1795_8 + sum_29;
85 i_12 = i_28 + 1;
86 if (N_6(D) > i_12)
87 goto header_bb;
88
a509ebb5
RL
89
90exit_bb:
91
0eb7e7aa
RL
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
a509ebb5
RL
94
95
96after reduction transformation (only relevant parts):
97
98parloop
99{
100
101....
102
0eb7e7aa 103
fa10beec 104 # Storing the initial value given by the user. #
0eb7e7aa 105
ae0bce62 106 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
107
108 #pragma omp parallel num_threads(4)
a509ebb5 109
0eb7e7aa 110 #pragma omp for schedule(static)
ae0bce62
RL
111
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
115
116 # sum.27_29 = PHI <sum.27_11, 0>
117
0eb7e7aa 118 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 119
726a989a 120 GIMPLE_OMP_CONTINUE
a509ebb5 121
0eb7e7aa
RL
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 124 GIMPLE_OMP_RETURN
b8698a0f
L
125
126 # Creating the atomic operation is done at
0eb7e7aa 127 create_call_for_reduction_1() #
a509ebb5 128
0eb7e7aa
RL
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
b8698a0f 133
726a989a 134 GIMPLE_OMP_RETURN
b8698a0f 135
0eb7e7aa
RL
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
ae0bce62
RL
138 The value computed by the threads is loaded from the
139 shared struct. #
140
b8698a0f 141
0eb7e7aa 142 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 143 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
144 sum_43 = D.1795_41 + sum_37;
145
146 exit bb:
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
149
150...
151
a509ebb5
RL
152}
153
154*/
155
5f40b3cb
ZD
156/* Minimal number of iterations of a loop that should be executed in each
157 thread. */
158#define MIN_PER_THREAD 100
159
b8698a0f 160/* Element of the hashtable, representing a
a509ebb5
RL
161 reduction in the current loop. */
162struct reduction_info
163{
726a989a
RB
164 gimple reduc_stmt; /* reduction statement. */
165 gimple reduc_phi; /* The phi node defining the reduction. */
166 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
167 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168 result. */
b8698a0f 169 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 170 of the reduction variable when existing the loop. */
ae0bce62 171 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 172 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 173 tree init; /* reduction initialization value. */
b8698a0f 174 gimple new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
177 operation. */
178};
179
180/* Equality and hash functions for hashtab code. */
181
182static int
183reduction_info_eq (const void *aa, const void *bb)
184{
185 const struct reduction_info *a = (const struct reduction_info *) aa;
186 const struct reduction_info *b = (const struct reduction_info *) bb;
187
188 return (a->reduc_phi == b->reduc_phi);
189}
190
191static hashval_t
192reduction_info_hash (const void *aa)
193{
194 const struct reduction_info *a = (const struct reduction_info *) aa;
195
5d1fd1de 196 return a->reduc_version;
a509ebb5
RL
197}
198
199static struct reduction_info *
726a989a 200reduction_phi (htab_t reduction_list, gimple phi)
a509ebb5
RL
201{
202 struct reduction_info tmpred, *red;
203
87ebde38 204 if (htab_elements (reduction_list) == 0 || phi == NULL)
a509ebb5
RL
205 return NULL;
206
207 tmpred.reduc_phi = phi;
5d1fd1de 208 tmpred.reduc_version = gimple_uid (phi);
3d9a9f94 209 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
a509ebb5
RL
210
211 return red;
212}
213
5f40b3cb
ZD
214/* Element of hashtable of names to copy. */
215
216struct name_to_copy_elt
217{
218 unsigned version; /* The version of the name to copy. */
219 tree new_name; /* The new name used in the copy. */
220 tree field; /* The field of the structure used to pass the
221 value. */
222};
223
224/* Equality and hash functions for hashtab code. */
225
226static int
227name_to_copy_elt_eq (const void *aa, const void *bb)
228{
a509ebb5
RL
229 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
5f40b3cb
ZD
231
232 return a->version == b->version;
233}
234
235static hashval_t
236name_to_copy_elt_hash (const void *aa)
237{
a509ebb5 238 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
5f40b3cb
ZD
239
240 return (hashval_t) a->version;
241}
242
b305e3da
SP
243/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245 represents the denominator for every element in the matrix. */
246typedef struct lambda_trans_matrix_s
247{
248 lambda_matrix matrix;
249 int rowsize;
250 int colsize;
251 int denominator;
252} *lambda_trans_matrix;
253#define LTM_MATRIX(T) ((T)->matrix)
254#define LTM_ROWSIZE(T) ((T)->rowsize)
255#define LTM_COLSIZE(T) ((T)->colsize)
256#define LTM_DENOMINATOR(T) ((T)->denominator)
257
258/* Allocate a new transformation matrix. */
259
260static lambda_trans_matrix
261lambda_trans_matrix_new (int colsize, int rowsize,
262 struct obstack * lambda_obstack)
263{
264 lambda_trans_matrix ret;
265
266 ret = (lambda_trans_matrix)
267 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
268 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
269 LTM_ROWSIZE (ret) = rowsize;
270 LTM_COLSIZE (ret) = colsize;
271 LTM_DENOMINATOR (ret) = 1;
272 return ret;
273}
274
275/* Multiply a vector VEC by a matrix MAT.
276 MAT is an M*N matrix, and VEC is a vector with length N. The result
277 is stored in DEST which must be a vector of length M. */
278
279static void
280lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
281 lambda_vector vec, lambda_vector dest)
282{
283 int i, j;
284
285 lambda_vector_clear (dest, m);
286 for (i = 0; i < m; i++)
287 for (j = 0; j < n; j++)
288 dest[i] += matrix[i][j] * vec[j];
289}
290
291/* Return true if TRANS is a legal transformation matrix that respects
292 the dependence vectors in DISTS and DIRS. The conservative answer
293 is false.
294
295 "Wolfe proves that a unimodular transformation represented by the
296 matrix T is legal when applied to a loop nest with a set of
297 lexicographically non-negative distance vectors RDG if and only if
298 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299 i.e.: if and only if it transforms the lexicographically positive
300 distance vectors to lexicographically positive vectors. Note that
301 a unimodular matrix must transform the zero vector (and only it) to
302 the zero vector." S.Muchnick. */
303
304static bool
305lambda_transform_legal_p (lambda_trans_matrix trans,
306 int nb_loops,
307 VEC (ddr_p, heap) *dependence_relations)
308{
309 unsigned int i, j;
310 lambda_vector distres;
311 struct data_dependence_relation *ddr;
312
313 gcc_assert (LTM_COLSIZE (trans) == nb_loops
314 && LTM_ROWSIZE (trans) == nb_loops);
315
316 /* When there are no dependences, the transformation is correct. */
317 if (VEC_length (ddr_p, dependence_relations) == 0)
318 return true;
319
320 ddr = VEC_index (ddr_p, dependence_relations, 0);
321 if (ddr == NULL)
322 return true;
323
324 /* When there is an unknown relation in the dependence_relations, we
325 know that it is no worth looking at this loop nest: give up. */
326 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
327 return false;
328
329 distres = lambda_vector_new (nb_loops);
330
331 /* For each distance vector in the dependence graph. */
332 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
333 {
334 /* Don't care about relations for which we know that there is no
335 dependence, nor about read-read (aka. output-dependences):
336 these data accesses can happen in any order. */
337 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
338 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
339 continue;
340
341 /* Conservatively answer: "this transformation is not valid". */
342 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343 return false;
344
345 /* If the dependence could not be captured by a distance vector,
346 conservatively answer that the transform is not valid. */
347 if (DDR_NUM_DIST_VECTS (ddr) == 0)
348 return false;
349
350 /* Compute trans.dist_vect */
351 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352 {
353 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
354 DDR_DIST_VECT (ddr, j), distres);
355
356 if (!lambda_vector_lexico_pos (distres, nb_loops))
357 return false;
358 }
359 }
360 return true;
361}
08dab97a
RL
362
363/* Data dependency analysis. Returns true if the iterations of LOOP
364 are independent on each other (that is, if we can execute them
365 in parallel). */
5f40b3cb
ZD
366
367static bool
f873b205 368loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 369{
01be8516
SP
370 VEC (loop_p, heap) *loop_nest;
371 VEC (ddr_p, heap) *dependence_relations;
726a989a 372 VEC (data_reference_p, heap) *datarefs;
5f40b3cb
ZD
373 lambda_trans_matrix trans;
374 bool ret = false;
5f40b3cb
ZD
375
376 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
377 {
378 fprintf (dump_file, "Considering loop %d\n", loop->num);
379 if (!loop->inner)
380 fprintf (dump_file, "loop is innermost\n");
b8698a0f 381 else
48710229
RL
382 fprintf (dump_file, "loop NOT innermost\n");
383 }
5f40b3cb 384
5f40b3cb
ZD
385 /* Check for problems with dependences. If the loop can be reversed,
386 the iterations are independent. */
387 datarefs = VEC_alloc (data_reference_p, heap, 10);
388 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
01be8516 389 loop_nest = VEC_alloc (loop_p, heap, 3);
9ca3d00e
AB
390 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
391 &dependence_relations))
392 {
393 if (dump_file && (dump_flags & TDF_DETAILS))
394 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
395 ret = false;
396 goto end;
397 }
5f40b3cb
ZD
398 if (dump_file && (dump_flags & TDF_DETAILS))
399 dump_data_dependence_relations (dump_file, dependence_relations);
400
f873b205 401 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
402 LTM_MATRIX (trans)[0][0] = -1;
403
404 if (lambda_transform_legal_p (trans, 1, dependence_relations))
405 {
406 ret = true;
407 if (dump_file && (dump_flags & TDF_DETAILS))
408 fprintf (dump_file, " SUCCESS: may be parallelized\n");
409 }
410 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
411 fprintf (dump_file,
412 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 413
9ca3d00e 414 end:
01be8516 415 VEC_free (loop_p, heap, loop_nest);
5f40b3cb
ZD
416 free_dependence_relations (dependence_relations);
417 free_data_refs (datarefs);
418
419 return ret;
420}
421
1d4af1e8
SP
422/* Return true when LOOP contains basic blocks marked with the
423 BB_IRREDUCIBLE_LOOP flag. */
424
425static inline bool
426loop_has_blocks_with_irreducible_flag (struct loop *loop)
427{
428 unsigned i;
429 basic_block *bbs = get_loop_body_in_dom_order (loop);
430 bool res = true;
431
432 for (i = 0; i < loop->num_nodes; i++)
433 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
434 goto end;
435
436 res = false;
437 end:
438 free (bbs);
439 return res;
440}
441
8a171a59 442/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 443 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 444 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
445 be invariant in the whole function. Other needed statements are placed
446 right before GSI. */
5f40b3cb
ZD
447
448static tree
cba1eb61
JJ
449take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
450 gimple_stmt_iterator *gsi)
5f40b3cb 451{
8a171a59 452 int uid;
5f40b3cb
ZD
453 void **dslot;
454 struct int_tree_map ielt, *nielt;
83d5977e 455 tree *var_p, name, addr;
726a989a
RB
456 gimple stmt;
457 gimple_seq stmts;
5f40b3cb 458
8a171a59
ZD
459 /* Since the address of OBJ is invariant, the trees may be shared.
460 Avoid rewriting unrelated parts of the code. */
461 obj = unshare_expr (obj);
462 for (var_p = &obj;
463 handled_component_p (*var_p);
464 var_p = &TREE_OPERAND (*var_p, 0))
465 continue;
8a171a59 466
c9a410f0
RG
467 /* Canonicalize the access to base on a MEM_REF. */
468 if (DECL_P (*var_p))
469 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
470
471 /* Assign a canonical SSA name to the address of the base decl used
472 in the address and share it for all accesses and addresses based
473 on it. */
474 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
5f40b3cb
ZD
475 ielt.uid = uid;
476 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
477 if (!*dslot)
478 {
cba1eb61
JJ
479 if (gsi == NULL)
480 return NULL;
c9a410f0 481 addr = TREE_OPERAND (*var_p, 0);
83d5977e
RG
482 name = make_temp_ssa_name (TREE_TYPE (addr), NULL,
483 get_name (TREE_OPERAND
484 (TREE_OPERAND (*var_p, 0), 0)));
485 stmt = gimple_build_assign (name, addr);
726a989a 486 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb
ZD
487
488 nielt = XNEW (struct int_tree_map);
489 nielt->uid = uid;
490 nielt->to = name;
491 *dslot = nielt;
5f40b3cb 492 }
8a171a59
ZD
493 else
494 name = ((struct int_tree_map *) *dslot)->to;
5f40b3cb 495
c9a410f0
RG
496 /* Express the address in terms of the canonical SSA name. */
497 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
498 if (gsi == NULL)
499 return build_fold_addr_expr_with_type (obj, type);
500
c9a410f0
RG
501 name = force_gimple_operand (build_addr (obj, current_function_decl),
502 &stmts, true, NULL_TREE);
503 if (!gimple_seq_empty_p (stmts))
cba1eb61 504 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 505
c9a410f0 506 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 507 {
726a989a 508 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 509 NULL_TREE);
726a989a 510 if (!gimple_seq_empty_p (stmts))
cba1eb61 511 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 512 }
5f40b3cb
ZD
513
514 return name;
515}
516
a509ebb5 517/* Callback for htab_traverse. Create the initialization statement
b8698a0f 518 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
519 the loop described in DATA. */
520
521static int
522initialize_reductions (void **slot, void *data)
523{
a509ebb5 524 tree init, c;
a509ebb5
RL
525 tree bvar, type, arg;
526 edge e;
527
3d9a9f94 528 struct reduction_info *const reduc = (struct reduction_info *) *slot;
a509ebb5
RL
529 struct loop *loop = (struct loop *) data;
530
b8698a0f 531 /* Create initialization in preheader:
a509ebb5
RL
532 reduction_variable = initialization value of reduction. */
533
b8698a0f 534 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
535 from the preheader with the reduction initialization value. */
536
537 /* Create a new variable to initialize the reduction. */
538 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
539 bvar = create_tmp_var (type, "reduction");
a509ebb5 540
c2255bc4
AH
541 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
542 OMP_CLAUSE_REDUCTION);
a509ebb5 543 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
726a989a 544 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
a509ebb5
RL
545
546 init = omp_reduction_init (c, TREE_TYPE (bvar));
547 reduc->init = init;
548
b8698a0f
L
549 /* Replace the argument representing the initialization value
550 with the initialization value for the reduction (neutral
551 element for the particular operation, e.g. 0 for PLUS_EXPR,
552 1 for MULT_EXPR, etc).
553 Keep the old value in a new variable "reduction_initial",
554 that will be taken in consideration after the parallel
0eb7e7aa 555 computing is done. */
a509ebb5
RL
556
557 e = loop_preheader_edge (loop);
558 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
559 /* Create new variable to hold the initial value. */
a509ebb5 560
a509ebb5 561 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 562 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 563 reduc->initial_value = arg;
a509ebb5
RL
564 return 1;
565}
5f40b3cb
ZD
566
567struct elv_data
568{
726a989a 569 struct walk_stmt_info info;
9f9f72aa 570 edge entry;
5f40b3cb 571 htab_t decl_address;
cba1eb61 572 gimple_stmt_iterator *gsi;
5f40b3cb 573 bool changed;
cba1eb61 574 bool reset;
5f40b3cb
ZD
575};
576
9f9f72aa
AP
577/* Eliminates references to local variables in *TP out of the single
578 entry single exit region starting at DTA->ENTRY.
579 DECL_ADDRESS contains addresses of the references that had their
580 address taken already. If the expression is changed, CHANGED is
581 set to true. Callback for walk_tree. */
a509ebb5 582
5f40b3cb 583static tree
8a171a59 584eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 585{
3d9a9f94 586 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 587 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
588
589 if (DECL_P (t))
590 {
591 *walk_subtrees = 0;
592
593 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
594 return NULL_TREE;
595
596 type = TREE_TYPE (t);
597 addr_type = build_pointer_type (type);
cba1eb61
JJ
598 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
599 dta->gsi);
600 if (dta->gsi == NULL && addr == NULL_TREE)
601 {
602 dta->reset = true;
603 return NULL_TREE;
604 }
605
70f34814 606 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
607
608 dta->changed = true;
609 return NULL_TREE;
610 }
611
612 if (TREE_CODE (t) == ADDR_EXPR)
613 {
8a171a59
ZD
614 /* ADDR_EXPR may appear in two contexts:
615 -- as a gimple operand, when the address taken is a function invariant
616 -- as gimple rhs, when the resulting address in not a function
617 invariant
618 We do not need to do anything special in the latter case (the base of
619 the memory reference whose address is taken may be replaced in the
620 DECL_P case). The former case is more complicated, as we need to
621 ensure that the new address is still a gimple operand. Thus, it
622 is not sufficient to replace just the base of the memory reference --
623 we need to move the whole computation of the address out of the
624 loop. */
625 if (!is_gimple_val (t))
5f40b3cb
ZD
626 return NULL_TREE;
627
628 *walk_subtrees = 0;
8a171a59
ZD
629 obj = TREE_OPERAND (t, 0);
630 var = get_base_address (obj);
631 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
632 return NULL_TREE;
633
634 addr_type = TREE_TYPE (t);
cba1eb61
JJ
635 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
636 dta->gsi);
637 if (dta->gsi == NULL && addr == NULL_TREE)
638 {
639 dta->reset = true;
640 return NULL_TREE;
641 }
5f40b3cb
ZD
642 *tp = addr;
643
644 dta->changed = true;
645 return NULL_TREE;
646 }
647
726a989a 648 if (!EXPR_P (t))
5f40b3cb
ZD
649 *walk_subtrees = 0;
650
651 return NULL_TREE;
652}
653
cba1eb61 654/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
655 entry single exit region starting at ENTRY. DECL_ADDRESS contains
656 addresses of the references that had their address taken
657 already. */
5f40b3cb
ZD
658
659static void
cba1eb61 660eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
5f40b3cb
ZD
661 htab_t decl_address)
662{
663 struct elv_data dta;
cba1eb61 664 gimple stmt = gsi_stmt (*gsi);
5f40b3cb 665
726a989a 666 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 667 dta.entry = entry;
5f40b3cb
ZD
668 dta.decl_address = decl_address;
669 dta.changed = false;
cba1eb61 670 dta.reset = false;
5f40b3cb 671
b5b8b0ac 672 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
673 {
674 dta.gsi = NULL;
675 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
676 eliminate_local_variables_1, &dta.info, NULL);
677 if (dta.reset)
678 {
679 gimple_debug_bind_reset_value (stmt);
680 dta.changed = true;
681 }
682 }
b5b8b0ac 683 else
cba1eb61
JJ
684 {
685 dta.gsi = gsi;
686 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
687 }
5f40b3cb
ZD
688
689 if (dta.changed)
690 update_stmt (stmt);
691}
692
9f9f72aa
AP
693/* Eliminates the references to local variables from the single entry
694 single exit region between the ENTRY and EXIT edges.
b8698a0f 695
a509ebb5 696 This includes:
b8698a0f
L
697 1) Taking address of a local variable -- these are moved out of the
698 region (and temporary variable is created to hold the address if
a509ebb5 699 necessary).
9f9f72aa 700
5f40b3cb 701 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 702 references. */
5f40b3cb
ZD
703
704static void
9f9f72aa 705eliminate_local_variables (edge entry, edge exit)
5f40b3cb 706{
9f9f72aa
AP
707 basic_block bb;
708 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
5f40b3cb 709 unsigned i;
726a989a 710 gimple_stmt_iterator gsi;
cba1eb61 711 bool has_debug_stmt = false;
5f40b3cb
ZD
712 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
713 free);
9f9f72aa
AP
714 basic_block entry_bb = entry->src;
715 basic_block exit_bb = exit->dest;
5f40b3cb 716
9f9f72aa 717 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 718
ac47786e 719 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
9f9f72aa 720 if (bb != entry_bb && bb != exit_bb)
726a989a 721 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
ddb555ed
JJ
722 if (is_gimple_debug (gsi_stmt (gsi)))
723 {
724 if (gimple_debug_bind_p (gsi_stmt (gsi)))
725 has_debug_stmt = true;
726 }
cba1eb61
JJ
727 else
728 eliminate_local_variables_stmt (entry, &gsi, decl_address);
729
730 if (has_debug_stmt)
731 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
732 if (bb != entry_bb && bb != exit_bb)
733 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
734 if (gimple_debug_bind_p (gsi_stmt (gsi)))
735 eliminate_local_variables_stmt (entry, &gsi, decl_address);
5f40b3cb
ZD
736
737 htab_delete (decl_address);
9f9f72aa
AP
738 VEC_free (basic_block, heap, body);
739}
740
741/* Returns true if expression EXPR is not defined between ENTRY and
742 EXIT, i.e. if all its operands are defined outside of the region. */
743
744static bool
745expr_invariant_in_region_p (edge entry, edge exit, tree expr)
746{
747 basic_block entry_bb = entry->src;
748 basic_block exit_bb = exit->dest;
749 basic_block def_bb;
9f9f72aa
AP
750
751 if (is_gimple_min_invariant (expr))
752 return true;
753
754 if (TREE_CODE (expr) == SSA_NAME)
755 {
726a989a 756 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
757 if (def_bb
758 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
759 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
760 return false;
761
762 return true;
763 }
764
726a989a 765 return false;
5f40b3cb
ZD
766}
767
768/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
769 The copies are stored to NAME_COPIES, if NAME was already duplicated,
770 its duplicate stored in NAME_COPIES is returned.
b8698a0f 771
5f40b3cb
ZD
772 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
773 duplicated, storing the copies in DECL_COPIES. */
774
775static tree
9f9f72aa
AP
776separate_decls_in_region_name (tree name,
777 htab_t name_copies, htab_t decl_copies,
778 bool copy_name_p)
5f40b3cb
ZD
779{
780 tree copy, var, var_copy;
781 unsigned idx, uid, nuid;
782 struct int_tree_map ielt, *nielt;
783 struct name_to_copy_elt elt, *nelt;
784 void **slot, **dslot;
785
786 if (TREE_CODE (name) != SSA_NAME)
787 return name;
788
789 idx = SSA_NAME_VERSION (name);
790 elt.version = idx;
791 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
792 copy_name_p ? INSERT : NO_INSERT);
793 if (slot && *slot)
794 return ((struct name_to_copy_elt *) *slot)->new_name;
795
70b5e7dc
RG
796 if (copy_name_p)
797 {
798 copy = duplicate_ssa_name (name, NULL);
799 nelt = XNEW (struct name_to_copy_elt);
800 nelt->version = idx;
801 nelt->new_name = copy;
802 nelt->field = NULL_TREE;
803 *slot = nelt;
804 }
805 else
806 {
807 gcc_assert (!slot);
808 copy = name;
809 }
810
5f40b3cb 811 var = SSA_NAME_VAR (name);
70b5e7dc
RG
812 if (!var)
813 return copy;
814
5f40b3cb
ZD
815 uid = DECL_UID (var);
816 ielt.uid = uid;
817 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
818 if (!*dslot)
819 {
820 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 821 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
5f40b3cb
ZD
822 nielt = XNEW (struct int_tree_map);
823 nielt->uid = uid;
824 nielt->to = var_copy;
825 *dslot = nielt;
826
827 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 828 it again. */
5f40b3cb
ZD
829 nuid = DECL_UID (var_copy);
830 ielt.uid = nuid;
831 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
832 gcc_assert (!*dslot);
833 nielt = XNEW (struct int_tree_map);
834 nielt->uid = nuid;
835 nielt->to = var_copy;
836 *dslot = nielt;
837 }
838 else
839 var_copy = ((struct int_tree_map *) *dslot)->to;
840
b2ec94d4 841 replace_ssa_name_symbol (copy, var_copy);
5f40b3cb
ZD
842 return copy;
843}
844
9f9f72aa
AP
845/* Finds the ssa names used in STMT that are defined outside the
846 region between ENTRY and EXIT and replaces such ssa names with
847 their duplicates. The duplicates are stored to NAME_COPIES. Base
848 decls of all ssa names used in STMT (including those defined in
849 LOOP) are replaced with the new temporary variables; the
850 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
851
852static void
726a989a 853separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
9f9f72aa 854 htab_t name_copies, htab_t decl_copies)
5f40b3cb
ZD
855{
856 use_operand_p use;
857 def_operand_p def;
858 ssa_op_iter oi;
859 tree name, copy;
860 bool copy_name_p;
861
5f40b3cb 862 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
863 {
864 name = DEF_FROM_PTR (def);
865 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
866 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
867 false);
a509ebb5
RL
868 gcc_assert (copy == name);
869 }
5f40b3cb
ZD
870
871 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
872 {
873 name = USE_FROM_PTR (use);
874 if (TREE_CODE (name) != SSA_NAME)
875 continue;
876
9f9f72aa
AP
877 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
878 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
879 copy_name_p);
a509ebb5
RL
880 SET_USE (use, copy);
881 }
5f40b3cb
ZD
882}
883
b5b8b0ac
AO
884/* Finds the ssa names used in STMT that are defined outside the
885 region between ENTRY and EXIT and replaces such ssa names with
886 their duplicates. The duplicates are stored to NAME_COPIES. Base
887 decls of all ssa names used in STMT (including those defined in
888 LOOP) are replaced with the new temporary variables; the
889 replacement decls are stored in DECL_COPIES. */
890
891static bool
ddb555ed
JJ
892separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
893 htab_t decl_copies)
b5b8b0ac
AO
894{
895 use_operand_p use;
896 ssa_op_iter oi;
897 tree var, name;
898 struct int_tree_map ielt;
899 struct name_to_copy_elt elt;
900 void **slot, **dslot;
901
ddb555ed
JJ
902 if (gimple_debug_bind_p (stmt))
903 var = gimple_debug_bind_get_var (stmt);
904 else if (gimple_debug_source_bind_p (stmt))
905 var = gimple_debug_source_bind_get_var (stmt);
906 else
907 return true;
598e67d7 908 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 909 return true;
b5b8b0ac
AO
910 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
911 ielt.uid = DECL_UID (var);
912 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
913 if (!dslot)
914 return true;
ddb555ed
JJ
915 if (gimple_debug_bind_p (stmt))
916 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
917 else if (gimple_debug_source_bind_p (stmt))
918 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
b5b8b0ac
AO
919
920 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
921 {
922 name = USE_FROM_PTR (use);
923 if (TREE_CODE (name) != SSA_NAME)
924 continue;
925
926 elt.version = SSA_NAME_VERSION (name);
927 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
928 if (!slot)
929 {
930 gimple_debug_bind_reset_value (stmt);
931 update_stmt (stmt);
932 break;
933 }
934
935 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
936 }
937
938 return false;
939}
940
0eb7e7aa
RL
941/* Callback for htab_traverse. Adds a field corresponding to the reduction
942 specified in SLOT. The type is passed in DATA. */
943
944static int
945add_field_for_reduction (void **slot, void *data)
a509ebb5 946{
b8698a0f 947
3d9a9f94
KG
948 struct reduction_info *const red = (struct reduction_info *) *slot;
949 tree const type = (tree) data;
726a989a 950 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
c2255bc4
AH
951 tree field = build_decl (gimple_location (red->reduc_stmt),
952 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
0eb7e7aa
RL
953
954 insert_field_into_struct (type, field);
955
956 red->field = field;
957
958 return 1;
959}
a509ebb5 960
5f40b3cb 961/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 962 described in SLOT. The type is passed in DATA. */
5f40b3cb
ZD
963
964static int
965add_field_for_name (void **slot, void *data)
966{
3d9a9f94
KG
967 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
968 tree type = (tree) data;
5f40b3cb 969 tree name = ssa_name (elt->version);
70b5e7dc
RG
970 tree field = build_decl (UNKNOWN_LOCATION,
971 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
972 TREE_TYPE (name));
5f40b3cb
ZD
973
974 insert_field_into_struct (type, field);
975 elt->field = field;
a509ebb5 976
5f40b3cb
ZD
977 return 1;
978}
979
b8698a0f
L
980/* Callback for htab_traverse. A local result is the intermediate result
981 computed by a single
fa10beec 982 thread, or the initial value in case no iteration was executed.
b8698a0f
L
983 This function creates a phi node reflecting these values.
984 The phi's result will be stored in NEW_PHI field of the
985 reduction's data structure. */
a509ebb5
RL
986
987static int
988create_phi_for_local_result (void **slot, void *data)
989{
3d9a9f94
KG
990 struct reduction_info *const reduc = (struct reduction_info *) *slot;
991 const struct loop *const loop = (const struct loop *) data;
a509ebb5 992 edge e;
726a989a 993 gimple new_phi;
a509ebb5
RL
994 basic_block store_bb;
995 tree local_res;
f5045c96 996 source_location locus;
a509ebb5 997
b8698a0f
L
998 /* STORE_BB is the block where the phi
999 should be stored. It is the destination of the loop exit.
726a989a 1000 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
a509ebb5
RL
1001 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1002
1003 /* STORE_BB has two predecessors. One coming from the loop
1004 (the reduction's result is computed at the loop),
b8698a0f
L
1005 and another coming from a block preceding the loop,
1006 when no iterations
1007 are executed (the initial value should be taken). */
a509ebb5
RL
1008 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1009 e = EDGE_PRED (store_bb, 1);
1010 else
1011 e = EDGE_PRED (store_bb, 0);
6b4a85ad 1012 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
f5045c96 1013 locus = gimple_location (reduc->reduc_stmt);
a509ebb5 1014 new_phi = create_phi_node (local_res, store_bb);
9e227d60 1015 add_phi_arg (new_phi, reduc->init, e, locus);
726a989a 1016 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
9e227d60 1017 FALLTHRU_EDGE (loop->latch), locus);
a509ebb5
RL
1018 reduc->new_phi = new_phi;
1019
1020 return 1;
1021}
5f40b3cb
ZD
1022
1023struct clsn_data
1024{
1025 tree store;
1026 tree load;
1027
1028 basic_block store_bb;
1029 basic_block load_bb;
1030};
1031
a509ebb5 1032/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1033 reduction described in SLOT.
a509ebb5
RL
1034 DATA annotates the place in memory the atomic operation relates to,
1035 and the basic block it needs to be generated in. */
1036
1037static int
1038create_call_for_reduction_1 (void **slot, void *data)
1039{
3d9a9f94
KG
1040 struct reduction_info *const reduc = (struct reduction_info *) *slot;
1041 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a 1042 gimple_stmt_iterator gsi;
a509ebb5 1043 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1044 tree load_struct;
1045 basic_block bb;
1046 basic_block new_bb;
1047 edge e;
0f900dfa 1048 tree t, addr, ref, x;
726a989a
RB
1049 tree tmp_load, name;
1050 gimple load;
a509ebb5 1051
70f34814 1052 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1053 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5
RL
1054
1055 addr = build_addr (t, current_function_decl);
1056
1057 /* Create phi node. */
1058 bb = clsn_data->load_bb;
1059
1060 e = split_block (bb, t);
1061 new_bb = e->dest;
1062
1063 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
a509ebb5 1064 tmp_load = make_ssa_name (tmp_load, NULL);
726a989a 1065 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1066 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1067 gsi = gsi_start_bb (new_bb);
1068 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1069
1070 e = split_block (new_bb, load);
1071 new_bb = e->dest;
726a989a 1072 gsi = gsi_start_bb (new_bb);
a509ebb5 1073 ref = tmp_load;
726a989a
RB
1074 x = fold_build2 (reduc->reduction_code,
1075 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1076 PHI_RESULT (reduc->new_phi));
a509ebb5 1077
726a989a
RB
1078 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1079 GSI_CONTINUE_LINKING);
a509ebb5 1080
726a989a 1081 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1082 return 1;
1083}
1084
b8698a0f
L
1085/* Create the atomic operation at the join point of the threads.
1086 REDUCTION_LIST describes the reductions in the LOOP.
1087 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1088 shared data is stored in and loaded from. */
1089static void
b8698a0f 1090create_call_for_reduction (struct loop *loop, htab_t reduction_list,
a509ebb5
RL
1091 struct clsn_data *ld_st_data)
1092{
1093 htab_traverse (reduction_list, create_phi_for_local_result, loop);
726a989a 1094 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
a509ebb5
RL
1095 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1096 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1097}
1098
ae0bce62
RL
1099/* Callback for htab_traverse. Loads the final reduction value at the
1100 join point of all threads, and inserts it in the right place. */
a509ebb5
RL
1101
1102static int
1103create_loads_for_reductions (void **slot, void *data)
1104{
3d9a9f94
KG
1105 struct reduction_info *const red = (struct reduction_info *) *slot;
1106 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1107 gimple stmt;
1108 gimple_stmt_iterator gsi;
1109 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
a509ebb5 1110 tree load_struct;
ae0bce62 1111 tree name;
a509ebb5
RL
1112 tree x;
1113
726a989a 1114 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1115 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1116 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1117 NULL_TREE);
a509ebb5 1118
ae0bce62 1119 x = load_struct;
a509ebb5 1120 name = PHI_RESULT (red->keep_res);
726a989a 1121 stmt = gimple_build_assign (name, x);
a509ebb5
RL
1122 SSA_NAME_DEF_STMT (name) = stmt;
1123
726a989a 1124 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1125
726a989a
RB
1126 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1127 !gsi_end_p (gsi); gsi_next (&gsi))
1128 if (gsi_stmt (gsi) == red->keep_res)
1129 {
1130 remove_phi_node (&gsi, false);
1131 return 1;
1132 }
1133 gcc_unreachable ();
a509ebb5
RL
1134}
1135
b8698a0f 1136/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1137 REDUCTION_LIST describes the list of reductions that the
fa10beec 1138 loads should be generated for. */
a509ebb5 1139static void
b8698a0f 1140create_final_loads_for_reduction (htab_t reduction_list,
a509ebb5
RL
1141 struct clsn_data *ld_st_data)
1142{
726a989a 1143 gimple_stmt_iterator gsi;
a509ebb5 1144 tree t;
726a989a 1145 gimple stmt;
a509ebb5 1146
726a989a 1147 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1148 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1149 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1150
726a989a
RB
1151 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1152 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
a509ebb5
RL
1153
1154 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1155
1156}
1157
0eb7e7aa
RL
1158/* Callback for htab_traverse. Store the neutral value for the
1159 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1160 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1161 The reduction is specified in SLOT. The store information is
1162 passed in DATA. */
0eb7e7aa
RL
1163
1164static int
1165create_stores_for_reduction (void **slot, void *data)
1166{
3d9a9f94
KG
1167 struct reduction_info *const red = (struct reduction_info *) *slot;
1168 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1169 tree t;
1170 gimple stmt;
1171 gimple_stmt_iterator gsi;
1172 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1173
1174 gsi = gsi_last_bb (clsn_data->store_bb);
1175 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1176 stmt = gimple_build_assign (t, red->initial_value);
726a989a 1177 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1178
1179 return 1;
1180}
1181
a509ebb5
RL
1182/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1183 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1184 specified in SLOT. */
1185
5f40b3cb
ZD
1186static int
1187create_loads_and_stores_for_name (void **slot, void *data)
1188{
3d9a9f94
KG
1189 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1190 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1191 tree t;
1192 gimple stmt;
1193 gimple_stmt_iterator gsi;
5f40b3cb 1194 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1195 tree load_struct;
1196
726a989a
RB
1197 gsi = gsi_last_bb (clsn_data->store_bb);
1198 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1199 stmt = gimple_build_assign (t, ssa_name (elt->version));
726a989a 1200 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1201
726a989a 1202 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1203 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1204 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1205 stmt = gimple_build_assign (elt->new_name, t);
5f40b3cb 1206 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
726a989a 1207 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1208
1209 return 1;
1210}
1211
1212/* Moves all the variables used in LOOP and defined outside of it (including
1213 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1214 name) to a structure created for this purpose. The code
b8698a0f 1215
5f40b3cb
ZD
1216 while (1)
1217 {
1218 use (a);
1219 use (b);
1220 }
1221
1222 is transformed this way:
1223
1224 bb0:
1225 old.a = a;
1226 old.b = b;
1227
1228 bb1:
1229 a' = new->a;
1230 b' = new->b;
1231 while (1)
1232 {
1233 use (a');
1234 use (b');
1235 }
1236
1237 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1238 pointer `new' is intentionally not initialized (the loop will be split to a
1239 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1240 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1241 information among the threads. It is initialized here, and
1242 gen_parallel_loop will pass it to create_call_for_reduction that
1243 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1244 in LOOP. */
5f40b3cb
ZD
1245
1246static void
9f9f72aa 1247separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
b8698a0f 1248 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1249 struct clsn_data *ld_st_data)
a509ebb5 1250
5f40b3cb 1251{
9f9f72aa 1252 basic_block bb1 = split_edge (entry);
5f40b3cb
ZD
1253 basic_block bb0 = single_pred (bb1);
1254 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1255 name_to_copy_elt_eq, free);
1256 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1257 free);
5f40b3cb 1258 unsigned i;
726a989a
RB
1259 tree type, type_name, nvar;
1260 gimple_stmt_iterator gsi;
5f40b3cb 1261 struct clsn_data clsn_data;
9f9f72aa
AP
1262 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1263 basic_block bb;
1264 basic_block entry_bb = bb1;
1265 basic_block exit_bb = exit->dest;
b5b8b0ac 1266 bool has_debug_stmt = false;
5f40b3cb 1267
726a989a 1268 entry = single_succ_edge (entry_bb);
9f9f72aa 1269 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1270
ac47786e 1271 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
9f9f72aa 1272 {
b8698a0f 1273 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1274 {
726a989a
RB
1275 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1276 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1277 name_copies, decl_copies);
1278
1279 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac
AO
1280 {
1281 gimple stmt = gsi_stmt (gsi);
1282
1283 if (is_gimple_debug (stmt))
1284 has_debug_stmt = true;
1285 else
1286 separate_decls_in_region_stmt (entry, exit, stmt,
1287 name_copies, decl_copies);
1288 }
9f9f72aa 1289 }
5f40b3cb 1290 }
9f9f72aa 1291
b5b8b0ac
AO
1292 /* Now process debug bind stmts. We must not create decls while
1293 processing debug stmts, so we defer their processing so as to
1294 make sure we will have debug info for as many variables as
1295 possible (all of those that were dealt with in the loop above),
1296 and discard those for which we know there's nothing we can
1297 do. */
1298 if (has_debug_stmt)
ac47786e 1299 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
b5b8b0ac
AO
1300 if (bb != entry_bb && bb != exit_bb)
1301 {
1302 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1303 {
1304 gimple stmt = gsi_stmt (gsi);
1305
ddb555ed 1306 if (is_gimple_debug (stmt))
b5b8b0ac 1307 {
ddb555ed
JJ
1308 if (separate_decls_in_region_debug (stmt, name_copies,
1309 decl_copies))
b5b8b0ac
AO
1310 {
1311 gsi_remove (&gsi, true);
1312 continue;
1313 }
1314 }
1315
1316 gsi_next (&gsi);
1317 }
1318 }
1319
9f9f72aa 1320 VEC_free (basic_block, heap, body);
5f40b3cb 1321
b8698a0f 1322 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
5f40b3cb
ZD
1323 {
1324 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1325 loop carried and external variables in the loop). */
5f40b3cb
ZD
1326 *arg_struct = NULL;
1327 *new_arg_struct = NULL;
1328 }
1329 else
1330 {
1331 /* Create the type for the structure to store the ssa names to. */
1332 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1333 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1334 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1335 type);
1336 TYPE_NAME (type) = type_name;
1337
0eb7e7aa 1338 htab_traverse (name_copies, add_field_for_name, type);
9f9f72aa 1339 if (reduction_list && htab_elements (reduction_list) > 0)
0eb7e7aa
RL
1340 {
1341 /* Create the fields for reductions. */
1342 htab_traverse (reduction_list, add_field_for_reduction,
1343 type);
1344 }
5f40b3cb 1345 layout_type (type);
b8698a0f 1346
5f40b3cb
ZD
1347 /* Create the loads and stores. */
1348 *arg_struct = create_tmp_var (type, ".paral_data_store");
5f40b3cb 1349 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
726a989a 1350 *new_arg_struct = make_ssa_name (nvar, NULL);
5f40b3cb 1351
a509ebb5
RL
1352 ld_st_data->store = *arg_struct;
1353 ld_st_data->load = *new_arg_struct;
1354 ld_st_data->store_bb = bb0;
1355 ld_st_data->load_bb = bb1;
0eb7e7aa 1356
5f40b3cb 1357 htab_traverse (name_copies, create_loads_and_stores_for_name,
a509ebb5
RL
1358 ld_st_data);
1359
ae0bce62
RL
1360 /* Load the calculation from memory (after the join of the threads). */
1361
9f9f72aa 1362 if (reduction_list && htab_elements (reduction_list) > 0)
a509ebb5 1363 {
0eb7e7aa 1364 htab_traverse (reduction_list, create_stores_for_reduction,
b8698a0f 1365 ld_st_data);
726a989a 1366 clsn_data.load = make_ssa_name (nvar, NULL);
9f9f72aa 1367 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1368 clsn_data.store = ld_st_data->store;
1369 create_final_loads_for_reduction (reduction_list, &clsn_data);
1370 }
5f40b3cb
ZD
1371 }
1372
1373 htab_delete (decl_copies);
1374 htab_delete (name_copies);
1375}
1376
1377/* Bitmap containing uids of functions created by parallelization. We cannot
1378 allocate it from the default obstack, as it must live across compilation
1379 of several functions; we make it gc allocated instead. */
1380
1381static GTY(()) bitmap parallelized_functions;
1382
1383/* Returns true if FN was created by create_loop_fn. */
1384
62e0a1ed 1385bool
5f40b3cb
ZD
1386parallelized_function_p (tree fn)
1387{
1388 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1389 return false;
1390
1391 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1392}
1393
1394/* Creates and returns an empty function that will receive the body of
1395 a parallelized loop. */
1396
1397static tree
9ff70652 1398create_loop_fn (location_t loc)
5f40b3cb
ZD
1399{
1400 char buf[100];
1401 char *tname;
1402 tree decl, type, name, t;
1403 struct function *act_cfun = cfun;
1404 static unsigned loopfn_num;
1405
1406 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1407 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1408 clean_symbol_name (tname);
1409 name = get_identifier (tname);
1410 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1411
9ff70652 1412 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1413 if (!parallelized_functions)
1414 parallelized_functions = BITMAP_GGC_ALLOC ();
1415 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1416
1417 TREE_STATIC (decl) = 1;
1418 TREE_USED (decl) = 1;
1419 DECL_ARTIFICIAL (decl) = 1;
1420 DECL_IGNORED_P (decl) = 0;
1421 TREE_PUBLIC (decl) = 0;
1422 DECL_UNINLINABLE (decl) = 1;
1423 DECL_EXTERNAL (decl) = 0;
1424 DECL_CONTEXT (decl) = NULL_TREE;
1425 DECL_INITIAL (decl) = make_node (BLOCK);
1426
9ff70652 1427 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1428 DECL_ARTIFICIAL (t) = 1;
1429 DECL_IGNORED_P (t) = 1;
1430 DECL_RESULT (decl) = t;
1431
9ff70652 1432 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1433 ptr_type_node);
1434 DECL_ARTIFICIAL (t) = 1;
1435 DECL_ARG_TYPE (t) = ptr_type_node;
1436 DECL_CONTEXT (t) = decl;
1437 TREE_USED (t) = 1;
1438 DECL_ARGUMENTS (decl) = t;
1439
182e0d71 1440 allocate_struct_function (decl, false);
5f40b3cb
ZD
1441
1442 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1443 it. */
5576d6f2 1444 set_cfun (act_cfun);
5f40b3cb
ZD
1445
1446 return decl;
1447}
1448
5f40b3cb
ZD
1449/* Moves the exit condition of LOOP to the beginning of its header, and
1450 duplicates the part of the last iteration that gets disabled to the
1451 exit of the loop. NIT is the number of iterations of the loop
1452 (used to initialize the variables in the duplicated part).
b8698a0f 1453
fa10beec 1454 TODO: the common case is that latch of the loop is empty and immediately
5f40b3cb
ZD
1455 follows the loop exit. In this case, it would be better not to copy the
1456 body of the loop, but only move the entry of the loop directly before the
1457 exit check and increase the number of iterations of the loop by one.
b8698a0f 1458 This may need some additional preconditioning in case NIT = ~0.
a509ebb5 1459 REDUCTION_LIST describes the reductions in LOOP. */
5f40b3cb
ZD
1460
1461static void
a509ebb5 1462transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
5f40b3cb
ZD
1463{
1464 basic_block *bbs, *nbbs, ex_bb, orig_header;
1465 unsigned n;
1466 bool ok;
1467 edge exit = single_dom_exit (loop), hpred;
726a989a 1468 tree control, control_name, res, t;
48710229 1469 gimple phi, nphi, cond_stmt, stmt, cond_nit;
726a989a 1470 gimple_stmt_iterator gsi;
48710229 1471 tree nit_1;
5f40b3cb
ZD
1472
1473 split_block_after_labels (loop->header);
1474 orig_header = single_succ (loop->header);
1475 hpred = single_succ_edge (loop->header);
1476
1477 cond_stmt = last_stmt (exit->src);
726a989a
RB
1478 control = gimple_cond_lhs (cond_stmt);
1479 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1480
1481 /* Make sure that we have phi nodes on exit for all loop header phis
1482 (create_parallel_loop requires that). */
726a989a 1483 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
5f40b3cb 1484 {
726a989a 1485 phi = gsi_stmt (gsi);
5f40b3cb 1486 res = PHI_RESULT (phi);
070ecdfd 1487 t = copy_ssa_name (res, phi);
5f40b3cb 1488 SET_PHI_RESULT (phi, t);
5f40b3cb 1489 nphi = create_phi_node (res, orig_header);
9e227d60 1490 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1491
1492 if (res == control)
1493 {
726a989a 1494 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1495 update_stmt (cond_stmt);
1496 control = t;
1497 }
1498 }
12037899 1499
5f40b3cb 1500 bbs = get_loop_body_in_dom_order (loop);
48710229 1501
69958396
RL
1502 for (n = 0; bbs[n] != exit->src; n++)
1503 continue;
5f40b3cb 1504 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1505 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1506 bbs + 1, n, nbbs);
5f40b3cb
ZD
1507 gcc_assert (ok);
1508 free (bbs);
1509 ex_bb = nbbs[0];
1510 free (nbbs);
1511
b8698a0f 1512 /* Other than reductions, the only gimple reg that should be copied
726a989a 1513 out of the loop is the control variable. */
69958396 1514 exit = single_dom_exit (loop);
5f40b3cb 1515 control_name = NULL_TREE;
726a989a 1516 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
5f40b3cb 1517 {
726a989a 1518 phi = gsi_stmt (gsi);
5f40b3cb 1519 res = PHI_RESULT (phi);
ea057359 1520 if (virtual_operand_p (res))
726a989a
RB
1521 {
1522 gsi_next (&gsi);
1523 continue;
1524 }
5f40b3cb 1525
a509ebb5 1526 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1527 keep the phi at the reduction's keep_res field. The
1528 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1529 variable when exiting the loop. */
1530
b8698a0f 1531 if (htab_elements (reduction_list) > 0)
a509ebb5
RL
1532 {
1533 struct reduction_info *red;
1534
1535 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1536 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1537 if (red)
726a989a
RB
1538 {
1539 red->keep_res = phi;
1540 gsi_next (&gsi);
1541 continue;
1542 }
a509ebb5 1543 }
726a989a
RB
1544 gcc_assert (control_name == NULL_TREE
1545 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1546 control_name = res;
726a989a 1547 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1548 }
1549 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1550
b8698a0f 1551 /* Initialize the control variable to number of iterations
48710229 1552 according to the rhs of the exit condition. */
726a989a 1553 gsi = gsi_after_labels (ex_bb);
b8698a0f 1554 cond_nit = last_stmt (exit->src);
48710229
RL
1555 nit_1 = gimple_cond_rhs (cond_nit);
1556 nit_1 = force_gimple_operand_gsi (&gsi,
1557 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1558 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1559 stmt = gimple_build_assign (control_name, nit_1);
726a989a
RB
1560 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1561 SSA_NAME_DEF_STMT (control_name) = stmt;
5f40b3cb
ZD
1562}
1563
1564/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1565 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb
ZD
1566 NEW_DATA is the variable that should be initialized from the argument
1567 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
726a989a 1568 basic block containing GIMPLE_OMP_PARALLEL tree. */
5f40b3cb
ZD
1569
1570static basic_block
1571create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1572 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1573{
726a989a 1574 gimple_stmt_iterator gsi;
5f40b3cb 1575 basic_block bb, paral_bb, for_bb, ex_bb;
0f900dfa 1576 tree t, param;
726a989a
RB
1577 gimple stmt, for_stmt, phi, cond_stmt;
1578 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
1579 edge exit, nexit, guard, end, e;
1580
726a989a 1581 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
1582 bb = loop_preheader_edge (loop)->src;
1583 paral_bb = single_pred (bb);
726a989a 1584 gsi = gsi_last_bb (paral_bb);
5f40b3cb 1585
9ff70652 1586 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 1587 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 1588 = build_int_cst (integer_type_node, n_threads);
726a989a 1589 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
9ff70652 1590 gimple_set_location (stmt, loc);
5f40b3cb 1591
726a989a 1592 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1593
1594 /* Initialize NEW_DATA. */
1595 if (data)
1596 {
726a989a
RB
1597 gsi = gsi_after_labels (bb);
1598
1599 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1600 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1601 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1602 SSA_NAME_DEF_STMT (param) = stmt;
1603
1604 stmt = gimple_build_assign (new_data,
1605 fold_convert (TREE_TYPE (new_data), param));
1606 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1607 SSA_NAME_DEF_STMT (new_data) = stmt;
5f40b3cb
ZD
1608 }
1609
726a989a 1610 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 1611 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 1612 gsi = gsi_last_bb (bb);
9ff70652
JJ
1613 stmt = gimple_build_omp_return (false);
1614 gimple_set_location (stmt, loc);
1615 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1616
726a989a 1617 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 1618 gcc_assert (loop->header == single_dom_exit (loop)->src);
726a989a 1619 cond_stmt = last_stmt (loop->header);
5f40b3cb 1620
726a989a 1621 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
1622 cvar_base = SSA_NAME_VAR (cvar);
1623 phi = SSA_NAME_DEF_STMT (cvar);
1624 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
070ecdfd 1625 initvar = copy_ssa_name (cvar, NULL);
5f40b3cb
ZD
1626 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1627 initvar);
1628 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1629
1dff453d 1630 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
1631 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1632 gsi_remove (&gsi, true);
5f40b3cb
ZD
1633
1634 /* Prepare cfg. */
1635 for_bb = split_edge (loop_preheader_edge (loop));
1636 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1637 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1638 gcc_assert (exit == single_dom_exit (loop));
1639
1640 guard = make_edge (for_bb, ex_bb, 0);
1641 single_succ_edge (loop->latch)->flags = 0;
1642 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
726a989a 1643 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
5f40b3cb 1644 {
f5045c96
AM
1645 source_location locus;
1646 tree def;
726a989a 1647 phi = gsi_stmt (gsi);
726a989a 1648 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
f5045c96
AM
1649
1650 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 1651 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 1652 loop_preheader_edge (loop));
9e227d60 1653 add_phi_arg (phi, def, guard, locus);
f5045c96
AM
1654
1655 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1656 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
9e227d60 1657 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
1658 }
1659 e = redirect_edge_and_branch (exit, nexit->dest);
1660 PENDING_STMT (e) = NULL;
1661
726a989a
RB
1662 /* Emit GIMPLE_OMP_FOR. */
1663 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 1664 type = TREE_TYPE (cvar);
9ff70652 1665 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
5f40b3cb
ZD
1666 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1667
726a989a 1668 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
9ff70652 1669 gimple_set_location (for_stmt, loc);
726a989a
RB
1670 gimple_omp_for_set_index (for_stmt, 0, initvar);
1671 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1672 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1673 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1674 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1675 cvar_base,
1676 build_int_cst (type, 1)));
1677
1678 gsi = gsi_last_bb (for_bb);
1679 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1680 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1681
726a989a
RB
1682 /* Emit GIMPLE_OMP_CONTINUE. */
1683 gsi = gsi_last_bb (loop->latch);
1684 stmt = gimple_build_omp_continue (cvar_next, cvar);
9ff70652 1685 gimple_set_location (stmt, loc);
726a989a
RB
1686 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1687 SSA_NAME_DEF_STMT (cvar_next) = stmt;
5f40b3cb 1688
726a989a
RB
1689 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1690 gsi = gsi_last_bb (ex_bb);
9ff70652
JJ
1691 stmt = gimple_build_omp_return (true);
1692 gimple_set_location (stmt, loc);
1693 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1694
cd7d9fd7
RG
1695 /* After the above dom info is hosed. Re-compute it. */
1696 free_dominance_info (CDI_DOMINATORS);
1697 calculate_dominance_info (CDI_DOMINATORS);
1698
5f40b3cb
ZD
1699 return paral_bb;
1700}
1701
08dab97a
RL
1702/* Generates code to execute the iterations of LOOP in N_THREADS
1703 threads in parallel.
1704
1705 NITER describes number of iterations of LOOP.
fa10beec 1706 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
1707
1708static void
08dab97a 1709gen_parallel_loop (struct loop *loop, htab_t reduction_list,
a509ebb5 1710 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 1711{
9326236d 1712 loop_iterator li;
5f40b3cb 1713 tree many_iterations_cond, type, nit;
726a989a
RB
1714 tree arg_struct, new_arg_struct;
1715 gimple_seq stmts;
5f40b3cb 1716 basic_block parallel_head;
9f9f72aa 1717 edge entry, exit;
a509ebb5 1718 struct clsn_data clsn_data;
5f40b3cb 1719 unsigned prob;
9ff70652
JJ
1720 location_t loc;
1721 gimple cond_stmt;
768da0da 1722 unsigned int m_p_thread=2;
5f40b3cb
ZD
1723
1724 /* From
1725
1726 ---------------------------------------------------------------------
1727 loop
1728 {
1729 IV = phi (INIT, IV + STEP)
1730 BODY1;
1731 if (COND)
1732 break;
1733 BODY2;
1734 }
1735 ---------------------------------------------------------------------
1736
1737 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1738 we generate the following code:
1739
1740 ---------------------------------------------------------------------
1741
1742 if (MAY_BE_ZERO
a509ebb5
RL
1743 || NITER < MIN_PER_THREAD * N_THREADS)
1744 goto original;
5f40b3cb
ZD
1745
1746 BODY1;
1747 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 1748 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 1749 load the variables from DATA.
726a989a 1750 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
1751 BODY2;
1752 BODY1;
726a989a
RB
1753 GIMPLE_OMP_CONTINUE;
1754 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1755 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
1756 goto end;
1757
1758 original:
1759 loop
1760 {
1761 IV = phi (INIT, IV + STEP)
1762 BODY1;
1763 if (COND)
1764 break;
1765 BODY2;
1766 }
1767
1768 end:
1769
1770 */
1771
1772 /* Create two versions of the loop -- in the old one, we know that the
1773 number of iterations is large enough, and we will transform it into the
1774 loop that will be split to loop_fn, the new one will be used for the
1775 remaining iterations. */
a509ebb5 1776
768da0da
RL
1777 /* We should compute a better number-of-iterations value for outer loops.
1778 That is, if we have
1779
1780 for (i = 0; i < n; ++i)
1781 for (j = 0; j < m; ++j)
1782 ...
1783
1784 we should compute nit = n * m, not nit = n.
1785 Also may_be_zero handling would need to be adjusted. */
1786
5f40b3cb
ZD
1787 type = TREE_TYPE (niter->niter);
1788 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1789 NULL_TREE);
1790 if (stmts)
726a989a 1791 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 1792
768da0da
RL
1793 if (loop->inner)
1794 m_p_thread=2;
1795 else
1796 m_p_thread=MIN_PER_THREAD;
1797
1798 many_iterations_cond =
1799 fold_build2 (GE_EXPR, boolean_type_node,
1800 nit, build_int_cst (type, m_p_thread * n_threads));
1801
5f40b3cb 1802 many_iterations_cond
a509ebb5
RL
1803 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1804 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1805 many_iterations_cond);
5f40b3cb 1806 many_iterations_cond
a509ebb5 1807 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 1808 if (stmts)
726a989a 1809 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1810 if (!is_gimple_condexpr (many_iterations_cond))
1811 {
1812 many_iterations_cond
a509ebb5
RL
1813 = force_gimple_operand (many_iterations_cond, &stmts,
1814 true, NULL_TREE);
5f40b3cb 1815 if (stmts)
726a989a 1816 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1817 }
1818
1819 initialize_original_copy_tables ();
1820
1821 /* We assume that the loop usually iterates a lot. */
1822 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
1823 loop_version (loop, many_iterations_cond, NULL,
1824 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
1825 update_ssa (TODO_update_ssa);
1826 free_original_copy_tables ();
1827
1828 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 1829 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb
ZD
1830
1831 /* Ensure that the exit condition is the first statement in the loop. */
a509ebb5
RL
1832 transform_to_exit_first_loop (loop, reduction_list, nit);
1833
fa10beec 1834 /* Generate initializations for reductions. */
b8698a0f 1835 if (htab_elements (reduction_list) > 0)
a509ebb5 1836 htab_traverse (reduction_list, initialize_reductions, loop);
5f40b3cb
ZD
1837
1838 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
1839 gcc_assert (single_exit (loop));
1840 entry = loop_preheader_edge (loop);
1841 exit = single_dom_exit (loop);
5f40b3cb 1842
9f9f72aa 1843 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
1844 /* In the old loop, move all variables non-local to the loop to a structure
1845 and back, and create separate decls for the variables used in loop. */
b8698a0f 1846 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 1847 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
1848
1849 /* Create the parallel constructs. */
9ff70652
JJ
1850 loc = UNKNOWN_LOCATION;
1851 cond_stmt = last_stmt (loop->header);
1852 if (cond_stmt)
1853 loc = gimple_location (cond_stmt);
1854 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1855 new_arg_struct, n_threads, loc);
b8698a0f 1856 if (htab_elements (reduction_list) > 0)
a509ebb5 1857 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
1858
1859 scev_reset ();
1860
1861 /* Cancel the loop (it is simpler to do it here rather than to teach the
1862 expander to do it). */
1863 cancel_loop_tree (loop);
1864
92a6bdbd
SP
1865 /* Free loop bound estimations that could contain references to
1866 removed statements. */
1867 FOR_EACH_LOOP (li, loop, 0)
1868 free_numbers_of_iterations_estimates_loop (loop);
1869
5f40b3cb
ZD
1870 /* Expand the parallel constructs. We do it directly here instead of running
1871 a separate expand_omp pass, since it is more efficient, and less likely to
1872 cause troubles with further analyses not being able to deal with the
1873 OMP trees. */
a509ebb5 1874
5f40b3cb
ZD
1875 omp_expand_local (parallel_head);
1876}
1877
9857228c
SP
1878/* Returns true when LOOP contains vector phi nodes. */
1879
1880static bool
726a989a 1881loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
1882{
1883 unsigned i;
1884 basic_block *bbs = get_loop_body_in_dom_order (loop);
726a989a 1885 gimple_stmt_iterator gsi;
9857228c 1886 bool res = true;
9857228c
SP
1887
1888 for (i = 0; i < loop->num_nodes; i++)
726a989a
RB
1889 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1890 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
9857228c
SP
1891 goto end;
1892
1893 res = false;
1894 end:
1895 free (bbs);
1896 return res;
1897}
1898
08dab97a
RL
1899/* Create a reduction_info struct, initialize it with REDUC_STMT
1900 and PHI, insert it to the REDUCTION_LIST. */
1901
1902static void
1903build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1904{
1905 PTR *slot;
1906 struct reduction_info *new_reduction;
1907
1908 gcc_assert (reduc_stmt);
b8698a0f 1909
08dab97a
RL
1910 if (dump_file && (dump_flags & TDF_DETAILS))
1911 {
1912 fprintf (dump_file,
1913 "Detected reduction. reduction stmt is: \n");
1914 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1915 fprintf (dump_file, "\n");
1916 }
b8698a0f 1917
08dab97a 1918 new_reduction = XCNEW (struct reduction_info);
b8698a0f 1919
08dab97a
RL
1920 new_reduction->reduc_stmt = reduc_stmt;
1921 new_reduction->reduc_phi = phi;
5d1fd1de 1922 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
08dab97a
RL
1923 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1924 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1925 *slot = new_reduction;
1926}
1927
5d1fd1de
JJ
1928/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1929
1930static int
1931set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1932{
1933 struct reduction_info *const red = (struct reduction_info *) *slot;
1934 gimple_set_uid (red->reduc_phi, red->reduc_version);
1935 return 1;
1936}
1937
08dab97a
RL
1938/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1939
1940static void
1941gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1942{
1943 gimple_stmt_iterator gsi;
1944 loop_vec_info simple_loop_info;
1945
1946 vect_dump = NULL;
1947 simple_loop_info = vect_analyze_loop_form (loop);
1948
1949 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1950 {
1951 gimple phi = gsi_stmt (gsi);
1952 affine_iv iv;
1953 tree res = PHI_RESULT (phi);
1954 bool double_reduc;
1955
ea057359 1956 if (virtual_operand_p (res))
08dab97a
RL
1957 continue;
1958
1959 if (!simple_iv (loop, loop, res, &iv, true)
1960 && simple_loop_info)
1961 {
8a9ecffd
MM
1962 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1963 phi, true,
1964 &double_reduc);
48710229 1965 if (reduc_stmt && !double_reduc)
08dab97a
RL
1966 build_new_reduction (reduction_list, reduc_stmt, phi);
1967 }
1968 }
5d1fd1de
JJ
1969 destroy_loop_vec_info (simple_loop_info, true);
1970
1971 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1972 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1973 only now. */
1974 htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
08dab97a
RL
1975}
1976
1977/* Try to initialize NITER for code generation part. */
1978
1979static bool
1980try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1981{
1982 edge exit = single_dom_exit (loop);
1983
1984 gcc_assert (exit);
1985
1986 /* We need to know # of iterations, and there should be no uses of values
1987 defined inside loop outside of it, unless the values are invariants of
1988 the loop. */
1989 if (!number_of_iterations_exit (loop, exit, niter, false))
1990 {
1991 if (dump_file && (dump_flags & TDF_DETAILS))
1992 fprintf (dump_file, " FAILED: number of iterations not known\n");
1993 return false;
1994 }
1995
1996 return true;
1997}
1998
1999/* Try to initialize REDUCTION_LIST for code generation part.
2000 REDUCTION_LIST describes the reductions. */
2001
2002static bool
2003try_create_reduction_list (loop_p loop, htab_t reduction_list)
2004{
2005 edge exit = single_dom_exit (loop);
2006 gimple_stmt_iterator gsi;
2007
2008 gcc_assert (exit);
2009
2010 gather_scalar_reductions (loop, reduction_list);
2011
b8698a0f 2012
08dab97a
RL
2013 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2014 {
2015 gimple phi = gsi_stmt (gsi);
2016 struct reduction_info *red;
2017 imm_use_iterator imm_iter;
2018 use_operand_p use_p;
2019 gimple reduc_phi;
2020 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2021
ea057359 2022 if (!virtual_operand_p (val))
08dab97a
RL
2023 {
2024 if (dump_file && (dump_flags & TDF_DETAILS))
2025 {
2026 fprintf (dump_file, "phi is ");
2027 print_gimple_stmt (dump_file, phi, 0, 0);
2028 fprintf (dump_file, "arg of phi to exit: value ");
2029 print_generic_expr (dump_file, val, 0);
2030 fprintf (dump_file, " used outside loop\n");
2031 fprintf (dump_file,
2032 " checking if it a part of reduction pattern: \n");
2033 }
2034 if (htab_elements (reduction_list) == 0)
2035 {
2036 if (dump_file && (dump_flags & TDF_DETAILS))
2037 fprintf (dump_file,
2038 " FAILED: it is not a part of reduction.\n");
2039 return false;
2040 }
2041 reduc_phi = NULL;
2042 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2043 {
4942af9b
JJ
2044 if (!gimple_debug_bind_p (USE_STMT (use_p))
2045 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2046 {
2047 reduc_phi = USE_STMT (use_p);
2048 break;
2049 }
2050 }
2051 red = reduction_phi (reduction_list, reduc_phi);
2052 if (red == NULL)
2053 {
2054 if (dump_file && (dump_flags & TDF_DETAILS))
2055 fprintf (dump_file,
2056 " FAILED: it is not a part of reduction.\n");
2057 return false;
2058 }
2059 if (dump_file && (dump_flags & TDF_DETAILS))
2060 {
2061 fprintf (dump_file, "reduction phi is ");
2062 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2063 fprintf (dump_file, "reduction stmt is ");
2064 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2065 }
2066 }
2067 }
2068
2069 /* The iterations of the loop may communicate only through bivs whose
2070 iteration space can be distributed efficiently. */
2071 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2072 {
2073 gimple phi = gsi_stmt (gsi);
2074 tree def = PHI_RESULT (phi);
2075 affine_iv iv;
2076
ea057359 2077 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
08dab97a
RL
2078 {
2079 struct reduction_info *red;
2080
2081 red = reduction_phi (reduction_list, phi);
2082 if (red == NULL)
2083 {
2084 if (dump_file && (dump_flags & TDF_DETAILS))
2085 fprintf (dump_file,
2086 " FAILED: scalar dependency between iterations\n");
2087 return false;
2088 }
2089 }
2090 }
2091
2092
2093 return true;
2094}
2095
5f40b3cb
ZD
2096/* Detect parallel loops and generate parallel code using libgomp
2097 primitives. Returns true if some loop was parallelized, false
2098 otherwise. */
2099
2100bool
2101parallelize_loops (void)
2102{
2103 unsigned n_threads = flag_tree_parallelize_loops;
2104 bool changed = false;
2105 struct loop *loop;
2106 struct tree_niter_desc niter_desc;
2107 loop_iterator li;
a509ebb5 2108 htab_t reduction_list;
f873b205 2109 struct obstack parloop_obstack;
8adfe01d
RL
2110 HOST_WIDE_INT estimated;
2111 LOC loop_loc;
f873b205 2112
5f40b3cb
ZD
2113 /* Do not parallelize loops in the functions created by parallelization. */
2114 if (parallelized_function_p (cfun->decl))
2115 return false;
8adfe01d
RL
2116 if (cfun->has_nonlocal_label)
2117 return false;
5f40b3cb 2118
f873b205 2119 gcc_obstack_init (&parloop_obstack);
a509ebb5 2120 reduction_list = htab_create (10, reduction_info_hash,
08dab97a 2121 reduction_info_eq, free);
726a989a 2122 init_stmt_vec_info_vec ();
a509ebb5 2123
5f40b3cb
ZD
2124 FOR_EACH_LOOP (li, loop, 0)
2125 {
a509ebb5 2126 htab_empty (reduction_list);
48710229
RL
2127 if (dump_file && (dump_flags & TDF_DETAILS))
2128 {
2129 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2130 if (loop->inner)
2131 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2132 else
2133 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2134 }
b8698a0f 2135
48710229 2136 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2137 checking results. */
2138 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2139 {
2140 if (dump_file && (dump_flags & TDF_DETAILS))
2141 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2142 continue;
48710229 2143 }
87d4d0ee 2144
48710229
RL
2145 if (!single_dom_exit (loop))
2146 {
b8698a0f 2147
48710229
RL
2148 if (dump_file && (dump_flags & TDF_DETAILS))
2149 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2150
08dab97a 2151 continue;
48710229 2152 }
08dab97a
RL
2153
2154 if (/* And of course, the loop must be parallelizable. */
2155 !can_duplicate_loop_p (loop)
1d4af1e8 2156 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2157 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2158 /* FIXME: the check for vector phi nodes could be removed. */
69958396 2159 || loop_has_vector_phi_nodes (loop))
08dab97a 2160 continue;
e5b332cd 2161
652c4c71 2162 estimated = estimated_stmt_executions_int (loop);
e5b332cd
RG
2163 if (estimated == -1)
2164 estimated = max_stmt_executions_int (loop);
87d4d0ee 2165 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 2166 count and frequency correctly now. */
87d4d0ee 2167 if (!flag_loop_parallelize_all
e5b332cd
RG
2168 && ((estimated != -1
2169 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2170 /* Do not bother with loops in cold areas. */
2171 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2172 continue;
b8698a0f 2173
08dab97a
RL
2174 if (!try_get_loop_niter (loop, &niter_desc))
2175 continue;
2176
2177 if (!try_create_reduction_list (loop, reduction_list))
2178 continue;
2179
f873b205
LB
2180 if (!flag_loop_parallelize_all
2181 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2182 continue;
2183
2184 changed = true;
48710229
RL
2185 if (dump_file && (dump_flags & TDF_DETAILS))
2186 {
48710229 2187 if (loop->inner)
8adfe01d 2188 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2189 else
8adfe01d
RL
2190 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2191 loop_loc = find_loop_location (loop);
2192 if (loop_loc != UNKNOWN_LOC)
2193 fprintf (dump_file, "\nloop at %s:%d: ",
2194 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
b8698a0f
L
2195 }
2196 gen_parallel_loop (loop, reduction_list,
08dab97a 2197 n_threads, &niter_desc);
510dbcce 2198#ifdef ENABLE_CHECKING
5f40b3cb 2199 verify_flow_info ();
5f40b3cb 2200 verify_loop_structure ();
a3b9e73c 2201 verify_loop_closed_ssa (true);
510dbcce 2202#endif
5f40b3cb
ZD
2203 }
2204
726a989a 2205 free_stmt_vec_info_vec ();
a509ebb5 2206 htab_delete (reduction_list);
f873b205 2207 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2208
2209 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2210 which local variables will escape. Reset the points-to solution
2211 for ESCAPED. */
6b8ed145 2212 if (changed)
d086d311 2213 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2214
5f40b3cb
ZD
2215 return changed;
2216}
2217
2218#include "gt-tree-parloops.h"