]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
* ipa.c (cgraph_address_taken_from_non_vtable_p): Check the ref type.
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
c75c517d
SB
2 Copyright (C) 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5f40b3cb
ZD
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5 Zdenek Dvorak <dvorakz@suse.cz>.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
6da7fc87 11Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
6da7fc87
NC
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
5f40b3cb
ZD
26#include "tree-flow.h"
27#include "cfgloop.h"
5f40b3cb 28#include "tree-data-ref.h"
1bd6497c 29#include "tree-scalar-evolution.h"
cf835838 30#include "gimple-pretty-print.h"
5f40b3cb 31#include "tree-pass.h"
5f40b3cb 32#include "langhooks.h"
a509ebb5 33#include "tree-vectorizer.h"
5f40b3cb
ZD
34
35/* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40 machinery do its job.
b8698a0f 41
5f40b3cb
ZD
42 The most of the complexity is in bringing the code into shape expected
43 by the omp expanders:
726a989a
RB
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
51 can be shared).
52
53 TODO:
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common scalar dependence patterns (accumulation, ...)
58 -- handling of non-innermost loops */
59
b8698a0f 60/*
a509ebb5 61 Reduction handling:
8a9ecffd 62 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 63 The code transformation will be introduced by an example.
b8698a0f
L
64
65
a509ebb5
RL
66parloop
67{
68 int sum=1;
69
0eb7e7aa 70 for (i = 0; i < N; i++)
a509ebb5
RL
71 {
72 x[i] = i + 3;
73 sum+=x[i];
74 }
75}
76
0eb7e7aa 77gimple-like code:
a509ebb5
RL
78header_bb:
79
0eb7e7aa
RL
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
82 D.1795_8 = i_28 + 3;
83 x[i_28] = D.1795_8;
84 sum_11 = D.1795_8 + sum_29;
85 i_12 = i_28 + 1;
86 if (N_6(D) > i_12)
87 goto header_bb;
88
a509ebb5
RL
89
90exit_bb:
91
0eb7e7aa
RL
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
a509ebb5
RL
94
95
96after reduction transformation (only relevant parts):
97
98parloop
99{
100
101....
102
0eb7e7aa 103
fa10beec 104 # Storing the initial value given by the user. #
0eb7e7aa 105
ae0bce62 106 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
107
108 #pragma omp parallel num_threads(4)
a509ebb5 109
0eb7e7aa 110 #pragma omp for schedule(static)
ae0bce62
RL
111
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
115
116 # sum.27_29 = PHI <sum.27_11, 0>
117
0eb7e7aa 118 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 119
726a989a 120 GIMPLE_OMP_CONTINUE
a509ebb5 121
0eb7e7aa
RL
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 124 GIMPLE_OMP_RETURN
b8698a0f
L
125
126 # Creating the atomic operation is done at
0eb7e7aa 127 create_call_for_reduction_1() #
a509ebb5 128
0eb7e7aa
RL
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
b8698a0f 133
726a989a 134 GIMPLE_OMP_RETURN
b8698a0f 135
0eb7e7aa
RL
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
ae0bce62
RL
138 The value computed by the threads is loaded from the
139 shared struct. #
140
b8698a0f 141
0eb7e7aa 142 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 143 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
144 sum_43 = D.1795_41 + sum_37;
145
146 exit bb:
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
149
150...
151
a509ebb5
RL
152}
153
154*/
155
5f40b3cb
ZD
156/* Minimal number of iterations of a loop that should be executed in each
157 thread. */
158#define MIN_PER_THREAD 100
159
b8698a0f 160/* Element of the hashtable, representing a
a509ebb5
RL
161 reduction in the current loop. */
162struct reduction_info
163{
726a989a
RB
164 gimple reduc_stmt; /* reduction statement. */
165 gimple reduc_phi; /* The phi node defining the reduction. */
166 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
167 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168 result. */
b8698a0f 169 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 170 of the reduction variable when existing the loop. */
ae0bce62 171 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 172 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 173 tree init; /* reduction initialization value. */
b8698a0f 174 gimple new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
177 operation. */
178};
179
180/* Equality and hash functions for hashtab code. */
181
182static int
183reduction_info_eq (const void *aa, const void *bb)
184{
185 const struct reduction_info *a = (const struct reduction_info *) aa;
186 const struct reduction_info *b = (const struct reduction_info *) bb;
187
188 return (a->reduc_phi == b->reduc_phi);
189}
190
191static hashval_t
192reduction_info_hash (const void *aa)
193{
194 const struct reduction_info *a = (const struct reduction_info *) aa;
195
5d1fd1de 196 return a->reduc_version;
a509ebb5
RL
197}
198
199static struct reduction_info *
726a989a 200reduction_phi (htab_t reduction_list, gimple phi)
a509ebb5
RL
201{
202 struct reduction_info tmpred, *red;
203
87ebde38 204 if (htab_elements (reduction_list) == 0 || phi == NULL)
a509ebb5
RL
205 return NULL;
206
207 tmpred.reduc_phi = phi;
5d1fd1de 208 tmpred.reduc_version = gimple_uid (phi);
3d9a9f94 209 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
a509ebb5
RL
210
211 return red;
212}
213
5f40b3cb
ZD
214/* Element of hashtable of names to copy. */
215
216struct name_to_copy_elt
217{
218 unsigned version; /* The version of the name to copy. */
219 tree new_name; /* The new name used in the copy. */
220 tree field; /* The field of the structure used to pass the
221 value. */
222};
223
224/* Equality and hash functions for hashtab code. */
225
226static int
227name_to_copy_elt_eq (const void *aa, const void *bb)
228{
a509ebb5
RL
229 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
5f40b3cb
ZD
231
232 return a->version == b->version;
233}
234
235static hashval_t
236name_to_copy_elt_hash (const void *aa)
237{
a509ebb5 238 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
5f40b3cb
ZD
239
240 return (hashval_t) a->version;
241}
242
b305e3da
SP
243/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245 represents the denominator for every element in the matrix. */
246typedef struct lambda_trans_matrix_s
247{
248 lambda_matrix matrix;
249 int rowsize;
250 int colsize;
251 int denominator;
252} *lambda_trans_matrix;
253#define LTM_MATRIX(T) ((T)->matrix)
254#define LTM_ROWSIZE(T) ((T)->rowsize)
255#define LTM_COLSIZE(T) ((T)->colsize)
256#define LTM_DENOMINATOR(T) ((T)->denominator)
257
258/* Allocate a new transformation matrix. */
259
260static lambda_trans_matrix
261lambda_trans_matrix_new (int colsize, int rowsize,
262 struct obstack * lambda_obstack)
263{
264 lambda_trans_matrix ret;
265
266 ret = (lambda_trans_matrix)
267 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
268 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
269 LTM_ROWSIZE (ret) = rowsize;
270 LTM_COLSIZE (ret) = colsize;
271 LTM_DENOMINATOR (ret) = 1;
272 return ret;
273}
274
275/* Multiply a vector VEC by a matrix MAT.
276 MAT is an M*N matrix, and VEC is a vector with length N. The result
277 is stored in DEST which must be a vector of length M. */
278
279static void
280lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
281 lambda_vector vec, lambda_vector dest)
282{
283 int i, j;
284
285 lambda_vector_clear (dest, m);
286 for (i = 0; i < m; i++)
287 for (j = 0; j < n; j++)
288 dest[i] += matrix[i][j] * vec[j];
289}
290
291/* Return true if TRANS is a legal transformation matrix that respects
292 the dependence vectors in DISTS and DIRS. The conservative answer
293 is false.
294
295 "Wolfe proves that a unimodular transformation represented by the
296 matrix T is legal when applied to a loop nest with a set of
297 lexicographically non-negative distance vectors RDG if and only if
298 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299 i.e.: if and only if it transforms the lexicographically positive
300 distance vectors to lexicographically positive vectors. Note that
301 a unimodular matrix must transform the zero vector (and only it) to
302 the zero vector." S.Muchnick. */
303
304static bool
305lambda_transform_legal_p (lambda_trans_matrix trans,
306 int nb_loops,
307 VEC (ddr_p, heap) *dependence_relations)
308{
309 unsigned int i, j;
310 lambda_vector distres;
311 struct data_dependence_relation *ddr;
312
313 gcc_assert (LTM_COLSIZE (trans) == nb_loops
314 && LTM_ROWSIZE (trans) == nb_loops);
315
316 /* When there are no dependences, the transformation is correct. */
317 if (VEC_length (ddr_p, dependence_relations) == 0)
318 return true;
319
320 ddr = VEC_index (ddr_p, dependence_relations, 0);
321 if (ddr == NULL)
322 return true;
323
324 /* When there is an unknown relation in the dependence_relations, we
325 know that it is no worth looking at this loop nest: give up. */
326 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
327 return false;
328
329 distres = lambda_vector_new (nb_loops);
330
331 /* For each distance vector in the dependence graph. */
332 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
333 {
334 /* Don't care about relations for which we know that there is no
335 dependence, nor about read-read (aka. output-dependences):
336 these data accesses can happen in any order. */
337 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
338 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
339 continue;
340
341 /* Conservatively answer: "this transformation is not valid". */
342 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343 return false;
344
345 /* If the dependence could not be captured by a distance vector,
346 conservatively answer that the transform is not valid. */
347 if (DDR_NUM_DIST_VECTS (ddr) == 0)
348 return false;
349
350 /* Compute trans.dist_vect */
351 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352 {
353 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
354 DDR_DIST_VECT (ddr, j), distres);
355
356 if (!lambda_vector_lexico_pos (distres, nb_loops))
357 return false;
358 }
359 }
360 return true;
361}
08dab97a
RL
362
363/* Data dependency analysis. Returns true if the iterations of LOOP
364 are independent on each other (that is, if we can execute them
365 in parallel). */
5f40b3cb
ZD
366
367static bool
f873b205 368loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 369{
01be8516
SP
370 VEC (loop_p, heap) *loop_nest;
371 VEC (ddr_p, heap) *dependence_relations;
726a989a 372 VEC (data_reference_p, heap) *datarefs;
5f40b3cb
ZD
373 lambda_trans_matrix trans;
374 bool ret = false;
5f40b3cb
ZD
375
376 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
377 {
378 fprintf (dump_file, "Considering loop %d\n", loop->num);
379 if (!loop->inner)
380 fprintf (dump_file, "loop is innermost\n");
b8698a0f 381 else
48710229
RL
382 fprintf (dump_file, "loop NOT innermost\n");
383 }
5f40b3cb 384
5f40b3cb
ZD
385 /* Check for problems with dependences. If the loop can be reversed,
386 the iterations are independent. */
387 datarefs = VEC_alloc (data_reference_p, heap, 10);
388 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
01be8516
SP
389 loop_nest = VEC_alloc (loop_p, heap, 3);
390 compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
5f40b3cb
ZD
391 &dependence_relations);
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 dump_data_dependence_relations (dump_file, dependence_relations);
394
f873b205 395 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
396 LTM_MATRIX (trans)[0][0] = -1;
397
398 if (lambda_transform_legal_p (trans, 1, dependence_relations))
399 {
400 ret = true;
401 if (dump_file && (dump_flags & TDF_DETAILS))
402 fprintf (dump_file, " SUCCESS: may be parallelized\n");
403 }
404 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
405 fprintf (dump_file,
406 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 407
01be8516 408 VEC_free (loop_p, heap, loop_nest);
5f40b3cb
ZD
409 free_dependence_relations (dependence_relations);
410 free_data_refs (datarefs);
411
412 return ret;
413}
414
1d4af1e8
SP
415/* Return true when LOOP contains basic blocks marked with the
416 BB_IRREDUCIBLE_LOOP flag. */
417
418static inline bool
419loop_has_blocks_with_irreducible_flag (struct loop *loop)
420{
421 unsigned i;
422 basic_block *bbs = get_loop_body_in_dom_order (loop);
423 bool res = true;
424
425 for (i = 0; i < loop->num_nodes; i++)
426 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
427 goto end;
428
429 res = false;
430 end:
431 free (bbs);
432 return res;
433}
434
8a171a59 435/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 436 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 437 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
438 be invariant in the whole function. Other needed statements are placed
439 right before GSI. */
5f40b3cb
ZD
440
441static tree
cba1eb61
JJ
442take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
443 gimple_stmt_iterator *gsi)
5f40b3cb 444{
8a171a59 445 int uid;
5f40b3cb
ZD
446 void **dslot;
447 struct int_tree_map ielt, *nielt;
726a989a
RB
448 tree *var_p, name, bvar, addr;
449 gimple stmt;
450 gimple_seq stmts;
5f40b3cb 451
8a171a59
ZD
452 /* Since the address of OBJ is invariant, the trees may be shared.
453 Avoid rewriting unrelated parts of the code. */
454 obj = unshare_expr (obj);
455 for (var_p = &obj;
456 handled_component_p (*var_p);
457 var_p = &TREE_OPERAND (*var_p, 0))
458 continue;
8a171a59 459
c9a410f0
RG
460 /* Canonicalize the access to base on a MEM_REF. */
461 if (DECL_P (*var_p))
462 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
463
464 /* Assign a canonical SSA name to the address of the base decl used
465 in the address and share it for all accesses and addresses based
466 on it. */
467 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
5f40b3cb
ZD
468 ielt.uid = uid;
469 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
470 if (!*dslot)
471 {
cba1eb61
JJ
472 if (gsi == NULL)
473 return NULL;
c9a410f0
RG
474 addr = TREE_OPERAND (*var_p, 0);
475 bvar = create_tmp_var (TREE_TYPE (addr),
476 get_name (TREE_OPERAND
477 (TREE_OPERAND (*var_p, 0), 0)));
5f40b3cb 478 add_referenced_var (bvar);
726a989a 479 stmt = gimple_build_assign (bvar, addr);
5f40b3cb 480 name = make_ssa_name (bvar, stmt);
726a989a
RB
481 gimple_assign_set_lhs (stmt, name);
482 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb
ZD
483
484 nielt = XNEW (struct int_tree_map);
485 nielt->uid = uid;
486 nielt->to = name;
487 *dslot = nielt;
5f40b3cb 488 }
8a171a59
ZD
489 else
490 name = ((struct int_tree_map *) *dslot)->to;
5f40b3cb 491
c9a410f0
RG
492 /* Express the address in terms of the canonical SSA name. */
493 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
494 if (gsi == NULL)
495 return build_fold_addr_expr_with_type (obj, type);
496
c9a410f0
RG
497 name = force_gimple_operand (build_addr (obj, current_function_decl),
498 &stmts, true, NULL_TREE);
499 if (!gimple_seq_empty_p (stmts))
cba1eb61 500 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 501
c9a410f0 502 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 503 {
726a989a 504 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 505 NULL_TREE);
726a989a 506 if (!gimple_seq_empty_p (stmts))
cba1eb61 507 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 508 }
5f40b3cb
ZD
509
510 return name;
511}
512
a509ebb5 513/* Callback for htab_traverse. Create the initialization statement
b8698a0f 514 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
515 the loop described in DATA. */
516
517static int
518initialize_reductions (void **slot, void *data)
519{
a509ebb5 520 tree init, c;
a509ebb5
RL
521 tree bvar, type, arg;
522 edge e;
523
3d9a9f94 524 struct reduction_info *const reduc = (struct reduction_info *) *slot;
a509ebb5
RL
525 struct loop *loop = (struct loop *) data;
526
b8698a0f 527 /* Create initialization in preheader:
a509ebb5
RL
528 reduction_variable = initialization value of reduction. */
529
b8698a0f 530 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
531 from the preheader with the reduction initialization value. */
532
533 /* Create a new variable to initialize the reduction. */
534 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
535 bvar = create_tmp_var (type, "reduction");
536 add_referenced_var (bvar);
537
c2255bc4
AH
538 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
539 OMP_CLAUSE_REDUCTION);
a509ebb5 540 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
726a989a 541 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
a509ebb5
RL
542
543 init = omp_reduction_init (c, TREE_TYPE (bvar));
544 reduc->init = init;
545
b8698a0f
L
546 /* Replace the argument representing the initialization value
547 with the initialization value for the reduction (neutral
548 element for the particular operation, e.g. 0 for PLUS_EXPR,
549 1 for MULT_EXPR, etc).
550 Keep the old value in a new variable "reduction_initial",
551 that will be taken in consideration after the parallel
0eb7e7aa 552 computing is done. */
a509ebb5
RL
553
554 e = loop_preheader_edge (loop);
555 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
556 /* Create new variable to hold the initial value. */
a509ebb5 557
a509ebb5 558 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 559 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 560 reduc->initial_value = arg;
a509ebb5
RL
561 return 1;
562}
5f40b3cb
ZD
563
564struct elv_data
565{
726a989a 566 struct walk_stmt_info info;
9f9f72aa 567 edge entry;
5f40b3cb 568 htab_t decl_address;
cba1eb61 569 gimple_stmt_iterator *gsi;
5f40b3cb 570 bool changed;
cba1eb61 571 bool reset;
5f40b3cb
ZD
572};
573
9f9f72aa
AP
574/* Eliminates references to local variables in *TP out of the single
575 entry single exit region starting at DTA->ENTRY.
576 DECL_ADDRESS contains addresses of the references that had their
577 address taken already. If the expression is changed, CHANGED is
578 set to true. Callback for walk_tree. */
a509ebb5 579
5f40b3cb 580static tree
8a171a59 581eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 582{
3d9a9f94 583 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 584 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
585
586 if (DECL_P (t))
587 {
588 *walk_subtrees = 0;
589
590 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
591 return NULL_TREE;
592
593 type = TREE_TYPE (t);
594 addr_type = build_pointer_type (type);
cba1eb61
JJ
595 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
596 dta->gsi);
597 if (dta->gsi == NULL && addr == NULL_TREE)
598 {
599 dta->reset = true;
600 return NULL_TREE;
601 }
602
70f34814 603 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
604
605 dta->changed = true;
606 return NULL_TREE;
607 }
608
609 if (TREE_CODE (t) == ADDR_EXPR)
610 {
8a171a59
ZD
611 /* ADDR_EXPR may appear in two contexts:
612 -- as a gimple operand, when the address taken is a function invariant
613 -- as gimple rhs, when the resulting address in not a function
614 invariant
615 We do not need to do anything special in the latter case (the base of
616 the memory reference whose address is taken may be replaced in the
617 DECL_P case). The former case is more complicated, as we need to
618 ensure that the new address is still a gimple operand. Thus, it
619 is not sufficient to replace just the base of the memory reference --
620 we need to move the whole computation of the address out of the
621 loop. */
622 if (!is_gimple_val (t))
5f40b3cb
ZD
623 return NULL_TREE;
624
625 *walk_subtrees = 0;
8a171a59
ZD
626 obj = TREE_OPERAND (t, 0);
627 var = get_base_address (obj);
628 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
629 return NULL_TREE;
630
631 addr_type = TREE_TYPE (t);
cba1eb61
JJ
632 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
633 dta->gsi);
634 if (dta->gsi == NULL && addr == NULL_TREE)
635 {
636 dta->reset = true;
637 return NULL_TREE;
638 }
5f40b3cb
ZD
639 *tp = addr;
640
641 dta->changed = true;
642 return NULL_TREE;
643 }
644
726a989a 645 if (!EXPR_P (t))
5f40b3cb
ZD
646 *walk_subtrees = 0;
647
648 return NULL_TREE;
649}
650
cba1eb61 651/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
652 entry single exit region starting at ENTRY. DECL_ADDRESS contains
653 addresses of the references that had their address taken
654 already. */
5f40b3cb
ZD
655
656static void
cba1eb61 657eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
5f40b3cb
ZD
658 htab_t decl_address)
659{
660 struct elv_data dta;
cba1eb61 661 gimple stmt = gsi_stmt (*gsi);
5f40b3cb 662
726a989a 663 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 664 dta.entry = entry;
5f40b3cb
ZD
665 dta.decl_address = decl_address;
666 dta.changed = false;
cba1eb61 667 dta.reset = false;
5f40b3cb 668
b5b8b0ac 669 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
670 {
671 dta.gsi = NULL;
672 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
673 eliminate_local_variables_1, &dta.info, NULL);
674 if (dta.reset)
675 {
676 gimple_debug_bind_reset_value (stmt);
677 dta.changed = true;
678 }
679 }
b5b8b0ac 680 else
cba1eb61
JJ
681 {
682 dta.gsi = gsi;
683 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
684 }
5f40b3cb
ZD
685
686 if (dta.changed)
687 update_stmt (stmt);
688}
689
9f9f72aa
AP
690/* Eliminates the references to local variables from the single entry
691 single exit region between the ENTRY and EXIT edges.
b8698a0f 692
a509ebb5 693 This includes:
b8698a0f
L
694 1) Taking address of a local variable -- these are moved out of the
695 region (and temporary variable is created to hold the address if
a509ebb5 696 necessary).
9f9f72aa 697
5f40b3cb 698 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 699 references. */
5f40b3cb
ZD
700
701static void
9f9f72aa 702eliminate_local_variables (edge entry, edge exit)
5f40b3cb 703{
9f9f72aa
AP
704 basic_block bb;
705 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
5f40b3cb 706 unsigned i;
726a989a 707 gimple_stmt_iterator gsi;
cba1eb61 708 bool has_debug_stmt = false;
5f40b3cb
ZD
709 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
710 free);
9f9f72aa
AP
711 basic_block entry_bb = entry->src;
712 basic_block exit_bb = exit->dest;
5f40b3cb 713
9f9f72aa 714 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 715
ac47786e 716 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
9f9f72aa 717 if (bb != entry_bb && bb != exit_bb)
726a989a 718 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
cba1eb61
JJ
719 if (gimple_debug_bind_p (gsi_stmt (gsi)))
720 has_debug_stmt = true;
721 else
722 eliminate_local_variables_stmt (entry, &gsi, decl_address);
723
724 if (has_debug_stmt)
725 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
726 if (bb != entry_bb && bb != exit_bb)
727 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
728 if (gimple_debug_bind_p (gsi_stmt (gsi)))
729 eliminate_local_variables_stmt (entry, &gsi, decl_address);
5f40b3cb
ZD
730
731 htab_delete (decl_address);
9f9f72aa
AP
732 VEC_free (basic_block, heap, body);
733}
734
735/* Returns true if expression EXPR is not defined between ENTRY and
736 EXIT, i.e. if all its operands are defined outside of the region. */
737
738static bool
739expr_invariant_in_region_p (edge entry, edge exit, tree expr)
740{
741 basic_block entry_bb = entry->src;
742 basic_block exit_bb = exit->dest;
743 basic_block def_bb;
9f9f72aa
AP
744
745 if (is_gimple_min_invariant (expr))
746 return true;
747
748 if (TREE_CODE (expr) == SSA_NAME)
749 {
726a989a 750 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
751 if (def_bb
752 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
753 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
754 return false;
755
756 return true;
757 }
758
726a989a 759 return false;
5f40b3cb
ZD
760}
761
762/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
763 The copies are stored to NAME_COPIES, if NAME was already duplicated,
764 its duplicate stored in NAME_COPIES is returned.
b8698a0f 765
5f40b3cb
ZD
766 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
767 duplicated, storing the copies in DECL_COPIES. */
768
769static tree
9f9f72aa
AP
770separate_decls_in_region_name (tree name,
771 htab_t name_copies, htab_t decl_copies,
772 bool copy_name_p)
5f40b3cb
ZD
773{
774 tree copy, var, var_copy;
775 unsigned idx, uid, nuid;
776 struct int_tree_map ielt, *nielt;
777 struct name_to_copy_elt elt, *nelt;
778 void **slot, **dslot;
779
780 if (TREE_CODE (name) != SSA_NAME)
781 return name;
782
783 idx = SSA_NAME_VERSION (name);
784 elt.version = idx;
785 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
786 copy_name_p ? INSERT : NO_INSERT);
787 if (slot && *slot)
788 return ((struct name_to_copy_elt *) *slot)->new_name;
789
790 var = SSA_NAME_VAR (name);
791 uid = DECL_UID (var);
792 ielt.uid = uid;
793 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
794 if (!*dslot)
795 {
796 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 797 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
5f40b3cb
ZD
798 add_referenced_var (var_copy);
799 nielt = XNEW (struct int_tree_map);
800 nielt->uid = uid;
801 nielt->to = var_copy;
802 *dslot = nielt;
803
804 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 805 it again. */
5f40b3cb
ZD
806 nuid = DECL_UID (var_copy);
807 ielt.uid = nuid;
808 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
809 gcc_assert (!*dslot);
810 nielt = XNEW (struct int_tree_map);
811 nielt->uid = nuid;
812 nielt->to = var_copy;
813 *dslot = nielt;
814 }
815 else
816 var_copy = ((struct int_tree_map *) *dslot)->to;
817
818 if (copy_name_p)
819 {
726a989a 820 copy = duplicate_ssa_name (name, NULL);
5f40b3cb
ZD
821 nelt = XNEW (struct name_to_copy_elt);
822 nelt->version = idx;
823 nelt->new_name = copy;
824 nelt->field = NULL_TREE;
825 *slot = nelt;
826 }
827 else
828 {
829 gcc_assert (!slot);
830 copy = name;
831 }
832
833 SSA_NAME_VAR (copy) = var_copy;
834 return copy;
835}
836
9f9f72aa
AP
837/* Finds the ssa names used in STMT that are defined outside the
838 region between ENTRY and EXIT and replaces such ssa names with
839 their duplicates. The duplicates are stored to NAME_COPIES. Base
840 decls of all ssa names used in STMT (including those defined in
841 LOOP) are replaced with the new temporary variables; the
842 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
843
844static void
726a989a 845separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
9f9f72aa 846 htab_t name_copies, htab_t decl_copies)
5f40b3cb
ZD
847{
848 use_operand_p use;
849 def_operand_p def;
850 ssa_op_iter oi;
851 tree name, copy;
852 bool copy_name_p;
853
854 mark_virtual_ops_for_renaming (stmt);
855
856 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
857 {
858 name = DEF_FROM_PTR (def);
859 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
860 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
861 false);
a509ebb5
RL
862 gcc_assert (copy == name);
863 }
5f40b3cb
ZD
864
865 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
866 {
867 name = USE_FROM_PTR (use);
868 if (TREE_CODE (name) != SSA_NAME)
869 continue;
870
9f9f72aa
AP
871 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
872 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
873 copy_name_p);
a509ebb5
RL
874 SET_USE (use, copy);
875 }
5f40b3cb
ZD
876}
877
b5b8b0ac
AO
878/* Finds the ssa names used in STMT that are defined outside the
879 region between ENTRY and EXIT and replaces such ssa names with
880 their duplicates. The duplicates are stored to NAME_COPIES. Base
881 decls of all ssa names used in STMT (including those defined in
882 LOOP) are replaced with the new temporary variables; the
883 replacement decls are stored in DECL_COPIES. */
884
885static bool
886separate_decls_in_region_debug_bind (gimple stmt,
887 htab_t name_copies, htab_t decl_copies)
888{
889 use_operand_p use;
890 ssa_op_iter oi;
891 tree var, name;
892 struct int_tree_map ielt;
893 struct name_to_copy_elt elt;
894 void **slot, **dslot;
895
896 var = gimple_debug_bind_get_var (stmt);
4f2a9af8
JJ
897 if (TREE_CODE (var) == DEBUG_EXPR_DECL)
898 return true;
b5b8b0ac
AO
899 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
900 ielt.uid = DECL_UID (var);
901 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
902 if (!dslot)
903 return true;
904 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
905
906 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
907 {
908 name = USE_FROM_PTR (use);
909 if (TREE_CODE (name) != SSA_NAME)
910 continue;
911
912 elt.version = SSA_NAME_VERSION (name);
913 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
914 if (!slot)
915 {
916 gimple_debug_bind_reset_value (stmt);
917 update_stmt (stmt);
918 break;
919 }
920
921 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
922 }
923
924 return false;
925}
926
0eb7e7aa
RL
927/* Callback for htab_traverse. Adds a field corresponding to the reduction
928 specified in SLOT. The type is passed in DATA. */
929
930static int
931add_field_for_reduction (void **slot, void *data)
a509ebb5 932{
b8698a0f 933
3d9a9f94
KG
934 struct reduction_info *const red = (struct reduction_info *) *slot;
935 tree const type = (tree) data;
726a989a 936 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
c2255bc4
AH
937 tree field = build_decl (gimple_location (red->reduc_stmt),
938 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
0eb7e7aa
RL
939
940 insert_field_into_struct (type, field);
941
942 red->field = field;
943
944 return 1;
945}
a509ebb5 946
5f40b3cb 947/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 948 described in SLOT. The type is passed in DATA. */
5f40b3cb
ZD
949
950static int
951add_field_for_name (void **slot, void *data)
952{
3d9a9f94
KG
953 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
954 tree type = (tree) data;
5f40b3cb
ZD
955 tree name = ssa_name (elt->version);
956 tree var = SSA_NAME_VAR (name);
c2255bc4
AH
957 tree field = build_decl (DECL_SOURCE_LOCATION (var),
958 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
5f40b3cb
ZD
959
960 insert_field_into_struct (type, field);
961 elt->field = field;
a509ebb5 962
5f40b3cb
ZD
963 return 1;
964}
965
b8698a0f
L
966/* Callback for htab_traverse. A local result is the intermediate result
967 computed by a single
fa10beec 968 thread, or the initial value in case no iteration was executed.
b8698a0f
L
969 This function creates a phi node reflecting these values.
970 The phi's result will be stored in NEW_PHI field of the
971 reduction's data structure. */
a509ebb5
RL
972
973static int
974create_phi_for_local_result (void **slot, void *data)
975{
3d9a9f94
KG
976 struct reduction_info *const reduc = (struct reduction_info *) *slot;
977 const struct loop *const loop = (const struct loop *) data;
a509ebb5 978 edge e;
726a989a 979 gimple new_phi;
a509ebb5
RL
980 basic_block store_bb;
981 tree local_res;
f5045c96 982 source_location locus;
a509ebb5 983
b8698a0f
L
984 /* STORE_BB is the block where the phi
985 should be stored. It is the destination of the loop exit.
726a989a 986 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
a509ebb5
RL
987 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
988
989 /* STORE_BB has two predecessors. One coming from the loop
990 (the reduction's result is computed at the loop),
b8698a0f
L
991 and another coming from a block preceding the loop,
992 when no iterations
993 are executed (the initial value should be taken). */
a509ebb5
RL
994 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
995 e = EDGE_PRED (store_bb, 1);
996 else
997 e = EDGE_PRED (store_bb, 0);
726a989a
RB
998 local_res
999 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
1000 NULL);
f5045c96 1001 locus = gimple_location (reduc->reduc_stmt);
a509ebb5
RL
1002 new_phi = create_phi_node (local_res, store_bb);
1003 SSA_NAME_DEF_STMT (local_res) = new_phi;
f5045c96 1004 add_phi_arg (new_phi, reduc->init, e, locus);
726a989a 1005 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
f5045c96 1006 FALLTHRU_EDGE (loop->latch), locus);
a509ebb5
RL
1007 reduc->new_phi = new_phi;
1008
1009 return 1;
1010}
5f40b3cb
ZD
1011
1012struct clsn_data
1013{
1014 tree store;
1015 tree load;
1016
1017 basic_block store_bb;
1018 basic_block load_bb;
1019};
1020
a509ebb5 1021/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1022 reduction described in SLOT.
a509ebb5
RL
1023 DATA annotates the place in memory the atomic operation relates to,
1024 and the basic block it needs to be generated in. */
1025
1026static int
1027create_call_for_reduction_1 (void **slot, void *data)
1028{
3d9a9f94
KG
1029 struct reduction_info *const reduc = (struct reduction_info *) *slot;
1030 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a 1031 gimple_stmt_iterator gsi;
a509ebb5 1032 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1033 tree load_struct;
1034 basic_block bb;
1035 basic_block new_bb;
1036 edge e;
0f900dfa 1037 tree t, addr, ref, x;
726a989a
RB
1038 tree tmp_load, name;
1039 gimple load;
a509ebb5 1040
70f34814 1041 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1042 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5
RL
1043
1044 addr = build_addr (t, current_function_decl);
1045
1046 /* Create phi node. */
1047 bb = clsn_data->load_bb;
1048
1049 e = split_block (bb, t);
1050 new_bb = e->dest;
1051
1052 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1053 add_referenced_var (tmp_load);
1054 tmp_load = make_ssa_name (tmp_load, NULL);
726a989a 1055 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1056 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1057 gsi = gsi_start_bb (new_bb);
1058 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1059
1060 e = split_block (new_bb, load);
1061 new_bb = e->dest;
726a989a 1062 gsi = gsi_start_bb (new_bb);
a509ebb5 1063 ref = tmp_load;
726a989a
RB
1064 x = fold_build2 (reduc->reduction_code,
1065 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1066 PHI_RESULT (reduc->new_phi));
a509ebb5 1067
726a989a
RB
1068 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1069 GSI_CONTINUE_LINKING);
a509ebb5 1070
726a989a 1071 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1072 return 1;
1073}
1074
b8698a0f
L
1075/* Create the atomic operation at the join point of the threads.
1076 REDUCTION_LIST describes the reductions in the LOOP.
1077 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1078 shared data is stored in and loaded from. */
1079static void
b8698a0f 1080create_call_for_reduction (struct loop *loop, htab_t reduction_list,
a509ebb5
RL
1081 struct clsn_data *ld_st_data)
1082{
1083 htab_traverse (reduction_list, create_phi_for_local_result, loop);
726a989a 1084 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
a509ebb5
RL
1085 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1086 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1087}
1088
ae0bce62
RL
1089/* Callback for htab_traverse. Loads the final reduction value at the
1090 join point of all threads, and inserts it in the right place. */
a509ebb5
RL
1091
1092static int
1093create_loads_for_reductions (void **slot, void *data)
1094{
3d9a9f94
KG
1095 struct reduction_info *const red = (struct reduction_info *) *slot;
1096 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1097 gimple stmt;
1098 gimple_stmt_iterator gsi;
1099 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
a509ebb5 1100 tree load_struct;
ae0bce62 1101 tree name;
a509ebb5
RL
1102 tree x;
1103
726a989a 1104 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1105 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1106 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1107 NULL_TREE);
a509ebb5 1108
ae0bce62 1109 x = load_struct;
a509ebb5 1110 name = PHI_RESULT (red->keep_res);
726a989a 1111 stmt = gimple_build_assign (name, x);
a509ebb5
RL
1112 SSA_NAME_DEF_STMT (name) = stmt;
1113
726a989a 1114 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1115
726a989a
RB
1116 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1117 !gsi_end_p (gsi); gsi_next (&gsi))
1118 if (gsi_stmt (gsi) == red->keep_res)
1119 {
1120 remove_phi_node (&gsi, false);
1121 return 1;
1122 }
1123 gcc_unreachable ();
a509ebb5
RL
1124}
1125
b8698a0f 1126/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1127 REDUCTION_LIST describes the list of reductions that the
fa10beec 1128 loads should be generated for. */
a509ebb5 1129static void
b8698a0f 1130create_final_loads_for_reduction (htab_t reduction_list,
a509ebb5
RL
1131 struct clsn_data *ld_st_data)
1132{
726a989a 1133 gimple_stmt_iterator gsi;
a509ebb5 1134 tree t;
726a989a 1135 gimple stmt;
a509ebb5 1136
726a989a 1137 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1138 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1139 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1140
726a989a
RB
1141 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1142 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
a509ebb5
RL
1143
1144 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1145
1146}
1147
0eb7e7aa
RL
1148/* Callback for htab_traverse. Store the neutral value for the
1149 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1150 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1151 The reduction is specified in SLOT. The store information is
1152 passed in DATA. */
0eb7e7aa
RL
1153
1154static int
1155create_stores_for_reduction (void **slot, void *data)
1156{
3d9a9f94
KG
1157 struct reduction_info *const red = (struct reduction_info *) *slot;
1158 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1159 tree t;
1160 gimple stmt;
1161 gimple_stmt_iterator gsi;
1162 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1163
1164 gsi = gsi_last_bb (clsn_data->store_bb);
1165 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1166 stmt = gimple_build_assign (t, red->initial_value);
0eb7e7aa 1167 mark_virtual_ops_for_renaming (stmt);
726a989a 1168 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1169
1170 return 1;
1171}
1172
a509ebb5
RL
1173/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1174 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1175 specified in SLOT. */
1176
5f40b3cb
ZD
1177static int
1178create_loads_and_stores_for_name (void **slot, void *data)
1179{
3d9a9f94
KG
1180 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1181 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1182 tree t;
1183 gimple stmt;
1184 gimple_stmt_iterator gsi;
5f40b3cb 1185 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1186 tree load_struct;
1187
726a989a
RB
1188 gsi = gsi_last_bb (clsn_data->store_bb);
1189 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1190 stmt = gimple_build_assign (t, ssa_name (elt->version));
5f40b3cb 1191 mark_virtual_ops_for_renaming (stmt);
726a989a 1192 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1193
726a989a 1194 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1195 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1196 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1197 stmt = gimple_build_assign (elt->new_name, t);
5f40b3cb 1198 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
726a989a 1199 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1200
1201 return 1;
1202}
1203
1204/* Moves all the variables used in LOOP and defined outside of it (including
1205 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1206 name) to a structure created for this purpose. The code
b8698a0f 1207
5f40b3cb
ZD
1208 while (1)
1209 {
1210 use (a);
1211 use (b);
1212 }
1213
1214 is transformed this way:
1215
1216 bb0:
1217 old.a = a;
1218 old.b = b;
1219
1220 bb1:
1221 a' = new->a;
1222 b' = new->b;
1223 while (1)
1224 {
1225 use (a');
1226 use (b');
1227 }
1228
1229 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1230 pointer `new' is intentionally not initialized (the loop will be split to a
1231 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1232 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1233 information among the threads. It is initialized here, and
1234 gen_parallel_loop will pass it to create_call_for_reduction that
1235 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1236 in LOOP. */
5f40b3cb
ZD
1237
1238static void
9f9f72aa 1239separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
b8698a0f 1240 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1241 struct clsn_data *ld_st_data)
a509ebb5 1242
5f40b3cb 1243{
9f9f72aa 1244 basic_block bb1 = split_edge (entry);
5f40b3cb
ZD
1245 basic_block bb0 = single_pred (bb1);
1246 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1247 name_to_copy_elt_eq, free);
1248 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1249 free);
5f40b3cb 1250 unsigned i;
726a989a
RB
1251 tree type, type_name, nvar;
1252 gimple_stmt_iterator gsi;
5f40b3cb 1253 struct clsn_data clsn_data;
9f9f72aa
AP
1254 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1255 basic_block bb;
1256 basic_block entry_bb = bb1;
1257 basic_block exit_bb = exit->dest;
b5b8b0ac 1258 bool has_debug_stmt = false;
5f40b3cb 1259
726a989a 1260 entry = single_succ_edge (entry_bb);
9f9f72aa 1261 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1262
ac47786e 1263 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
9f9f72aa 1264 {
b8698a0f 1265 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1266 {
726a989a
RB
1267 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1268 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1269 name_copies, decl_copies);
1270
1271 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac
AO
1272 {
1273 gimple stmt = gsi_stmt (gsi);
1274
1275 if (is_gimple_debug (stmt))
1276 has_debug_stmt = true;
1277 else
1278 separate_decls_in_region_stmt (entry, exit, stmt,
1279 name_copies, decl_copies);
1280 }
9f9f72aa 1281 }
5f40b3cb 1282 }
9f9f72aa 1283
b5b8b0ac
AO
1284 /* Now process debug bind stmts. We must not create decls while
1285 processing debug stmts, so we defer their processing so as to
1286 make sure we will have debug info for as many variables as
1287 possible (all of those that were dealt with in the loop above),
1288 and discard those for which we know there's nothing we can
1289 do. */
1290 if (has_debug_stmt)
ac47786e 1291 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
b5b8b0ac
AO
1292 if (bb != entry_bb && bb != exit_bb)
1293 {
1294 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1295 {
1296 gimple stmt = gsi_stmt (gsi);
1297
1298 if (gimple_debug_bind_p (stmt))
1299 {
1300 if (separate_decls_in_region_debug_bind (stmt,
1301 name_copies,
1302 decl_copies))
1303 {
1304 gsi_remove (&gsi, true);
1305 continue;
1306 }
1307 }
1308
1309 gsi_next (&gsi);
1310 }
1311 }
1312
9f9f72aa 1313 VEC_free (basic_block, heap, body);
5f40b3cb 1314
b8698a0f 1315 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
5f40b3cb
ZD
1316 {
1317 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1318 loop carried and external variables in the loop). */
5f40b3cb
ZD
1319 *arg_struct = NULL;
1320 *new_arg_struct = NULL;
1321 }
1322 else
1323 {
1324 /* Create the type for the structure to store the ssa names to. */
1325 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1326 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1327 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1328 type);
1329 TYPE_NAME (type) = type_name;
1330
0eb7e7aa 1331 htab_traverse (name_copies, add_field_for_name, type);
9f9f72aa 1332 if (reduction_list && htab_elements (reduction_list) > 0)
0eb7e7aa
RL
1333 {
1334 /* Create the fields for reductions. */
1335 htab_traverse (reduction_list, add_field_for_reduction,
1336 type);
1337 }
5f40b3cb 1338 layout_type (type);
b8698a0f 1339
5f40b3cb
ZD
1340 /* Create the loads and stores. */
1341 *arg_struct = create_tmp_var (type, ".paral_data_store");
1342 add_referenced_var (*arg_struct);
1343 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1344 add_referenced_var (nvar);
726a989a 1345 *new_arg_struct = make_ssa_name (nvar, NULL);
5f40b3cb 1346
a509ebb5
RL
1347 ld_st_data->store = *arg_struct;
1348 ld_st_data->load = *new_arg_struct;
1349 ld_st_data->store_bb = bb0;
1350 ld_st_data->load_bb = bb1;
0eb7e7aa 1351
5f40b3cb 1352 htab_traverse (name_copies, create_loads_and_stores_for_name,
a509ebb5
RL
1353 ld_st_data);
1354
ae0bce62
RL
1355 /* Load the calculation from memory (after the join of the threads). */
1356
9f9f72aa 1357 if (reduction_list && htab_elements (reduction_list) > 0)
a509ebb5 1358 {
0eb7e7aa 1359 htab_traverse (reduction_list, create_stores_for_reduction,
b8698a0f 1360 ld_st_data);
726a989a 1361 clsn_data.load = make_ssa_name (nvar, NULL);
9f9f72aa 1362 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1363 clsn_data.store = ld_st_data->store;
1364 create_final_loads_for_reduction (reduction_list, &clsn_data);
1365 }
5f40b3cb
ZD
1366 }
1367
1368 htab_delete (decl_copies);
1369 htab_delete (name_copies);
1370}
1371
1372/* Bitmap containing uids of functions created by parallelization. We cannot
1373 allocate it from the default obstack, as it must live across compilation
1374 of several functions; we make it gc allocated instead. */
1375
1376static GTY(()) bitmap parallelized_functions;
1377
1378/* Returns true if FN was created by create_loop_fn. */
1379
1380static bool
1381parallelized_function_p (tree fn)
1382{
1383 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1384 return false;
1385
1386 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1387}
1388
1389/* Creates and returns an empty function that will receive the body of
1390 a parallelized loop. */
1391
1392static tree
9ff70652 1393create_loop_fn (location_t loc)
5f40b3cb
ZD
1394{
1395 char buf[100];
1396 char *tname;
1397 tree decl, type, name, t;
1398 struct function *act_cfun = cfun;
1399 static unsigned loopfn_num;
1400
1401 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1402 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1403 clean_symbol_name (tname);
1404 name = get_identifier (tname);
1405 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1406
9ff70652 1407 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1408 if (!parallelized_functions)
1409 parallelized_functions = BITMAP_GGC_ALLOC ();
1410 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1411
1412 TREE_STATIC (decl) = 1;
1413 TREE_USED (decl) = 1;
1414 DECL_ARTIFICIAL (decl) = 1;
1415 DECL_IGNORED_P (decl) = 0;
1416 TREE_PUBLIC (decl) = 0;
1417 DECL_UNINLINABLE (decl) = 1;
1418 DECL_EXTERNAL (decl) = 0;
1419 DECL_CONTEXT (decl) = NULL_TREE;
1420 DECL_INITIAL (decl) = make_node (BLOCK);
1421
9ff70652 1422 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1423 DECL_ARTIFICIAL (t) = 1;
1424 DECL_IGNORED_P (t) = 1;
1425 DECL_RESULT (decl) = t;
1426
9ff70652 1427 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1428 ptr_type_node);
1429 DECL_ARTIFICIAL (t) = 1;
1430 DECL_ARG_TYPE (t) = ptr_type_node;
1431 DECL_CONTEXT (t) = decl;
1432 TREE_USED (t) = 1;
1433 DECL_ARGUMENTS (decl) = t;
1434
182e0d71 1435 allocate_struct_function (decl, false);
5f40b3cb
ZD
1436
1437 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1438 it. */
5576d6f2 1439 set_cfun (act_cfun);
5f40b3cb
ZD
1440
1441 return decl;
1442}
1443
5f40b3cb
ZD
1444/* Moves the exit condition of LOOP to the beginning of its header, and
1445 duplicates the part of the last iteration that gets disabled to the
1446 exit of the loop. NIT is the number of iterations of the loop
1447 (used to initialize the variables in the duplicated part).
b8698a0f 1448
fa10beec 1449 TODO: the common case is that latch of the loop is empty and immediately
5f40b3cb
ZD
1450 follows the loop exit. In this case, it would be better not to copy the
1451 body of the loop, but only move the entry of the loop directly before the
1452 exit check and increase the number of iterations of the loop by one.
b8698a0f 1453 This may need some additional preconditioning in case NIT = ~0.
a509ebb5 1454 REDUCTION_LIST describes the reductions in LOOP. */
5f40b3cb
ZD
1455
1456static void
a509ebb5 1457transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
5f40b3cb
ZD
1458{
1459 basic_block *bbs, *nbbs, ex_bb, orig_header;
1460 unsigned n;
1461 bool ok;
1462 edge exit = single_dom_exit (loop), hpred;
726a989a 1463 tree control, control_name, res, t;
48710229 1464 gimple phi, nphi, cond_stmt, stmt, cond_nit;
726a989a 1465 gimple_stmt_iterator gsi;
48710229 1466 tree nit_1;
5f40b3cb
ZD
1467
1468 split_block_after_labels (loop->header);
1469 orig_header = single_succ (loop->header);
1470 hpred = single_succ_edge (loop->header);
1471
1472 cond_stmt = last_stmt (exit->src);
726a989a
RB
1473 control = gimple_cond_lhs (cond_stmt);
1474 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1475
1476 /* Make sure that we have phi nodes on exit for all loop header phis
1477 (create_parallel_loop requires that). */
726a989a 1478 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
5f40b3cb 1479 {
726a989a 1480 phi = gsi_stmt (gsi);
5f40b3cb
ZD
1481 res = PHI_RESULT (phi);
1482 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1483 SET_PHI_RESULT (phi, t);
5f40b3cb
ZD
1484 nphi = create_phi_node (res, orig_header);
1485 SSA_NAME_DEF_STMT (res) = nphi;
f5045c96 1486 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1487
1488 if (res == control)
1489 {
726a989a 1490 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1491 update_stmt (cond_stmt);
1492 control = t;
1493 }
1494 }
5f40b3cb 1495 bbs = get_loop_body_in_dom_order (loop);
48710229
RL
1496
1497 for (n = 0; bbs[n] != loop->latch; n++)
5f40b3cb
ZD
1498 continue;
1499 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1500 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1501 bbs + 1, n, nbbs);
5f40b3cb
ZD
1502 gcc_assert (ok);
1503 free (bbs);
1504 ex_bb = nbbs[0];
1505 free (nbbs);
1506
b8698a0f 1507 /* Other than reductions, the only gimple reg that should be copied
726a989a 1508 out of the loop is the control variable. */
a509ebb5 1509
5f40b3cb 1510 control_name = NULL_TREE;
726a989a 1511 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
5f40b3cb 1512 {
726a989a 1513 phi = gsi_stmt (gsi);
5f40b3cb
ZD
1514 res = PHI_RESULT (phi);
1515 if (!is_gimple_reg (res))
726a989a
RB
1516 {
1517 gsi_next (&gsi);
1518 continue;
1519 }
5f40b3cb 1520
a509ebb5 1521 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1522 keep the phi at the reduction's keep_res field. The
1523 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1524 variable when exiting the loop. */
1525
1526 exit = single_dom_exit (loop);
1527
b8698a0f 1528 if (htab_elements (reduction_list) > 0)
a509ebb5
RL
1529 {
1530 struct reduction_info *red;
1531
1532 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1533 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1534 if (red)
726a989a
RB
1535 {
1536 red->keep_res = phi;
1537 gsi_next (&gsi);
1538 continue;
1539 }
a509ebb5 1540 }
726a989a
RB
1541 gcc_assert (control_name == NULL_TREE
1542 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1543 control_name = res;
726a989a 1544 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1545 }
1546 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1547
b8698a0f 1548 /* Initialize the control variable to number of iterations
48710229 1549 according to the rhs of the exit condition. */
726a989a 1550 gsi = gsi_after_labels (ex_bb);
b8698a0f 1551 cond_nit = last_stmt (exit->src);
48710229
RL
1552 nit_1 = gimple_cond_rhs (cond_nit);
1553 nit_1 = force_gimple_operand_gsi (&gsi,
1554 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1555 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1556 stmt = gimple_build_assign (control_name, nit_1);
726a989a
RB
1557 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1558 SSA_NAME_DEF_STMT (control_name) = stmt;
5f40b3cb
ZD
1559}
1560
1561/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1562 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb
ZD
1563 NEW_DATA is the variable that should be initialized from the argument
1564 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
726a989a 1565 basic block containing GIMPLE_OMP_PARALLEL tree. */
5f40b3cb
ZD
1566
1567static basic_block
1568create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1569 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1570{
726a989a 1571 gimple_stmt_iterator gsi;
5f40b3cb 1572 basic_block bb, paral_bb, for_bb, ex_bb;
0f900dfa 1573 tree t, param;
726a989a
RB
1574 gimple stmt, for_stmt, phi, cond_stmt;
1575 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
1576 edge exit, nexit, guard, end, e;
1577
726a989a 1578 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
1579 bb = loop_preheader_edge (loop)->src;
1580 paral_bb = single_pred (bb);
726a989a 1581 gsi = gsi_last_bb (paral_bb);
5f40b3cb 1582
9ff70652 1583 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 1584 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 1585 = build_int_cst (integer_type_node, n_threads);
726a989a 1586 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
9ff70652 1587 gimple_set_location (stmt, loc);
5f40b3cb 1588
726a989a 1589 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1590
1591 /* Initialize NEW_DATA. */
1592 if (data)
1593 {
726a989a
RB
1594 gsi = gsi_after_labels (bb);
1595
1596 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1597 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1598 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1599 SSA_NAME_DEF_STMT (param) = stmt;
1600
1601 stmt = gimple_build_assign (new_data,
1602 fold_convert (TREE_TYPE (new_data), param));
1603 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1604 SSA_NAME_DEF_STMT (new_data) = stmt;
5f40b3cb
ZD
1605 }
1606
726a989a 1607 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 1608 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 1609 gsi = gsi_last_bb (bb);
9ff70652
JJ
1610 stmt = gimple_build_omp_return (false);
1611 gimple_set_location (stmt, loc);
1612 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1613
726a989a 1614 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 1615 gcc_assert (loop->header == single_dom_exit (loop)->src);
726a989a 1616 cond_stmt = last_stmt (loop->header);
5f40b3cb 1617
726a989a 1618 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
1619 cvar_base = SSA_NAME_VAR (cvar);
1620 phi = SSA_NAME_DEF_STMT (cvar);
1621 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
726a989a 1622 initvar = make_ssa_name (cvar_base, NULL);
5f40b3cb
ZD
1623 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1624 initvar);
1625 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1626
1dff453d 1627 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
1628 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1629 gsi_remove (&gsi, true);
5f40b3cb
ZD
1630
1631 /* Prepare cfg. */
1632 for_bb = split_edge (loop_preheader_edge (loop));
1633 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1634 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1635 gcc_assert (exit == single_dom_exit (loop));
1636
1637 guard = make_edge (for_bb, ex_bb, 0);
1638 single_succ_edge (loop->latch)->flags = 0;
1639 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
726a989a 1640 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
5f40b3cb 1641 {
f5045c96
AM
1642 source_location locus;
1643 tree def;
726a989a 1644 phi = gsi_stmt (gsi);
726a989a 1645 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
f5045c96
AM
1646
1647 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 1648 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96
AM
1649 loop_preheader_edge (loop));
1650 add_phi_arg (phi, def, guard, locus);
1651
1652 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1653 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1654 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
1655 }
1656 e = redirect_edge_and_branch (exit, nexit->dest);
1657 PENDING_STMT (e) = NULL;
1658
726a989a
RB
1659 /* Emit GIMPLE_OMP_FOR. */
1660 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 1661 type = TREE_TYPE (cvar);
9ff70652 1662 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
5f40b3cb
ZD
1663 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1664
726a989a 1665 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
9ff70652 1666 gimple_set_location (for_stmt, loc);
726a989a
RB
1667 gimple_omp_for_set_index (for_stmt, 0, initvar);
1668 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1669 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1670 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1671 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1672 cvar_base,
1673 build_int_cst (type, 1)));
1674
1675 gsi = gsi_last_bb (for_bb);
1676 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1677 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1678
726a989a
RB
1679 /* Emit GIMPLE_OMP_CONTINUE. */
1680 gsi = gsi_last_bb (loop->latch);
1681 stmt = gimple_build_omp_continue (cvar_next, cvar);
9ff70652 1682 gimple_set_location (stmt, loc);
726a989a
RB
1683 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1684 SSA_NAME_DEF_STMT (cvar_next) = stmt;
5f40b3cb 1685
726a989a
RB
1686 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1687 gsi = gsi_last_bb (ex_bb);
9ff70652
JJ
1688 stmt = gimple_build_omp_return (true);
1689 gimple_set_location (stmt, loc);
1690 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1691
1692 return paral_bb;
1693}
1694
08dab97a
RL
1695/* Generates code to execute the iterations of LOOP in N_THREADS
1696 threads in parallel.
1697
1698 NITER describes number of iterations of LOOP.
fa10beec 1699 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
1700
1701static void
08dab97a 1702gen_parallel_loop (struct loop *loop, htab_t reduction_list,
a509ebb5 1703 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 1704{
9326236d 1705 loop_iterator li;
5f40b3cb 1706 tree many_iterations_cond, type, nit;
726a989a
RB
1707 tree arg_struct, new_arg_struct;
1708 gimple_seq stmts;
5f40b3cb 1709 basic_block parallel_head;
9f9f72aa 1710 edge entry, exit;
a509ebb5 1711 struct clsn_data clsn_data;
5f40b3cb 1712 unsigned prob;
9ff70652
JJ
1713 location_t loc;
1714 gimple cond_stmt;
5f40b3cb
ZD
1715
1716 /* From
1717
1718 ---------------------------------------------------------------------
1719 loop
1720 {
1721 IV = phi (INIT, IV + STEP)
1722 BODY1;
1723 if (COND)
1724 break;
1725 BODY2;
1726 }
1727 ---------------------------------------------------------------------
1728
1729 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1730 we generate the following code:
1731
1732 ---------------------------------------------------------------------
1733
1734 if (MAY_BE_ZERO
a509ebb5
RL
1735 || NITER < MIN_PER_THREAD * N_THREADS)
1736 goto original;
5f40b3cb
ZD
1737
1738 BODY1;
1739 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 1740 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 1741 load the variables from DATA.
726a989a 1742 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
1743 BODY2;
1744 BODY1;
726a989a
RB
1745 GIMPLE_OMP_CONTINUE;
1746 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1747 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
1748 goto end;
1749
1750 original:
1751 loop
1752 {
1753 IV = phi (INIT, IV + STEP)
1754 BODY1;
1755 if (COND)
1756 break;
1757 BODY2;
1758 }
1759
1760 end:
1761
1762 */
1763
1764 /* Create two versions of the loop -- in the old one, we know that the
1765 number of iterations is large enough, and we will transform it into the
1766 loop that will be split to loop_fn, the new one will be used for the
1767 remaining iterations. */
a509ebb5 1768
5f40b3cb
ZD
1769 type = TREE_TYPE (niter->niter);
1770 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1771 NULL_TREE);
1772 if (stmts)
726a989a 1773 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1774
1775 many_iterations_cond =
a509ebb5
RL
1776 fold_build2 (GE_EXPR, boolean_type_node,
1777 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
5f40b3cb 1778 many_iterations_cond
a509ebb5
RL
1779 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1780 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1781 many_iterations_cond);
5f40b3cb 1782 many_iterations_cond
a509ebb5 1783 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 1784 if (stmts)
726a989a 1785 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1786 if (!is_gimple_condexpr (many_iterations_cond))
1787 {
1788 many_iterations_cond
a509ebb5
RL
1789 = force_gimple_operand (many_iterations_cond, &stmts,
1790 true, NULL_TREE);
5f40b3cb 1791 if (stmts)
726a989a 1792 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1793 }
1794
1795 initialize_original_copy_tables ();
1796
1797 /* We assume that the loop usually iterates a lot. */
1798 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
1799 loop_version (loop, many_iterations_cond, NULL,
1800 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
1801 update_ssa (TODO_update_ssa);
1802 free_original_copy_tables ();
1803
1804 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 1805 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb
ZD
1806
1807 /* Ensure that the exit condition is the first statement in the loop. */
a509ebb5
RL
1808 transform_to_exit_first_loop (loop, reduction_list, nit);
1809
fa10beec 1810 /* Generate initializations for reductions. */
b8698a0f 1811 if (htab_elements (reduction_list) > 0)
a509ebb5 1812 htab_traverse (reduction_list, initialize_reductions, loop);
5f40b3cb
ZD
1813
1814 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
1815 gcc_assert (single_exit (loop));
1816 entry = loop_preheader_edge (loop);
1817 exit = single_dom_exit (loop);
5f40b3cb 1818
9f9f72aa 1819 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
1820 /* In the old loop, move all variables non-local to the loop to a structure
1821 and back, and create separate decls for the variables used in loop. */
b8698a0f 1822 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 1823 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
1824
1825 /* Create the parallel constructs. */
9ff70652
JJ
1826 loc = UNKNOWN_LOCATION;
1827 cond_stmt = last_stmt (loop->header);
1828 if (cond_stmt)
1829 loc = gimple_location (cond_stmt);
1830 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1831 new_arg_struct, n_threads, loc);
b8698a0f 1832 if (htab_elements (reduction_list) > 0)
a509ebb5 1833 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
1834
1835 scev_reset ();
1836
1837 /* Cancel the loop (it is simpler to do it here rather than to teach the
1838 expander to do it). */
1839 cancel_loop_tree (loop);
1840
92a6bdbd
SP
1841 /* Free loop bound estimations that could contain references to
1842 removed statements. */
1843 FOR_EACH_LOOP (li, loop, 0)
1844 free_numbers_of_iterations_estimates_loop (loop);
1845
5f40b3cb
ZD
1846 /* Expand the parallel constructs. We do it directly here instead of running
1847 a separate expand_omp pass, since it is more efficient, and less likely to
1848 cause troubles with further analyses not being able to deal with the
1849 OMP trees. */
a509ebb5 1850
5f40b3cb
ZD
1851 omp_expand_local (parallel_head);
1852}
1853
9857228c
SP
1854/* Returns true when LOOP contains vector phi nodes. */
1855
1856static bool
726a989a 1857loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
1858{
1859 unsigned i;
1860 basic_block *bbs = get_loop_body_in_dom_order (loop);
726a989a 1861 gimple_stmt_iterator gsi;
9857228c 1862 bool res = true;
9857228c
SP
1863
1864 for (i = 0; i < loop->num_nodes; i++)
726a989a
RB
1865 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1866 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
9857228c
SP
1867 goto end;
1868
1869 res = false;
1870 end:
1871 free (bbs);
1872 return res;
1873}
1874
08dab97a
RL
1875/* Create a reduction_info struct, initialize it with REDUC_STMT
1876 and PHI, insert it to the REDUCTION_LIST. */
1877
1878static void
1879build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1880{
1881 PTR *slot;
1882 struct reduction_info *new_reduction;
1883
1884 gcc_assert (reduc_stmt);
b8698a0f 1885
08dab97a
RL
1886 if (dump_file && (dump_flags & TDF_DETAILS))
1887 {
1888 fprintf (dump_file,
1889 "Detected reduction. reduction stmt is: \n");
1890 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1891 fprintf (dump_file, "\n");
1892 }
b8698a0f 1893
08dab97a 1894 new_reduction = XCNEW (struct reduction_info);
b8698a0f 1895
08dab97a
RL
1896 new_reduction->reduc_stmt = reduc_stmt;
1897 new_reduction->reduc_phi = phi;
5d1fd1de 1898 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
08dab97a
RL
1899 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1900 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1901 *slot = new_reduction;
1902}
1903
5d1fd1de
JJ
1904/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1905
1906static int
1907set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1908{
1909 struct reduction_info *const red = (struct reduction_info *) *slot;
1910 gimple_set_uid (red->reduc_phi, red->reduc_version);
1911 return 1;
1912}
1913
08dab97a
RL
1914/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1915
1916static void
1917gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1918{
1919 gimple_stmt_iterator gsi;
1920 loop_vec_info simple_loop_info;
1921
1922 vect_dump = NULL;
1923 simple_loop_info = vect_analyze_loop_form (loop);
1924
1925 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1926 {
1927 gimple phi = gsi_stmt (gsi);
1928 affine_iv iv;
1929 tree res = PHI_RESULT (phi);
1930 bool double_reduc;
1931
1932 if (!is_gimple_reg (res))
1933 continue;
1934
1935 if (!simple_iv (loop, loop, res, &iv, true)
1936 && simple_loop_info)
1937 {
8a9ecffd
MM
1938 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1939 phi, true,
1940 &double_reduc);
48710229 1941 if (reduc_stmt && !double_reduc)
08dab97a
RL
1942 build_new_reduction (reduction_list, reduc_stmt, phi);
1943 }
1944 }
5d1fd1de
JJ
1945 destroy_loop_vec_info (simple_loop_info, true);
1946
1947 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1948 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1949 only now. */
1950 htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
08dab97a
RL
1951}
1952
1953/* Try to initialize NITER for code generation part. */
1954
1955static bool
1956try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1957{
1958 edge exit = single_dom_exit (loop);
1959
1960 gcc_assert (exit);
1961
1962 /* We need to know # of iterations, and there should be no uses of values
1963 defined inside loop outside of it, unless the values are invariants of
1964 the loop. */
1965 if (!number_of_iterations_exit (loop, exit, niter, false))
1966 {
1967 if (dump_file && (dump_flags & TDF_DETAILS))
1968 fprintf (dump_file, " FAILED: number of iterations not known\n");
1969 return false;
1970 }
1971
1972 return true;
1973}
1974
1975/* Try to initialize REDUCTION_LIST for code generation part.
1976 REDUCTION_LIST describes the reductions. */
1977
1978static bool
1979try_create_reduction_list (loop_p loop, htab_t reduction_list)
1980{
1981 edge exit = single_dom_exit (loop);
1982 gimple_stmt_iterator gsi;
1983
1984 gcc_assert (exit);
1985
1986 gather_scalar_reductions (loop, reduction_list);
1987
b8698a0f 1988
08dab97a
RL
1989 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1990 {
1991 gimple phi = gsi_stmt (gsi);
1992 struct reduction_info *red;
1993 imm_use_iterator imm_iter;
1994 use_operand_p use_p;
1995 gimple reduc_phi;
1996 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1997
1998 if (is_gimple_reg (val))
1999 {
2000 if (dump_file && (dump_flags & TDF_DETAILS))
2001 {
2002 fprintf (dump_file, "phi is ");
2003 print_gimple_stmt (dump_file, phi, 0, 0);
2004 fprintf (dump_file, "arg of phi to exit: value ");
2005 print_generic_expr (dump_file, val, 0);
2006 fprintf (dump_file, " used outside loop\n");
2007 fprintf (dump_file,
2008 " checking if it a part of reduction pattern: \n");
2009 }
2010 if (htab_elements (reduction_list) == 0)
2011 {
2012 if (dump_file && (dump_flags & TDF_DETAILS))
2013 fprintf (dump_file,
2014 " FAILED: it is not a part of reduction.\n");
2015 return false;
2016 }
2017 reduc_phi = NULL;
2018 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2019 {
4942af9b
JJ
2020 if (!gimple_debug_bind_p (USE_STMT (use_p))
2021 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2022 {
2023 reduc_phi = USE_STMT (use_p);
2024 break;
2025 }
2026 }
2027 red = reduction_phi (reduction_list, reduc_phi);
2028 if (red == NULL)
2029 {
2030 if (dump_file && (dump_flags & TDF_DETAILS))
2031 fprintf (dump_file,
2032 " FAILED: it is not a part of reduction.\n");
2033 return false;
2034 }
2035 if (dump_file && (dump_flags & TDF_DETAILS))
2036 {
2037 fprintf (dump_file, "reduction phi is ");
2038 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2039 fprintf (dump_file, "reduction stmt is ");
2040 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2041 }
2042 }
2043 }
2044
2045 /* The iterations of the loop may communicate only through bivs whose
2046 iteration space can be distributed efficiently. */
2047 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2048 {
2049 gimple phi = gsi_stmt (gsi);
2050 tree def = PHI_RESULT (phi);
2051 affine_iv iv;
2052
2053 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
2054 {
2055 struct reduction_info *red;
2056
2057 red = reduction_phi (reduction_list, phi);
2058 if (red == NULL)
2059 {
2060 if (dump_file && (dump_flags & TDF_DETAILS))
2061 fprintf (dump_file,
2062 " FAILED: scalar dependency between iterations\n");
2063 return false;
2064 }
2065 }
2066 }
2067
2068
2069 return true;
2070}
2071
5f40b3cb
ZD
2072/* Detect parallel loops and generate parallel code using libgomp
2073 primitives. Returns true if some loop was parallelized, false
2074 otherwise. */
2075
2076bool
2077parallelize_loops (void)
2078{
2079 unsigned n_threads = flag_tree_parallelize_loops;
2080 bool changed = false;
2081 struct loop *loop;
2082 struct tree_niter_desc niter_desc;
2083 loop_iterator li;
a509ebb5 2084 htab_t reduction_list;
f873b205 2085 struct obstack parloop_obstack;
8adfe01d
RL
2086 HOST_WIDE_INT estimated;
2087 LOC loop_loc;
f873b205 2088
5f40b3cb
ZD
2089 /* Do not parallelize loops in the functions created by parallelization. */
2090 if (parallelized_function_p (cfun->decl))
2091 return false;
8adfe01d
RL
2092 if (cfun->has_nonlocal_label)
2093 return false;
5f40b3cb 2094
f873b205 2095 gcc_obstack_init (&parloop_obstack);
a509ebb5 2096 reduction_list = htab_create (10, reduction_info_hash,
08dab97a 2097 reduction_info_eq, free);
726a989a 2098 init_stmt_vec_info_vec ();
a509ebb5 2099
5f40b3cb
ZD
2100 FOR_EACH_LOOP (li, loop, 0)
2101 {
a509ebb5 2102 htab_empty (reduction_list);
48710229
RL
2103 if (dump_file && (dump_flags & TDF_DETAILS))
2104 {
2105 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2106 if (loop->inner)
2107 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2108 else
2109 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2110 }
b8698a0f 2111
48710229 2112 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2113 checking results. */
2114 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2115 {
2116 if (dump_file && (dump_flags & TDF_DETAILS))
2117 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2118 continue;
48710229 2119 }
87d4d0ee 2120
48710229
RL
2121 if (!single_dom_exit (loop))
2122 {
b8698a0f 2123
48710229
RL
2124 if (dump_file && (dump_flags & TDF_DETAILS))
2125 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2126
08dab97a 2127 continue;
48710229 2128 }
08dab97a
RL
2129
2130 if (/* And of course, the loop must be parallelizable. */
2131 !can_duplicate_loop_p (loop)
1d4af1e8 2132 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2133 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2134 /* FIXME: the check for vector phi nodes could be removed. */
08dab97a
RL
2135 || loop_has_vector_phi_nodes (loop))
2136 continue;
8adfe01d 2137 estimated = estimated_loop_iterations_int (loop, false);
87d4d0ee
SP
2138 /* FIXME: Bypass this check as graphite doesn't update the
2139 count and frequency correctly now. */
2140 if (!flag_loop_parallelize_all
8adfe01d
RL
2141 && ((estimated !=-1
2142 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2143 /* Do not bother with loops in cold areas. */
2144 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2145 continue;
b8698a0f 2146
08dab97a
RL
2147 if (!try_get_loop_niter (loop, &niter_desc))
2148 continue;
2149
2150 if (!try_create_reduction_list (loop, reduction_list))
2151 continue;
2152
f873b205
LB
2153 if (!flag_loop_parallelize_all
2154 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2155 continue;
2156
2157 changed = true;
48710229
RL
2158 if (dump_file && (dump_flags & TDF_DETAILS))
2159 {
48710229 2160 if (loop->inner)
8adfe01d 2161 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2162 else
8adfe01d
RL
2163 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2164 loop_loc = find_loop_location (loop);
2165 if (loop_loc != UNKNOWN_LOC)
2166 fprintf (dump_file, "\nloop at %s:%d: ",
2167 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
b8698a0f
L
2168 }
2169 gen_parallel_loop (loop, reduction_list,
08dab97a 2170 n_threads, &niter_desc);
5f40b3cb
ZD
2171 verify_flow_info ();
2172 verify_dominators (CDI_DOMINATORS);
2173 verify_loop_structure ();
a3b9e73c 2174 verify_loop_closed_ssa (true);
5f40b3cb
ZD
2175 }
2176
726a989a 2177 free_stmt_vec_info_vec ();
a509ebb5 2178 htab_delete (reduction_list);
f873b205 2179 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2180
2181 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2182 which local variables will escape. Reset the points-to solution
2183 for ESCAPED. */
6b8ed145 2184 if (changed)
d086d311 2185 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2186
5f40b3cb
ZD
2187 return changed;
2188}
2189
2190#include "gt-tree-parloops.h"