]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
gimple-ssa-isolate-paths.c (pass_isolate_erroneous_paths): Comment fix.
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
d1e082c2 2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
70837b71
RL
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
6da7fc87 10Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
6da7fc87
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
4d648807 25#include "tree.h"
45b0be94 26#include "gimplify.h"
442b4905
AM
27#include "gimple-ssa.h"
28#include "tree-cfg.h"
29#include "tree-phinodes.h"
30#include "ssa-iterators.h"
31#include "tree-ssanames.h"
e28030cf
AM
32#include "tree-ssa-loop-ivopts.h"
33#include "tree-ssa-loop-manip.h"
34#include "tree-ssa-loop-niter.h"
442b4905
AM
35#include "tree-ssa-loop.h"
36#include "tree-into-ssa.h"
5f40b3cb 37#include "cfgloop.h"
5f40b3cb 38#include "tree-data-ref.h"
1bd6497c 39#include "tree-scalar-evolution.h"
cf835838 40#include "gimple-pretty-print.h"
5f40b3cb 41#include "tree-pass.h"
5f40b3cb 42#include "langhooks.h"
a509ebb5 43#include "tree-vectorizer.h"
4a8fb1a1 44#include "tree-hasher.h"
c1bf2a39 45#include "tree-parloops.h"
0645c1a2 46#include "omp-low.h"
5f40b3cb
ZD
47
48/* This pass tries to distribute iterations of loops into several threads.
49 The implementation is straightforward -- for each loop we test whether its
50 iterations are independent, and if it is the case (and some additional
51 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
52 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
53 machinery do its job.
b8698a0f 54
5f40b3cb
ZD
55 The most of the complexity is in bringing the code into shape expected
56 by the omp expanders:
726a989a
RB
57 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
58 variable and that the exit test is at the start of the loop body
59 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
60 variables by accesses through pointers, and breaking up ssa chains
61 by storing the values incoming to the parallelized loop to a structure
62 passed to the new function as an argument (something similar is done
63 in omp gimplification, unfortunately only a small part of the code
64 can be shared).
65
66 TODO:
67 -- if there are several parallelizable loops in a function, it may be
68 possible to generate the threads just once (using synchronization to
69 ensure that cross-loop dependences are obeyed).
70837b71
RL
70 -- handling of common reduction patterns for outer loops.
71
72 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 73/*
a509ebb5 74 Reduction handling:
8a9ecffd 75 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 76 The code transformation will be introduced by an example.
b8698a0f
L
77
78
a509ebb5
RL
79parloop
80{
81 int sum=1;
82
0eb7e7aa 83 for (i = 0; i < N; i++)
a509ebb5
RL
84 {
85 x[i] = i + 3;
86 sum+=x[i];
87 }
88}
89
0eb7e7aa 90gimple-like code:
a509ebb5
RL
91header_bb:
92
0eb7e7aa
RL
93 # sum_29 = PHI <sum_11(5), 1(3)>
94 # i_28 = PHI <i_12(5), 0(3)>
95 D.1795_8 = i_28 + 3;
96 x[i_28] = D.1795_8;
97 sum_11 = D.1795_8 + sum_29;
98 i_12 = i_28 + 1;
99 if (N_6(D) > i_12)
100 goto header_bb;
101
a509ebb5
RL
102
103exit_bb:
104
0eb7e7aa
RL
105 # sum_21 = PHI <sum_11(4)>
106 printf (&"%d"[0], sum_21);
a509ebb5
RL
107
108
109after reduction transformation (only relevant parts):
110
111parloop
112{
113
114....
115
0eb7e7aa 116
fa10beec 117 # Storing the initial value given by the user. #
0eb7e7aa 118
ae0bce62 119 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
120
121 #pragma omp parallel num_threads(4)
a509ebb5 122
0eb7e7aa 123 #pragma omp for schedule(static)
ae0bce62
RL
124
125 # The neutral element corresponding to the particular
126 reduction's operation, e.g. 0 for PLUS_EXPR,
127 1 for MULT_EXPR, etc. replaces the user's initial value. #
128
129 # sum.27_29 = PHI <sum.27_11, 0>
130
0eb7e7aa 131 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 132
726a989a 133 GIMPLE_OMP_CONTINUE
a509ebb5 134
0eb7e7aa
RL
135 # Adding this reduction phi is done at create_phi_for_local_result() #
136 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 137 GIMPLE_OMP_RETURN
b8698a0f
L
138
139 # Creating the atomic operation is done at
0eb7e7aa 140 create_call_for_reduction_1() #
a509ebb5 141
0eb7e7aa
RL
142 #pragma omp atomic_load
143 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
144 D.1840_60 = sum.27_56 + D.1839_59;
145 #pragma omp atomic_store (D.1840_60);
b8698a0f 146
726a989a 147 GIMPLE_OMP_RETURN
b8698a0f 148
0eb7e7aa
RL
149 # collecting the result after the join of the threads is done at
150 create_loads_for_reductions().
ae0bce62
RL
151 The value computed by the threads is loaded from the
152 shared struct. #
153
b8698a0f 154
0eb7e7aa 155 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 156 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
157 sum_43 = D.1795_41 + sum_37;
158
159 exit bb:
160 # sum_21 = PHI <sum_43, sum_26>
161 printf (&"%d"[0], sum_21);
162
163...
164
a509ebb5
RL
165}
166
167*/
168
5f40b3cb
ZD
169/* Minimal number of iterations of a loop that should be executed in each
170 thread. */
171#define MIN_PER_THREAD 100
172
b8698a0f 173/* Element of the hashtable, representing a
a509ebb5
RL
174 reduction in the current loop. */
175struct reduction_info
176{
726a989a
RB
177 gimple reduc_stmt; /* reduction statement. */
178 gimple reduc_phi; /* The phi node defining the reduction. */
179 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
180 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
181 result. */
b8698a0f 182 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 183 of the reduction variable when existing the loop. */
ae0bce62 184 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 185 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 186 tree init; /* reduction initialization value. */
b8698a0f 187 gimple new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
188 will be passed to the atomic operation. Represents
189 the local result each thread computed for the reduction
190 operation. */
191};
192
4a8fb1a1 193/* Reduction info hashtable helpers. */
a509ebb5 194
4a8fb1a1 195struct reduction_hasher : typed_free_remove <reduction_info>
a509ebb5 196{
4a8fb1a1
LC
197 typedef reduction_info value_type;
198 typedef reduction_info compare_type;
199 static inline hashval_t hash (const value_type *);
200 static inline bool equal (const value_type *, const compare_type *);
201};
202
203/* Equality and hash functions for hashtab code. */
a509ebb5 204
4a8fb1a1
LC
205inline bool
206reduction_hasher::equal (const value_type *a, const compare_type *b)
207{
a509ebb5
RL
208 return (a->reduc_phi == b->reduc_phi);
209}
210
4a8fb1a1
LC
211inline hashval_t
212reduction_hasher::hash (const value_type *a)
a509ebb5 213{
5d1fd1de 214 return a->reduc_version;
a509ebb5
RL
215}
216
4a8fb1a1
LC
217typedef hash_table <reduction_hasher> reduction_info_table_type;
218
219
a509ebb5 220static struct reduction_info *
4a8fb1a1 221reduction_phi (reduction_info_table_type reduction_list, gimple phi)
a509ebb5
RL
222{
223 struct reduction_info tmpred, *red;
224
4a8fb1a1 225 if (reduction_list.elements () == 0 || phi == NULL)
a509ebb5
RL
226 return NULL;
227
228 tmpred.reduc_phi = phi;
5d1fd1de 229 tmpred.reduc_version = gimple_uid (phi);
4a8fb1a1 230 red = reduction_list.find (&tmpred);
a509ebb5
RL
231
232 return red;
233}
234
5f40b3cb
ZD
235/* Element of hashtable of names to copy. */
236
237struct name_to_copy_elt
238{
239 unsigned version; /* The version of the name to copy. */
240 tree new_name; /* The new name used in the copy. */
241 tree field; /* The field of the structure used to pass the
242 value. */
243};
244
4a8fb1a1 245/* Name copies hashtable helpers. */
5f40b3cb 246
4a8fb1a1 247struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
5f40b3cb 248{
4a8fb1a1
LC
249 typedef name_to_copy_elt value_type;
250 typedef name_to_copy_elt compare_type;
251 static inline hashval_t hash (const value_type *);
252 static inline bool equal (const value_type *, const compare_type *);
253};
254
255/* Equality and hash functions for hashtab code. */
5f40b3cb 256
4a8fb1a1
LC
257inline bool
258name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
259{
5f40b3cb
ZD
260 return a->version == b->version;
261}
262
4a8fb1a1
LC
263inline hashval_t
264name_to_copy_hasher::hash (const value_type *a)
5f40b3cb 265{
5f40b3cb
ZD
266 return (hashval_t) a->version;
267}
268
4a8fb1a1
LC
269typedef hash_table <name_to_copy_hasher> name_to_copy_table_type;
270
b305e3da
SP
271/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
272 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
273 represents the denominator for every element in the matrix. */
274typedef struct lambda_trans_matrix_s
275{
276 lambda_matrix matrix;
277 int rowsize;
278 int colsize;
279 int denominator;
280} *lambda_trans_matrix;
281#define LTM_MATRIX(T) ((T)->matrix)
282#define LTM_ROWSIZE(T) ((T)->rowsize)
283#define LTM_COLSIZE(T) ((T)->colsize)
284#define LTM_DENOMINATOR(T) ((T)->denominator)
285
286/* Allocate a new transformation matrix. */
287
288static lambda_trans_matrix
289lambda_trans_matrix_new (int colsize, int rowsize,
290 struct obstack * lambda_obstack)
291{
292 lambda_trans_matrix ret;
293
294 ret = (lambda_trans_matrix)
295 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
296 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
297 LTM_ROWSIZE (ret) = rowsize;
298 LTM_COLSIZE (ret) = colsize;
299 LTM_DENOMINATOR (ret) = 1;
300 return ret;
301}
302
303/* Multiply a vector VEC by a matrix MAT.
304 MAT is an M*N matrix, and VEC is a vector with length N. The result
305 is stored in DEST which must be a vector of length M. */
306
307static void
308lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
309 lambda_vector vec, lambda_vector dest)
310{
311 int i, j;
312
313 lambda_vector_clear (dest, m);
314 for (i = 0; i < m; i++)
315 for (j = 0; j < n; j++)
316 dest[i] += matrix[i][j] * vec[j];
317}
318
319/* Return true if TRANS is a legal transformation matrix that respects
320 the dependence vectors in DISTS and DIRS. The conservative answer
321 is false.
322
323 "Wolfe proves that a unimodular transformation represented by the
324 matrix T is legal when applied to a loop nest with a set of
325 lexicographically non-negative distance vectors RDG if and only if
326 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
327 i.e.: if and only if it transforms the lexicographically positive
328 distance vectors to lexicographically positive vectors. Note that
329 a unimodular matrix must transform the zero vector (and only it) to
330 the zero vector." S.Muchnick. */
331
332static bool
333lambda_transform_legal_p (lambda_trans_matrix trans,
334 int nb_loops,
9771b263 335 vec<ddr_p> dependence_relations)
b305e3da
SP
336{
337 unsigned int i, j;
338 lambda_vector distres;
339 struct data_dependence_relation *ddr;
340
341 gcc_assert (LTM_COLSIZE (trans) == nb_loops
342 && LTM_ROWSIZE (trans) == nb_loops);
343
344 /* When there are no dependences, the transformation is correct. */
9771b263 345 if (dependence_relations.length () == 0)
b305e3da
SP
346 return true;
347
9771b263 348 ddr = dependence_relations[0];
b305e3da
SP
349 if (ddr == NULL)
350 return true;
351
352 /* When there is an unknown relation in the dependence_relations, we
353 know that it is no worth looking at this loop nest: give up. */
354 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
355 return false;
356
357 distres = lambda_vector_new (nb_loops);
358
359 /* For each distance vector in the dependence graph. */
9771b263 360 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
b305e3da
SP
361 {
362 /* Don't care about relations for which we know that there is no
363 dependence, nor about read-read (aka. output-dependences):
364 these data accesses can happen in any order. */
365 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
366 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
367 continue;
368
369 /* Conservatively answer: "this transformation is not valid". */
370 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
371 return false;
372
373 /* If the dependence could not be captured by a distance vector,
374 conservatively answer that the transform is not valid. */
375 if (DDR_NUM_DIST_VECTS (ddr) == 0)
376 return false;
377
378 /* Compute trans.dist_vect */
379 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
380 {
381 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
382 DDR_DIST_VECT (ddr, j), distres);
383
384 if (!lambda_vector_lexico_pos (distres, nb_loops))
385 return false;
386 }
387 }
388 return true;
389}
08dab97a
RL
390
391/* Data dependency analysis. Returns true if the iterations of LOOP
392 are independent on each other (that is, if we can execute them
393 in parallel). */
5f40b3cb
ZD
394
395static bool
f873b205 396loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 397{
9771b263
DN
398 vec<ddr_p> dependence_relations;
399 vec<data_reference_p> datarefs;
5f40b3cb
ZD
400 lambda_trans_matrix trans;
401 bool ret = false;
5f40b3cb
ZD
402
403 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
404 {
405 fprintf (dump_file, "Considering loop %d\n", loop->num);
406 if (!loop->inner)
407 fprintf (dump_file, "loop is innermost\n");
b8698a0f 408 else
48710229
RL
409 fprintf (dump_file, "loop NOT innermost\n");
410 }
5f40b3cb 411
5f40b3cb
ZD
412 /* Check for problems with dependences. If the loop can be reversed,
413 the iterations are independent. */
07687835 414 stack_vec<loop_p, 3> loop_nest;
9771b263 415 datarefs.create (10);
07687835 416 dependence_relations.create (100);
9ca3d00e
AB
417 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
418 &dependence_relations))
419 {
420 if (dump_file && (dump_flags & TDF_DETAILS))
421 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
422 ret = false;
423 goto end;
424 }
5f40b3cb
ZD
425 if (dump_file && (dump_flags & TDF_DETAILS))
426 dump_data_dependence_relations (dump_file, dependence_relations);
427
f873b205 428 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
429 LTM_MATRIX (trans)[0][0] = -1;
430
431 if (lambda_transform_legal_p (trans, 1, dependence_relations))
432 {
433 ret = true;
434 if (dump_file && (dump_flags & TDF_DETAILS))
435 fprintf (dump_file, " SUCCESS: may be parallelized\n");
436 }
437 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
438 fprintf (dump_file,
439 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 440
9ca3d00e 441 end:
5f40b3cb
ZD
442 free_dependence_relations (dependence_relations);
443 free_data_refs (datarefs);
444
445 return ret;
446}
447
1d4af1e8
SP
448/* Return true when LOOP contains basic blocks marked with the
449 BB_IRREDUCIBLE_LOOP flag. */
450
451static inline bool
452loop_has_blocks_with_irreducible_flag (struct loop *loop)
453{
454 unsigned i;
455 basic_block *bbs = get_loop_body_in_dom_order (loop);
456 bool res = true;
457
458 for (i = 0; i < loop->num_nodes; i++)
459 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
460 goto end;
461
462 res = false;
463 end:
464 free (bbs);
465 return res;
466}
467
8a171a59 468/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 469 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 470 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
471 be invariant in the whole function. Other needed statements are placed
472 right before GSI. */
5f40b3cb
ZD
473
474static tree
4a8fb1a1
LC
475take_address_of (tree obj, tree type, edge entry,
476 int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
5f40b3cb 477{
8a171a59 478 int uid;
4a8fb1a1 479 int_tree_map **dslot;
5f40b3cb 480 struct int_tree_map ielt, *nielt;
83d5977e 481 tree *var_p, name, addr;
726a989a
RB
482 gimple stmt;
483 gimple_seq stmts;
5f40b3cb 484
8a171a59
ZD
485 /* Since the address of OBJ is invariant, the trees may be shared.
486 Avoid rewriting unrelated parts of the code. */
487 obj = unshare_expr (obj);
488 for (var_p = &obj;
489 handled_component_p (*var_p);
490 var_p = &TREE_OPERAND (*var_p, 0))
491 continue;
8a171a59 492
c9a410f0
RG
493 /* Canonicalize the access to base on a MEM_REF. */
494 if (DECL_P (*var_p))
495 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
496
497 /* Assign a canonical SSA name to the address of the base decl used
498 in the address and share it for all accesses and addresses based
499 on it. */
500 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
5f40b3cb 501 ielt.uid = uid;
4a8fb1a1 502 dslot = decl_address.find_slot_with_hash (&ielt, uid, INSERT);
5f40b3cb
ZD
503 if (!*dslot)
504 {
cba1eb61
JJ
505 if (gsi == NULL)
506 return NULL;
c9a410f0 507 addr = TREE_OPERAND (*var_p, 0);
29b89442
JJ
508 const char *obj_name
509 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
510 if (obj_name)
511 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
512 else
513 name = make_ssa_name (TREE_TYPE (addr), NULL);
83d5977e 514 stmt = gimple_build_assign (name, addr);
726a989a 515 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb
ZD
516
517 nielt = XNEW (struct int_tree_map);
518 nielt->uid = uid;
519 nielt->to = name;
520 *dslot = nielt;
5f40b3cb 521 }
8a171a59 522 else
4a8fb1a1 523 name = (*dslot)->to;
5f40b3cb 524
c9a410f0
RG
525 /* Express the address in terms of the canonical SSA name. */
526 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
527 if (gsi == NULL)
528 return build_fold_addr_expr_with_type (obj, type);
529
c9a410f0
RG
530 name = force_gimple_operand (build_addr (obj, current_function_decl),
531 &stmts, true, NULL_TREE);
532 if (!gimple_seq_empty_p (stmts))
cba1eb61 533 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 534
c9a410f0 535 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 536 {
726a989a 537 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 538 NULL_TREE);
726a989a 539 if (!gimple_seq_empty_p (stmts))
cba1eb61 540 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 541 }
5f40b3cb
ZD
542
543 return name;
544}
545
a509ebb5 546/* Callback for htab_traverse. Create the initialization statement
b8698a0f 547 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
548 the loop described in DATA. */
549
4a8fb1a1
LC
550int
551initialize_reductions (reduction_info **slot, struct loop *loop)
a509ebb5 552{
a509ebb5 553 tree init, c;
a509ebb5
RL
554 tree bvar, type, arg;
555 edge e;
556
4a8fb1a1 557 struct reduction_info *const reduc = *slot;
a509ebb5 558
b8698a0f 559 /* Create initialization in preheader:
a509ebb5
RL
560 reduction_variable = initialization value of reduction. */
561
b8698a0f 562 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
563 from the preheader with the reduction initialization value. */
564
565 /* Create a new variable to initialize the reduction. */
566 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
567 bvar = create_tmp_var (type, "reduction");
a509ebb5 568
c2255bc4
AH
569 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
570 OMP_CLAUSE_REDUCTION);
a509ebb5 571 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
726a989a 572 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
a509ebb5
RL
573
574 init = omp_reduction_init (c, TREE_TYPE (bvar));
575 reduc->init = init;
576
b8698a0f
L
577 /* Replace the argument representing the initialization value
578 with the initialization value for the reduction (neutral
579 element for the particular operation, e.g. 0 for PLUS_EXPR,
580 1 for MULT_EXPR, etc).
581 Keep the old value in a new variable "reduction_initial",
582 that will be taken in consideration after the parallel
0eb7e7aa 583 computing is done. */
a509ebb5
RL
584
585 e = loop_preheader_edge (loop);
586 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
587 /* Create new variable to hold the initial value. */
a509ebb5 588
a509ebb5 589 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 590 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 591 reduc->initial_value = arg;
a509ebb5
RL
592 return 1;
593}
5f40b3cb
ZD
594
595struct elv_data
596{
726a989a 597 struct walk_stmt_info info;
9f9f72aa 598 edge entry;
4a8fb1a1 599 int_tree_htab_type decl_address;
cba1eb61 600 gimple_stmt_iterator *gsi;
5f40b3cb 601 bool changed;
cba1eb61 602 bool reset;
5f40b3cb
ZD
603};
604
9f9f72aa
AP
605/* Eliminates references to local variables in *TP out of the single
606 entry single exit region starting at DTA->ENTRY.
607 DECL_ADDRESS contains addresses of the references that had their
608 address taken already. If the expression is changed, CHANGED is
609 set to true. Callback for walk_tree. */
a509ebb5 610
5f40b3cb 611static tree
8a171a59 612eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 613{
3d9a9f94 614 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 615 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
616
617 if (DECL_P (t))
618 {
619 *walk_subtrees = 0;
620
621 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
622 return NULL_TREE;
623
624 type = TREE_TYPE (t);
625 addr_type = build_pointer_type (type);
cba1eb61
JJ
626 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
627 dta->gsi);
628 if (dta->gsi == NULL && addr == NULL_TREE)
629 {
630 dta->reset = true;
631 return NULL_TREE;
632 }
633
70f34814 634 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
635
636 dta->changed = true;
637 return NULL_TREE;
638 }
639
640 if (TREE_CODE (t) == ADDR_EXPR)
641 {
8a171a59
ZD
642 /* ADDR_EXPR may appear in two contexts:
643 -- as a gimple operand, when the address taken is a function invariant
644 -- as gimple rhs, when the resulting address in not a function
645 invariant
646 We do not need to do anything special in the latter case (the base of
647 the memory reference whose address is taken may be replaced in the
648 DECL_P case). The former case is more complicated, as we need to
649 ensure that the new address is still a gimple operand. Thus, it
650 is not sufficient to replace just the base of the memory reference --
651 we need to move the whole computation of the address out of the
652 loop. */
653 if (!is_gimple_val (t))
5f40b3cb
ZD
654 return NULL_TREE;
655
656 *walk_subtrees = 0;
8a171a59
ZD
657 obj = TREE_OPERAND (t, 0);
658 var = get_base_address (obj);
659 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
660 return NULL_TREE;
661
662 addr_type = TREE_TYPE (t);
cba1eb61
JJ
663 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
664 dta->gsi);
665 if (dta->gsi == NULL && addr == NULL_TREE)
666 {
667 dta->reset = true;
668 return NULL_TREE;
669 }
5f40b3cb
ZD
670 *tp = addr;
671
672 dta->changed = true;
673 return NULL_TREE;
674 }
675
726a989a 676 if (!EXPR_P (t))
5f40b3cb
ZD
677 *walk_subtrees = 0;
678
679 return NULL_TREE;
680}
681
cba1eb61 682/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
683 entry single exit region starting at ENTRY. DECL_ADDRESS contains
684 addresses of the references that had their address taken
685 already. */
5f40b3cb
ZD
686
687static void
cba1eb61 688eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
4a8fb1a1 689 int_tree_htab_type decl_address)
5f40b3cb
ZD
690{
691 struct elv_data dta;
cba1eb61 692 gimple stmt = gsi_stmt (*gsi);
5f40b3cb 693
726a989a 694 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 695 dta.entry = entry;
5f40b3cb
ZD
696 dta.decl_address = decl_address;
697 dta.changed = false;
cba1eb61 698 dta.reset = false;
5f40b3cb 699
b5b8b0ac 700 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
701 {
702 dta.gsi = NULL;
703 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
704 eliminate_local_variables_1, &dta.info, NULL);
705 if (dta.reset)
706 {
707 gimple_debug_bind_reset_value (stmt);
708 dta.changed = true;
709 }
710 }
29b89442
JJ
711 else if (gimple_clobber_p (stmt))
712 {
713 stmt = gimple_build_nop ();
714 gsi_replace (gsi, stmt, false);
715 dta.changed = true;
716 }
b5b8b0ac 717 else
cba1eb61
JJ
718 {
719 dta.gsi = gsi;
720 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
721 }
5f40b3cb
ZD
722
723 if (dta.changed)
724 update_stmt (stmt);
725}
726
9f9f72aa
AP
727/* Eliminates the references to local variables from the single entry
728 single exit region between the ENTRY and EXIT edges.
b8698a0f 729
a509ebb5 730 This includes:
b8698a0f
L
731 1) Taking address of a local variable -- these are moved out of the
732 region (and temporary variable is created to hold the address if
a509ebb5 733 necessary).
9f9f72aa 734
5f40b3cb 735 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 736 references. */
5f40b3cb
ZD
737
738static void
9f9f72aa 739eliminate_local_variables (edge entry, edge exit)
5f40b3cb 740{
9f9f72aa 741 basic_block bb;
07687835 742 stack_vec<basic_block, 3> body;
5f40b3cb 743 unsigned i;
726a989a 744 gimple_stmt_iterator gsi;
cba1eb61 745 bool has_debug_stmt = false;
4a8fb1a1
LC
746 int_tree_htab_type decl_address;
747 decl_address.create (10);
9f9f72aa
AP
748 basic_block entry_bb = entry->src;
749 basic_block exit_bb = exit->dest;
5f40b3cb 750
9f9f72aa 751 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 752
9771b263 753 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 754 if (bb != entry_bb && bb != exit_bb)
726a989a 755 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
ddb555ed
JJ
756 if (is_gimple_debug (gsi_stmt (gsi)))
757 {
758 if (gimple_debug_bind_p (gsi_stmt (gsi)))
759 has_debug_stmt = true;
760 }
cba1eb61
JJ
761 else
762 eliminate_local_variables_stmt (entry, &gsi, decl_address);
763
764 if (has_debug_stmt)
9771b263 765 FOR_EACH_VEC_ELT (body, i, bb)
cba1eb61
JJ
766 if (bb != entry_bb && bb != exit_bb)
767 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
768 if (gimple_debug_bind_p (gsi_stmt (gsi)))
769 eliminate_local_variables_stmt (entry, &gsi, decl_address);
5f40b3cb 770
4a8fb1a1 771 decl_address.dispose ();
9f9f72aa
AP
772}
773
774/* Returns true if expression EXPR is not defined between ENTRY and
775 EXIT, i.e. if all its operands are defined outside of the region. */
776
777static bool
778expr_invariant_in_region_p (edge entry, edge exit, tree expr)
779{
780 basic_block entry_bb = entry->src;
781 basic_block exit_bb = exit->dest;
782 basic_block def_bb;
9f9f72aa
AP
783
784 if (is_gimple_min_invariant (expr))
785 return true;
786
787 if (TREE_CODE (expr) == SSA_NAME)
788 {
726a989a 789 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
790 if (def_bb
791 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
792 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
793 return false;
794
795 return true;
796 }
797
726a989a 798 return false;
5f40b3cb
ZD
799}
800
801/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
802 The copies are stored to NAME_COPIES, if NAME was already duplicated,
803 its duplicate stored in NAME_COPIES is returned.
b8698a0f 804
5f40b3cb
ZD
805 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
806 duplicated, storing the copies in DECL_COPIES. */
807
808static tree
4a8fb1a1
LC
809separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies,
810 int_tree_htab_type decl_copies, bool copy_name_p)
5f40b3cb
ZD
811{
812 tree copy, var, var_copy;
813 unsigned idx, uid, nuid;
814 struct int_tree_map ielt, *nielt;
815 struct name_to_copy_elt elt, *nelt;
4a8fb1a1
LC
816 name_to_copy_elt **slot;
817 int_tree_map **dslot;
5f40b3cb
ZD
818
819 if (TREE_CODE (name) != SSA_NAME)
820 return name;
821
822 idx = SSA_NAME_VERSION (name);
823 elt.version = idx;
4a8fb1a1
LC
824 slot = name_copies.find_slot_with_hash (&elt, idx,
825 copy_name_p ? INSERT : NO_INSERT);
5f40b3cb 826 if (slot && *slot)
4a8fb1a1 827 return (*slot)->new_name;
5f40b3cb 828
70b5e7dc
RG
829 if (copy_name_p)
830 {
831 copy = duplicate_ssa_name (name, NULL);
832 nelt = XNEW (struct name_to_copy_elt);
833 nelt->version = idx;
834 nelt->new_name = copy;
835 nelt->field = NULL_TREE;
836 *slot = nelt;
837 }
838 else
839 {
840 gcc_assert (!slot);
841 copy = name;
842 }
843
5f40b3cb 844 var = SSA_NAME_VAR (name);
70b5e7dc
RG
845 if (!var)
846 return copy;
847
5f40b3cb
ZD
848 uid = DECL_UID (var);
849 ielt.uid = uid;
4a8fb1a1 850 dslot = decl_copies.find_slot_with_hash (&ielt, uid, INSERT);
5f40b3cb
ZD
851 if (!*dslot)
852 {
853 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 854 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
5f40b3cb
ZD
855 nielt = XNEW (struct int_tree_map);
856 nielt->uid = uid;
857 nielt->to = var_copy;
858 *dslot = nielt;
859
860 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 861 it again. */
5f40b3cb
ZD
862 nuid = DECL_UID (var_copy);
863 ielt.uid = nuid;
4a8fb1a1 864 dslot = decl_copies.find_slot_with_hash (&ielt, nuid, INSERT);
5f40b3cb
ZD
865 gcc_assert (!*dslot);
866 nielt = XNEW (struct int_tree_map);
867 nielt->uid = nuid;
868 nielt->to = var_copy;
869 *dslot = nielt;
870 }
871 else
872 var_copy = ((struct int_tree_map *) *dslot)->to;
873
b2ec94d4 874 replace_ssa_name_symbol (copy, var_copy);
5f40b3cb
ZD
875 return copy;
876}
877
9f9f72aa
AP
878/* Finds the ssa names used in STMT that are defined outside the
879 region between ENTRY and EXIT and replaces such ssa names with
880 their duplicates. The duplicates are stored to NAME_COPIES. Base
881 decls of all ssa names used in STMT (including those defined in
882 LOOP) are replaced with the new temporary variables; the
883 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
884
885static void
726a989a 886separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
4a8fb1a1
LC
887 name_to_copy_table_type name_copies,
888 int_tree_htab_type decl_copies)
5f40b3cb
ZD
889{
890 use_operand_p use;
891 def_operand_p def;
892 ssa_op_iter oi;
893 tree name, copy;
894 bool copy_name_p;
895
5f40b3cb 896 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
897 {
898 name = DEF_FROM_PTR (def);
899 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
900 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
901 false);
a509ebb5
RL
902 gcc_assert (copy == name);
903 }
5f40b3cb
ZD
904
905 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
906 {
907 name = USE_FROM_PTR (use);
908 if (TREE_CODE (name) != SSA_NAME)
909 continue;
910
9f9f72aa
AP
911 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
912 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
913 copy_name_p);
a509ebb5
RL
914 SET_USE (use, copy);
915 }
5f40b3cb
ZD
916}
917
b5b8b0ac
AO
918/* Finds the ssa names used in STMT that are defined outside the
919 region between ENTRY and EXIT and replaces such ssa names with
920 their duplicates. The duplicates are stored to NAME_COPIES. Base
921 decls of all ssa names used in STMT (including those defined in
922 LOOP) are replaced with the new temporary variables; the
923 replacement decls are stored in DECL_COPIES. */
924
925static bool
4a8fb1a1
LC
926separate_decls_in_region_debug (gimple stmt,
927 name_to_copy_table_type name_copies,
928 int_tree_htab_type decl_copies)
b5b8b0ac
AO
929{
930 use_operand_p use;
931 ssa_op_iter oi;
932 tree var, name;
933 struct int_tree_map ielt;
934 struct name_to_copy_elt elt;
4a8fb1a1
LC
935 name_to_copy_elt **slot;
936 int_tree_map **dslot;
b5b8b0ac 937
ddb555ed
JJ
938 if (gimple_debug_bind_p (stmt))
939 var = gimple_debug_bind_get_var (stmt);
940 else if (gimple_debug_source_bind_p (stmt))
941 var = gimple_debug_source_bind_get_var (stmt);
942 else
943 return true;
598e67d7 944 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 945 return true;
b5b8b0ac
AO
946 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
947 ielt.uid = DECL_UID (var);
4a8fb1a1 948 dslot = decl_copies.find_slot_with_hash (&ielt, ielt.uid, NO_INSERT);
b5b8b0ac
AO
949 if (!dslot)
950 return true;
ddb555ed
JJ
951 if (gimple_debug_bind_p (stmt))
952 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
953 else if (gimple_debug_source_bind_p (stmt))
954 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
b5b8b0ac
AO
955
956 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
957 {
958 name = USE_FROM_PTR (use);
959 if (TREE_CODE (name) != SSA_NAME)
960 continue;
961
962 elt.version = SSA_NAME_VERSION (name);
4a8fb1a1 963 slot = name_copies.find_slot_with_hash (&elt, elt.version, NO_INSERT);
b5b8b0ac
AO
964 if (!slot)
965 {
966 gimple_debug_bind_reset_value (stmt);
967 update_stmt (stmt);
968 break;
969 }
970
4a8fb1a1 971 SET_USE (use, (*slot)->new_name);
b5b8b0ac
AO
972 }
973
974 return false;
975}
976
0eb7e7aa
RL
977/* Callback for htab_traverse. Adds a field corresponding to the reduction
978 specified in SLOT. The type is passed in DATA. */
979
4a8fb1a1
LC
980int
981add_field_for_reduction (reduction_info **slot, tree type)
a509ebb5 982{
b8698a0f 983
4a8fb1a1 984 struct reduction_info *const red = *slot;
aa06a978
RB
985 tree var = gimple_assign_lhs (red->reduc_stmt);
986 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
987 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
0eb7e7aa
RL
988
989 insert_field_into_struct (type, field);
990
991 red->field = field;
992
993 return 1;
994}
a509ebb5 995
5f40b3cb 996/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 997 described in SLOT. The type is passed in DATA. */
5f40b3cb 998
4a8fb1a1
LC
999int
1000add_field_for_name (name_to_copy_elt **slot, tree type)
5f40b3cb 1001{
4a8fb1a1 1002 struct name_to_copy_elt *const elt = *slot;
5f40b3cb 1003 tree name = ssa_name (elt->version);
70b5e7dc
RG
1004 tree field = build_decl (UNKNOWN_LOCATION,
1005 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1006 TREE_TYPE (name));
5f40b3cb
ZD
1007
1008 insert_field_into_struct (type, field);
1009 elt->field = field;
a509ebb5 1010
5f40b3cb
ZD
1011 return 1;
1012}
1013
b8698a0f
L
1014/* Callback for htab_traverse. A local result is the intermediate result
1015 computed by a single
fa10beec 1016 thread, or the initial value in case no iteration was executed.
b8698a0f
L
1017 This function creates a phi node reflecting these values.
1018 The phi's result will be stored in NEW_PHI field of the
1019 reduction's data structure. */
a509ebb5 1020
4a8fb1a1
LC
1021int
1022create_phi_for_local_result (reduction_info **slot, struct loop *loop)
a509ebb5 1023{
4a8fb1a1 1024 struct reduction_info *const reduc = *slot;
a509ebb5 1025 edge e;
726a989a 1026 gimple new_phi;
a509ebb5
RL
1027 basic_block store_bb;
1028 tree local_res;
f5045c96 1029 source_location locus;
a509ebb5 1030
b8698a0f
L
1031 /* STORE_BB is the block where the phi
1032 should be stored. It is the destination of the loop exit.
726a989a 1033 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
a509ebb5
RL
1034 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1035
1036 /* STORE_BB has two predecessors. One coming from the loop
1037 (the reduction's result is computed at the loop),
b8698a0f
L
1038 and another coming from a block preceding the loop,
1039 when no iterations
1040 are executed (the initial value should be taken). */
a509ebb5
RL
1041 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1042 e = EDGE_PRED (store_bb, 1);
1043 else
1044 e = EDGE_PRED (store_bb, 0);
6b4a85ad 1045 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
f5045c96 1046 locus = gimple_location (reduc->reduc_stmt);
a509ebb5 1047 new_phi = create_phi_node (local_res, store_bb);
9e227d60 1048 add_phi_arg (new_phi, reduc->init, e, locus);
726a989a 1049 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
9e227d60 1050 FALLTHRU_EDGE (loop->latch), locus);
a509ebb5
RL
1051 reduc->new_phi = new_phi;
1052
1053 return 1;
1054}
5f40b3cb
ZD
1055
1056struct clsn_data
1057{
1058 tree store;
1059 tree load;
1060
1061 basic_block store_bb;
1062 basic_block load_bb;
1063};
1064
a509ebb5 1065/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1066 reduction described in SLOT.
a509ebb5
RL
1067 DATA annotates the place in memory the atomic operation relates to,
1068 and the basic block it needs to be generated in. */
1069
4a8fb1a1
LC
1070int
1071create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1072{
4a8fb1a1 1073 struct reduction_info *const reduc = *slot;
726a989a 1074 gimple_stmt_iterator gsi;
a509ebb5 1075 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1076 tree load_struct;
1077 basic_block bb;
1078 basic_block new_bb;
1079 edge e;
0f900dfa 1080 tree t, addr, ref, x;
726a989a
RB
1081 tree tmp_load, name;
1082 gimple load;
a509ebb5 1083
70f34814 1084 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1085 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5
RL
1086
1087 addr = build_addr (t, current_function_decl);
1088
1089 /* Create phi node. */
1090 bb = clsn_data->load_bb;
1091
1092 e = split_block (bb, t);
1093 new_bb = e->dest;
1094
1095 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
a509ebb5 1096 tmp_load = make_ssa_name (tmp_load, NULL);
726a989a 1097 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1098 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1099 gsi = gsi_start_bb (new_bb);
1100 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1101
1102 e = split_block (new_bb, load);
1103 new_bb = e->dest;
726a989a 1104 gsi = gsi_start_bb (new_bb);
a509ebb5 1105 ref = tmp_load;
726a989a
RB
1106 x = fold_build2 (reduc->reduction_code,
1107 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1108 PHI_RESULT (reduc->new_phi));
a509ebb5 1109
726a989a
RB
1110 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1111 GSI_CONTINUE_LINKING);
a509ebb5 1112
726a989a 1113 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1114 return 1;
1115}
1116
b8698a0f
L
1117/* Create the atomic operation at the join point of the threads.
1118 REDUCTION_LIST describes the reductions in the LOOP.
1119 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1120 shared data is stored in and loaded from. */
1121static void
4a8fb1a1
LC
1122create_call_for_reduction (struct loop *loop,
1123 reduction_info_table_type reduction_list,
a509ebb5
RL
1124 struct clsn_data *ld_st_data)
1125{
4a8fb1a1 1126 reduction_list.traverse <struct loop *, create_phi_for_local_result> (loop);
726a989a 1127 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
a509ebb5 1128 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
4a8fb1a1
LC
1129 reduction_list
1130 .traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
a509ebb5
RL
1131}
1132
ae0bce62
RL
1133/* Callback for htab_traverse. Loads the final reduction value at the
1134 join point of all threads, and inserts it in the right place. */
a509ebb5 1135
4a8fb1a1
LC
1136int
1137create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1138{
4a8fb1a1 1139 struct reduction_info *const red = *slot;
726a989a
RB
1140 gimple stmt;
1141 gimple_stmt_iterator gsi;
1142 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
a509ebb5 1143 tree load_struct;
ae0bce62 1144 tree name;
a509ebb5
RL
1145 tree x;
1146
726a989a 1147 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1148 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1149 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1150 NULL_TREE);
a509ebb5 1151
ae0bce62 1152 x = load_struct;
a509ebb5 1153 name = PHI_RESULT (red->keep_res);
726a989a 1154 stmt = gimple_build_assign (name, x);
a509ebb5 1155
726a989a 1156 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1157
726a989a
RB
1158 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1159 !gsi_end_p (gsi); gsi_next (&gsi))
1160 if (gsi_stmt (gsi) == red->keep_res)
1161 {
1162 remove_phi_node (&gsi, false);
1163 return 1;
1164 }
1165 gcc_unreachable ();
a509ebb5
RL
1166}
1167
b8698a0f 1168/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1169 REDUCTION_LIST describes the list of reductions that the
fa10beec 1170 loads should be generated for. */
a509ebb5 1171static void
4a8fb1a1 1172create_final_loads_for_reduction (reduction_info_table_type reduction_list,
a509ebb5
RL
1173 struct clsn_data *ld_st_data)
1174{
726a989a 1175 gimple_stmt_iterator gsi;
a509ebb5 1176 tree t;
726a989a 1177 gimple stmt;
a509ebb5 1178
726a989a 1179 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1180 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1181 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1182
726a989a 1183 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1184
4a8fb1a1
LC
1185 reduction_list
1186 .traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
a509ebb5
RL
1187
1188}
1189
0eb7e7aa
RL
1190/* Callback for htab_traverse. Store the neutral value for the
1191 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1192 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1193 The reduction is specified in SLOT. The store information is
1194 passed in DATA. */
0eb7e7aa 1195
4a8fb1a1
LC
1196int
1197create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
0eb7e7aa 1198{
4a8fb1a1 1199 struct reduction_info *const red = *slot;
726a989a
RB
1200 tree t;
1201 gimple stmt;
1202 gimple_stmt_iterator gsi;
1203 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1204
1205 gsi = gsi_last_bb (clsn_data->store_bb);
1206 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1207 stmt = gimple_build_assign (t, red->initial_value);
726a989a 1208 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1209
1210 return 1;
1211}
1212
a509ebb5
RL
1213/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1214 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1215 specified in SLOT. */
1216
4a8fb1a1
LC
1217int
1218create_loads_and_stores_for_name (name_to_copy_elt **slot,
1219 struct clsn_data *clsn_data)
5f40b3cb 1220{
4a8fb1a1 1221 struct name_to_copy_elt *const elt = *slot;
726a989a
RB
1222 tree t;
1223 gimple stmt;
1224 gimple_stmt_iterator gsi;
5f40b3cb 1225 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1226 tree load_struct;
1227
726a989a
RB
1228 gsi = gsi_last_bb (clsn_data->store_bb);
1229 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1230 stmt = gimple_build_assign (t, ssa_name (elt->version));
726a989a 1231 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1232
726a989a 1233 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1234 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1235 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1236 stmt = gimple_build_assign (elt->new_name, t);
726a989a 1237 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1238
1239 return 1;
1240}
1241
1242/* Moves all the variables used in LOOP and defined outside of it (including
1243 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1244 name) to a structure created for this purpose. The code
b8698a0f 1245
5f40b3cb
ZD
1246 while (1)
1247 {
1248 use (a);
1249 use (b);
1250 }
1251
1252 is transformed this way:
1253
1254 bb0:
1255 old.a = a;
1256 old.b = b;
1257
1258 bb1:
1259 a' = new->a;
1260 b' = new->b;
1261 while (1)
1262 {
1263 use (a');
1264 use (b');
1265 }
1266
1267 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1268 pointer `new' is intentionally not initialized (the loop will be split to a
1269 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1270 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1271 information among the threads. It is initialized here, and
1272 gen_parallel_loop will pass it to create_call_for_reduction that
1273 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1274 in LOOP. */
5f40b3cb
ZD
1275
1276static void
4a8fb1a1
LC
1277separate_decls_in_region (edge entry, edge exit,
1278 reduction_info_table_type reduction_list,
b8698a0f 1279 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1280 struct clsn_data *ld_st_data)
a509ebb5 1281
5f40b3cb 1282{
9f9f72aa 1283 basic_block bb1 = split_edge (entry);
5f40b3cb 1284 basic_block bb0 = single_pred (bb1);
4a8fb1a1
LC
1285 name_to_copy_table_type name_copies;
1286 name_copies.create (10);
1287 int_tree_htab_type decl_copies;
1288 decl_copies.create (10);
5f40b3cb 1289 unsigned i;
726a989a
RB
1290 tree type, type_name, nvar;
1291 gimple_stmt_iterator gsi;
5f40b3cb 1292 struct clsn_data clsn_data;
07687835 1293 stack_vec<basic_block, 3> body;
9f9f72aa
AP
1294 basic_block bb;
1295 basic_block entry_bb = bb1;
1296 basic_block exit_bb = exit->dest;
b5b8b0ac 1297 bool has_debug_stmt = false;
5f40b3cb 1298
726a989a 1299 entry = single_succ_edge (entry_bb);
9f9f72aa 1300 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1301
9771b263 1302 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 1303 {
b8698a0f 1304 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1305 {
726a989a
RB
1306 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1307 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1308 name_copies, decl_copies);
1309
1310 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac
AO
1311 {
1312 gimple stmt = gsi_stmt (gsi);
1313
1314 if (is_gimple_debug (stmt))
1315 has_debug_stmt = true;
1316 else
1317 separate_decls_in_region_stmt (entry, exit, stmt,
1318 name_copies, decl_copies);
1319 }
9f9f72aa 1320 }
5f40b3cb 1321 }
9f9f72aa 1322
b5b8b0ac
AO
1323 /* Now process debug bind stmts. We must not create decls while
1324 processing debug stmts, so we defer their processing so as to
1325 make sure we will have debug info for as many variables as
1326 possible (all of those that were dealt with in the loop above),
1327 and discard those for which we know there's nothing we can
1328 do. */
1329 if (has_debug_stmt)
9771b263 1330 FOR_EACH_VEC_ELT (body, i, bb)
b5b8b0ac
AO
1331 if (bb != entry_bb && bb != exit_bb)
1332 {
1333 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1334 {
1335 gimple stmt = gsi_stmt (gsi);
1336
ddb555ed 1337 if (is_gimple_debug (stmt))
b5b8b0ac 1338 {
ddb555ed
JJ
1339 if (separate_decls_in_region_debug (stmt, name_copies,
1340 decl_copies))
b5b8b0ac
AO
1341 {
1342 gsi_remove (&gsi, true);
1343 continue;
1344 }
1345 }
1346
1347 gsi_next (&gsi);
1348 }
1349 }
1350
4a8fb1a1 1351 if (name_copies.elements () == 0 && reduction_list.elements () == 0)
5f40b3cb
ZD
1352 {
1353 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1354 loop carried and external variables in the loop). */
5f40b3cb
ZD
1355 *arg_struct = NULL;
1356 *new_arg_struct = NULL;
1357 }
1358 else
1359 {
1360 /* Create the type for the structure to store the ssa names to. */
1361 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1362 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1363 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1364 type);
1365 TYPE_NAME (type) = type_name;
1366
4a8fb1a1
LC
1367 name_copies.traverse <tree, add_field_for_name> (type);
1368 if (reduction_list.is_created () && reduction_list.elements () > 0)
0eb7e7aa
RL
1369 {
1370 /* Create the fields for reductions. */
4a8fb1a1 1371 reduction_list.traverse <tree, add_field_for_reduction> (type);
0eb7e7aa 1372 }
5f40b3cb 1373 layout_type (type);
b8698a0f 1374
5f40b3cb
ZD
1375 /* Create the loads and stores. */
1376 *arg_struct = create_tmp_var (type, ".paral_data_store");
5f40b3cb 1377 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
726a989a 1378 *new_arg_struct = make_ssa_name (nvar, NULL);
5f40b3cb 1379
a509ebb5
RL
1380 ld_st_data->store = *arg_struct;
1381 ld_st_data->load = *new_arg_struct;
1382 ld_st_data->store_bb = bb0;
1383 ld_st_data->load_bb = bb1;
0eb7e7aa 1384
4a8fb1a1
LC
1385 name_copies
1386 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1387 (ld_st_data);
a509ebb5 1388
ae0bce62
RL
1389 /* Load the calculation from memory (after the join of the threads). */
1390
4a8fb1a1 1391 if (reduction_list.is_created () && reduction_list.elements () > 0)
a509ebb5 1392 {
4a8fb1a1
LC
1393 reduction_list
1394 .traverse <struct clsn_data *, create_stores_for_reduction>
1395 (ld_st_data);
726a989a 1396 clsn_data.load = make_ssa_name (nvar, NULL);
9f9f72aa 1397 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1398 clsn_data.store = ld_st_data->store;
1399 create_final_loads_for_reduction (reduction_list, &clsn_data);
1400 }
5f40b3cb
ZD
1401 }
1402
4a8fb1a1
LC
1403 decl_copies.dispose ();
1404 name_copies.dispose ();
5f40b3cb
ZD
1405}
1406
1407/* Bitmap containing uids of functions created by parallelization. We cannot
1408 allocate it from the default obstack, as it must live across compilation
1409 of several functions; we make it gc allocated instead. */
1410
1411static GTY(()) bitmap parallelized_functions;
1412
1413/* Returns true if FN was created by create_loop_fn. */
1414
62e0a1ed 1415bool
5f40b3cb
ZD
1416parallelized_function_p (tree fn)
1417{
1418 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1419 return false;
1420
1421 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1422}
1423
1424/* Creates and returns an empty function that will receive the body of
1425 a parallelized loop. */
1426
1427static tree
9ff70652 1428create_loop_fn (location_t loc)
5f40b3cb
ZD
1429{
1430 char buf[100];
1431 char *tname;
1432 tree decl, type, name, t;
1433 struct function *act_cfun = cfun;
1434 static unsigned loopfn_num;
1435
5368224f 1436 loc = LOCATION_LOCUS (loc);
5f40b3cb
ZD
1437 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1438 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1439 clean_symbol_name (tname);
1440 name = get_identifier (tname);
1441 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1442
9ff70652 1443 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1444 if (!parallelized_functions)
1445 parallelized_functions = BITMAP_GGC_ALLOC ();
1446 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1447
1448 TREE_STATIC (decl) = 1;
1449 TREE_USED (decl) = 1;
1450 DECL_ARTIFICIAL (decl) = 1;
1451 DECL_IGNORED_P (decl) = 0;
1452 TREE_PUBLIC (decl) = 0;
1453 DECL_UNINLINABLE (decl) = 1;
1454 DECL_EXTERNAL (decl) = 0;
1455 DECL_CONTEXT (decl) = NULL_TREE;
1456 DECL_INITIAL (decl) = make_node (BLOCK);
1457
9ff70652 1458 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1459 DECL_ARTIFICIAL (t) = 1;
1460 DECL_IGNORED_P (t) = 1;
1461 DECL_RESULT (decl) = t;
1462
9ff70652 1463 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1464 ptr_type_node);
1465 DECL_ARTIFICIAL (t) = 1;
1466 DECL_ARG_TYPE (t) = ptr_type_node;
1467 DECL_CONTEXT (t) = decl;
1468 TREE_USED (t) = 1;
1469 DECL_ARGUMENTS (decl) = t;
1470
182e0d71 1471 allocate_struct_function (decl, false);
5f40b3cb
ZD
1472
1473 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1474 it. */
5576d6f2 1475 set_cfun (act_cfun);
5f40b3cb
ZD
1476
1477 return decl;
1478}
1479
5f40b3cb
ZD
1480/* Moves the exit condition of LOOP to the beginning of its header, and
1481 duplicates the part of the last iteration that gets disabled to the
1482 exit of the loop. NIT is the number of iterations of the loop
1483 (used to initialize the variables in the duplicated part).
b8698a0f 1484
fa10beec 1485 TODO: the common case is that latch of the loop is empty and immediately
5f40b3cb
ZD
1486 follows the loop exit. In this case, it would be better not to copy the
1487 body of the loop, but only move the entry of the loop directly before the
1488 exit check and increase the number of iterations of the loop by one.
b8698a0f 1489 This may need some additional preconditioning in case NIT = ~0.
a509ebb5 1490 REDUCTION_LIST describes the reductions in LOOP. */
5f40b3cb
ZD
1491
1492static void
4a8fb1a1
LC
1493transform_to_exit_first_loop (struct loop *loop,
1494 reduction_info_table_type reduction_list,
1495 tree nit)
5f40b3cb
ZD
1496{
1497 basic_block *bbs, *nbbs, ex_bb, orig_header;
1498 unsigned n;
1499 bool ok;
1500 edge exit = single_dom_exit (loop), hpred;
726a989a 1501 tree control, control_name, res, t;
48710229 1502 gimple phi, nphi, cond_stmt, stmt, cond_nit;
726a989a 1503 gimple_stmt_iterator gsi;
48710229 1504 tree nit_1;
5f40b3cb
ZD
1505
1506 split_block_after_labels (loop->header);
1507 orig_header = single_succ (loop->header);
1508 hpred = single_succ_edge (loop->header);
1509
1510 cond_stmt = last_stmt (exit->src);
726a989a
RB
1511 control = gimple_cond_lhs (cond_stmt);
1512 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1513
1514 /* Make sure that we have phi nodes on exit for all loop header phis
1515 (create_parallel_loop requires that). */
726a989a 1516 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
5f40b3cb 1517 {
726a989a 1518 phi = gsi_stmt (gsi);
5f40b3cb 1519 res = PHI_RESULT (phi);
070ecdfd 1520 t = copy_ssa_name (res, phi);
5f40b3cb 1521 SET_PHI_RESULT (phi, t);
5f40b3cb 1522 nphi = create_phi_node (res, orig_header);
9e227d60 1523 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1524
1525 if (res == control)
1526 {
726a989a 1527 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1528 update_stmt (cond_stmt);
1529 control = t;
1530 }
1531 }
12037899 1532
5f40b3cb 1533 bbs = get_loop_body_in_dom_order (loop);
48710229 1534
69958396
RL
1535 for (n = 0; bbs[n] != exit->src; n++)
1536 continue;
5f40b3cb 1537 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1538 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1539 bbs + 1, n, nbbs);
5f40b3cb
ZD
1540 gcc_assert (ok);
1541 free (bbs);
1542 ex_bb = nbbs[0];
1543 free (nbbs);
1544
b8698a0f 1545 /* Other than reductions, the only gimple reg that should be copied
726a989a 1546 out of the loop is the control variable. */
69958396 1547 exit = single_dom_exit (loop);
5f40b3cb 1548 control_name = NULL_TREE;
726a989a 1549 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
5f40b3cb 1550 {
726a989a 1551 phi = gsi_stmt (gsi);
5f40b3cb 1552 res = PHI_RESULT (phi);
ea057359 1553 if (virtual_operand_p (res))
726a989a
RB
1554 {
1555 gsi_next (&gsi);
1556 continue;
1557 }
5f40b3cb 1558
a509ebb5 1559 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1560 keep the phi at the reduction's keep_res field. The
1561 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1562 variable when exiting the loop. */
1563
4a8fb1a1 1564 if (reduction_list.elements () > 0)
a509ebb5
RL
1565 {
1566 struct reduction_info *red;
1567
1568 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1569 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1570 if (red)
726a989a
RB
1571 {
1572 red->keep_res = phi;
1573 gsi_next (&gsi);
1574 continue;
1575 }
a509ebb5 1576 }
726a989a
RB
1577 gcc_assert (control_name == NULL_TREE
1578 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1579 control_name = res;
726a989a 1580 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1581 }
1582 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1583
b8698a0f 1584 /* Initialize the control variable to number of iterations
48710229 1585 according to the rhs of the exit condition. */
726a989a 1586 gsi = gsi_after_labels (ex_bb);
b8698a0f 1587 cond_nit = last_stmt (exit->src);
48710229
RL
1588 nit_1 = gimple_cond_rhs (cond_nit);
1589 nit_1 = force_gimple_operand_gsi (&gsi,
1590 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1591 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1592 stmt = gimple_build_assign (control_name, nit_1);
726a989a 1593 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1594}
1595
1596/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1597 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb
ZD
1598 NEW_DATA is the variable that should be initialized from the argument
1599 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
726a989a 1600 basic block containing GIMPLE_OMP_PARALLEL tree. */
5f40b3cb
ZD
1601
1602static basic_block
1603create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1604 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1605{
726a989a 1606 gimple_stmt_iterator gsi;
5f40b3cb 1607 basic_block bb, paral_bb, for_bb, ex_bb;
0f900dfa 1608 tree t, param;
726a989a
RB
1609 gimple stmt, for_stmt, phi, cond_stmt;
1610 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
1611 edge exit, nexit, guard, end, e;
1612
726a989a 1613 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
1614 bb = loop_preheader_edge (loop)->src;
1615 paral_bb = single_pred (bb);
726a989a 1616 gsi = gsi_last_bb (paral_bb);
5f40b3cb 1617
9ff70652 1618 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 1619 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 1620 = build_int_cst (integer_type_node, n_threads);
726a989a 1621 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
9ff70652 1622 gimple_set_location (stmt, loc);
5f40b3cb 1623
726a989a 1624 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1625
1626 /* Initialize NEW_DATA. */
1627 if (data)
1628 {
726a989a
RB
1629 gsi = gsi_after_labels (bb);
1630
1631 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1632 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1633 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
726a989a
RB
1634
1635 stmt = gimple_build_assign (new_data,
1636 fold_convert (TREE_TYPE (new_data), param));
1637 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5f40b3cb
ZD
1638 }
1639
726a989a 1640 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 1641 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 1642 gsi = gsi_last_bb (bb);
9ff70652
JJ
1643 stmt = gimple_build_omp_return (false);
1644 gimple_set_location (stmt, loc);
1645 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1646
726a989a 1647 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 1648 gcc_assert (loop->header == single_dom_exit (loop)->src);
726a989a 1649 cond_stmt = last_stmt (loop->header);
5f40b3cb 1650
726a989a 1651 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
1652 cvar_base = SSA_NAME_VAR (cvar);
1653 phi = SSA_NAME_DEF_STMT (cvar);
1654 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
070ecdfd 1655 initvar = copy_ssa_name (cvar, NULL);
5f40b3cb
ZD
1656 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1657 initvar);
1658 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1659
1dff453d 1660 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
1661 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1662 gsi_remove (&gsi, true);
5f40b3cb
ZD
1663
1664 /* Prepare cfg. */
1665 for_bb = split_edge (loop_preheader_edge (loop));
1666 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1667 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1668 gcc_assert (exit == single_dom_exit (loop));
1669
1670 guard = make_edge (for_bb, ex_bb, 0);
1671 single_succ_edge (loop->latch)->flags = 0;
1672 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
726a989a 1673 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
5f40b3cb 1674 {
f5045c96
AM
1675 source_location locus;
1676 tree def;
726a989a 1677 phi = gsi_stmt (gsi);
726a989a 1678 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
f5045c96
AM
1679
1680 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 1681 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 1682 loop_preheader_edge (loop));
9e227d60 1683 add_phi_arg (phi, def, guard, locus);
f5045c96
AM
1684
1685 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1686 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
9e227d60 1687 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
1688 }
1689 e = redirect_edge_and_branch (exit, nexit->dest);
1690 PENDING_STMT (e) = NULL;
1691
726a989a
RB
1692 /* Emit GIMPLE_OMP_FOR. */
1693 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 1694 type = TREE_TYPE (cvar);
9ff70652 1695 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
5f40b3cb
ZD
1696 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1697
74bf76ed 1698 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
9ff70652 1699 gimple_set_location (for_stmt, loc);
726a989a
RB
1700 gimple_omp_for_set_index (for_stmt, 0, initvar);
1701 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1702 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1703 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1704 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1705 cvar_base,
1706 build_int_cst (type, 1)));
1707
1708 gsi = gsi_last_bb (for_bb);
1709 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1710 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1711
726a989a
RB
1712 /* Emit GIMPLE_OMP_CONTINUE. */
1713 gsi = gsi_last_bb (loop->latch);
1714 stmt = gimple_build_omp_continue (cvar_next, cvar);
9ff70652 1715 gimple_set_location (stmt, loc);
726a989a
RB
1716 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1717 SSA_NAME_DEF_STMT (cvar_next) = stmt;
5f40b3cb 1718
726a989a
RB
1719 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1720 gsi = gsi_last_bb (ex_bb);
9ff70652
JJ
1721 stmt = gimple_build_omp_return (true);
1722 gimple_set_location (stmt, loc);
1723 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1724
cd7d9fd7
RG
1725 /* After the above dom info is hosed. Re-compute it. */
1726 free_dominance_info (CDI_DOMINATORS);
1727 calculate_dominance_info (CDI_DOMINATORS);
1728
5f40b3cb
ZD
1729 return paral_bb;
1730}
1731
08dab97a
RL
1732/* Generates code to execute the iterations of LOOP in N_THREADS
1733 threads in parallel.
1734
1735 NITER describes number of iterations of LOOP.
fa10beec 1736 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
1737
1738static void
4a8fb1a1 1739gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list,
a509ebb5 1740 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 1741{
9326236d 1742 loop_iterator li;
5f40b3cb 1743 tree many_iterations_cond, type, nit;
726a989a
RB
1744 tree arg_struct, new_arg_struct;
1745 gimple_seq stmts;
5f40b3cb 1746 basic_block parallel_head;
9f9f72aa 1747 edge entry, exit;
a509ebb5 1748 struct clsn_data clsn_data;
5f40b3cb 1749 unsigned prob;
9ff70652
JJ
1750 location_t loc;
1751 gimple cond_stmt;
768da0da 1752 unsigned int m_p_thread=2;
5f40b3cb
ZD
1753
1754 /* From
1755
1756 ---------------------------------------------------------------------
1757 loop
1758 {
1759 IV = phi (INIT, IV + STEP)
1760 BODY1;
1761 if (COND)
1762 break;
1763 BODY2;
1764 }
1765 ---------------------------------------------------------------------
1766
1767 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1768 we generate the following code:
1769
1770 ---------------------------------------------------------------------
1771
1772 if (MAY_BE_ZERO
a509ebb5
RL
1773 || NITER < MIN_PER_THREAD * N_THREADS)
1774 goto original;
5f40b3cb
ZD
1775
1776 BODY1;
1777 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 1778 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 1779 load the variables from DATA.
726a989a 1780 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
1781 BODY2;
1782 BODY1;
726a989a
RB
1783 GIMPLE_OMP_CONTINUE;
1784 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1785 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
1786 goto end;
1787
1788 original:
1789 loop
1790 {
1791 IV = phi (INIT, IV + STEP)
1792 BODY1;
1793 if (COND)
1794 break;
1795 BODY2;
1796 }
1797
1798 end:
1799
1800 */
1801
1802 /* Create two versions of the loop -- in the old one, we know that the
1803 number of iterations is large enough, and we will transform it into the
1804 loop that will be split to loop_fn, the new one will be used for the
1805 remaining iterations. */
a509ebb5 1806
768da0da
RL
1807 /* We should compute a better number-of-iterations value for outer loops.
1808 That is, if we have
1809
1810 for (i = 0; i < n; ++i)
1811 for (j = 0; j < m; ++j)
1812 ...
1813
1814 we should compute nit = n * m, not nit = n.
1815 Also may_be_zero handling would need to be adjusted. */
1816
5f40b3cb
ZD
1817 type = TREE_TYPE (niter->niter);
1818 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1819 NULL_TREE);
1820 if (stmts)
726a989a 1821 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 1822
768da0da
RL
1823 if (loop->inner)
1824 m_p_thread=2;
1825 else
1826 m_p_thread=MIN_PER_THREAD;
1827
1828 many_iterations_cond =
1829 fold_build2 (GE_EXPR, boolean_type_node,
1830 nit, build_int_cst (type, m_p_thread * n_threads));
1831
5f40b3cb 1832 many_iterations_cond
a509ebb5
RL
1833 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1834 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1835 many_iterations_cond);
5f40b3cb 1836 many_iterations_cond
a509ebb5 1837 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 1838 if (stmts)
726a989a 1839 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1840 if (!is_gimple_condexpr (many_iterations_cond))
1841 {
1842 many_iterations_cond
a509ebb5
RL
1843 = force_gimple_operand (many_iterations_cond, &stmts,
1844 true, NULL_TREE);
5f40b3cb 1845 if (stmts)
726a989a 1846 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1847 }
1848
1849 initialize_original_copy_tables ();
1850
1851 /* We assume that the loop usually iterates a lot. */
1852 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
1853 loop_version (loop, many_iterations_cond, NULL,
1854 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
1855 update_ssa (TODO_update_ssa);
1856 free_original_copy_tables ();
1857
1858 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 1859 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb
ZD
1860
1861 /* Ensure that the exit condition is the first statement in the loop. */
a509ebb5
RL
1862 transform_to_exit_first_loop (loop, reduction_list, nit);
1863
fa10beec 1864 /* Generate initializations for reductions. */
4a8fb1a1
LC
1865 if (reduction_list.elements () > 0)
1866 reduction_list.traverse <struct loop *, initialize_reductions> (loop);
5f40b3cb
ZD
1867
1868 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
1869 gcc_assert (single_exit (loop));
1870 entry = loop_preheader_edge (loop);
1871 exit = single_dom_exit (loop);
5f40b3cb 1872
9f9f72aa 1873 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
1874 /* In the old loop, move all variables non-local to the loop to a structure
1875 and back, and create separate decls for the variables used in loop. */
b8698a0f 1876 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 1877 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
1878
1879 /* Create the parallel constructs. */
9ff70652
JJ
1880 loc = UNKNOWN_LOCATION;
1881 cond_stmt = last_stmt (loop->header);
1882 if (cond_stmt)
1883 loc = gimple_location (cond_stmt);
1884 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1885 new_arg_struct, n_threads, loc);
4a8fb1a1 1886 if (reduction_list.elements () > 0)
a509ebb5 1887 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
1888
1889 scev_reset ();
1890
1891 /* Cancel the loop (it is simpler to do it here rather than to teach the
1892 expander to do it). */
1893 cancel_loop_tree (loop);
1894
92a6bdbd
SP
1895 /* Free loop bound estimations that could contain references to
1896 removed statements. */
1897 FOR_EACH_LOOP (li, loop, 0)
1898 free_numbers_of_iterations_estimates_loop (loop);
1899
5f40b3cb
ZD
1900 /* Expand the parallel constructs. We do it directly here instead of running
1901 a separate expand_omp pass, since it is more efficient, and less likely to
1902 cause troubles with further analyses not being able to deal with the
1903 OMP trees. */
a509ebb5 1904
5f40b3cb
ZD
1905 omp_expand_local (parallel_head);
1906}
1907
9857228c
SP
1908/* Returns true when LOOP contains vector phi nodes. */
1909
1910static bool
726a989a 1911loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
1912{
1913 unsigned i;
1914 basic_block *bbs = get_loop_body_in_dom_order (loop);
726a989a 1915 gimple_stmt_iterator gsi;
9857228c 1916 bool res = true;
9857228c
SP
1917
1918 for (i = 0; i < loop->num_nodes; i++)
726a989a
RB
1919 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1920 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
9857228c
SP
1921 goto end;
1922
1923 res = false;
1924 end:
1925 free (bbs);
1926 return res;
1927}
1928
08dab97a
RL
1929/* Create a reduction_info struct, initialize it with REDUC_STMT
1930 and PHI, insert it to the REDUCTION_LIST. */
1931
1932static void
4a8fb1a1
LC
1933build_new_reduction (reduction_info_table_type reduction_list,
1934 gimple reduc_stmt, gimple phi)
08dab97a 1935{
4a8fb1a1 1936 reduction_info **slot;
08dab97a
RL
1937 struct reduction_info *new_reduction;
1938
1939 gcc_assert (reduc_stmt);
b8698a0f 1940
08dab97a
RL
1941 if (dump_file && (dump_flags & TDF_DETAILS))
1942 {
1943 fprintf (dump_file,
1944 "Detected reduction. reduction stmt is: \n");
1945 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1946 fprintf (dump_file, "\n");
1947 }
b8698a0f 1948
08dab97a 1949 new_reduction = XCNEW (struct reduction_info);
b8698a0f 1950
08dab97a
RL
1951 new_reduction->reduc_stmt = reduc_stmt;
1952 new_reduction->reduc_phi = phi;
5d1fd1de 1953 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
08dab97a 1954 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
4a8fb1a1 1955 slot = reduction_list.find_slot (new_reduction, INSERT);
08dab97a
RL
1956 *slot = new_reduction;
1957}
1958
5d1fd1de
JJ
1959/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1960
4a8fb1a1
LC
1961int
1962set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
5d1fd1de 1963{
4a8fb1a1 1964 struct reduction_info *const red = *slot;
5d1fd1de
JJ
1965 gimple_set_uid (red->reduc_phi, red->reduc_version);
1966 return 1;
1967}
1968
08dab97a
RL
1969/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1970
1971static void
4a8fb1a1 1972gather_scalar_reductions (loop_p loop, reduction_info_table_type reduction_list)
08dab97a
RL
1973{
1974 gimple_stmt_iterator gsi;
1975 loop_vec_info simple_loop_info;
1976
08dab97a
RL
1977 simple_loop_info = vect_analyze_loop_form (loop);
1978
1979 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1980 {
1981 gimple phi = gsi_stmt (gsi);
1982 affine_iv iv;
1983 tree res = PHI_RESULT (phi);
1984 bool double_reduc;
1985
ea057359 1986 if (virtual_operand_p (res))
08dab97a
RL
1987 continue;
1988
1989 if (!simple_iv (loop, loop, res, &iv, true)
1990 && simple_loop_info)
1991 {
8a9ecffd
MM
1992 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1993 phi, true,
1994 &double_reduc);
48710229 1995 if (reduc_stmt && !double_reduc)
08dab97a
RL
1996 build_new_reduction (reduction_list, reduc_stmt, phi);
1997 }
1998 }
5d1fd1de
JJ
1999 destroy_loop_vec_info (simple_loop_info, true);
2000
2001 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2002 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2003 only now. */
4a8fb1a1 2004 reduction_list.traverse <void *, set_reduc_phi_uids> (NULL);
08dab97a
RL
2005}
2006
2007/* Try to initialize NITER for code generation part. */
2008
2009static bool
2010try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2011{
2012 edge exit = single_dom_exit (loop);
2013
2014 gcc_assert (exit);
2015
2016 /* We need to know # of iterations, and there should be no uses of values
2017 defined inside loop outside of it, unless the values are invariants of
2018 the loop. */
2019 if (!number_of_iterations_exit (loop, exit, niter, false))
2020 {
2021 if (dump_file && (dump_flags & TDF_DETAILS))
2022 fprintf (dump_file, " FAILED: number of iterations not known\n");
2023 return false;
2024 }
2025
2026 return true;
2027}
2028
2029/* Try to initialize REDUCTION_LIST for code generation part.
2030 REDUCTION_LIST describes the reductions. */
2031
2032static bool
4a8fb1a1
LC
2033try_create_reduction_list (loop_p loop,
2034 reduction_info_table_type reduction_list)
08dab97a
RL
2035{
2036 edge exit = single_dom_exit (loop);
2037 gimple_stmt_iterator gsi;
2038
2039 gcc_assert (exit);
2040
2041 gather_scalar_reductions (loop, reduction_list);
2042
b8698a0f 2043
08dab97a
RL
2044 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2045 {
2046 gimple phi = gsi_stmt (gsi);
2047 struct reduction_info *red;
2048 imm_use_iterator imm_iter;
2049 use_operand_p use_p;
2050 gimple reduc_phi;
2051 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2052
ea057359 2053 if (!virtual_operand_p (val))
08dab97a
RL
2054 {
2055 if (dump_file && (dump_flags & TDF_DETAILS))
2056 {
2057 fprintf (dump_file, "phi is ");
2058 print_gimple_stmt (dump_file, phi, 0, 0);
2059 fprintf (dump_file, "arg of phi to exit: value ");
2060 print_generic_expr (dump_file, val, 0);
2061 fprintf (dump_file, " used outside loop\n");
2062 fprintf (dump_file,
2063 " checking if it a part of reduction pattern: \n");
2064 }
4a8fb1a1 2065 if (reduction_list.elements () == 0)
08dab97a
RL
2066 {
2067 if (dump_file && (dump_flags & TDF_DETAILS))
2068 fprintf (dump_file,
2069 " FAILED: it is not a part of reduction.\n");
2070 return false;
2071 }
2072 reduc_phi = NULL;
2073 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2074 {
4942af9b
JJ
2075 if (!gimple_debug_bind_p (USE_STMT (use_p))
2076 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2077 {
2078 reduc_phi = USE_STMT (use_p);
2079 break;
2080 }
2081 }
2082 red = reduction_phi (reduction_list, reduc_phi);
2083 if (red == NULL)
2084 {
2085 if (dump_file && (dump_flags & TDF_DETAILS))
2086 fprintf (dump_file,
2087 " FAILED: it is not a part of reduction.\n");
2088 return false;
2089 }
2090 if (dump_file && (dump_flags & TDF_DETAILS))
2091 {
2092 fprintf (dump_file, "reduction phi is ");
2093 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2094 fprintf (dump_file, "reduction stmt is ");
2095 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2096 }
2097 }
2098 }
2099
2100 /* The iterations of the loop may communicate only through bivs whose
2101 iteration space can be distributed efficiently. */
2102 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2103 {
2104 gimple phi = gsi_stmt (gsi);
2105 tree def = PHI_RESULT (phi);
2106 affine_iv iv;
2107
ea057359 2108 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
08dab97a
RL
2109 {
2110 struct reduction_info *red;
2111
2112 red = reduction_phi (reduction_list, phi);
2113 if (red == NULL)
2114 {
2115 if (dump_file && (dump_flags & TDF_DETAILS))
2116 fprintf (dump_file,
2117 " FAILED: scalar dependency between iterations\n");
2118 return false;
2119 }
2120 }
2121 }
2122
2123
2124 return true;
2125}
2126
5f40b3cb
ZD
2127/* Detect parallel loops and generate parallel code using libgomp
2128 primitives. Returns true if some loop was parallelized, false
2129 otherwise. */
2130
2131bool
2132parallelize_loops (void)
2133{
2134 unsigned n_threads = flag_tree_parallelize_loops;
2135 bool changed = false;
2136 struct loop *loop;
2137 struct tree_niter_desc niter_desc;
2138 loop_iterator li;
4a8fb1a1 2139 reduction_info_table_type reduction_list;
f873b205 2140 struct obstack parloop_obstack;
8adfe01d
RL
2141 HOST_WIDE_INT estimated;
2142 LOC loop_loc;
f873b205 2143
5f40b3cb
ZD
2144 /* Do not parallelize loops in the functions created by parallelization. */
2145 if (parallelized_function_p (cfun->decl))
2146 return false;
8adfe01d
RL
2147 if (cfun->has_nonlocal_label)
2148 return false;
5f40b3cb 2149
f873b205 2150 gcc_obstack_init (&parloop_obstack);
4a8fb1a1 2151 reduction_list.create (10);
726a989a 2152 init_stmt_vec_info_vec ();
a509ebb5 2153
5f40b3cb
ZD
2154 FOR_EACH_LOOP (li, loop, 0)
2155 {
4a8fb1a1 2156 reduction_list.empty ();
48710229
RL
2157 if (dump_file && (dump_flags & TDF_DETAILS))
2158 {
2159 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2160 if (loop->inner)
2161 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2162 else
2163 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2164 }
b8698a0f 2165
48710229 2166 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2167 checking results. */
2168 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2169 {
2170 if (dump_file && (dump_flags & TDF_DETAILS))
2171 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2172 continue;
48710229 2173 }
87d4d0ee 2174
48710229
RL
2175 if (!single_dom_exit (loop))
2176 {
b8698a0f 2177
48710229
RL
2178 if (dump_file && (dump_flags & TDF_DETAILS))
2179 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2180
08dab97a 2181 continue;
48710229 2182 }
08dab97a
RL
2183
2184 if (/* And of course, the loop must be parallelizable. */
2185 !can_duplicate_loop_p (loop)
1d4af1e8 2186 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2187 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2188 /* FIXME: the check for vector phi nodes could be removed. */
69958396 2189 || loop_has_vector_phi_nodes (loop))
08dab97a 2190 continue;
e5b332cd 2191
652c4c71 2192 estimated = estimated_stmt_executions_int (loop);
e5b332cd
RG
2193 if (estimated == -1)
2194 estimated = max_stmt_executions_int (loop);
87d4d0ee 2195 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 2196 count and frequency correctly now. */
87d4d0ee 2197 if (!flag_loop_parallelize_all
e5b332cd
RG
2198 && ((estimated != -1
2199 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2200 /* Do not bother with loops in cold areas. */
2201 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2202 continue;
b8698a0f 2203
08dab97a
RL
2204 if (!try_get_loop_niter (loop, &niter_desc))
2205 continue;
2206
2207 if (!try_create_reduction_list (loop, reduction_list))
2208 continue;
2209
f873b205
LB
2210 if (!flag_loop_parallelize_all
2211 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2212 continue;
2213
2214 changed = true;
48710229
RL
2215 if (dump_file && (dump_flags & TDF_DETAILS))
2216 {
48710229 2217 if (loop->inner)
8adfe01d 2218 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2219 else
8adfe01d
RL
2220 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2221 loop_loc = find_loop_location (loop);
2222 if (loop_loc != UNKNOWN_LOC)
2223 fprintf (dump_file, "\nloop at %s:%d: ",
2224 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
b8698a0f
L
2225 }
2226 gen_parallel_loop (loop, reduction_list,
08dab97a 2227 n_threads, &niter_desc);
5f40b3cb
ZD
2228 }
2229
726a989a 2230 free_stmt_vec_info_vec ();
4a8fb1a1 2231 reduction_list.dispose ();
f873b205 2232 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2233
2234 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2235 which local variables will escape. Reset the points-to solution
2236 for ESCAPED. */
6b8ed145 2237 if (changed)
d086d311 2238 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2239
5f40b3cb
ZD
2240 return changed;
2241}
2242
c1bf2a39
AM
2243/* Parallelization. */
2244
2245static bool
2246gate_tree_parallelize_loops (void)
2247{
2248 return flag_tree_parallelize_loops > 1;
2249}
2250
2251static unsigned
2252tree_parallelize_loops (void)
2253{
2254 if (number_of_loops (cfun) <= 1)
2255 return 0;
2256
2257 if (parallelize_loops ())
2258 return TODO_cleanup_cfg | TODO_rebuild_alias;
2259 return 0;
2260}
2261
2262namespace {
2263
2264const pass_data pass_data_parallelize_loops =
2265{
2266 GIMPLE_PASS, /* type */
2267 "parloops", /* name */
2268 OPTGROUP_LOOP, /* optinfo_flags */
2269 true, /* has_gate */
2270 true, /* has_execute */
2271 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2272 ( PROP_cfg | PROP_ssa ), /* properties_required */
2273 0, /* properties_provided */
2274 0, /* properties_destroyed */
2275 0, /* todo_flags_start */
2276 TODO_verify_flow, /* todo_flags_finish */
2277};
2278
2279class pass_parallelize_loops : public gimple_opt_pass
2280{
2281public:
2282 pass_parallelize_loops (gcc::context *ctxt)
2283 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2284 {}
2285
2286 /* opt_pass methods: */
2287 bool gate () { return gate_tree_parallelize_loops (); }
2288 unsigned int execute () { return tree_parallelize_loops (); }
2289
2290}; // class pass_parallelize_loops
2291
2292} // anon namespace
2293
2294gimple_opt_pass *
2295make_pass_parallelize_loops (gcc::context *ctxt)
2296{
2297 return new pass_parallelize_loops (ctxt);
2298}
2299
2300
5f40b3cb 2301#include "gt-tree-parloops.h"